Readd source files. I need coffee now
diff --git a/example.toml b/example.toml new file mode 100644 index 0000000..3d902f2 --- /dev/null +++ b/example.toml
@@ -0,0 +1,29 @@ +# This is a TOML document. Boom. + +title = "TOML Example" + +[owner] +name = "Tom Preston-Werner" +organization = "GitHub" +bio = "GitHub Cofounder & CEO\nLikes tater tots and beer." +dob = 1979-05-27T07:32:00Z # First class dates? Why not? + +[database] +server = "192.168.1.1" +ports = [ 8001, 8001, 8002 ] +connection_max = 5000 +enabled = true + +[servers] + + # You can indent as you please. Tabs or spaces. TOML don't care. + [servers.alpha] + ip = "10.0.0.1" + dc = "eqdc10" + + [servers.beta] + ip = "10.0.0.2" + dc = "eqdc10" + +[clients] +data = [ ["gamma", "delta"], [1, 2] ] # just an update to make sure parsers support it
diff --git a/lexer.go b/lexer.go new file mode 100644 index 0000000..586263b --- /dev/null +++ b/lexer.go
@@ -0,0 +1,400 @@ +// TOML lexer.// Written using the principles developped by Rob Pike in +// http://www.youtube.com/watch?v=HxaD_trXwRE + +package toml + +import ( + "fmt" + "regexp" + "strings" + "unicode/utf8" +) + +var dateRegexp *regexp.Regexp + +// Define tokens +type tokenType int + +const ( + eof = -(iota + 1) +) + +const ( + tokenError tokenType = iota + tokenEOF + tokenComment + tokenKey + tokenEqual + tokenString + tokenInteger + tokenTrue + tokenFalse + tokenFloat + tokenLeftBracket + tokenRightBracket + tokenDate + tokenKeyGroup + tokenComma + tokenEOL +) + +type token struct { + typ tokenType + val string +} + +func (i token) String() string { + switch i.typ { + case tokenEOF: + return "EOF" + case tokenError: + return i.val + } + + if len(i.val) > 10 { + return fmt.Sprintf("%.10q...", i.val) + } + return fmt.Sprintf("%q", i.val) +} + +func isSpace(r rune) bool { + return r == ' ' || r == '\t' +} + +func isAlpha(r rune) bool { + return r >= 'a' && r <= 'z' +} + +func isDigit(r rune) bool { + return r >= '0' && r <= '9' +} + +// Define lexer +type lexer struct { + input string + start int + pos int + width int + tokens chan token + depth int +} + +func (l *lexer) run() { + for state := lexVoid; state != nil; { + state = state(l) + } + close(l.tokens) +} + +func (l *lexer) emit(t tokenType) { + l.tokens <- token{t, l.input[l.start:l.pos]} + l.start = l.pos +} + +func (l *lexer) emitWithValue(t tokenType, value string) { + l.tokens <- token{t, value} + l.start = l.pos +} + +func (l *lexer) next() rune { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + var r rune + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +func (l *lexer) errorf(format string, args ...interface{}) stateFn { + l.tokens <- token{ + tokenError, + fmt.Sprintf(format, args...), + } + return nil +} + +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) follow(next string) bool { + return strings.HasPrefix(l.input[l.pos:], next) +} + +// Define state functions +type stateFn func(*lexer) stateFn + +func lexVoid(l *lexer) stateFn { + for { + next := l.peek() + switch next { + case '[': + return lexKeyGroup + case '#': + return lexComment + case '=': + return lexEqual + } + + if isAlpha(next) { + return lexKey + } + + if isSpace(next) { + l.ignore() + } + + if l.next() == eof { + break + } + } + + l.emit(tokenEOF) + return nil +} + +func lexRvalue(l *lexer) stateFn { + for { + next := l.peek() + switch next { + case '[': + l.depth += 1 + return lexLeftBracket + case ']': + l.depth -= 1 + return lexRightBracket + case '#': + return lexComment + case '"': + return lexString + case ',': + return lexComma + case '\n': + l.ignore() + l.pos += 1 + if l.depth == 0 { + return lexVoid + } else { + return lexRvalue + } + } + + if l.follow("true") { + return lexTrue + } + + if l.follow("false") { + return lexFalse + } + + if isAlpha(next) { + return lexKey + } + + if dateRegexp.FindString(l.input[l.pos:]) != "" { + return lexDate + } + + if next == '+' || next == '-' || isDigit(next) { + return lexNumber + } + + if isSpace(next) { + l.ignore() + } + + if l.next() == eof { + break + } + } + + l.emit(tokenEOF) + return nil +} + +func lexDate(l *lexer) stateFn { + l.ignore() + l.pos += 20 // Fixed size of a date in TOML + l.emit(tokenDate) + return lexRvalue +} + +func lexTrue(l *lexer) stateFn { + l.ignore() + l.pos += 4 + l.emit(tokenTrue) + return lexRvalue +} + +func lexFalse(l *lexer) stateFn { + l.ignore() + l.pos += 5 + l.emit(tokenFalse) + return lexRvalue +} + +func lexEqual(l *lexer) stateFn { + l.ignore() + l.accept("=") + l.emit(tokenEqual) + return lexRvalue +} + +func lexComma(l *lexer) stateFn { + l.ignore() + l.accept(",") + l.emit(tokenComma) + return lexRvalue +} + +func lexKey(l *lexer) stateFn { + l.ignore() + for isAlpha(l.next()) { + } + l.backup() + l.emit(tokenKey) + return lexVoid +} + +func lexComment(l *lexer) stateFn { + for { + next := l.next() + if next == '\n' || next == eof { + break + } + } + l.ignore() + return lexVoid +} + +func lexLeftBracket(l *lexer) stateFn { + l.ignore() + l.pos += 1 + l.emit(tokenLeftBracket) + return lexRvalue +} + +func lexString(l *lexer) stateFn { + l.pos += 1 + l.ignore() + growing_string := "" + + for { + if l.peek() == '"' { + l.emitWithValue(tokenString, growing_string) + l.pos += 1 + l.ignore() + return lexVoid + } + + if l.follow("\\\"") { + l.pos += 1 + growing_string += "\"" + } else { + growing_string += string(l.peek()) + } + + if l.next() == eof { + break + } + } + + return l.errorf("unclosed string") +} + +func lexKeyGroup(l *lexer) stateFn { + l.ignore() + l.pos += 1 + l.emit(tokenLeftBracket) + return lexInsideKeyGroup +} + +func lexInsideKeyGroup(l *lexer) stateFn { + for { + if l.peek() == ']' { + if l.pos > l.start { + l.emit(tokenKeyGroup) + } + l.ignore() + l.pos += 1 + l.emit(tokenRightBracket) + return lexVoid + } + + if l.next() == eof { + break + } + } + return l.errorf("unclosed key group") +} + +func lexRightBracket(l *lexer) stateFn { + l.ignore() + l.pos += 1 + l.emit(tokenRightBracket) + return lexRvalue +} + +func lexNumber(l *lexer) stateFn { + l.ignore() + if !l.accept("+") { + l.accept("-") + } + point_seen := false + digit_seen := false + for { + next := l.next() + if next == '.' { + point_seen = true + } else if isDigit(next) { + digit_seen = true + } else { + l.backup() + break + } + } + + if !digit_seen { + return l.errorf("no digit in that number") + } + if point_seen { + l.emit(tokenFloat) + } else { + l.emit(tokenInteger) + } + return lexRvalue +} + +func init() { + dateRegexp = regexp.MustCompile("^\\d{1,4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z") +} + +// Entry point +func lex(input string) (*lexer, chan token) { + l := &lexer{ + input: input, + tokens: make(chan token), + } + go l.run() + return l, l.tokens +}
diff --git a/lexer_test.go b/lexer_test.go new file mode 100644 index 0000000..b232705 --- /dev/null +++ b/lexer_test.go
@@ -0,0 +1,231 @@ +package toml + +import "testing" + +func testFlow(t *testing.T, input string, expectedFlow []token) { + _, ch := lex(input) + for _, expected := range expectedFlow { + token := <-ch + if token != expected { + t.Log("compared", token, "to", expected) + t.Log(token.val, "<->", expected.val) + t.Log(token.typ, "<->", expected.typ) + t.FailNow() + } + } + + tok, ok := <-ch + if ok { + t.Log("channel is not closed!") + t.Log(len(ch)+1, "tokens remaining:") + + t.Log("token ->", tok) + for token := range ch { + t.Log("token ->", token) + } + t.FailNow() + } +} + +func TestValidKeyGroup(t *testing.T) { + testFlow(t, "[hello world]", []token{ + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "hello world"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestUnclosedKeyGroup(t *testing.T) { + testFlow(t, "[hello world", []token{ + token{tokenLeftBracket, "["}, + token{tokenError, "unclosed key group"}, + }) +} + +func TestComment(t *testing.T) { + testFlow(t, "# blahblah", []token{ + token{tokenEOF, ""}, + }) +} + +func TestKeyGroupComment(t *testing.T) { + testFlow(t, "[hello world] # blahblah", []token{ + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "hello world"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestMultipleKeyGroupsComment(t *testing.T) { + testFlow(t, "[hello world] # blahblah\n[test]", []token{ + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "hello world"}, + token{tokenRightBracket, "]"}, + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "test"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestBasicKey(t *testing.T) { + testFlow(t, "hello", []token{ + token{tokenKey, "hello"}, + token{tokenEOF, ""}, + }) +} + +func TestBasicKeyAndEqual(t *testing.T) { + testFlow(t, "hello =", []token{ + token{tokenKey, "hello"}, + token{tokenEqual, "="}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualStringEscape(t *testing.T) { + testFlow(t, "foo = \"hello\\\"\"", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenString, "hello\""}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualStringUnfinished(t *testing.T) { + testFlow(t, "foo = \"bar", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenError, "unclosed string"}, + }) +} + +func TestKeyEqualString(t *testing.T) { + testFlow(t, "foo = \"bar\"", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenString, "bar"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualTrue(t *testing.T) { + testFlow(t, "foo = true", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenTrue, "true"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualFalse(t *testing.T) { + testFlow(t, "foo = false", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFalse, "false"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualArrayBools(t *testing.T) { + testFlow(t, "foo = [true, false, true]", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenLeftBracket, "["}, + token{tokenTrue, "true"}, + token{tokenComma, ","}, + token{tokenFalse, "false"}, + token{tokenComma, ","}, + token{tokenTrue, "true"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualArrayBoolsWithComments(t *testing.T) { + testFlow(t, "foo = [true, false, true] # YEAH", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenLeftBracket, "["}, + token{tokenTrue, "true"}, + token{tokenComma, ","}, + token{tokenFalse, "false"}, + token{tokenComma, ","}, + token{tokenTrue, "true"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestDateRegexp(t *testing.T) { + if dateRegexp.FindString("1979-05-27T07:32:00Z") == "" { + t.Fail() + } +} + +func TestKeyEqualDate(t *testing.T) { + testFlow(t, "foo = 1979-05-27T07:32:00Z", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenDate, "1979-05-27T07:32:00Z"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualNumber(t *testing.T) { + testFlow(t, "foo = 42", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenInteger, "42"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = +42", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenInteger, "+42"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = -42", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenInteger, "-42"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = 4.2", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFloat, "4.2"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = +4.2", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFloat, "+4.2"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = -4.2", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFloat, "-4.2"}, + token{tokenEOF, ""}, + }) +} + +func TestMultiline(t *testing.T) { + testFlow(t, "foo = 42\nbar=21", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenInteger, "42"}, + token{tokenKey, "bar"}, + token{tokenEqual, "="}, + token{tokenInteger, "21"}, + token{tokenEOF, ""}, + }) +}
diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..fc6a6ff --- /dev/null +++ b/parser.go
@@ -0,0 +1,184 @@ +// TOML Parser. + +package toml + +import ( + "fmt" + "strconv" + "time" +) + +type parser struct { + flow chan token + tree *TomlTree + tokensBuffer []token + currentGroup string +} + +type parserStateFn func(*parser) parserStateFn + +func (p *parser) run() { + for state := parseStart; state != nil; { + state = state(p) + } +} + +func (p *parser) peek() *token { + if len(p.tokensBuffer) != 0 { + return &(p.tokensBuffer[0]) + } + + tok, ok := <-p.flow + if !ok { + return nil + } + p.tokensBuffer = append(p.tokensBuffer, tok) + return &tok +} + +func (p *parser) assume(typ tokenType) { + tok := p.getToken() + if tok == nil { + panic(fmt.Sprintf("was expecting token %s, but token stream is empty", typ)) + } + if tok.typ != typ { + panic(fmt.Sprintf("was expecting token %s, but got %s", typ, tok.typ)) + } +} + +func (p *parser) getToken() *token { + if len(p.tokensBuffer) != 0 { + tok := p.tokensBuffer[0] + p.tokensBuffer = p.tokensBuffer[1:] + return &tok + } + tok, ok := <-p.flow + if !ok { + return nil + } + return &tok +} + +func parseStart(p *parser) parserStateFn { + tok := p.peek() + + // end of stream, parsing is finished + if tok == nil { + return nil + } + + switch tok.typ { + case tokenLeftBracket: + return parseGroup + case tokenKey: + return parseAssign + case tokenEOF: + return nil + default: + panic("unexpected token") + } + return nil +} + +func parseGroup(p *parser) parserStateFn { + p.getToken() // discard the [ + key := p.getToken() + if key.typ != tokenKeyGroup { + panic(fmt.Sprintf("unexpected token %s, was expecting a key group", key)) + } + p.tree.createSubTree(key.val) + p.assume(tokenRightBracket) + p.currentGroup = key.val + return parseStart(p) +} + +func parseAssign(p *parser) parserStateFn { + key := p.getToken() + p.assume(tokenEqual) + value := parseRvalue(p) + final_key := key.val + if p.currentGroup != "" { + final_key = p.currentGroup + "." + key.val + } + p.tree.Set(final_key, value) + return parseStart(p) +} + +func parseRvalue(p *parser) interface{} { + tok := p.getToken() + if tok == nil { + panic("expecting a value") + } + + switch tok.typ { + case tokenString: + return tok.val + case tokenTrue: + return true + case tokenFalse: + return false + case tokenInteger: + val, err := strconv.ParseInt(tok.val, 10, 64) + if err != nil { + panic(err) + } + return val + case tokenFloat: + val, err := strconv.ParseFloat(tok.val, 64) + if err != nil { + panic(err) + } + return val + case tokenDate: + val, err := time.Parse(time.RFC3339, tok.val) + if err != nil { + panic(err) + } + return val + case tokenLeftBracket: + return parseArray(p) + } + + panic("never reached") + + return nil +} + +func parseArray(p *parser) []interface{} { + array := make([]interface{}, 0) + for { + follow := p.peek() + if follow == nil { + panic("unterminated array") + } + if follow.typ == tokenRightBracket { + p.getToken() + return array + } + val := parseRvalue(p) + array = append(array, val) + follow = p.peek() + if follow == nil { + panic("unterminated array") + } + if follow.typ != tokenRightBracket && follow.typ != tokenComma { + panic("missing comma") + } + if follow.typ == tokenComma { + p.getToken() + } + } + return array +} + +func parse(flow chan token) *TomlTree { + result := make(TomlTree) + parser := &parser{ + flow: flow, + tree: &result, + tokensBuffer: make([]token, 0), + currentGroup: "", + } + parser.run() + return parser.tree +}
diff --git a/parser_test.go b/parser_test.go new file mode 100644 index 0000000..fd271ce --- /dev/null +++ b/parser_test.go
@@ -0,0 +1,103 @@ +package toml + +import ( + "fmt" + "testing" + "time" +) + +func assertTree(t *testing.T, tree *TomlTree, ref map[string]interface{}) { + for k, v := range ref { + if fmt.Sprintf("%v", tree.Get(k)) != fmt.Sprintf("%v", v) { + t.Log("was expecting", v, "at", k, "but got", tree.Get(k)) + t.Fail() + } + } +} + +func TestCreateSubTree(t *testing.T) { + tree := make(TomlTree) + tree.createSubTree("a.b.c") + tree.Set("a.b.c", 42) + if tree.Get("a.b.c") != 42 { + t.Fail() + } +} + +func TestSimpleKV(t *testing.T) { + tree := Load("a = 42") + assertTree(t, tree, map[string]interface{}{ + "a": int64(42), + }) + + tree = Load("a = 42\nb = 21") + assertTree(t, tree, map[string]interface{}{ + "a": int64(42), + "b": int64(21), + }) +} + +func TestSimpleNumbers(t *testing.T) { + tree := Load("a = +42\nb = -21\nc = +4.2\nd = -2.1") + assertTree(t, tree, map[string]interface{}{ + "a": int64(42), + "b": int64(-21), + "c": float64(4.2), + "d": float64(-2.1), + }) +} + +func TestSimpleDate(t *testing.T) { + tree := Load("a = 1979-05-27T07:32:00Z") + assertTree(t, tree, map[string]interface{}{ + "a": time.Date(1979, time.May, 27, 7, 32, 0, 0, time.UTC), + }) +} + +func TestSimpleString(t *testing.T) { + tree := Load("a = \"hello world\"") + assertTree(t, tree, map[string]interface{}{ + "a": "hello world", + }) +} + +func TestBools(t *testing.T) { + tree := Load("a = true\nb = false") + assertTree(t, tree, map[string]interface{}{ + "a": true, + "b": false, + }) +} + +func TestNestedKeys(t *testing.T) { + tree := Load("[a.b.c]\nd = 42") + assertTree(t, tree, map[string]interface{}{ + "a.b.c.d": int64(42), + }) +} + +func TestArraySimple(t *testing.T) { + tree := Load("a = [42, 21, 10]") + assertTree(t, tree, map[string]interface{}{ + "a": []int64{int64(42), int64(21), int64(10)}, + }) + + tree = Load("a = [42, 21, 10,]") + assertTree(t, tree, map[string]interface{}{ + "a": []int64{int64(42), int64(21), int64(10)}, + }) +} + +func TestArrayMultiline(t *testing.T) { + tree := Load("a = [42,\n21, 10,]") + assertTree(t, tree, map[string]interface{}{ + "a": []int64{int64(42), int64(21), int64(10)}, + }) +} + +func TestArrayNested(t *testing.T) { + tree := Load("a = [[42, 21], [10]]") + assertTree(t, tree, map[string]interface{}{ + "a": [][]int64{[]int64{int64(42), int64(21)}, []int64{int64(10)}}, + }) +}
diff --git a/toml.go b/toml.go new file mode 100644 index 0000000..e8f5556 --- /dev/null +++ b/toml.go
@@ -0,0 +1,80 @@ +// TOML markup language parser. +// +// This version supports the specification as described in +// https://github.com/mojombo/toml/tree/e3656ad493400895f4460f1244a25f8f8e31a32a +package toml + +import ( + "strings" +) + +// Definition of a TomlTree. +// This is the result of the parsing of a TOML file. +type TomlTree map[string]interface{} + +// Keys returns the keys of the toplevel tree. +// Warning: this is a costly operation. +func (t *TomlTree) Keys() []string { + keys := make([]string, 0) + mp := (map[string]interface{})(*t) + for k, _ := range mp { + keys = append(keys, k) + } + return keys +} + +// Get the value at key in the TomlTree. +// Key is a dot-separated path (e.g. a.b.c). +// Returns nil if the path does not exist in the tree. +func (t *TomlTree) Get(key string) interface{} { + subtree := t + keys := strings.Split(key, ".") + for _, intermediate_key := range keys[:len(keys)-1] { + _, exists := (*subtree)[intermediate_key] + if !exists { + return nil + } + subtree = (*subtree)[intermediate_key].(*TomlTree) + } + return (*subtree)[keys[len(keys)-1]] +} + +// Set an element in the tree. +// Key is a dot-separated path (e.g. a.b.c). +// Creates all necessary intermediates trees, if needed. +func (t *TomlTree) Set(key string, value interface{}) { + subtree := t + keys := strings.Split(key, ".") + for _, intermediate_key := range keys[:len(keys)-1] { + _, exists := (*subtree)[intermediate_key] + if !exists { + var new_tree TomlTree = make(TomlTree) + (*subtree)[intermediate_key] = &new_tree + } + subtree = (*subtree)[intermediate_key].(*TomlTree) + } + (*subtree)[keys[len(keys)-1]] = value +} + +// createSubTree takes a tree and a key andcreate the necessary intermediate +// subtrees to create a subtree at that point. In-place. +// +// e.g. passing a.b.c will create (assuming tree is empty) tree[a], tree[a][b] +// and tree[a][b][c] +func (t *TomlTree) createSubTree(key string) { + subtree := t + for _, intermediate_key := range strings.Split(key, ".") { + _, exists := (*subtree)[intermediate_key] + if !exists { + var new_tree TomlTree = make(TomlTree) + (*subtree)[intermediate_key] = &new_tree + } + subtree = ((*subtree)[intermediate_key]).(*TomlTree) + } +} + +// Create a TomlTree from a string. +func Load(content string) *TomlTree { + _, flow := lex(content) + return parse(flow) +}
diff --git a/toml_test.go b/toml_test.go new file mode 100644 index 0000000..f9fa173 --- /dev/null +++ b/toml_test.go
@@ -0,0 +1 @@ +package toml