Add lexer
diff --git a/README.md b/README.md new file mode 100644 index 0000000..d44c653 --- /dev/null +++ b/README.md
@@ -0,0 +1,3 @@ +# go-toml + +Go library for the [TOML](https://github.com/mojombo/toml) format.
diff --git a/src/toml/example.toml b/src/toml/example.toml new file mode 100644 index 0000000..3d902f2 --- /dev/null +++ b/src/toml/example.toml
@@ -0,0 +1,29 @@ +# This is a TOML document. Boom. + +title = "TOML Example" + +[owner] +name = "Tom Preston-Werner" +organization = "GitHub" +bio = "GitHub Cofounder & CEO\nLikes tater tots and beer." +dob = 1979-05-27T07:32:00Z # First class dates? Why not? + +[database] +server = "192.168.1.1" +ports = [ 8001, 8001, 8002 ] +connection_max = 5000 +enabled = true + +[servers] + + # You can indent as you please. Tabs or spaces. TOML don't care. + [servers.alpha] + ip = "10.0.0.1" + dc = "eqdc10" + + [servers.beta] + ip = "10.0.0.2" + dc = "eqdc10" + +[clients] +data = [ ["gamma", "delta"], [1, 2] ] # just an update to make sure parsers support it
diff --git a/src/toml/lexer.go b/src/toml/lexer.go new file mode 100644 index 0000000..d057294 --- /dev/null +++ b/src/toml/lexer.go
@@ -0,0 +1,389 @@ +// TOML lexer.// Written using the principles developped by Rob Pike in +// http://www.youtube.com/watch?v=HxaD_trXwRE + +package toml + +import ( + "fmt" + "reflect" + "regexp" + "runtime" + "strconv" + "strings" + "unicode/utf8" +) + + +var dateRegexp *regexp.Regexp + +// Define tokens +type tokenType int + +const ( + EOF = - (iota + 1) +) + +const ( + tokenError tokenType = iota + tokenEOF + tokenComment + tokenKey + tokenEqual + tokenString + tokenInteger + tokenTrue + tokenFalse + tokenFloat + tokenLeftBracket + tokenRightBracket + tokenDate + tokenKeyGroup + tokenComma +) + +type token struct { + typ tokenType + val string +} + + +func (i token) String() string { + switch i.typ { + case tokenEOF: + return "EOF" + case tokenError: + return i.val + } + + if len(i.val) > 10 { + return fmt.Sprintf("%.10q...", i.val); + } + return fmt.Sprintf("%q", i.val) +} + + +func isSpace(r rune) bool { + return r == ' ' || r == '\t' +} + +func isAlpha(r rune) bool { + return r >= 'a' && r <= 'z' +} + +func isDigit(r rune) bool { + return r >= '0' && r <= '9' +} + + +// Define lexer +type lexer struct { + input string + start int + pos int + width int + tokens chan token +} + + +func (l *lexer) run() { + for state := lexVoid; state != nil; { + fmt.Println("going in state", runtime.FuncForPC(reflect.ValueOf(state).Pointer()).Name()) + state = state(l) + } + fmt.Println("closing...") + close (l.tokens) +} + +func (l *lexer) emit(t tokenType) { + l.tokens <- token{t, l.input[l.start:l.pos]} + l.start = l.pos +} + +func (l *lexer) emitWithValue(t tokenType, value string) { + l.tokens <- token{t, value} + l.start = l.pos +} + + +func (l *lexer) next() (rune) { + if l.pos >= len(l.input) { + l.width = 0 + return EOF + } + var r rune + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +func (l *lexer) errorf(format string, args ...interface{}) stateFn { + l.tokens <- token{ + tokenError, + fmt.Sprintf(format, args...), + } + return nil +} + +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) follow(next string) bool { + return strings.HasPrefix(l.input[l.pos:], next) +} + + +// Define state functions +type stateFn func(*lexer) stateFn + +func lexVoid(l *lexer) stateFn { + for { + next := l.peek() + switch next { + case '[': + return lexKeyGroup + case '#': + return lexComment + case '=': + return lexEqual + } + + if isAlpha(next) { + return lexKey + } + + if isSpace(next) { + l.ignore() + } + + if l.next() == EOF { break } + } + + l.emit(tokenEOF) + return nil +} + +func lexRvalue(l *lexer) stateFn { + for { + next := l.peek() + switch next { + case '[': + return lexLeftBracket + case ']': + return lexRightBracket + case '#': + return lexComment + case '"': + return lexString + case ',': + return lexComma + case '\n': + return lexVoid + } + + if l.follow("true") { + return lexTrue + } + + if l.follow("false") { + return lexFalse + } + + if isAlpha(next) { + return lexKey + } + + if dateRegexp.FindString(l.input[l.pos:]) != "" { + return lexDate + } + + if next == '+' || next == '-' || isDigit(next) { + return lexNumber + } + + if isSpace(next) { + l.ignore() + } + + if l.next() == EOF { break } + } + + l.emit(tokenEOF) + return nil +} + +func lexDate(l *lexer) stateFn { + l.ignore() + l.pos += 20 // Fixed size of a date in TOML + l.emit(tokenDate) + return lexRvalue +} + +func lexTrue(l *lexer) stateFn { + l.ignore() + l.pos += 4 + l.emit(tokenTrue) + return lexRvalue +} + +func lexFalse(l *lexer) stateFn { + l.ignore() + l.pos += 5 + l.emit(tokenFalse) + return lexRvalue +} + +func lexEqual(l *lexer) stateFn { + l.ignore() + l.accept("=") + l.emit(tokenEqual) + return lexRvalue +} + +func lexComma(l *lexer) stateFn { + l.ignore() + l.accept(",") + l.emit(tokenComma) + return lexRvalue +} + +func lexKey(l *lexer) stateFn { + for isAlpha(l.next()) { + } + l.backup() + l.emit(tokenKey) + return lexVoid +} + +func lexComment(l *lexer) stateFn { + for { + next := l.next() + if next == '\n' || next == EOF { + break + } + } + l.ignore() + return lexVoid +} + +func lexLeftBracket(l *lexer) stateFn { + l.ignore() + l.pos += 1 + l.emit(tokenLeftBracket) + return lexRvalue +} + +func lexString(l *lexer) stateFn { + l.pos += 1 + l.ignore() + growing_string := "" + + for { + fmt.Println("peek:", strconv.QuoteRune(l.peek())) + if l.peek() == '"' { + l.emitWithValue(tokenString, growing_string) + l.pos += 1 + l.ignore() + return lexVoid + } + + if l.follow("\\\"") { + fmt.Println("follow") + l.pos += 1 + growing_string += "\"" + } else { + growing_string += string(l.peek()) + } + + if l.next() == EOF { break } + } + + return l.errorf("unclosed string") +} + +func lexKeyGroup(l *lexer) stateFn { + l.ignore() + l.pos += 1 + l.emit(tokenLeftBracket) + return lexInsideKeyGroup +} + +func lexInsideKeyGroup(l *lexer) stateFn { + for { + if l.peek() == ']' { + if l.pos > l.start { + l.emit(tokenKeyGroup) + } + l.ignore() + l.pos += 1 + l.emit(tokenRightBracket) + return lexVoid + } + + if l.next() == EOF { break } + } + return l.errorf("unclosed key group") +} + +func lexRightBracket(l *lexer) stateFn { + l.ignore() + l.pos += 1 + l.emit(tokenRightBracket) + return lexRvalue +} + +func lexNumber(l *lexer) stateFn { + l.ignore() + if !l.accept("+") { l.accept("-") } + point_seen := false + digit_seen := false + for { + next := l.next() + if next == '.' { point_seen = true + } else if isDigit(next) { digit_seen = true + } else { break } + } + + if !digit_seen { + return l.errorf("no digit in that number") + } + if point_seen { + l.emit(tokenFloat) + } else { + l.emit(tokenInteger) + } + return lexRvalue +} + +func init() { + dateRegexp = regexp.MustCompile("^\\d{1,4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z") +} + + +// Entry point +func lex(input string) (*lexer, chan token) { + l := &lexer { + input: input, + tokens: make(chan token), + } + go l.run() + return l, l.tokens +}
diff --git a/src/toml/lexer_test.go b/src/toml/lexer_test.go new file mode 100644 index 0000000..5a67038 --- /dev/null +++ b/src/toml/lexer_test.go
@@ -0,0 +1,220 @@ +package toml + +import "testing" + +func testFlow(t *testing.T, input string, expectedFlow []token) { + _, ch := lex(input) + for _, expected := range expectedFlow { + token := <- ch + if token != expected { + t.Log("compared", token, "to", expected) + t.Log(token.val, "<->", expected.val) + t.Log(token.typ, "<->", expected.typ) + t.FailNow() + } + } + + tok, ok := <- ch + if ok { + t.Log("channel is not closed!") + t.Log(len(ch) + 1, "tokens remaining:") + + t.Log("token ->", tok) + for token := range ch { + t.Log("token ->", token) + } + t.FailNow() + } +} + +func TestValidKeyGroup(t *testing.T) { + testFlow(t, "[hello world]", []token{ + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "hello world"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestUnclosedKeyGroup(t *testing.T) { + testFlow(t, "[hello world", []token{ + token{tokenLeftBracket, "["}, + token{tokenError, "unclosed key group"}, + }) +} + + +func TestComment(t *testing.T) { + testFlow(t, "# blahblah", []token{ + token{tokenEOF, ""}, + }) +} + +func TestKeyGroupComment(t *testing.T) { + testFlow(t, "[hello world] # blahblah", []token{ + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "hello world"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestMultipleKeyGroupsComment(t *testing.T) { + testFlow(t, "[hello world] # blahblah\n[test]", []token{ + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "hello world"}, + token{tokenRightBracket, "]"}, + token{tokenLeftBracket, "["}, + token{tokenKeyGroup, "test"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestBasicKey(t *testing.T) { + testFlow(t, "hello", []token{ + token{tokenKey, "hello"}, + token{tokenEOF, ""}, + }) +} + +func TestBasicKeyAndEqual(t *testing.T) { + testFlow(t, "hello =", []token{ + token{tokenKey, "hello"}, + token{tokenEqual, "="}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualStringEscape(t *testing.T) { + testFlow(t, "foo = \"hello\\\"\"", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenString, "hello\""}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualStringUnfinished(t *testing.T) { + testFlow(t, "foo = \"bar", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenError, "unclosed string"}, + }) +} + +func TestKeyEqualString(t *testing.T) { + testFlow(t, "foo = \"bar\"", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenString, "bar"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualTrue(t *testing.T) { + testFlow(t, "foo = true", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenTrue, "true"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualFalse(t *testing.T) { + testFlow(t, "foo = false", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFalse, "false"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualArrayBools(t *testing.T) { + testFlow(t, "foo = [true, false, true]", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenLeftBracket, "["}, + token{tokenTrue, "true"}, + token{tokenComma, ","}, + token{tokenFalse, "false"}, + token{tokenComma, ","}, + token{tokenTrue, "true"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualArrayBoolsWithComments(t *testing.T) { + testFlow(t, "foo = [true, false, true] # YEAH", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenLeftBracket, "["}, + token{tokenTrue, "true"}, + token{tokenComma, ","}, + token{tokenFalse, "false"}, + token{tokenComma, ","}, + token{tokenTrue, "true"}, + token{tokenRightBracket, "]"}, + token{tokenEOF, ""}, + }) +} + +func TestDateRegexp(t *testing.T) { + if dateRegexp.FindString("1979-05-27T07:32:00Z") == "" { + t.Fail() + } +} + +func TestKeyEqualDate(t *testing.T) { + testFlow(t, "foo = 1979-05-27T07:32:00Z", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenDate, "1979-05-27T07:32:00Z"}, + token{tokenEOF, ""}, + }) +} + +func TestKeyEqualNumber(t *testing.T) { + testFlow(t, "foo = 42", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenInteger, "42"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = +42", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenInteger, "+42"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = -42", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenInteger, "-42"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = 4.2", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFloat, "4.2"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = +4.2", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFloat, "+4.2"}, + token{tokenEOF, ""}, + }) + + testFlow(t, "foo = -4.2", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenFloat, "-4.2"}, + token{tokenEOF, ""}, + }) +}
diff --git a/src/toml/parser.go b/src/toml/parser.go new file mode 100644 index 0000000..f9592b7 --- /dev/null +++ b/src/toml/parser.go
@@ -0,0 +1,3 @@ +// TOML Parser. + +package toml
diff --git a/src/toml/toml.go b/src/toml/toml.go new file mode 100644 index 0000000..1b558cc --- /dev/null +++ b/src/toml/toml.go
@@ -0,0 +1,8 @@ +// TOML interface. + +package toml + +func Load() map[string]interface{} { + result := make(map[string]interface{}) + return result +}
diff --git a/src/toml/toml_test.go b/src/toml/toml_test.go new file mode 100644 index 0000000..f9fa173 --- /dev/null +++ b/src/toml/toml_test.go
@@ -0,0 +1 @@ +package toml