Support underscores and uppercase letters in key names, as well as international characters (not specified in TOML spec, but implied by Unicode and JSON compatibility). Fixes #3.
diff --git a/lexer.go b/lexer.go index 586263b..3db06eb 100644 --- a/lexer.go +++ b/lexer.go
@@ -7,6 +7,7 @@ "fmt" "regexp" "strings" + "unicode" "unicode/utf8" ) @@ -61,12 +62,12 @@ return r == ' ' || r == '\t' } -func isAlpha(r rune) bool { - return r >= 'a' && r <= 'z' +func isAlphanumeric(r rune) bool { + return unicode.IsLetter(r) || r == '_' } func isDigit(r rune) bool { - return r >= '0' && r <= '9' + return unicode.IsNumber(r) } // Define lexer @@ -156,7 +157,7 @@ return lexEqual } - if isAlpha(next) { + if isAlphanumeric(next) { return lexKey } @@ -207,7 +208,7 @@ return lexFalse } - if isAlpha(next) { + if isAlphanumeric(next) { return lexKey } @@ -269,7 +270,7 @@ func lexKey(l *lexer) stateFn { l.ignore() - for isAlpha(l.next()) { + for isAlphanumeric(l.next()) { } l.backup() l.emit(tokenKey)
diff --git a/lexer_test.go b/lexer_test.go index b232705..03a1740 100644 --- a/lexer_test.go +++ b/lexer_test.go
@@ -77,6 +77,27 @@ }) } +func TestBasicKeyWithUnderscore(t *testing.T) { + testFlow(t, "hello_hello", []token{ + token{tokenKey, "hello_hello"}, + token{tokenEOF, ""}, + }) +} + +func TestBasicKeyWithUppercaseMix(t *testing.T) { + testFlow(t, "helloHELLOHello", []token{ + token{tokenKey, "helloHELLOHello"}, + token{tokenEOF, ""}, + }) +} + +func TestBasicKeyWithInternationalCharacters(t *testing.T) { + testFlow(t, "héllÖ", []token{ + token{tokenKey, "héllÖ"}, + token{tokenEOF, ""}, + }) +} + func TestBasicKeyAndEqual(t *testing.T) { testFlow(t, "hello =", []token{ token{tokenKey, "hello"},