Added line/col support to lexer
diff --git a/lexer.go b/lexer.go index 6b9311c..ce0f2f6 100644 --- a/lexer.go +++ b/lexer.go
@@ -46,6 +46,8 @@ type token struct { typ tokenType val string + line int + col int } func (i token) String() string { @@ -93,6 +95,8 @@ width int tokens chan token depth int + line int + col int } func (l *lexer) run() { @@ -102,14 +106,32 @@ close(l.tokens) } +func (l *lexer) nextStart() { + // iterate by runes (utf8 characters) + // search for newlines and advance line/col counts + for i:=l.start; i<l.pos; { + r, width := utf8.DecodeRuneInString(l.input[i:]) + if r == '\n' { + l.line += 1 + l.col = 0 + } else { + l.col += 1 + } + i += width +// fmt.Printf("'%c'\n", r) + } + // advance start position to next token + l.start = l.pos +} + func (l *lexer) emit(t tokenType) { - l.tokens <- token{t, l.input[l.start:l.pos]} - l.start = l.pos + l.tokens <- token{t, l.input[l.start:l.pos], l.line, l.col} + l.nextStart() } func (l *lexer) emitWithValue(t tokenType, value string) { - l.tokens <- token{t, value} - l.start = l.pos + l.tokens <- token{t, value, l.line, l.col} + l.nextStart() } func (l *lexer) next() rune { @@ -124,7 +146,7 @@ } func (l *lexer) ignore() { - l.start = l.pos + l.nextStart() } func (l *lexer) backup() { @@ -135,6 +157,8 @@ l.tokens <- token{ tokenError, fmt.Sprintf(format, args...), + l.line, + l.col, } return nil }
diff --git a/lexer_test.go b/lexer_test.go index ebd03a9..4d17e17 100644 --- a/lexer_test.go +++ b/lexer_test.go
@@ -7,9 +7,12 @@ for _, expected := range expectedFlow { token := <-ch if token != expected { + t.Log("While testing: ", input) t.Log("compared", token, "to", expected) - t.Log(token.val, "<->", expected.val) - t.Log(token.typ, "<->", expected.typ) + t.Log(token.val, "<->", expected.val) + t.Log(token.typ, "<->", expected.typ) + t.Log(token.line, "<->", expected.line) + t.Log(token.col, "<->", expected.col) t.FailNow() } } @@ -29,244 +32,246 @@ func TestValidKeyGroup(t *testing.T) { testFlow(t, "[hello world]", []token{ - token{tokenLeftBracket, "["}, - token{tokenKeyGroup, "hello world"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenLeftBracket, "[", 0, 0}, + token{tokenKeyGroup, "hello world", 0, 1}, + token{tokenRightBracket, "]", 0, 12}, + token{tokenEOF, "", 0, 13}, }) } func TestUnclosedKeyGroup(t *testing.T) { testFlow(t, "[hello world", []token{ - token{tokenLeftBracket, "["}, - token{tokenError, "unclosed key group"}, + token{tokenLeftBracket, "[", 0, 0}, + token{tokenError, "unclosed key group", 0, 1}, }) } func TestComment(t *testing.T) { testFlow(t, "# blahblah", []token{ - token{tokenEOF, ""}, + token{tokenEOF, "", 0, 10}, }) } func TestKeyGroupComment(t *testing.T) { testFlow(t, "[hello world] # blahblah", []token{ - token{tokenLeftBracket, "["}, - token{tokenKeyGroup, "hello world"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenLeftBracket, "[", 0, 0}, + token{tokenKeyGroup, "hello world", 0, 1}, + token{tokenRightBracket, "]", 0, 12}, + token{tokenEOF, "", 0, 24}, }) } func TestMultipleKeyGroupsComment(t *testing.T) { testFlow(t, "[hello world] # blahblah\n[test]", []token{ - token{tokenLeftBracket, "["}, - token{tokenKeyGroup, "hello world"}, - token{tokenRightBracket, "]"}, - token{tokenLeftBracket, "["}, - token{tokenKeyGroup, "test"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenLeftBracket, "[", 0, 0}, + token{tokenKeyGroup, "hello world", 0, 1}, + token{tokenRightBracket, "]", 0, 12}, + token{tokenLeftBracket, "[", 1, 0}, + token{tokenKeyGroup, "test", 1, 1}, + token{tokenRightBracket, "]", 1, 5}, + token{tokenEOF, "", 1, 6}, }) } func TestBasicKey(t *testing.T) { testFlow(t, "hello", []token{ - token{tokenKey, "hello"}, - token{tokenEOF, ""}, + token{tokenKey, "hello", 0, 0}, + token{tokenEOF, "", 0, 5}, }) } func TestBasicKeyWithUnderscore(t *testing.T) { testFlow(t, "hello_hello", []token{ - token{tokenKey, "hello_hello"}, - token{tokenEOF, ""}, + token{tokenKey, "hello_hello", 0, 0}, + token{tokenEOF, "", 0, 11}, }) } func TestBasicKeyWithDash(t *testing.T) { testFlow(t, "hello-world", []token{ - token{tokenKey, "hello-world"}, - token{tokenEOF, ""}, + token{tokenKey, "hello-world", 0, 0}, + token{tokenEOF, "", 0, 11}, }) } func TestBasicKeyWithUppercaseMix(t *testing.T) { testFlow(t, "helloHELLOHello", []token{ - token{tokenKey, "helloHELLOHello"}, - token{tokenEOF, ""}, + token{tokenKey, "helloHELLOHello", 0, 0}, + token{tokenEOF, "", 0, 15}, }) } func TestBasicKeyWithInternationalCharacters(t *testing.T) { testFlow(t, "héllÖ", []token{ - token{tokenKey, "héllÖ"}, - token{tokenEOF, ""}, + token{tokenKey, "héllÖ", 0, 0}, + token{tokenEOF, "", 0, 5}, }) } func TestBasicKeyAndEqual(t *testing.T) { testFlow(t, "hello =", []token{ - token{tokenKey, "hello"}, - token{tokenEqual, "="}, - token{tokenEOF, ""}, + token{tokenKey, "hello", 0, 0}, + token{tokenEqual, "=", 0, 6}, + token{tokenEOF, "", 0, 7}, }) } func TestKeyWithSharpAndEqual(t *testing.T) { testFlow(t, "key#name = 5", []token{ - token{tokenKey, "key#name"}, - token{tokenEqual, "="}, - token{tokenInteger, "5"}, - token{tokenEOF, ""}, + token{tokenKey, "key#name", 0, 0}, + token{tokenEqual, "=", 0, 9}, + token{tokenInteger, "5", 0, 11}, + token{tokenEOF, "", 0, 12}, }) } + + func TestKeyWithSymbolsAndEqual(t *testing.T) { testFlow(t, "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:' = 5", []token{ - token{tokenKey, "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'"}, - token{tokenEqual, "="}, - token{tokenInteger, "5"}, - token{tokenEOF, ""}, + token{tokenKey, "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'", 0, 0}, + token{tokenEqual, "=", 0, 38}, + token{tokenInteger, "5", 0, 40}, + token{tokenEOF, "", 0, 41}, }) } func TestKeyEqualStringEscape(t *testing.T) { - testFlow(t, "foo = \"hello\\\"\"", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenString, "hello\""}, - token{tokenEOF, ""}, + testFlow(t, `foo = "hello\""`, []token{ + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenString, "hello\"" ,0, 7}, + token{tokenEOF, "", 0, 15}, }) } func TestKeyEqualStringUnfinished(t *testing.T) { - testFlow(t, "foo = \"bar", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenError, "unclosed string"}, + testFlow(t, `foo = "bar`, []token{ + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenError, "unclosed string", 0, 7}, }) } func TestKeyEqualString(t *testing.T) { - testFlow(t, "foo = \"bar\"", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenString, "bar"}, - token{tokenEOF, ""}, + testFlow(t, `foo = "bar"`, []token{ + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenString, "bar", 0, 7}, + token{tokenEOF, "", 0, 11}, }) } func TestKeyEqualTrue(t *testing.T) { testFlow(t, "foo = true", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenTrue, "true"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenTrue, "true", 0, 6}, + token{tokenEOF, "", 0, 10}, }) } func TestKeyEqualFalse(t *testing.T) { testFlow(t, "foo = false", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenFalse, "false"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenFalse, "false", 0, 6}, + token{tokenEOF, "", 0, 11}, }) } func TestArrayNestedString(t *testing.T) { - testFlow(t, "a = [ [\"hello\", \"world\"] ]", []token{ - token{tokenKey, "a"}, - token{tokenEqual, "="}, - token{tokenLeftBracket, "["}, - token{tokenLeftBracket, "["}, - token{tokenString, "hello"}, - token{tokenComma, ","}, - token{tokenString, "world"}, - token{tokenRightBracket, "]"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + testFlow(t, `a = [ ["hello", "world"] ]`, []token{ + token{tokenKey, "a", 0, 0}, + token{tokenEqual, "=", 0, 2}, + token{tokenLeftBracket, "[", 0, 4}, + token{tokenLeftBracket, "[", 0, 6}, + token{tokenString, "hello", 0, 8}, + token{tokenComma, ",", 0, 14}, + token{tokenString, "world", 0, 17}, + token{tokenRightBracket, "]", 0, 23}, + token{tokenRightBracket, "]", 0, 25}, + token{tokenEOF, "", 0, 26}, }) } func TestArrayNestedInts(t *testing.T) { testFlow(t, "a = [ [42, 21], [10] ]", []token{ - token{tokenKey, "a"}, - token{tokenEqual, "="}, - token{tokenLeftBracket, "["}, - token{tokenLeftBracket, "["}, - token{tokenInteger, "42"}, - token{tokenComma, ","}, - token{tokenInteger, "21"}, - token{tokenRightBracket, "]"}, - token{tokenComma, ","}, - token{tokenLeftBracket, "["}, - token{tokenInteger, "10"}, - token{tokenRightBracket, "]"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenKey, "a", 0, 0}, + token{tokenEqual, "=", 0, 2}, + token{tokenLeftBracket, "[", 0, 4}, + token{tokenLeftBracket, "[", 0, 6}, + token{tokenInteger, "42", 0, 7}, + token{tokenComma, ",", 0, 9}, + token{tokenInteger, "21", 0, 11}, + token{tokenRightBracket, "]", 0, 13}, + token{tokenComma, ",", 0, 14}, + token{tokenLeftBracket, "[", 0, 16}, + token{tokenInteger, "10", 0, 17}, + token{tokenRightBracket, "]", 0, 19}, + token{tokenRightBracket, "]", 0, 21}, + token{tokenEOF, "", 0, 22}, }) } func TestArrayInts(t *testing.T) { testFlow(t, "a = [ 42, 21, 10, ]", []token{ - token{tokenKey, "a"}, - token{tokenEqual, "="}, - token{tokenLeftBracket, "["}, - token{tokenInteger, "42"}, - token{tokenComma, ","}, - token{tokenInteger, "21"}, - token{tokenComma, ","}, - token{tokenInteger, "10"}, - token{tokenComma, ","}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenKey, "a", 0, 0}, + token{tokenEqual, "=", 0, 2}, + token{tokenLeftBracket, "[", 0, 4}, + token{tokenInteger, "42", 0, 6}, + token{tokenComma, ",", 0, 8}, + token{tokenInteger, "21", 0, 10}, + token{tokenComma, ",", 0, 12}, + token{tokenInteger, "10", 0, 14}, + token{tokenComma, ",", 0, 16}, + token{tokenRightBracket, "]", 0, 18}, + token{tokenEOF, "", 0, 19}, }) } func TestMultilineArrayComments(t *testing.T) { testFlow(t, "a = [1, # wow\n2, # such items\n3, # so array\n]", []token{ - token{tokenKey, "a"}, - token{tokenEqual, "="}, - token{tokenLeftBracket, "["}, - token{tokenInteger, "1"}, - token{tokenComma, ","}, - token{tokenInteger, "2"}, - token{tokenComma, ","}, - token{tokenInteger, "3"}, - token{tokenComma, ","}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenKey, "a", 0, 0}, + token{tokenEqual, "=", 0, 2}, + token{tokenLeftBracket, "[", 0, 4}, + token{tokenInteger, "1", 0, 5}, + token{tokenComma, ",", 0, 6}, + token{tokenInteger, "2", 1, 0}, + token{tokenComma, ",", 1, 1}, + token{tokenInteger, "3", 2, 0}, + token{tokenComma, ",", 2, 1}, + token{tokenRightBracket, "]", 3, 0}, + token{tokenEOF, "", 3, 1}, }) } func TestKeyEqualArrayBools(t *testing.T) { testFlow(t, "foo = [true, false, true]", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenLeftBracket, "["}, - token{tokenTrue, "true"}, - token{tokenComma, ","}, - token{tokenFalse, "false"}, - token{tokenComma, ","}, - token{tokenTrue, "true"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenLeftBracket, "[", 0, 6}, + token{tokenTrue, "true", 0, 7}, + token{tokenComma, ",", 0, 11}, + token{tokenFalse, "false", 0, 13}, + token{tokenComma, ",", 0, 18}, + token{tokenTrue, "true", 0, 20}, + token{tokenRightBracket, "]", 0, 24}, + token{tokenEOF, "", 0, 25}, }) } func TestKeyEqualArrayBoolsWithComments(t *testing.T) { testFlow(t, "foo = [true, false, true] # YEAH", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenLeftBracket, "["}, - token{tokenTrue, "true"}, - token{tokenComma, ","}, - token{tokenFalse, "false"}, - token{tokenComma, ","}, - token{tokenTrue, "true"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenLeftBracket, "[", 0, 6}, + token{tokenTrue, "true", 0, 7}, + token{tokenComma, ",", 0, 11}, + token{tokenFalse, "false", 0, 13}, + token{tokenComma, ",", 0, 18}, + token{tokenTrue, "true", 0, 20}, + token{tokenRightBracket, "]", 0, 24}, + token{tokenEOF, "", 0, 32}, }) } @@ -278,138 +283,138 @@ func TestKeyEqualDate(t *testing.T) { testFlow(t, "foo = 1979-05-27T07:32:00Z", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenDate, "1979-05-27T07:32:00Z"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenDate, "1979-05-27T07:32:00Z", 0, 6}, + token{tokenEOF, "", 0, 26}, }) } func TestFloatEndingWithDot(t *testing.T) { testFlow(t, "foo = 42.", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenError, "float cannot end with a dot"}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenError, "float cannot end with a dot", 0, 6}, }) } func TestFloatWithTwoDots(t *testing.T) { testFlow(t, "foo = 4.2.", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenError, "cannot have two dots in one float"}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenError, "cannot have two dots in one float", 0, 6}, }) } func TestDoubleEqualKey(t *testing.T) { testFlow(t, "foo= = 2", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenError, "cannot have multiple equals for the same key"}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 3}, + token{tokenError, "cannot have multiple equals for the same key", 0, 4}, }) } func TestInvalidEsquapeSequence(t *testing.T) { - testFlow(t, "foo = \"\\x\"", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenError, "invalid escape sequence: \\x"}, + testFlow(t, `foo = "\x"`, []token{ + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenError, "invalid escape sequence: \\x", 0, 7}, }) } func TestNestedArrays(t *testing.T) { testFlow(t, "foo = [[[]]]", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenLeftBracket, "["}, - token{tokenLeftBracket, "["}, - token{tokenLeftBracket, "["}, - token{tokenRightBracket, "]"}, - token{tokenRightBracket, "]"}, - token{tokenRightBracket, "]"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenLeftBracket, "[", 0, 6}, + token{tokenLeftBracket, "[", 0, 7}, + token{tokenLeftBracket, "[", 0, 8}, + token{tokenRightBracket, "]", 0, 9}, + token{tokenRightBracket, "]", 0, 10}, + token{tokenRightBracket, "]", 0, 11}, + token{tokenEOF, "", 0, 12}, }) } func TestKeyEqualNumber(t *testing.T) { testFlow(t, "foo = 42", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenInteger, "42"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenInteger, "42", 0, 6}, + token{tokenEOF, "", 0, 8}, }) testFlow(t, "foo = +42", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenInteger, "+42"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenInteger, "+42", 0, 6}, + token{tokenEOF, "", 0, 9}, }) testFlow(t, "foo = -42", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenInteger, "-42"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenInteger, "-42", 0, 6}, + token{tokenEOF, "", 0, 9}, }) testFlow(t, "foo = 4.2", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenFloat, "4.2"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenFloat, "4.2", 0, 6}, + token{tokenEOF, "", 0, 9}, }) testFlow(t, "foo = +4.2", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenFloat, "+4.2"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenFloat, "+4.2", 0, 6}, + token{tokenEOF, "", 0, 10}, }) testFlow(t, "foo = -4.2", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenFloat, "-4.2"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenFloat, "-4.2", 0, 6}, + token{tokenEOF, "", 0, 10}, }) } func TestMultiline(t *testing.T) { testFlow(t, "foo = 42\nbar=21", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenInteger, "42"}, - token{tokenKey, "bar"}, - token{tokenEqual, "="}, - token{tokenInteger, "21"}, - token{tokenEOF, ""}, + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenInteger, "42", 0, 6}, + token{tokenKey, "bar", 1, 0}, + token{tokenEqual, "=", 1, 3}, + token{tokenInteger, "21", 1, 4}, + token{tokenEOF, "", 1, 6}, }) } func TestKeyEqualStringUnicodeEscape(t *testing.T) { - testFlow(t, "foo = \"hello \\u2665\"", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenString, "hello ♥"}, - token{tokenEOF, ""}, + testFlow(t, `foo = "hello \u2665"`, []token{ + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenString, "hello ♥", 0, 7}, + token{tokenEOF, "", 0, 20}, }) } func TestUnicodeString(t *testing.T) { - testFlow(t, "foo = \"hello ♥ world\"", []token{ - token{tokenKey, "foo"}, - token{tokenEqual, "="}, - token{tokenString, "hello ♥ world"}, - token{tokenEOF, ""}, + testFlow(t, `foo = "hello ♥ world"`, []token{ + token{tokenKey, "foo", 0, 0}, + token{tokenEqual, "=", 0, 4}, + token{tokenString, "hello ♥ world", 0, 7}, + token{tokenEOF, "", 0, 21}, }) } func TestKeyGroupArray(t *testing.T) { testFlow(t, "[[foo]]", []token{ - token{tokenDoubleLeftBracket, "[["}, - token{tokenKeyGroupArray, "foo"}, - token{tokenDoubleRightBracket, "]]"}, - token{tokenEOF, ""}, + token{tokenDoubleLeftBracket, "[[", 0, 0}, + token{tokenKeyGroupArray, "foo", 0, 2}, + token{tokenDoubleRightBracket, "]]", 0, 5}, + token{tokenEOF, "", 0, 7}, }) }