Add \U support to query lexer (#88)
diff --git a/querylexer.go b/querylexer.go index 61153cb..960681d 100644 --- a/querylexer.go +++ b/querylexer.go
@@ -272,6 +272,23 @@ return l.errorf("invalid unicode escape: \\u" + code) } growingString += string(rune(intcode)) + } else if l.follow("\\U") { + l.pos += 2 + code := "" + for i := 0; i < 8; i++ { + c := l.peek() + l.pos++ + if !isHexDigit(c) { + return l.errorf("unfinished unicode escape") + } + code = code + string(c) + } + l.pos-- + intcode, err := strconv.ParseInt(code, 16, 32) + if err != nil { + return l.errorf("invalid unicode escape: \\u" + code) + } + growingString += string(rune(intcode)) } else if l.follow("\\") { l.pos++ return l.errorf("invalid escape sequence: \\" + string(l.peek()))
diff --git a/querylexer_test.go b/querylexer_test.go index a9bd674..48338e5 100644 --- a/querylexer_test.go +++ b/querylexer_test.go
@@ -10,11 +10,13 @@ token := <-ch if token != expected { t.Log("While testing #", idx, ":", input) + t.Log("compared (got)", token, "to (expected)", expected) + t.Log("\tvalue:", token.val, "<->", expected.val) + t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val)) + t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String()) + t.Log("\tline:", token.Line, "<->", expected.Line) + t.Log("\tcolumn:", token.Col, "<->", expected.Col) t.Log("compared", token, "to", expected) - t.Log(token.val, "<->", expected.val) - t.Log(token.typ, "<->", expected.typ) - t.Log(token.Line, "<->", expected.Line) - t.Log(token.Col, "<->", expected.Col) t.FailNow() } } @@ -48,9 +50,9 @@ } func TestLexString(t *testing.T) { - testQLFlow(t, "'foo'", []token{ - token{Position{1, 2}, tokenString, "foo"}, - token{Position{1, 6}, tokenEOF, ""}, + testQLFlow(t, "'foo\n'", []token{ + token{Position{1, 2}, tokenString, "foo\n"}, + token{Position{2, 2}, tokenEOF, ""}, }) } @@ -61,6 +63,37 @@ }) } +func TestLexStringEscapes(t *testing.T) { + testQLFlow(t, `"foo \" \' \b \f \/ \t \r \\ \u03A9 \U00012345 \n bar"`, []token{ + token{Position{1, 2}, tokenString, "foo \" ' \b \f / \t \r \\ \u03A9 \U00012345 \n bar"}, + token{Position{1, 55}, tokenEOF, ""}, + }) +} + +func TestLexStringUnfinishedUnicode4(t *testing.T) { + testQLFlow(t, `"\u000"`, []token{ + token{Position{1, 2}, tokenError, "unfinished unicode escape"}, + }) +} + +func TestLexStringUnfinishedUnicode8(t *testing.T) { + testQLFlow(t, `"\U0000"`, []token{ + token{Position{1, 2}, tokenError, "unfinished unicode escape"}, + }) +} + +func TestLexStringInvalidEscape(t *testing.T) { + testQLFlow(t, `"\x"`, []token{ + token{Position{1, 2}, tokenError, "invalid escape sequence: \\x"}, + }) +} + +func TestLexStringUnfinished(t *testing.T) { + testQLFlow(t, `"bar`, []token{ + token{Position{1, 2}, tokenError, "unclosed string"}, + }) +} + func TestLexKey(t *testing.T) { testQLFlow(t, "foo", []token{ token{Position{1, 1}, tokenKey, "foo"}, @@ -95,3 +128,51 @@ token{Position{1, 12}, tokenEOF, ""}, }) } + +func TestLexInteger(t *testing.T) { + testQLFlow(t, "100 +200 -300", []token{ + token{Position{1, 1}, tokenInteger, "100"}, + token{Position{1, 5}, tokenInteger, "+200"}, + token{Position{1, 10}, tokenInteger, "-300"}, + token{Position{1, 14}, tokenEOF, ""}, + }) +} + +func TestLexFloat(t *testing.T) { + testQLFlow(t, "100.0 +200.0 -300.0", []token{ + token{Position{1, 1}, tokenFloat, "100.0"}, + token{Position{1, 7}, tokenFloat, "+200.0"}, + token{Position{1, 14}, tokenFloat, "-300.0"}, + token{Position{1, 20}, tokenEOF, ""}, + }) +} + +func TestLexFloatWithMultipleDots(t *testing.T) { + testQLFlow(t, "4.2.", []token{ + token{Position{1, 1}, tokenError, "cannot have two dots in one float"}, + }) +} + +func TestLexFloatLeadingDot(t *testing.T) { + testQLFlow(t, "+.1", []token{ + token{Position{1, 1}, tokenError, "cannot start float with a dot"}, + }) +} + +func TestLexFloatWithTrailingDot(t *testing.T) { + testQLFlow(t, "42.", []token{ + token{Position{1, 1}, tokenError, "float cannot end with a dot"}, + }) +} + +func TestLexNumberWithoutDigit(t *testing.T) { + testQLFlow(t, "+", []token{ + token{Position{1, 1}, tokenError, "no digit in that number"}, + }) +} + +func TestLexUnknown(t *testing.T) { + testQLFlow(t, "^", []token{ + token{Position{1, 1}, tokenError, "unexpected char: '94'"}, + }) +}