Add \U support to query lexer (#88)
diff --git a/querylexer.go b/querylexer.go
index 61153cb..960681d 100644
--- a/querylexer.go
+++ b/querylexer.go
@@ -272,6 +272,23 @@
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
+ } else if l.follow("\\U") {
+ l.pos += 2
+ code := ""
+ for i := 0; i < 8; i++ {
+ c := l.peek()
+ l.pos++
+ if !isHexDigit(c) {
+ return l.errorf("unfinished unicode escape")
+ }
+ code = code + string(c)
+ }
+ l.pos--
+ intcode, err := strconv.ParseInt(code, 16, 32)
+ if err != nil {
+ return l.errorf("invalid unicode escape: \\u" + code)
+ }
+ growingString += string(rune(intcode))
} else if l.follow("\\") {
l.pos++
return l.errorf("invalid escape sequence: \\" + string(l.peek()))
diff --git a/querylexer_test.go b/querylexer_test.go
index a9bd674..48338e5 100644
--- a/querylexer_test.go
+++ b/querylexer_test.go
@@ -10,11 +10,13 @@
token := <-ch
if token != expected {
t.Log("While testing #", idx, ":", input)
+ t.Log("compared (got)", token, "to (expected)", expected)
+ t.Log("\tvalue:", token.val, "<->", expected.val)
+ t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val))
+ t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String())
+ t.Log("\tline:", token.Line, "<->", expected.Line)
+ t.Log("\tcolumn:", token.Col, "<->", expected.Col)
t.Log("compared", token, "to", expected)
- t.Log(token.val, "<->", expected.val)
- t.Log(token.typ, "<->", expected.typ)
- t.Log(token.Line, "<->", expected.Line)
- t.Log(token.Col, "<->", expected.Col)
t.FailNow()
}
}
@@ -48,9 +50,9 @@
}
func TestLexString(t *testing.T) {
- testQLFlow(t, "'foo'", []token{
- token{Position{1, 2}, tokenString, "foo"},
- token{Position{1, 6}, tokenEOF, ""},
+ testQLFlow(t, "'foo\n'", []token{
+ token{Position{1, 2}, tokenString, "foo\n"},
+ token{Position{2, 2}, tokenEOF, ""},
})
}
@@ -61,6 +63,37 @@
})
}
+func TestLexStringEscapes(t *testing.T) {
+ testQLFlow(t, `"foo \" \' \b \f \/ \t \r \\ \u03A9 \U00012345 \n bar"`, []token{
+ token{Position{1, 2}, tokenString, "foo \" ' \b \f / \t \r \\ \u03A9 \U00012345 \n bar"},
+ token{Position{1, 55}, tokenEOF, ""},
+ })
+}
+
+func TestLexStringUnfinishedUnicode4(t *testing.T) {
+ testQLFlow(t, `"\u000"`, []token{
+ token{Position{1, 2}, tokenError, "unfinished unicode escape"},
+ })
+}
+
+func TestLexStringUnfinishedUnicode8(t *testing.T) {
+ testQLFlow(t, `"\U0000"`, []token{
+ token{Position{1, 2}, tokenError, "unfinished unicode escape"},
+ })
+}
+
+func TestLexStringInvalidEscape(t *testing.T) {
+ testQLFlow(t, `"\x"`, []token{
+ token{Position{1, 2}, tokenError, "invalid escape sequence: \\x"},
+ })
+}
+
+func TestLexStringUnfinished(t *testing.T) {
+ testQLFlow(t, `"bar`, []token{
+ token{Position{1, 2}, tokenError, "unclosed string"},
+ })
+}
+
func TestLexKey(t *testing.T) {
testQLFlow(t, "foo", []token{
token{Position{1, 1}, tokenKey, "foo"},
@@ -95,3 +128,51 @@
token{Position{1, 12}, tokenEOF, ""},
})
}
+
+func TestLexInteger(t *testing.T) {
+ testQLFlow(t, "100 +200 -300", []token{
+ token{Position{1, 1}, tokenInteger, "100"},
+ token{Position{1, 5}, tokenInteger, "+200"},
+ token{Position{1, 10}, tokenInteger, "-300"},
+ token{Position{1, 14}, tokenEOF, ""},
+ })
+}
+
+func TestLexFloat(t *testing.T) {
+ testQLFlow(t, "100.0 +200.0 -300.0", []token{
+ token{Position{1, 1}, tokenFloat, "100.0"},
+ token{Position{1, 7}, tokenFloat, "+200.0"},
+ token{Position{1, 14}, tokenFloat, "-300.0"},
+ token{Position{1, 20}, tokenEOF, ""},
+ })
+}
+
+func TestLexFloatWithMultipleDots(t *testing.T) {
+ testQLFlow(t, "4.2.", []token{
+ token{Position{1, 1}, tokenError, "cannot have two dots in one float"},
+ })
+}
+
+func TestLexFloatLeadingDot(t *testing.T) {
+ testQLFlow(t, "+.1", []token{
+ token{Position{1, 1}, tokenError, "cannot start float with a dot"},
+ })
+}
+
+func TestLexFloatWithTrailingDot(t *testing.T) {
+ testQLFlow(t, "42.", []token{
+ token{Position{1, 1}, tokenError, "float cannot end with a dot"},
+ })
+}
+
+func TestLexNumberWithoutDigit(t *testing.T) {
+ testQLFlow(t, "+", []token{
+ token{Position{1, 1}, tokenError, "no digit in that number"},
+ })
+}
+
+func TestLexUnknown(t *testing.T) {
+ testQLFlow(t, "^", []token{
+ token{Position{1, 1}, tokenError, "unexpected char: '94'"},
+ })
+}