Fixes #6: implement \uXXXX escaping
diff --git a/lexer.go b/lexer.go index ba18a9b..09598ec 100644 --- a/lexer.go +++ b/lexer.go
@@ -6,6 +6,7 @@ import ( "fmt" "regexp" + "strconv" "strings" "unicode" "unicode/utf8" @@ -70,6 +71,11 @@ return unicode.IsNumber(r) } +func isHexDigit(r rune) bool { + return isDigit(r) || + r == 'A' || r == 'B' || r == 'C' || r == 'D' || r == 'E' || r == 'F' +} + // Define lexer type lexer struct { input string @@ -323,6 +329,23 @@ } else if l.follow("\\\\") { l.pos += 1 growing_string += "\\" + } else if l.follow("\\u") { + l.pos += 2 + code := "" + for i := 0; i < 4; i++ { + c := l.peek() + l.pos += 1 + if !isHexDigit(c) { + return l.errorf("unfinished unicode escape") + } + code = code + string(c) + } + l.pos -= 1 + intcode, err := strconv.ParseInt(code, 16, 32) + if err != nil { + return l.errorf("invalid unicode escape: \\u" + code) + } + growing_string += string(rune(intcode)) } else { growing_string += string(l.peek()) }
diff --git a/lexer_test.go b/lexer_test.go index de0ccc6..6723738 100644 --- a/lexer_test.go +++ b/lexer_test.go
@@ -300,3 +300,12 @@ token{tokenEOF, ""}, }) } + +func TestKeyEqualStringUnicodeEscape(t *testing.T) { + testFlow(t, "foo = \"hello \\u2665\"", []token{ + token{tokenKey, "foo"}, + token{tokenEqual, "="}, + token{tokenString, "hello ♥"}, + token{tokenEOF, ""}, + }) +}