Final Toml-Path Solution * Refactored type names and file names to mesh with existing TOML library more closely * Added QueryResult structure that provides values and position data * Added Query() method to TomlTree type * Tests, tests, and more tests * Fixed bug where positions returned from some tables were invalid * Added test case for bug patch The bugfix was an interesting case. Position information wasn't being set in cases where createPath was called. So table names like [foo.bar] would result in table 'foo' having no position.
diff --git a/jpath/parser.go b/jpath/parser.go deleted file mode 100644 index 8e93457..0000000 --- a/jpath/parser.go +++ /dev/null
@@ -1,289 +0,0 @@ -/* - Based on the "jsonpath" spec/concept. - - http://goessner.net/articles/JsonPath/ - https://code.google.com/p/json-path/ -*/ - -package jpath - -import ( - "fmt" - "math" -) - -type parser struct { - flow chan token - tokensBuffer []token - path *Query - union []PathFn -} - -type parserStateFn func(*parser) parserStateFn - -// Formats and panics an error message based on a token -func (p *parser) raiseError(tok *token, msg string, args ...interface{}) { - panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) -} - -func (p *parser) run() { - for state := parseStart; state != nil; { - state = state(p) - } -} - -func (p *parser) backup(tok *token) { - p.tokensBuffer = append(p.tokensBuffer, *tok) -} - -func (p *parser) peek() *token { - if len(p.tokensBuffer) != 0 { - return &(p.tokensBuffer[0]) - } - - tok, ok := <-p.flow - if !ok { - return nil - } - p.backup(&tok) - return &tok -} - -func (p *parser) lookahead(types ...tokenType) bool { - result := true - buffer := []token{} - - for _, typ := range types { - tok := p.getToken() - if tok == nil { - result = false - break - } - buffer = append(buffer, *tok) - if tok.typ != typ { - result = false - break - } - } - // add the tokens back to the buffer, and return - p.tokensBuffer = append(p.tokensBuffer, buffer...) - return result -} - -func (p *parser) getToken() *token { - if len(p.tokensBuffer) != 0 { - tok := p.tokensBuffer[0] - p.tokensBuffer = p.tokensBuffer[1:] - return &tok - } - tok, ok := <-p.flow - if !ok { - return nil - } - return &tok -} - -func parseStart(p *parser) parserStateFn { - tok := p.getToken() - - if tok == nil || tok.typ == tokenEOF { - return nil - } - - if tok.typ != tokenDollar { - p.raiseError(tok, "Expected '$' at start of expression") - } - - return parseMatchExpr -} - -// handle '.' prefix, '[]', and '..' -func parseMatchExpr(p *parser) parserStateFn { - tok := p.getToken() - switch tok.typ { - case tokenDotDot: - p.path.appendPath(&matchRecursiveFn{}) - // nested parse for '..' - tok := p.getToken() - switch tok.typ { - case tokenKey: - p.path.appendPath(newMatchKeyFn(tok.val)) - return parseMatchExpr - case tokenLBracket: - return parseBracketExpr - case tokenStar: - // do nothing - the recursive predicate is enough - return parseMatchExpr - } - - case tokenDot: - // nested parse for '.' - tok := p.getToken() - switch tok.typ { - case tokenKey: - p.path.appendPath(newMatchKeyFn(tok.val)) - return parseMatchExpr - case tokenStar: - p.path.appendPath(&matchAnyFn{}) - return parseMatchExpr - } - - case tokenLBracket: - return parseBracketExpr - - case tokenEOF: - return nil // allow EOF at this stage - } - p.raiseError(tok, "expected match expression") - return nil -} - -func parseBracketExpr(p *parser) parserStateFn { - if p.lookahead(tokenInteger, tokenColon) { - return parseSliceExpr - } - if p.peek().typ == tokenColon { - return parseSliceExpr - } - return parseUnionExpr -} - -func parseUnionExpr(p *parser) parserStateFn { - var tok *token - - // this state can be traversed after some sub-expressions - // so be careful when setting up state in the parser - if p.union == nil { - p.union = []PathFn{} - } - -loop: // labeled loop for easy breaking - for { - if len(p.union) > 0 { - // parse delimiter or terminator - tok = p.getToken() - switch tok.typ { - case tokenComma: - // do nothing - case tokenRBracket: - break loop - default: - p.raiseError(tok, "expected ',' or ']', not '%s'", tok.val) - } - } - - // parse sub expression - tok = p.getToken() - switch tok.typ { - case tokenInteger: - p.union = append(p.union, newMatchIndexFn(tok.Int())) - case tokenKey: - p.union = append(p.union, newMatchKeyFn(tok.val)) - case tokenString: - p.union = append(p.union, newMatchKeyFn(tok.val)) - case tokenQuestion: - return parseFilterExpr - case tokenLParen: - return parseScriptExpr - default: - p.raiseError(tok, "expected union sub expression, not '%s', %d", tok.val, len(p.union)) - } - } - - // if there is only one sub-expression, use that instead - if len(p.union) == 1 { - p.path.appendPath(p.union[0]) - } else { - p.path.appendPath(&matchUnionFn{p.union}) - } - - p.union = nil // clear out state - return parseMatchExpr -} - -func parseSliceExpr(p *parser) parserStateFn { - // init slice to grab all elements - start, end, step := 0, math.MaxInt64, 1 - - // parse optional start - tok := p.getToken() - if tok.typ == tokenInteger { - start = tok.Int() - tok = p.getToken() - } - if tok.typ != tokenColon { - p.raiseError(tok, "expected ':'") - } - - // parse optional end - tok = p.getToken() - if tok.typ == tokenInteger { - end = tok.Int() - tok = p.getToken() - } - if tok.typ == tokenRBracket { - p.path.appendPath(newMatchSliceFn(start, end, step)) - return parseMatchExpr - } - if tok.typ != tokenColon { - p.raiseError(tok, "expected ']' or ':'") - } - - // parse optional step - tok = p.getToken() - if tok.typ == tokenInteger { - step = tok.Int() - if step < 0 { - p.raiseError(tok, "step must be a positive value") - } - tok = p.getToken() - } - if tok.typ != tokenRBracket { - p.raiseError(tok, "expected ']'") - } - - p.path.appendPath(newMatchSliceFn(start, end, step)) - return parseMatchExpr -} - -func parseFilterExpr(p *parser) parserStateFn { - tok := p.getToken() - if tok.typ != tokenLParen { - p.raiseError(tok, "expected left-parenthesis for filter expression") - } - tok = p.getToken() - if tok.typ != tokenKey && tok.typ != tokenString { - p.raiseError(tok, "expected key or string for filter funciton name") - } - name := tok.val - tok = p.getToken() - if tok.typ != tokenRParen { - p.raiseError(tok, "expected right-parenthesis for filter expression") - } - p.union = append(p.union, newMatchFilterFn(name, tok.Position)) - return parseUnionExpr -} - -func parseScriptExpr(p *parser) parserStateFn { - tok := p.getToken() - if tok.typ != tokenKey && tok.typ != tokenString { - p.raiseError(tok, "expected key or string for script funciton name") - } - name := tok.val - tok = p.getToken() - if tok.typ != tokenRParen { - p.raiseError(tok, "expected right-parenthesis for script expression") - } - p.union = append(p.union, newMatchScriptFn(name, tok.Position)) - return parseUnionExpr -} - -func parse(flow chan token) *Query { - parser := &parser{ - flow: flow, - tokensBuffer: []token{}, - path: newQuery(), - } - parser.run() - return parser.path -}
diff --git a/jpath/parser_test.go b/jpath/parser_test.go deleted file mode 100644 index 61908de..0000000 --- a/jpath/parser_test.go +++ /dev/null
@@ -1,261 +0,0 @@ -package jpath - -import ( - "fmt" - . "github.com/pelletier/go-toml" - "testing" - "sort" - "strings" -) - -func valueString(root interface{}) string { - result := "" //fmt.Sprintf("%T:", root) - switch node := root.(type) { - case []interface{}: - items := []string{} - for _, v := range node { - items = append(items, valueString(v)) - } - sort.Strings(items) - result = "[" + strings.Join(items, ", ") + "]" - case *TomlTree: - // workaround for unreliable map key ordering - items := []string{} - for _, k := range node.Keys() { - v := node.GetPath([]string{k}) - items = append(items, k + ":" + valueString(v)) - } - sort.Strings(items) - result = "{" + strings.Join(items, ", ") + "}" - case map[string]interface{}: - // workaround for unreliable map key ordering - items := []string{} - for k, v := range node { - items = append(items, k + ":" + valueString(v)) - } - sort.Strings(items) - result = "{" + strings.Join(items, ", ") + "}" - case int64: - result += fmt.Sprintf("%d", node) - case string: - result += "'" + node + "'" - } - return result -} - -func assertValue(t *testing.T, result, ref interface{}) { - pathStr := valueString(result) - refStr := valueString(ref) - if pathStr != refStr { - t.Errorf("values do not match") - t.Log("test:", pathStr) - t.Log("ref: ", refStr) - } -} - -func assertQuery(t *testing.T, toml, query string, ref []interface{}) { - tree, err := Load(toml) - if err != nil { - t.Errorf("Non-nil toml parse error: %v", err) - return - } - results := Compile(query).Execute(tree) - assertValue(t, results.Values(), ref) -} - - -func TestQueryRoot(t *testing.T) { - assertQuery(t, - "a = 42", - "$", - []interface{}{ - map[string]interface{}{ - "a": int64(42), - }, - }) -} - -func TestQueryKey(t *testing.T) { - assertQuery(t, - "[foo]\na = 42", - "$.foo.a", - []interface{}{ - int64(42), - }) -} - -func TestQueryKeyString(t *testing.T) { - assertQuery(t, - "[foo]\na = 42", - "$.foo['a']", - []interface{}{ - int64(42), - }) -} - -func TestQueryIndex(t *testing.T) { - assertQuery(t, - "[foo]\na = [1,2,3,4,5,6,7,8,9,0]", - "$.foo.a[0]", - []interface{}{ - int64(1), - }) -} - -func TestQuerySliceRange(t *testing.T) { - assertQuery(t, - "[foo]\na = [1,2,3,4,5,6,7,8,9,0]", - "$.foo.a[0:5]", - []interface{}{ - int64(1), - int64(2), - int64(3), - int64(4), - int64(5), - }) -} - -func TestQuerySliceStep(t *testing.T) { - assertQuery(t, - "[foo]\na = [1,2,3,4,5,6,7,8,9,0]", - "$.foo.a[0:5:2]", - []interface{}{ - int64(1), - int64(3), - int64(5), - }) -} - -func TestQueryAny(t *testing.T) { - assertQuery(t, - "[foo.bar]\na=1\nb=2\n[foo.baz]\na=3\nb=4", - "$.foo.*", - []interface{}{ - map[string]interface{}{ - "a": int64(1), - "b": int64(2), - }, - map[string]interface{}{ - "a": int64(3), - "b": int64(4), - }, - }) -} -func TestQueryUnionSimple(t *testing.T) { - assertQuery(t, - "[foo.bar]\na=1\nb=2\n[baz.foo]\na=3\nb=4\n[gorf.foo]\na=5\nb=6", - "$.*[bar,foo]", - []interface{}{ - map[string]interface{}{ - "a": int64(1), - "b": int64(2), - }, - map[string]interface{}{ - "a": int64(3), - "b": int64(4), - }, - map[string]interface{}{ - "a": int64(5), - "b": int64(6), - }, - }) -} - -func TestQueryRecursionAll(t *testing.T) { - assertQuery(t, - "[foo.bar]\na=1\nb=2\n[baz.foo]\na=3\nb=4\n[gorf.foo]\na=5\nb=6", - "$..*", - []interface{}{ - map[string]interface{}{ - "bar": map[string]interface{}{ - "a": int64(1), - "b": int64(2), - }, - }, - map[string]interface{}{ - "a": int64(1), - "b": int64(2), - }, - int64(1), - int64(2), - map[string]interface{}{ - "foo": map[string]interface{}{ - "a": int64(3), - "b": int64(4), - }, - }, - map[string]interface{}{ - "a": int64(3), - "b": int64(4), - }, - int64(3), - int64(4), - map[string]interface{}{ - "foo": map[string]interface{}{ - "a": int64(5), - "b": int64(6), - }, - }, - map[string]interface{}{ - "a": int64(5), - "b": int64(6), - }, - int64(5), - int64(6), - }) -} - -func TestQueryRecursionUnionSimple(t *testing.T) { - assertQuery(t, - "[foo.bar]\na=1\nb=2\n[baz.foo]\na=3\nb=4\n[gorf.foo]\na=5\nb=6", - "$..['foo','bar']", - []interface{}{ - map[string]interface{}{ - "a": int64(1), - "b": int64(2), - }, - map[string]interface{}{ - "a": int64(3), - "b": int64(4), - }, - map[string]interface{}{ - "a": int64(5), - "b": int64(6), - }, - }) -} - -func TestQueryScriptFnLast(t *testing.T) { - assertQuery(t, - "[foo]\na = [0,1,2,3,4,5,6,7,8,9]", - "$.foo.a[(last)]", - []interface{}{ - int64(9), - }) -} - -func TestQueryFilterFnOdd(t *testing.T) { - assertQuery(t, - "[foo]\na = [0,1,2,3,4,5,6,7,8,9]", - "$.foo.a[?(odd)]", - []interface{}{ - int64(1), - int64(3), - int64(5), - int64(7), - int64(9), - }) -} - -func TestQueryFilterFnEven(t *testing.T) { - assertQuery(t, - "[foo]\na = [0,1,2,3,4,5,6,7,8,9]", - "$.foo.a[?(even)]", - []interface{}{ - int64(0), - int64(2), - int64(4), - int64(6), - int64(8), - }) -}
diff --git a/lexer.go b/lexer.go index b057da5..c1b4998 100644 --- a/lexer.go +++ b/lexer.go
@@ -10,115 +10,16 @@ "regexp" "strconv" "strings" - "unicode" "unicode/utf8" ) var dateRegexp *regexp.Regexp -// Define tokens -type tokenType int - -const ( - eof = -(iota + 1) -) - -const ( - tokenError tokenType = iota - tokenEOF - tokenComment - tokenKey - tokenEqual - tokenString - tokenInteger - tokenTrue - tokenFalse - tokenFloat - tokenLeftBracket - tokenRightBracket - tokenDoubleLeftBracket - tokenDoubleRightBracket - tokenDate - tokenKeyGroup - tokenKeyGroupArray - tokenComma - tokenEOL -) - -var tokenTypeNames = []string{ - "EOF", - "Comment", - "Key", - "=", - "\"", - "Integer", - "True", - "False", - "Float", - "[", - "[", - "]]", - "[[", - "Date", - "KeyGroup", - "KeyGroupArray", - ",", - "EOL", -} - -type token struct { - Position - typ tokenType - val string -} - -func (tt tokenType) String() string { - idx := int(tt) - if idx < len(tokenTypeNames) { - return tokenTypeNames[idx] - } - return "Unknown" -} - -func (i token) String() string { - switch i.typ { - case tokenEOF: - return "EOF" - case tokenError: - return i.val - } - - if len(i.val) > 10 { - return fmt.Sprintf("%.10q...", i.val) - } - return fmt.Sprintf("%q", i.val) -} - -func isSpace(r rune) bool { - return r == ' ' || r == '\t' -} - -func isAlphanumeric(r rune) bool { - return unicode.IsLetter(r) || r == '_' -} - -func isKeyChar(r rune) bool { - // "Keys start with the first non-whitespace character and end with the last - // non-whitespace character before the equals sign." - return !(isSpace(r) || r == '\r' || r == '\n' || r == eof || r == '=') -} - -func isDigit(r rune) bool { - return unicode.IsNumber(r) -} - -func isHexDigit(r rune) bool { - return isDigit(r) || - r == 'A' || r == 'B' || r == 'C' || r == 'D' || r == 'E' || r == 'F' -} +// Define state functions +type tomlLexStateFn func() tomlLexStateFn // Define lexer -type lexer struct { +type tomlLexer struct { input string start int pos int @@ -129,14 +30,14 @@ col int } -func (l *lexer) run() { - for state := lexVoid; state != nil; { - state = state(l) +func (l *tomlLexer) run() { + for state := l.lexVoid; state != nil; { + state = state() } close(l.tokens) } -func (l *lexer) nextStart() { +func (l *tomlLexer) nextStart() { // iterate by runes (utf8 characters) // search for newlines and advance line/col counts for i := l.start; i < l.pos; { @@ -153,7 +54,7 @@ l.start = l.pos } -func (l *lexer) emit(t tokenType) { +func (l *tomlLexer) emit(t tokenType) { l.tokens <- token{ Position: Position{l.line, l.col}, typ: t, @@ -162,7 +63,7 @@ l.nextStart() } -func (l *lexer) emitWithValue(t tokenType, value string) { +func (l *tomlLexer) emitWithValue(t tokenType, value string) { l.tokens <- token{ Position: Position{l.line, l.col}, typ: t, @@ -171,7 +72,7 @@ l.nextStart() } -func (l *lexer) next() rune { +func (l *tomlLexer) next() rune { if l.pos >= len(l.input) { l.width = 0 return eof @@ -182,15 +83,15 @@ return r } -func (l *lexer) ignore() { +func (l *tomlLexer) ignore() { l.nextStart() } -func (l *lexer) backup() { +func (l *tomlLexer) backup() { l.pos -= l.width } -func (l *lexer) errorf(format string, args ...interface{}) stateFn { +func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn { l.tokens <- token{ Position: Position{l.line, l.col}, typ: tokenError, @@ -199,13 +100,13 @@ return nil } -func (l *lexer) peek() rune { +func (l *tomlLexer) peek() rune { r := l.next() l.backup() return r } -func (l *lexer) accept(valid string) bool { +func (l *tomlLexer) accept(valid string) bool { if strings.IndexRune(valid, l.next()) >= 0 { return true } @@ -213,23 +114,20 @@ return false } -func (l *lexer) follow(next string) bool { +func (l *tomlLexer) follow(next string) bool { return strings.HasPrefix(l.input[l.pos:], next) } -// Define state functions -type stateFn func(*lexer) stateFn - -func lexVoid(l *lexer) stateFn { +func (l *tomlLexer) lexVoid() tomlLexStateFn { for { next := l.peek() switch next { case '[': - return lexKeyGroup + return l.lexKeyGroup case '#': - return lexComment + return l.lexComment case '=': - return lexEqual + return l.lexEqual } if isSpace(next) { @@ -237,11 +135,11 @@ } if l.depth > 0 { - return lexRvalue + return l.lexRvalue } if isKeyChar(next) { - return lexKey + return l.lexKey } if l.next() == eof { @@ -253,7 +151,7 @@ return nil } -func lexRvalue(l *lexer) stateFn { +func (l *tomlLexer) lexRvalue() tomlLexStateFn { for { next := l.peek() switch next { @@ -263,43 +161,43 @@ return l.errorf("cannot have multiple equals for the same key") case '[': l.depth++ - return lexLeftBracket + return l.lexLeftBracket case ']': l.depth-- - return lexRightBracket + return l.lexRightBracket case '#': - return lexComment + return l.lexComment case '"': - return lexString + return l.lexString case ',': - return lexComma + return l.lexComma case '\n': l.ignore() l.pos++ if l.depth == 0 { - return lexVoid + return l.lexVoid } - return lexRvalue + return l.lexRvalue } if l.follow("true") { - return lexTrue + return l.lexTrue } if l.follow("false") { - return lexFalse + return l.lexFalse } if isAlphanumeric(next) { - return lexKey + return l.lexKey } if dateRegexp.FindString(l.input[l.pos:]) != "" { - return lexDate + return l.lexDate } if next == '+' || next == '-' || isDigit(next) { - return lexNumber + return l.lexNumber } if isSpace(next) { @@ -315,51 +213,51 @@ return nil } -func lexDate(l *lexer) stateFn { +func (l *tomlLexer) lexDate() tomlLexStateFn { l.ignore() l.pos += 20 // Fixed size of a date in TOML l.emit(tokenDate) - return lexRvalue + return l.lexRvalue } -func lexTrue(l *lexer) stateFn { +func (l *tomlLexer) lexTrue() tomlLexStateFn { l.ignore() l.pos += 4 l.emit(tokenTrue) - return lexRvalue + return l.lexRvalue } -func lexFalse(l *lexer) stateFn { +func (l *tomlLexer) lexFalse() tomlLexStateFn { l.ignore() l.pos += 5 l.emit(tokenFalse) - return lexRvalue + return l.lexRvalue } -func lexEqual(l *lexer) stateFn { +func (l *tomlLexer) lexEqual() tomlLexStateFn { l.ignore() l.accept("=") l.emit(tokenEqual) - return lexRvalue + return l.lexRvalue } -func lexComma(l *lexer) stateFn { +func (l *tomlLexer) lexComma() tomlLexStateFn { l.ignore() l.accept(",") l.emit(tokenComma) - return lexRvalue + return l.lexRvalue } -func lexKey(l *lexer) stateFn { +func (l *tomlLexer) lexKey() tomlLexStateFn { l.ignore() for isKeyChar(l.next()) { } l.backup() l.emit(tokenKey) - return lexVoid + return l.lexVoid } -func lexComment(l *lexer) stateFn { +func (l *tomlLexer) lexComment() tomlLexStateFn { for { next := l.next() if next == '\n' || next == eof { @@ -367,17 +265,17 @@ } } l.ignore() - return lexVoid + return l.lexVoid } -func lexLeftBracket(l *lexer) stateFn { +func (l *tomlLexer) lexLeftBracket() tomlLexStateFn { l.ignore() l.pos++ l.emit(tokenLeftBracket) - return lexRvalue + return l.lexRvalue } -func lexString(l *lexer) stateFn { +func (l *tomlLexer) lexString() tomlLexStateFn { l.pos++ l.ignore() growingString := "" @@ -387,7 +285,7 @@ l.emitWithValue(tokenString, growingString) l.pos++ l.ignore() - return lexRvalue + return l.lexRvalue } if l.follow("\\\"") { @@ -446,7 +344,7 @@ return l.errorf("unclosed string") } -func lexKeyGroup(l *lexer) stateFn { +func (l *tomlLexer) lexKeyGroup() tomlLexStateFn { l.ignore() l.pos++ @@ -454,14 +352,14 @@ // token '[[' signifies an array of anonymous key groups l.pos++ l.emit(tokenDoubleLeftBracket) - return lexInsideKeyGroupArray + return l.lexInsideKeyGroupArray } // vanilla key group l.emit(tokenLeftBracket) - return lexInsideKeyGroup + return l.lexInsideKeyGroup } -func lexInsideKeyGroupArray(l *lexer) stateFn { +func (l *tomlLexer) lexInsideKeyGroupArray() tomlLexStateFn { for { if l.peek() == ']' { if l.pos > l.start { @@ -474,7 +372,7 @@ } l.pos++ l.emit(tokenDoubleRightBracket) - return lexVoid + return l.lexVoid } else if l.peek() == '[' { return l.errorf("group name cannot contain ']'") } @@ -486,7 +384,7 @@ return l.errorf("unclosed key group array") } -func lexInsideKeyGroup(l *lexer) stateFn { +func (l *tomlLexer) lexInsideKeyGroup() tomlLexStateFn { for { if l.peek() == ']' { if l.pos > l.start { @@ -495,7 +393,7 @@ l.ignore() l.pos++ l.emit(tokenRightBracket) - return lexVoid + return l.lexVoid } else if l.peek() == '[' { return l.errorf("group name cannot contain ']'") } @@ -507,14 +405,14 @@ return l.errorf("unclosed key group") } -func lexRightBracket(l *lexer) stateFn { +func (l *tomlLexer) lexRightBracket() tomlLexStateFn { l.ignore() l.pos++ l.emit(tokenRightBracket) - return lexRvalue + return l.lexRvalue } -func lexNumber(l *lexer) stateFn { +func (l *tomlLexer) lexNumber() tomlLexStateFn { l.ignore() if !l.accept("+") { l.accept("-") @@ -550,7 +448,7 @@ } else { l.emit(tokenInteger) } - return lexRvalue + return l.lexRvalue } func init() { @@ -558,13 +456,13 @@ } // Entry point -func lex(input string) (*lexer, chan token) { - l := &lexer{ +func lexToml(input string) chan token { + l := &tomlLexer{ input: input, tokens: make(chan token), line: 1, col: 1, } go l.run() - return l, l.tokens + return l.tokens }
diff --git a/lexer_test.go b/lexer_test.go index 20483d7..5114223 100644 --- a/lexer_test.go +++ b/lexer_test.go
@@ -3,7 +3,7 @@ import "testing" func testFlow(t *testing.T, input string, expectedFlow []token) { - _, ch := lex(input) + ch := lexToml(input) for _, expected := range expectedFlow { token := <-ch if token != expected {
diff --git a/jpath/match.go b/match.go similarity index 74% rename from jpath/match.go rename to match.go index bdc3db5..d4f5c01 100644 --- a/jpath/match.go +++ b/match.go
@@ -1,10 +1,27 @@ -package jpath +package toml import ( "fmt" - . "github.com/pelletier/go-toml" ) +// support function to set positions for tomlValues +// NOTE: this is done to allow ctx.lastPosition to indicate the start of any +// values returned by the query engines +func tomlValueCheck(node interface{}, ctx *queryContext) interface{} { + switch castNode := node.(type) { + case *tomlValue: + ctx.lastPosition = castNode.position + return castNode.value + case []*TomlTree: + if len(castNode) > 0 { + ctx.lastPosition = castNode[0].position + } + return node + default: + return node + } +} + // base match type matchBase struct { next PathFn @@ -28,12 +45,15 @@ } func (f *terminatingFn) Call(node interface{}, ctx *queryContext) { - ctx.result.appendResult(node) -} - -// shim to ease functor writing -func treeValue(tree *TomlTree, key string) interface{} { - return tree.GetPath([]string{key}) + switch castNode := node.(type) { + case *TomlTree: + ctx.result.appendResult(node, castNode.position) + case *tomlValue: + ctx.result.appendResult(node, castNode.position) + default: + // use last position for scalars + ctx.result.appendResult(node, ctx.lastPosition) + } } // match single key @@ -48,7 +68,7 @@ func (f *matchKeyFn) Call(node interface{}, ctx *queryContext) { if tree, ok := node.(*TomlTree); ok { - item := treeValue(tree, f.Name) + item := tree.values[f.Name] if item != nil { f.next.Call(item, ctx) } @@ -66,11 +86,11 @@ } func (f *matchIndexFn) Call(node interface{}, ctx *queryContext) { - if arr, ok := node.([]interface{}); ok { - if f.Idx < len(arr) && f.Idx >= 0 { - f.next.Call(arr[f.Idx], ctx) - } - } + if arr, ok := tomlValueCheck(node, ctx).([]interface{}); ok { + if f.Idx < len(arr) && f.Idx >= 0 { + f.next.Call(arr[f.Idx], ctx) + } + } } // filter by slicing @@ -84,7 +104,7 @@ } func (f *matchSliceFn) Call(node interface{}, ctx *queryContext) { - if arr, ok := node.([]interface{}); ok { + if arr, ok := tomlValueCheck(node, ctx).([]interface{}); ok { // adjust indexes for negative values, reverse ordering realStart, realEnd := f.Start, f.End if realStart < 0 { @@ -114,9 +134,8 @@ func (f *matchAnyFn) Call(node interface{}, ctx *queryContext) { if tree, ok := node.(*TomlTree); ok { - for _, key := range tree.Keys() { - item := treeValue(tree, key) - f.next.Call(item, ctx) + for _,v := range tree.values { + f.next.Call(v, ctx) } } } @@ -151,10 +170,9 @@ if tree, ok := node.(*TomlTree); ok { var visit func(tree *TomlTree) visit = func(tree *TomlTree) { - for _, key := range tree.Keys() { - item := treeValue(tree, key) - f.next.Call(item, ctx) - switch node := item.(type) { + for _, v := range tree.values { + f.next.Call(v, ctx) + switch node := v.(type) { case *TomlTree: visit(node) case []*TomlTree: @@ -185,10 +203,9 @@ panic(fmt.Sprintf("%s: query context does not have filter '%s'", f.Pos, f.Name)) } - switch castNode := node.(type) { + switch castNode := tomlValueCheck(node, ctx).(type) { case *TomlTree: - for _, k := range castNode.Keys() { - v := castNode.GetPath([]string{k}) + for _, v := range castNode.values { if fn(v) { f.next.Call(v, ctx) } @@ -219,7 +236,7 @@ panic(fmt.Sprintf("%s: query context does not have script '%s'", f.Pos, f.Name)) } - switch result := fn(node).(type) { + switch result := fn(tomlValueCheck(node, ctx)).(type) { case string: nextMatch := newMatchKeyFn(result) nextMatch.SetNext(f.next)
diff --git a/jpath/match_test.go b/match_test.go similarity index 97% rename from jpath/match_test.go rename to match_test.go index d7da647..c667030 100644 --- a/jpath/match_test.go +++ b/match_test.go
@@ -1,8 +1,7 @@ -package jpath +package toml import ( "fmt" - . "github.com/pelletier/go-toml" "math" "testing" ) @@ -58,8 +57,7 @@ } func assertPath(t *testing.T, query string, ref *Query) { - _, flow := lex(query) - path := parse(flow) + path, _:= parseQuery(lexQuery(query)) assertPathMatch(t, path, ref) }
diff --git a/parser.go b/parser.go index dcab890..a67a1e6 100644 --- a/parser.go +++ b/parser.go
@@ -10,7 +10,7 @@ "time" ) -type parser struct { +type tomlParser struct { flow chan token tree *TomlTree tokensBuffer []token @@ -18,20 +18,20 @@ seenGroupKeys []string } -type parserStateFn func(*parser) parserStateFn +type tomlParserStateFn func() tomlParserStateFn // Formats and panics an error message based on a token -func (p *parser) raiseError(tok *token, msg string, args ...interface{}) { +func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) { panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) } -func (p *parser) run() { - for state := parseStart; state != nil; { - state = state(p) +func (p *tomlParser) run() { + for state := p.parseStart; state != nil; { + state = state() } } -func (p *parser) peek() *token { +func (p *tomlParser) peek() *token { if len(p.tokensBuffer) != 0 { return &(p.tokensBuffer[0]) } @@ -44,7 +44,7 @@ return &tok } -func (p *parser) assume(typ tokenType) { +func (p *tomlParser) assume(typ tokenType) { tok := p.getToken() if tok == nil { p.raiseError(tok, "was expecting token %s, but token stream is empty", tok) @@ -54,7 +54,7 @@ } } -func (p *parser) getToken() *token { +func (p *tomlParser) getToken() *token { if len(p.tokensBuffer) != 0 { tok := p.tokensBuffer[0] p.tokensBuffer = p.tokensBuffer[1:] @@ -67,7 +67,7 @@ return &tok } -func parseStart(p *parser) parserStateFn { +func (p *tomlParser) parseStart() tomlParserStateFn { tok := p.peek() // end of stream, parsing is finished @@ -77,11 +77,11 @@ switch tok.typ { case tokenDoubleLeftBracket: - return parseGroupArray + return p.parseGroupArray case tokenLeftBracket: - return parseGroup + return p.parseGroup case tokenKey: - return parseAssign + return p.parseAssign case tokenEOF: return nil default: @@ -90,7 +90,7 @@ return nil } -func parseGroupArray(p *parser) parserStateFn { +func (p *tomlParser) parseGroupArray() tomlParserStateFn { startToken := p.getToken() // discard the [[ key := p.getToken() if key.typ != tokenKeyGroupArray { @@ -99,7 +99,7 @@ // get or create group array element at the indicated part in the path keys := strings.Split(key.val, ".") - p.tree.createSubTree(keys[:len(keys)-1]) // create parent entries + p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries destTree := p.tree.GetPath(keys) var array []*TomlTree if destTree == nil { @@ -137,10 +137,10 @@ // move to next parser state p.assume(tokenDoubleRightBracket) - return parseStart(p) + return p.parseStart } -func parseGroup(p *parser) parserStateFn { +func (p *tomlParser) parseGroup() tomlParserStateFn { startToken := p.getToken() // discard the [ key := p.getToken() if key.typ != tokenKeyGroup { @@ -154,20 +154,18 @@ p.seenGroupKeys = append(p.seenGroupKeys, key.val) keys := strings.Split(key.val, ".") - if err := p.tree.createSubTree(keys); err != nil { + if err := p.tree.createSubTree(keys,startToken.Position); err != nil { p.raiseError(key, "%s", err) } p.assume(tokenRightBracket) p.currentGroup = keys - targetTree := p.tree.GetPath(p.currentGroup).(*TomlTree) - targetTree.position = startToken.Position - return parseStart(p) + return p.parseStart } -func parseAssign(p *parser) parserStateFn { +func (p *tomlParser) parseAssign() tomlParserStateFn { key := p.getToken() p.assume(tokenEqual) - value := parseRvalue(p) + value := p.parseRvalue() var groupKey []string if len(p.currentGroup) > 0 { groupKey = p.currentGroup @@ -195,10 +193,10 @@ strings.Join(finalKey, ".")) } targetNode.values[key.val] = &tomlValue{value, key.Position} - return parseStart(p) + return p.parseStart } -func parseRvalue(p *parser) interface{} { +func (p *tomlParser) parseRvalue() interface{} { tok := p.getToken() if tok == nil || tok.typ == tokenEOF { p.raiseError(tok, "expecting a value") @@ -230,7 +228,7 @@ } return val case tokenLeftBracket: - return parseArray(p) + return p.parseArray() case tokenError: p.raiseError(tok, "%s", tok) } @@ -240,7 +238,7 @@ return nil } -func parseArray(p *parser) []interface{} { +func (p *tomlParser) parseArray() []interface{} { var array []interface{} arrayType := reflect.TypeOf(nil) for { @@ -252,7 +250,7 @@ p.getToken() return array } - val := parseRvalue(p) + val := p.parseRvalue() if arrayType == nil { arrayType = reflect.TypeOf(val) } @@ -274,10 +272,10 @@ return array } -func parse(flow chan token) *TomlTree { +func parseToml(flow chan token) *TomlTree { result := newTomlTree() result.position = Position{1,1} - parser := &parser{ + parser := &tomlParser{ flow: flow, tree: result, tokensBuffer: make([]token, 0),
diff --git a/parser_test.go b/parser_test.go index 10ac6d7..b429b2f 100644 --- a/parser_test.go +++ b/parser_test.go
@@ -31,7 +31,7 @@ func TestCreateSubTree(t *testing.T) { tree := newTomlTree() - tree.createSubTree([]string{"a", "b", "c"}) + tree.createSubTree([]string{"a", "b", "c"}, Position{}) tree.Set("a.b.c", 42) if tree.Get("a.b.c") != 42 { t.Fail() @@ -385,7 +385,7 @@ for path, pos := range ref { testPos := tree.GetPosition(path) if testPos.Invalid() { - t.Errorf("Failed to query tree path: %s", path) + t.Errorf("Failed to query tree path or path has invalid position: %s", path) } else if pos != testPos { t.Errorf("Expected position %v, got %v instead", pos, testPos) } @@ -424,3 +424,15 @@ "foo.baz": Position{3, 1}, }) } + +func TestNestedTreePosition(t *testing.T) { + assertPosition(t, + "[foo.bar]\na=42\nb=69", + map[string]Position{ + "": Position{1, 1}, + "foo": Position{1, 1}, + "foo.bar": Position{1, 1}, + "foo.bar.a": Position{2, 1}, + "foo.bar.b": Position{3, 1}, + }) +}
diff --git a/jpath/query.go b/query.go similarity index 80% rename from jpath/query.go rename to query.go index 3435515..8bfb9ed 100644 --- a/jpath/query.go +++ b/query.go
@@ -1,8 +1,4 @@ -package jpath - -import ( - . "github.com/pelletier/go-toml" -) +package toml type nodeFilterFn func(node interface{}) bool type nodeFn func(node interface{}) interface{} @@ -12,18 +8,9 @@ positions []Position } -// TODO: modify after merging with rest of lib -func (r *QueryResult) appendResult(node interface{}) { +func (r *QueryResult) appendResult(node interface{}, pos Position) { r.items = append(r.items, node) - switch castNode := node.(type) { - case *TomlTree: - r.positions = append(r.positions, castNode.GetPosition("")) - //r.positions = append(r.positions, castNode.position) - //case *tomlValue: - //r.positions = append(r.positions, castNode.position) - default: - r.positions = append(r.positions, Position{}) - } + r.positions = append(r.positions, pos) } func (r *QueryResult) Values() []interface{} { @@ -39,6 +26,7 @@ result *QueryResult filters *map[string]nodeFilterFn scripts *map[string]nodeFn + lastPosition Position } // generic path functor interface @@ -74,9 +62,9 @@ next.SetNext(newTerminatingFn()) // init the next functor } -func Compile(path string) *Query { - _, flow := lex(path) - return parse(flow) +// TODO: return (err,query) instead +func Compile(path string) (*Query, error) { + return parseQuery(lexQuery(path)) } func (q *Query) Execute(tree *TomlTree) *QueryResult { @@ -85,7 +73,7 @@ positions: []Position{}, } if q.root == nil { - result.appendResult(tree) + result.appendResult(tree, tree.GetPosition("")) } else { ctx := &queryContext{ result: result,
diff --git a/jpath/lexer.go b/querylexer.go similarity index 62% rename from jpath/lexer.go rename to querylexer.go index 0e3af11..355838c 100644 --- a/jpath/lexer.go +++ b/querylexer.go
@@ -1,131 +1,22 @@ // TOML JSONPath lexer. // -// Written using the principles developped by Rob Pike in +// Written using the principles developed by Rob Pike in // http://www.youtube.com/watch?v=HxaD_trXwRE -package jpath +package toml import ( "fmt" - . "github.com/pelletier/go-toml" - "regexp" "strconv" "strings" - "unicode" "unicode/utf8" ) -var dateRegexp *regexp.Regexp +// Lexer state function +type queryLexStateFn func() queryLexStateFn -// Define tokens -type tokenType int - -const ( - eof = -(iota + 1) -) - -const ( - tokenError tokenType = iota - tokenEOF - tokenKey - tokenString - tokenFloat - tokenInteger - tokenAtCost - tokenDollar - tokenLBracket - tokenRBracket - tokenDot - tokenDotDot - tokenStar - tokenComma - tokenColon - tokenQuestion - tokenLParen - tokenRParen -) - -var tokenTypeNames = []string{ - "EOF", - "Key", - "String", - "Float", - "Integer", - "@", - "$", - "[", - "]", - ".", - "..", - "*", - ",", - ":", - "?", - "(", - ")", -} - -type token struct { - Position - typ tokenType - val string -} - -func (tt tokenType) String() string { - idx := int(tt) - if idx < len(tokenTypeNames) { - return tokenTypeNames[idx] - } - return "Unknown" -} - -func (t token) Int() int { - if result, err := strconv.Atoi(t.val); err != nil { - panic(err) - } else { - return result - } -} - -func (t token) String() string { - switch t.typ { - case tokenEOF: - return "EOF" - case tokenError: - return t.val - } - - if len(t.val) > 10 { - return fmt.Sprintf("%.10q...", t.val) - } - return fmt.Sprintf("%q", t.val) -} - -func isSpace(r rune) bool { - return r == ' ' || r == '\t' -} - -func isAlphanumeric(r rune) bool { - return unicode.IsLetter(r) || r == '_' -} - -func isKeyChar(r rune) bool { - // "Keys start with the first non-whitespace character and end with the last - // non-whitespace character before the equals sign." - return !(isSpace(r) || r == '\r' || r == '\n' || r == eof || r == '=') -} - -func isDigit(r rune) bool { - return unicode.IsNumber(r) -} - -func isHexDigit(r rune) bool { - return isDigit(r) || - r == 'A' || r == 'B' || r == 'C' || r == 'D' || r == 'E' || r == 'F' -} - -// Define lexer -type lexer struct { +// Lexer definition +type queryLexer struct { input string start int pos int @@ -137,14 +28,14 @@ stringTerm string } -func (l *lexer) run() { - for state := lexVoid; state != nil; { - state = state(l) +func (l *queryLexer) run() { + for state := l.lexVoid; state != nil; { + state = state() } close(l.tokens) } -func (l *lexer) nextStart() { +func (l *queryLexer) nextStart() { // iterate by runes (utf8 characters) // search for newlines and advance line/col counts for i := l.start; i < l.pos; { @@ -161,7 +52,7 @@ l.start = l.pos } -func (l *lexer) emit(t tokenType) { +func (l *queryLexer) emit(t tokenType) { l.tokens <- token{ Position: Position{l.line, l.col}, typ: t, @@ -170,7 +61,7 @@ l.nextStart() } -func (l *lexer) emitWithValue(t tokenType, value string) { +func (l *queryLexer) emitWithValue(t tokenType, value string) { l.tokens <- token{ Position: Position{l.line, l.col}, typ: t, @@ -179,7 +70,7 @@ l.nextStart() } -func (l *lexer) next() rune { +func (l *queryLexer) next() rune { if l.pos >= len(l.input) { l.width = 0 return eof @@ -190,15 +81,15 @@ return r } -func (l *lexer) ignore() { +func (l *queryLexer) ignore() { l.nextStart() } -func (l *lexer) backup() { +func (l *queryLexer) backup() { l.pos -= l.width } -func (l *lexer) errorf(format string, args ...interface{}) stateFn { +func (l *queryLexer) errorf(format string, args ...interface{}) queryLexStateFn { l.tokens <- token{ Position: Position{l.line, l.col}, typ: tokenError, @@ -207,13 +98,13 @@ return nil } -func (l *lexer) peek() rune { +func (l *queryLexer) peek() rune { r := l.next() l.backup() return r } -func (l *lexer) accept(valid string) bool { +func (l *queryLexer) accept(valid string) bool { if strings.IndexRune(valid, l.next()) >= 0 { return true } @@ -221,14 +112,12 @@ return false } -func (l *lexer) follow(next string) bool { +func (l *queryLexer) follow(next string) bool { return strings.HasPrefix(l.input[l.pos:], next) } -// Define state functions -type stateFn func(*lexer) stateFn -func lexVoid(l *lexer) stateFn { +func (l *queryLexer) lexVoid() queryLexStateFn { for { next := l.peek() switch next { @@ -245,17 +134,13 @@ l.emit(tokenDot) } continue - case '@': - l.pos++ - l.emit(tokenAtCost) - continue case '[': l.pos++ - l.emit(tokenLBracket) + l.emit(tokenLeftBracket) continue case ']': l.pos++ - l.emit(tokenRBracket) + l.emit(tokenRightBracket) continue case ',': l.pos++ @@ -267,11 +152,11 @@ continue case '(': l.pos++ - l.emit(tokenLParen) + l.emit(tokenLeftParen) continue case ')': l.pos++ - l.emit(tokenRParen) + l.emit(tokenRightParen) continue case '?': l.pos++ @@ -284,11 +169,11 @@ case '\'': l.ignore() l.stringTerm = string(next) - return lexString + return l.lexString case '"': l.ignore() l.stringTerm = string(next) - return lexString + return l.lexString } if isSpace(next) { @@ -298,11 +183,11 @@ } if isAlphanumeric(next) { - return lexKey + return l.lexKey } if next == '+' || next == '-' || isDigit(next) { - return lexNumber + return l.lexNumber } if l.next() == eof { @@ -315,12 +200,12 @@ return nil } -func lexKey(l *lexer) stateFn { +func (l *queryLexer) lexKey() queryLexStateFn { for { next := l.peek() if !isAlphanumeric(next) { l.emit(tokenKey) - return lexVoid + return l.lexVoid } if l.next() == eof { @@ -331,7 +216,7 @@ return nil } -func lexString(l *lexer) stateFn { +func (l *queryLexer) lexString() queryLexStateFn { l.pos++ l.ignore() growingString := "" @@ -341,7 +226,7 @@ l.emitWithValue(tokenString, growingString) l.pos++ l.ignore() - return lexVoid + return l.lexVoid } if l.follow("\\\"") { @@ -403,7 +288,7 @@ return l.errorf("unclosed string") } -func lexNumber(l *lexer) stateFn { +func (l *queryLexer) lexNumber() queryLexStateFn { l.ignore() if !l.accept("+") { l.accept("-") @@ -439,17 +324,17 @@ } else { l.emit(tokenInteger) } - return lexVoid + return l.lexVoid } // Entry point -func lex(input string) (*lexer, chan token) { - l := &lexer{ +func lexQuery(input string) chan token { + l := &queryLexer{ input: input, tokens: make(chan token), line: 1, col: 1, } go l.run() - return l, l.tokens + return l.tokens }
diff --git a/jpath/lexer_test.go b/querylexer_test.go similarity index 73% rename from jpath/lexer_test.go rename to querylexer_test.go index 4402abb..a9bd674 100644 --- a/jpath/lexer_test.go +++ b/querylexer_test.go
@@ -1,12 +1,11 @@ -package jpath +package toml import ( - . "github.com/pelletier/go-toml" "testing" ) -func testFlow(t *testing.T, input string, expectedFlow []token) { - _, ch := lex(input) +func testQLFlow(t *testing.T, input string, expectedFlow []token) { + ch := lexQuery(input) for idx, expected := range expectedFlow { token := <-ch if token != expected { @@ -34,15 +33,14 @@ } func TestLexSpecialChars(t *testing.T) { - testFlow(t, "@.$[]..()?*", []token{ - token{Position{1, 1}, tokenAtCost, "@"}, + testQLFlow(t, " .$[]..()?*", []token{ token{Position{1, 2}, tokenDot, "."}, token{Position{1, 3}, tokenDollar, "$"}, - token{Position{1, 4}, tokenLBracket, "["}, - token{Position{1, 5}, tokenRBracket, "]"}, + token{Position{1, 4}, tokenLeftBracket, "["}, + token{Position{1, 5}, tokenRightBracket, "]"}, token{Position{1, 6}, tokenDotDot, ".."}, - token{Position{1, 8}, tokenLParen, "("}, - token{Position{1, 9}, tokenRParen, ")"}, + token{Position{1, 8}, tokenLeftParen, "("}, + token{Position{1, 9}, tokenRightParen, ")"}, token{Position{1, 10}, tokenQuestion, "?"}, token{Position{1, 11}, tokenStar, "*"}, token{Position{1, 12}, tokenEOF, ""}, @@ -50,28 +48,28 @@ } func TestLexString(t *testing.T) { - testFlow(t, "'foo'", []token{ + testQLFlow(t, "'foo'", []token{ token{Position{1, 2}, tokenString, "foo"}, token{Position{1, 6}, tokenEOF, ""}, }) } func TestLexDoubleString(t *testing.T) { - testFlow(t, `"bar"`, []token{ + testQLFlow(t, `"bar"`, []token{ token{Position{1, 2}, tokenString, "bar"}, token{Position{1, 6}, tokenEOF, ""}, }) } func TestLexKey(t *testing.T) { - testFlow(t, "foo", []token{ + testQLFlow(t, "foo", []token{ token{Position{1, 1}, tokenKey, "foo"}, token{Position{1, 4}, tokenEOF, ""}, }) } func TestLexRecurse(t *testing.T) { - testFlow(t, "$..*", []token{ + testQLFlow(t, "$..*", []token{ token{Position{1, 1}, tokenDollar, "$"}, token{Position{1, 2}, tokenDotDot, ".."}, token{Position{1, 4}, tokenStar, "*"}, @@ -80,17 +78,17 @@ } func TestLexBracketKey(t *testing.T) { - testFlow(t, "$[foo]", []token{ + testQLFlow(t, "$[foo]", []token{ token{Position{1, 1}, tokenDollar, "$"}, - token{Position{1, 2}, tokenLBracket, "["}, + token{Position{1, 2}, tokenLeftBracket, "["}, token{Position{1, 3}, tokenKey, "foo"}, - token{Position{1, 6}, tokenRBracket, "]"}, + token{Position{1, 6}, tokenRightBracket, "]"}, token{Position{1, 7}, tokenEOF, ""}, }) } func TestLexSpace(t *testing.T) { - testFlow(t, "foo bar baz", []token{ + testQLFlow(t, "foo bar baz", []token{ token{Position{1, 1}, tokenKey, "foo"}, token{Position{1, 5}, tokenKey, "bar"}, token{Position{1, 9}, tokenKey, "baz"},
diff --git a/queryparser.go b/queryparser.go new file mode 100644 index 0000000..4973afb --- /dev/null +++ b/queryparser.go
@@ -0,0 +1,291 @@ +/* + Based on the "jsonpath" spec/concept. + + http://goessner.net/articles/JsonPath/ + https://code.google.com/p/json-path/ +*/ + +package toml + +import ( + "fmt" + "math" +) + +type queryParser struct { + flow chan token + tokensBuffer []token + query *Query + union []PathFn + err error +} + +type queryParserStateFn func() queryParserStateFn + +// Formats and panics an error message based on a token +func (p *queryParser) parseError(tok *token, msg string, args ...interface{}) queryParserStateFn { + p.err = fmt.Errorf(tok.Position.String() + ": " + msg, args...) + return nil // trigger parse to end +} + +func (p *queryParser) run() { + for state := p.parseStart; state != nil; { + state = state() + } +} + +func (p *queryParser) backup(tok *token) { + p.tokensBuffer = append(p.tokensBuffer, *tok) +} + +func (p *queryParser) peek() *token { + if len(p.tokensBuffer) != 0 { + return &(p.tokensBuffer[0]) + } + + tok, ok := <-p.flow + if !ok { + return nil + } + p.backup(&tok) + return &tok +} + +func (p *queryParser) lookahead(types ...tokenType) bool { + result := true + buffer := []token{} + + for _, typ := range types { + tok := p.getToken() + if tok == nil { + result = false + break + } + buffer = append(buffer, *tok) + if tok.typ != typ { + result = false + break + } + } + // add the tokens back to the buffer, and return + p.tokensBuffer = append(p.tokensBuffer, buffer...) + return result +} + +func (p *queryParser) getToken() *token { + if len(p.tokensBuffer) != 0 { + tok := p.tokensBuffer[0] + p.tokensBuffer = p.tokensBuffer[1:] + return &tok + } + tok, ok := <-p.flow + if !ok { + return nil + } + return &tok +} + +func (p *queryParser) parseStart() queryParserStateFn { + tok := p.getToken() + + if tok == nil || tok.typ == tokenEOF { + return nil + } + + if tok.typ != tokenDollar { + return p.parseError(tok, "Expected '$' at start of expression") + } + + return p.parseMatchExpr +} + +// handle '.' prefix, '[]', and '..' +func (p *queryParser) parseMatchExpr() queryParserStateFn { + tok := p.getToken() + switch tok.typ { + case tokenDotDot: + p.query.appendPath(&matchRecursiveFn{}) + // nested parse for '..' + tok := p.getToken() + switch tok.typ { + case tokenKey: + p.query.appendPath(newMatchKeyFn(tok.val)) + return p.parseMatchExpr + case tokenLeftBracket: + return p.parseBracketExpr + case tokenStar: + // do nothing - the recursive predicate is enough + return p.parseMatchExpr + } + + case tokenDot: + // nested parse for '.' + tok := p.getToken() + switch tok.typ { + case tokenKey: + p.query.appendPath(newMatchKeyFn(tok.val)) + return p.parseMatchExpr + case tokenStar: + p.query.appendPath(&matchAnyFn{}) + return p.parseMatchExpr + } + + case tokenLeftBracket: + return p.parseBracketExpr + + case tokenEOF: + return nil // allow EOF at this stage + } + return p.parseError(tok, "expected match expression") + return nil +} + +func (p *queryParser) parseBracketExpr() queryParserStateFn { + if p.lookahead(tokenInteger, tokenColon) { + return p.parseSliceExpr + } + if p.peek().typ == tokenColon { + return p.parseSliceExpr + } + return p.parseUnionExpr +} + +func (p *queryParser) parseUnionExpr() queryParserStateFn { + var tok *token + + // this state can be traversed after some sub-expressions + // so be careful when setting up state in the parser + if p.union == nil { + p.union = []PathFn{} + } + +loop: // labeled loop for easy breaking + for { + if len(p.union) > 0 { + // parse delimiter or terminator + tok = p.getToken() + switch tok.typ { + case tokenComma: + // do nothing + case tokenRightBracket: + break loop + default: + return p.parseError(tok, "expected ',' or ']', not '%s'", tok.val) + } + } + + // parse sub expression + tok = p.getToken() + switch tok.typ { + case tokenInteger: + p.union = append(p.union, newMatchIndexFn(tok.Int())) + case tokenKey: + p.union = append(p.union, newMatchKeyFn(tok.val)) + case tokenString: + p.union = append(p.union, newMatchKeyFn(tok.val)) + case tokenQuestion: + return p.parseFilterExpr + case tokenLeftParen: + return p.parseScriptExpr + default: + return p.parseError(tok, "expected union sub expression, not '%s', %d", tok.val, len(p.union)) + } + } + + // if there is only one sub-expression, use that instead + if len(p.union) == 1 { + p.query.appendPath(p.union[0]) + } else { + p.query.appendPath(&matchUnionFn{p.union}) + } + + p.union = nil // clear out state + return p.parseMatchExpr +} + +func (p *queryParser) parseSliceExpr() queryParserStateFn { + // init slice to grab all elements + start, end, step := 0, math.MaxInt64, 1 + + // parse optional start + tok := p.getToken() + if tok.typ == tokenInteger { + start = tok.Int() + tok = p.getToken() + } + if tok.typ != tokenColon { + return p.parseError(tok, "expected ':'") + } + + // parse optional end + tok = p.getToken() + if tok.typ == tokenInteger { + end = tok.Int() + tok = p.getToken() + } + if tok.typ == tokenRightBracket { + p.query.appendPath(newMatchSliceFn(start, end, step)) + return p.parseMatchExpr + } + if tok.typ != tokenColon { + return p.parseError(tok, "expected ']' or ':'") + } + + // parse optional step + tok = p.getToken() + if tok.typ == tokenInteger { + step = tok.Int() + if step < 0 { + return p.parseError(tok, "step must be a positive value") + } + tok = p.getToken() + } + if tok.typ != tokenRightBracket { + return p.parseError(tok, "expected ']'") + } + + p.query.appendPath(newMatchSliceFn(start, end, step)) + return p.parseMatchExpr +} + +func (p *queryParser) parseFilterExpr() queryParserStateFn { + tok := p.getToken() + if tok.typ != tokenLeftParen { + return p.parseError(tok, "expected left-parenthesis for filter expression") + } + tok = p.getToken() + if tok.typ != tokenKey && tok.typ != tokenString { + return p.parseError(tok, "expected key or string for filter funciton name") + } + name := tok.val + tok = p.getToken() + if tok.typ != tokenRightParen { + return p.parseError(tok, "expected right-parenthesis for filter expression") + } + p.union = append(p.union, newMatchFilterFn(name, tok.Position)) + return p.parseUnionExpr +} + +func (p *queryParser) parseScriptExpr() queryParserStateFn { + tok := p.getToken() + if tok.typ != tokenKey && tok.typ != tokenString { + return p.parseError(tok, "expected key or string for script funciton name") + } + name := tok.val + tok = p.getToken() + if tok.typ != tokenRightParen { + return p.parseError(tok, "expected right-parenthesis for script expression") + } + p.union = append(p.union, newMatchScriptFn(name, tok.Position)) + return p.parseUnionExpr +} + +func parseQuery(flow chan token) (*Query, error) { + parser := &queryParser{ + flow: flow, + tokensBuffer: []token{}, + query: newQuery(), + } + parser.run() + return parser.query, parser.err +}
diff --git a/queryparser_test.go b/queryparser_test.go new file mode 100644 index 0000000..96c88d0 --- /dev/null +++ b/queryparser_test.go
@@ -0,0 +1,368 @@ +package toml + +import ( + "fmt" + "testing" + "sort" + "strings" +) + +type queryTestNode struct { + value interface{} + position Position +} + +func valueString(root interface{}) string { + result := "" //fmt.Sprintf("%T:", root) + switch node := root.(type) { + case *tomlValue: + return valueString(node.value) + case *QueryResult: + items := []string{} + for i, v := range node.Values() { + items = append(items, fmt.Sprintf("%s:%s", + node.Positions()[i].String(), valueString(v))) + } + sort.Strings(items) + result = "[" + strings.Join(items, ", ") + "]" + case queryTestNode: + result = fmt.Sprintf("%s:%s", + node.position.String(), valueString(node.value)) + case []interface{}: + items := []string{} + for _, v := range node { + items = append(items, valueString(v)) + } + sort.Strings(items) + result = "[" + strings.Join(items, ", ") + "]" + case *TomlTree: + // workaround for unreliable map key ordering + items := []string{} + for _, k := range node.Keys() { + v := node.GetPath([]string{k}) + items = append(items, k + ":" + valueString(v)) + } + sort.Strings(items) + result = "{" + strings.Join(items, ", ") + "}" + case map[string]interface{}: + // workaround for unreliable map key ordering + items := []string{} + for k, v := range node { + items = append(items, k + ":" + valueString(v)) + } + sort.Strings(items) + result = "{" + strings.Join(items, ", ") + "}" + case int64: + result += fmt.Sprintf("%d", node) + case string: + result += "'" + node + "'" + } + return result +} + +func assertValue(t *testing.T, result, ref interface{}) { + pathStr := valueString(result) + refStr := valueString(ref) + if pathStr != refStr { + t.Errorf("values do not match") + t.Log("test:", pathStr) + t.Log("ref: ", refStr) + } +} + +func assertQueryPositions(t *testing.T, toml, query string, ref []interface{}) { + tree, err := Load(toml) + if err != nil { + t.Errorf("Non-nil toml parse error: %v", err) + return + } + q, err := Compile(query) + if err != nil { + t.Error(err) + return + } + results := q.Execute(tree) + assertValue(t, results, ref) +} + +func TestQueryRoot(t *testing.T) { + assertQueryPositions(t, + "a = 42", + "$", + []interface{}{ + queryTestNode{ + map[string]interface{}{ + "a": int64(42), + }, Position{1, 1}, + }, + }) +} + +func TestQueryKey(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = 42", + "$.foo.a", + []interface{}{ + queryTestNode{ + int64(42), Position{2,1}, + }, + }) +} + +func TestQueryKeyString(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = 42", + "$.foo['a']", + []interface{}{ + queryTestNode{ + int64(42), Position{2,1}, + }, + }) +} + +func TestQueryIndex(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = [1,2,3,4,5,6,7,8,9,0]", + "$.foo.a[5]", + []interface{}{ + queryTestNode{ + int64(6), Position{2,1}, + }, + }) +} + +func TestQuerySliceRange(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = [1,2,3,4,5,6,7,8,9,0]", + "$.foo.a[0:5]", + []interface{}{ + queryTestNode{ + int64(1), Position{2,1}, + }, + queryTestNode{ + int64(2), Position{2,1}, + }, + queryTestNode{ + int64(3), Position{2,1}, + }, + queryTestNode{ + int64(4), Position{2,1}, + }, + queryTestNode{ + int64(5), Position{2,1}, + }, + }) +} + +func TestQuerySliceStep(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = [1,2,3,4,5,6,7,8,9,0]", + "$.foo.a[0:5:2]", + []interface{}{ + queryTestNode{ + int64(1), Position{2,1}, + }, + queryTestNode{ + int64(3), Position{2,1}, + }, + queryTestNode{ + int64(5), Position{2,1}, + }, + }) +} + +func TestQueryAny(t *testing.T) { + assertQueryPositions(t, + "[foo.bar]\na=1\nb=2\n[foo.baz]\na=3\nb=4", + "$.foo.*", + []interface{}{ + queryTestNode{ + map[string]interface{}{ + "a": int64(1), + "b": int64(2), + }, Position{1,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(3), + "b": int64(4), + }, Position{4,1}, + }, + }) +} +func TestQueryUnionSimple(t *testing.T) { + assertQueryPositions(t, + "[foo.bar]\na=1\nb=2\n[baz.foo]\na=3\nb=4\n[gorf.foo]\na=5\nb=6", + "$.*[bar,foo]", + []interface{}{ + queryTestNode{ + map[string]interface{}{ + "a": int64(1), + "b": int64(2), + }, Position{1,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(3), + "b": int64(4), + }, Position{4,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(5), + "b": int64(6), + }, Position{7,1}, + }, + }) +} + +func TestQueryRecursionAll(t *testing.T) { + assertQueryPositions(t, + "[foo.bar]\na=1\nb=2\n[baz.foo]\na=3\nb=4\n[gorf.foo]\na=5\nb=6", + "$..*", + []interface{}{ + queryTestNode{ + map[string]interface{}{ + "bar": map[string]interface{}{ + "a": int64(1), + "b": int64(2), + }, + }, Position{1,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(1), + "b": int64(2), + }, Position{1,1}, + }, + queryTestNode{ + int64(1), Position{2,1}, + }, + queryTestNode{ + int64(2), Position{3,1}, + }, + queryTestNode{ + map[string]interface{}{ + "foo": map[string]interface{}{ + "a": int64(3), + "b": int64(4), + }, + }, Position{4,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(3), + "b": int64(4), + }, Position{4,1}, + }, + queryTestNode{ + int64(3), Position{5,1}, + }, + queryTestNode{ + int64(4), Position{6,1}, + }, + queryTestNode{ + map[string]interface{}{ + "foo": map[string]interface{}{ + "a": int64(5), + "b": int64(6), + }, + }, Position{7,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(5), + "b": int64(6), + }, Position{7,1}, + }, + queryTestNode{ + int64(5), Position{8,1}, + }, + queryTestNode{ + int64(6), Position{9,1}, + }, + }) +} + +func TestQueryRecursionUnionSimple(t *testing.T) { + assertQueryPositions(t, + "[foo.bar]\na=1\nb=2\n[baz.foo]\na=3\nb=4\n[gorf.foo]\na=5\nb=6", + "$..['foo','bar']", + []interface{}{ + queryTestNode{ + map[string]interface{}{ + "a": int64(1), + "b": int64(2), + }, Position{1,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(3), + "b": int64(4), + }, Position{4,1}, + }, + queryTestNode{ + map[string]interface{}{ + "a": int64(5), + "b": int64(6), + }, Position{7,1}, + }, + }) +} + +func TestQueryScriptFnLast(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = [0,1,2,3,4,5,6,7,8,9]", + "$.foo.a[(last)]", + []interface{}{ + queryTestNode{ + int64(9), Position{2,1}, + }, + }) +} + +func TestQueryFilterFnOdd(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = [0,1,2,3,4,5,6,7,8,9]", + "$.foo.a[?(odd)]", + []interface{}{ + queryTestNode{ + int64(1), Position{2,1}, + }, + queryTestNode{ + int64(3), Position{2,1}, + }, + queryTestNode{ + int64(5), Position{2,1}, + }, + queryTestNode{ + int64(7), Position{2,1}, + }, + queryTestNode{ + int64(9), Position{2,1}, + }, + }) +} + +func TestQueryFilterFnEven(t *testing.T) { + assertQueryPositions(t, + "[foo]\na = [0,1,2,3,4,5,6,7,8,9]", + "$.foo.a[?(even)]", + []interface{}{ + queryTestNode{ + int64(0), Position{2,1}, + }, + queryTestNode{ + int64(2), Position{2,1}, + }, + queryTestNode{ + int64(4), Position{2,1}, + }, + queryTestNode{ + int64(6), Position{2,1}, + }, + queryTestNode{ + int64(8), Position{2,1}, + }, + }) +}
diff --git a/test.sh b/test.sh index 80b27a7..07aa52e 100755 --- a/test.sh +++ b/test.sh
@@ -21,11 +21,8 @@ mkdir -p src/github.com/pelletier/go-toml/cmd cp *.go *.toml src/github.com/pelletier/go-toml cp cmd/*.go src/github.com/pelletier/go-toml/cmd -mkdir -p src/github.com/pelletier/go-toml/jpath -cp jpath/*.go src/github.com/pelletier/go-toml/jpath go build -o test_program_bin src/github.com/pelletier/go-toml/cmd/test_program.go # Run basic unit tests and then the BurntSushi test suite -go test -v github.com/pelletier/go-toml/jpath go test -v github.com/pelletier/go-toml ./toml-test ./test_program_bin | tee test_out
diff --git a/toml.go b/toml.go index 678959a..043d609 100644 --- a/toml.go +++ b/toml.go
@@ -199,7 +199,7 @@ // and tree[a][b][c] // // Returns nil on success, error object on failure -func (t *TomlTree) createSubTree(keys []string) error { +func (t *TomlTree) createSubTree(keys []string, pos Position) error { subtree := t for _, intermediateKey := range keys { if intermediateKey == "" { @@ -207,8 +207,10 @@ } nextTree, exists := subtree.values[intermediateKey] if !exists { - nextTree = newTomlTree() - subtree.values[intermediateKey] = nextTree + tree := newTomlTree() + tree.position = pos + subtree.values[intermediateKey] = tree + nextTree = tree } switch node := nextTree.(type) { @@ -317,6 +319,14 @@ return result } +func (t *TomlTree) Query(query string) (*QueryResult, error) { + if q, err := Compile(query); err != nil { + return nil, err + } else { + return q.Execute(t), nil + } +} + // ToString generates a human-readable representation of the current tree. // Output spans multiple lines, and is suitable for ingest by a TOML parser func (t *TomlTree) ToString() string { @@ -325,27 +335,26 @@ // Load creates a TomlTree from a string. func Load(content string) (tree *TomlTree, err error) { - defer func() { - if r := recover(); r != nil { - if _, ok := r.(runtime.Error); ok { - panic(r) - } - err = errors.New(r.(string)) - } - }() - _, flow := lex(content) - tree = parse(flow) - return + defer func() { + if r := recover(); r != nil { + if _, ok := r.(runtime.Error); ok { + panic(r) + } + err = errors.New(r.(string)) + } + }() + tree = parseToml(lexToml(content)) + return } // LoadFile creates a TomlTree from a file. func LoadFile(path string) (tree *TomlTree, err error) { - buff, ferr := ioutil.ReadFile(path) - if ferr != nil { - err = ferr - } else { - s := string(buff) - tree, err = Load(s) - } - return + buff, ferr := ioutil.ReadFile(path) + if ferr != nil { + err = ferr + } else { + s := string(buff) + tree, err = Load(s) + } + return }
diff --git a/toml_test.go b/toml_test.go index 09950b9..f14d046 100644 --- a/toml_test.go +++ b/toml_test.go
@@ -47,3 +47,28 @@ } } } + +func TestTomlQuery(t *testing.T) { + tree, err := Load("[foo.bar]\na=1\nb=2\n[baz.foo]\na=3\nb=4\n[gorf.foo]\na=5\nb=6") + if err != nil { + t.Error(err) + return + } + result, err := tree.Query("$.foo.bar") + if err != nil { + t.Error(err) + return + } + values := result.Values() + if len(values) != 1 { + t.Errorf("Expected resultset of 1, got %d instead: %v", len(values), values) + } + + if tt, ok := values[0].(*TomlTree); !ok { + t.Errorf("Expected type of TomlTree: %T Tv", values[0], values[0]) + } else if tt.Get("a") != int64(1) { + t.Errorf("Expected 'a' with a value 1: %v", tt.Get("a")) + } else if tt.Get("b") != int64(2) { + t.Errorf("Expected 'b' with a value 2: %v", tt.Get("b")) + } +}