Changed match func strategy; added tests As it turns out, closures are very hard to validate without running them. Since table-driven tests tend to rely on value types that can be compared directly, using structs that adhere to a generic callback interface is more work, but is more easily tested. * Changed jsonpath match functions to structs with Call() methods * Added tests to verify the generation of jsonpath QueryPath data * Added tests to verify jsonpath lexer * Fixed jsonpath whitespace handling bug * Fixed numerous flaws in jsonpath parser
diff --git a/jpath/lexer.go b/jpath/lexer.go index dc830fe..2d30bb5 100644 --- a/jpath/lexer.go +++ b/jpath/lexer.go
@@ -79,18 +79,26 @@ return "Unknown" } -func (i token) String() string { - switch i.typ { +func (t token) Int() int { + if result, err := strconv.Atoi(t.val); err != nil { + panic(err) + } else { + return result + } +} + +func (t token) String() string { + switch t.typ { case tokenEOF: return "EOF" case tokenError: - return i.val + return t.val } - if len(i.val) > 10 { - return fmt.Sprintf("%.10q...", i.val) + if len(t.val) > 10 { + return fmt.Sprintf("%.10q...", t.val) } - return fmt.Sprintf("%q", i.val) + return fmt.Sprintf("%q", t.val) } func isSpace(r rune) bool { @@ -283,6 +291,12 @@ return lexString } + if isSpace(next) { + l.next() + l.ignore() + continue + } + if isAlphanumeric(next) { return lexKey } @@ -291,14 +305,6 @@ return lexNumber } - if isAlphanumeric(next) { - return lexKey - } - - if isSpace(next) { - l.ignore() - } - if l.next() == eof { break }
diff --git a/jpath/lexer_test.go b/jpath/lexer_test.go index 5e24497..c553e2a 100644 --- a/jpath/lexer_test.go +++ b/jpath/lexer_test.go
@@ -54,7 +54,9 @@ token{Position{1, 2}, tokenString, "foo"}, token{Position{1, 6}, tokenEOF, ""}, }) +} +func TestLexDoubleString(t *testing.T) { testFlow(t, `"bar"`, []token{ token{Position{1, 2}, tokenString, "bar"}, token{Position{1, 6}, tokenEOF, ""}, @@ -67,3 +69,32 @@ token{Position{1, 4}, tokenEOF, ""}, }) } + +func TestLexRecurse(t *testing.T) { + testFlow(t, "$..*", []token{ + token{Position{1, 1}, tokenDollar, "$"}, + token{Position{1, 2}, tokenDotDot, ".."}, + token{Position{1, 4}, tokenStar, "*"}, + token{Position{1, 5}, tokenEOF, ""}, + }) +} + +func TestLexBracketKey(t *testing.T) { + testFlow(t, "$[foo]", []token{ + token{Position{1, 1}, tokenDollar, "$"}, + token{Position{1, 2}, tokenLBracket, "["}, + token{Position{1, 3}, tokenKey, "foo"}, + token{Position{1, 6}, tokenRBracket, "]"}, + token{Position{1, 7}, tokenEOF, ""}, + }) +} + +func TestLexSpace(t *testing.T) { + testFlow(t, "foo bar baz", []token{ + token{Position{1, 1}, tokenKey, "foo"}, + token{Position{1, 5}, tokenKey, "bar"}, + token{Position{1, 9}, tokenKey, "baz"}, + token{Position{1, 12}, tokenEOF, ""}, + }) +} +
diff --git a/jpath/match.go b/jpath/match.go index 9496168..9a9ef0b 100644 --- a/jpath/match.go +++ b/jpath/match.go
@@ -4,111 +4,139 @@ . "github.com/pelletier/go-toml" ) -type QueryPath []PathFn - -type PathFn func(context interface{}, next QueryPath) - -func (path QueryPath) Fn(context interface{}) { - path[0](context, path[1:]) +type PathFn interface{ + Call(context interface{}, next QueryPath) } +type QueryPath []PathFn + +func (path QueryPath) Fn(context interface{}) { + path[0].Call(context, path[1:]) +} + +// shim to ease functor writing func treeValue(tree *TomlTree, key string) interface{} { return tree.GetPath([]string{key}) } -func matchKeyFn(name string) PathFn { - return func(context interface{}, next QueryPath) { - if tree, ok := context.(*TomlTree); ok { - item := treeValue(tree, name) - if item != nil { - next.Fn(item) - } - } - } +// match single key +type matchKeyFn struct { + Name string } -func matchIndexFn(idx int) PathFn { - return func(context interface{}, next QueryPath) { - if arr, ok := context.([]interface{}); ok { - if idx < len(arr) && idx >= 0 { - next.Fn(arr[idx]) - } - } - } +func (f *matchKeyFn) Call(context interface{}, next QueryPath) { + if tree, ok := context.(*TomlTree); ok { + item := treeValue(tree, f.Name) + if item != nil { + next.Fn(item) + } + } } -func matchSliceFn(start, end, step int) PathFn { - return func(context interface{}, next QueryPath) { - if arr, ok := context.([]interface{}); ok { - // adjust indexes for negative values, reverse ordering - realStart, realEnd := start, end - if realStart < 0 { - realStart = len(arr) + realStart - } - if realEnd < 0 { - realEnd = len(arr) + realEnd - } - if realEnd < realStart { - realEnd, realStart = realStart, realEnd // swap - } - // loop and gather - for idx := realStart; idx < realEnd; idx += step { - next.Fn(arr[idx]) - } - } - } +// match single index +type matchIndexFn struct { + Idx int } -func matchAnyFn() PathFn { - return func(context interface{}, next QueryPath) { - if tree, ok := context.(*TomlTree); ok { - for _, key := range tree.Keys() { - item := treeValue(tree, key) - next.Fn(item) - } - } - } +func (f *matchIndexFn) Call(context interface{}, next QueryPath) { + if arr, ok := context.([]interface{}); ok { + if f.Idx < len(arr) && f.Idx >= 0 { + next.Fn(arr[f.Idx]) + } + } } -func matchUnionFn(union QueryPath) PathFn { - return func(context interface{}, next QueryPath) { - for _, fn := range union { - fn(context, next) - } - } +// filter by slicing +type matchSliceFn struct { + Start, End, Step int } -func matchRecurseFn() PathFn { - return func(context interface{}, next QueryPath) { - if tree, ok := context.(*TomlTree); ok { - var visit func(tree *TomlTree) - visit = func(tree *TomlTree) { - for _, key := range tree.Keys() { - item := treeValue(tree, key) - next.Fn(item) - switch node := item.(type) { - case *TomlTree: - visit(node) - case []*TomlTree: - for _, subtree := range node { - visit(subtree) - } - } - } - } - visit(tree) - } - } +func (f *matchSliceFn) Call(context interface{}, next QueryPath) { + if arr, ok := context.([]interface{}); ok { + // adjust indexes for negative values, reverse ordering + realStart, realEnd := f.Start, f.End + if realStart < 0 { + realStart = len(arr) + realStart + } + if realEnd < 0 { + realEnd = len(arr) + realEnd + } + if realEnd < realStart { + realEnd, realStart = realStart, realEnd // swap + } + // loop and gather + for idx := realStart; idx < realEnd; idx += f.Step { + next.Fn(arr[idx]) + } + } +} + +// match anything +type matchAnyFn struct { + // empty +} + +func (f *matchAnyFn) Call(context interface{}, next QueryPath) { + if tree, ok := context.(*TomlTree); ok { + for _, key := range tree.Keys() { + item := treeValue(tree, key) + next.Fn(item) + } + } +} + +// filter through union +type matchUnionFn struct { + Union QueryPath +} + +func (f *matchUnionFn) Call(context interface{}, next QueryPath) { + for _, fn := range f.Union { + fn.Call(context, next) + } +} + +// match every single last node in the tree +type matchRecursiveFn struct { + // empty +} + +func (f *matchRecursiveFn) Call(context interface{}, next QueryPath) { + if tree, ok := context.(*TomlTree); ok { + var visit func(tree *TomlTree) + visit = func(tree *TomlTree) { + for _, key := range tree.Keys() { + item := treeValue(tree, key) + next.Fn(item) + switch node := item.(type) { + case *TomlTree: + visit(node) + case []*TomlTree: + for _, subtree := range node { + visit(subtree) + } + } + } + } + visit(tree) + } +} + +// terminating expression +type matchEndFn struct { + Result []interface{} +} + +func (f *matchEndFn) Call(context interface{}, next QueryPath) { + f.Result = append(f.Result, context) } func processPath(path QueryPath, context interface{}) []interface{} { // terminate the path with a collection funciton - result := []interface{}{} - newPath := append(path, func(context interface{}, next QueryPath) { - result = append(result, context) - }) + endFn := &matchEndFn{ []interface{}{} } + newPath := append(path, endFn) // execute the path newPath.Fn(context) - return result + return endFn.Result }
diff --git a/jpath/match_test.go b/jpath/match_test.go new file mode 100644 index 0000000..5e7f73c --- /dev/null +++ b/jpath/match_test.go
@@ -0,0 +1,186 @@ +package jpath + +import ( + "fmt" + "math" + "testing" +) + +func pathString(path QueryPath) string { + result := "[" + for _, v := range path { + result += fmt.Sprintf("%T:%v, ", v, v) + } + return result + "]" +} + +func assertPathMatch(t *testing.T, path, ref QueryPath) bool { + if len(path) != len(ref) { + t.Errorf("lengths do not match: %v vs %v", + pathString(path), pathString(ref)) + return false + } else { + for i, v := range ref { + pass := false + node := path[i] + // compare by value + switch refNode := v.(type) { + case *matchKeyFn: + castNode, ok := node.(*matchKeyFn) + pass = ok && (*refNode == *castNode) + case *matchIndexFn: + castNode, ok := node.(*matchIndexFn) + pass = ok && (*refNode == *castNode) + case *matchSliceFn: + castNode, ok := node.(*matchSliceFn) + pass = ok && (*refNode == *castNode) + case *matchAnyFn: + castNode, ok := node.(*matchAnyFn) + pass = ok && (*refNode == *castNode) + case *matchUnionFn: + castNode, ok := node.(*matchUnionFn) + // special case - comapre all contents + pass = ok && assertPathMatch(t, castNode.Union, refNode.Union) + case *matchRecursiveFn: + castNode, ok := node.(*matchRecursiveFn) + pass = ok && (*refNode == *castNode) + } + if !pass { + t.Errorf("paths do not match at index %d: %v vs %v", + i, pathString(path), pathString(ref)) + return false + } + } + } + return true +} + +func assertPath(t *testing.T, query string, ref QueryPath) { + _, flow := lex(query) + path := parse(flow) + assertPathMatch(t, path, ref) +} + +func TestPathRoot(t *testing.T) { + assertPath(t, + "$", + QueryPath{ + // empty + }) +} + +func TestPathKey(t *testing.T) { + assertPath(t, + "$.foo", + QueryPath{ + &matchKeyFn{ "foo" }, + }) +} + +func TestPathBracketKey(t *testing.T) { + assertPath(t, + "$[foo]", + QueryPath{ + &matchKeyFn{ "foo" }, + }) +} + +func TestPathBracketStringKey(t *testing.T) { + assertPath(t, + "$['foo']", + QueryPath{ + &matchKeyFn{ "foo" }, + }) +} + +func TestPathIndex(t *testing.T) { + assertPath(t, + "$[123]", + QueryPath{ + &matchIndexFn{ 123 }, + }) +} + +func TestPathSliceStart(t *testing.T) { + assertPath(t, + "$[123:]", + QueryPath{ + &matchSliceFn{ 123, math.MaxInt64, 1 }, + }) +} + +func TestPathSliceStartEnd(t *testing.T) { + assertPath(t, + "$[123:456]", + QueryPath{ + &matchSliceFn{ 123, 456, 1 }, + }) +} + +func TestPathSliceStartEndColon(t *testing.T) { + assertPath(t, + "$[123:456:]", + QueryPath{ + &matchSliceFn{ 123, 456, 1 }, + }) +} + +func TestPathSliceStartStep(t *testing.T) { + assertPath(t, + "$[123::7]", + QueryPath{ + &matchSliceFn{ 123, math.MaxInt64, 7 }, + }) +} + +func TestPathSliceEndStep(t *testing.T) { + assertPath(t, + "$[:456:7]", + QueryPath{ + &matchSliceFn{ 0, 456, 7 }, + }) +} + +func TestPathSliceStep(t *testing.T) { + assertPath(t, + "$[::7]", + QueryPath{ + &matchSliceFn{ 0, math.MaxInt64, 7 }, + }) +} + +func TestPathSliceAll(t *testing.T) { + assertPath(t, + "$[123:456:7]", + QueryPath{ + &matchSliceFn{ 123, 456, 7 }, + }) +} + +func TestPathAny(t *testing.T) { + assertPath(t, + "$.*", + QueryPath{ + &matchAnyFn{}, + }) +} + +func TestPathUnion(t *testing.T) { + assertPath(t, + "$[foo, bar, baz]", + QueryPath{ + &matchUnionFn{ []PathFn { + &matchKeyFn{ "foo" }, + &matchKeyFn{ "bar" }, + &matchKeyFn{ "baz" }, + }}, + }) +} + +func TestPathRecurse(t *testing.T) { + assertPath(t, + "$..*", + QueryPath{ + &matchRecursiveFn{}, + }) +}
diff --git a/jpath/parser.go b/jpath/parser.go index 9fcd0c3..ce37081 100644 --- a/jpath/parser.go +++ b/jpath/parser.go
@@ -3,13 +3,13 @@ import ( "fmt" "math" - "strconv" ) type parser struct { flow chan token tokensBuffer []token path []PathFn + union []PathFn } type parserStateFn func(*parser) parserStateFn @@ -42,6 +42,27 @@ return &tok } +func (p *parser) lookahead(types... tokenType) bool { + result := true + buffer := []token{} + + for _, typ := range types { + tok := p.getToken() + if tok == nil { + result = false + break + } + buffer = append(buffer, *tok) + if tok.typ != typ { + result = false + break + } + } + // add the tokens back to the buffer, and return + p.tokensBuffer = append(p.tokensBuffer, buffer...) + return result +} + func (p *parser) getToken() *token { if len(p.tokensBuffer) != 0 { tok := p.tokensBuffer[0] @@ -73,20 +94,40 @@ return parseMatchExpr } +// handle '.' prefix, '[]', and '..' func parseMatchExpr(p *parser) parserStateFn { tok := p.getToken() switch tok.typ { - case tokenDot: - p.appendPath(matchKeyFn(tok.val)) - return parseMatchExpr case tokenDotDot: - p.appendPath(matchRecurseFn()) - return parseSimpleMatchExpr + p.appendPath(&matchRecursiveFn{}) + // nested parse for '..' + tok := p.getToken() + switch tok.typ { + case tokenKey: + p.appendPath(&matchKeyFn{tok.val}) + return parseMatchExpr + case tokenLBracket: + return parseBracketExpr + case tokenStar: + // do nothing - the recursive predicate is enough + return parseMatchExpr + } + + case tokenDot: + // nested parse for '.' + tok := p.getToken() + switch tok.typ { + case tokenKey: + p.appendPath(&matchKeyFn{tok.val}) + return parseMatchExpr + case tokenStar: + p.appendPath(&matchAnyFn{}) + return parseMatchExpr + } + case tokenLBracket: return parseBracketExpr - case tokenStar: - p.appendPath(matchAnyFn()) - return parseMatchExpr + case tokenEOF: return nil // allow EOF at this stage } @@ -94,57 +135,40 @@ return nil } -func parseSimpleMatchExpr(p *parser) parserStateFn { - tok := p.getToken() - switch tok.typ { - case tokenLBracket: - return parseBracketExpr - case tokenKey: - p.appendPath(matchKeyFn(tok.val)) - return parseMatchExpr - case tokenStar: - p.appendPath(matchAnyFn()) - return parseMatchExpr - } - p.raiseError(tok, "expected match expression") - return nil -} - func parseBracketExpr(p *parser) parserStateFn { - tok := p.peek() - switch tok.typ { - case tokenInteger: - // look ahead for a ':' - p.getToken() - next := p.peek() - p.backup(tok) - if next.typ == tokenColon { - return parseSliceExpr - } - return parseUnionExpr - case tokenColon: - return parseSliceExpr - } + if p.lookahead(tokenInteger, tokenColon) { + return parseSliceExpr + } + if p.peek().typ == tokenColon { + return parseSliceExpr + } return parseUnionExpr } func parseUnionExpr(p *parser) parserStateFn { - union := []PathFn{} - for { + // this state can be traversed after some sub-expressions + // so be careful when setting up state in the parser + if p.union == nil { + p.union = []PathFn{} + } + +loop: // labeled loop for easy breaking + for { // parse sub expression tok := p.getToken() switch tok.typ { case tokenInteger: - idx, _ := strconv.Atoi(tok.val) - union = append(union, matchIndexFn(idx)) + p.union = append(p.union, &matchIndexFn{tok.Int()}) case tokenKey: - union = append(union, matchKeyFn(tok.val)) + p.union = append(p.union, &matchKeyFn{tok.val}) + case tokenString: + p.union = append(p.union, &matchKeyFn{tok.val}) case tokenQuestion: return parseFilterExpr case tokenLParen: return parseScriptExpr default: - p.raiseError(tok, "expected union sub expression") + p.raiseError(tok, "expected union sub expression, not '%s'", tok.val) } // parse delimiter or terminator tok = p.getToken() @@ -152,12 +176,20 @@ case tokenComma: continue case tokenRBracket: - break + break loop default: p.raiseError(tok, "expected ',' or ']'") } } - p.appendPath(matchUnionFn(union)) + + // if there is only one sub-expression, use that instead + if len(p.union) == 1 { + p.appendPath(p.union[0]) + }else { + p.appendPath(&matchUnionFn{p.union}) + } + + p.union = nil // clear out state return parseMatchExpr } @@ -168,7 +200,7 @@ // parse optional start tok := p.getToken() if tok.typ == tokenInteger { - start, _ = strconv.Atoi(tok.val) + start = tok.Int() tok = p.getToken() } if tok.typ != tokenColon { @@ -178,17 +210,21 @@ // parse optional end tok = p.getToken() if tok.typ == tokenInteger { - end, _ = strconv.Atoi(tok.val) + end = tok.Int() tok = p.getToken() } - if tok.typ != tokenColon || tok.typ != tokenRBracket { + if tok.typ == tokenRBracket { + p.appendPath(&matchSliceFn{start, end, step}) + return parseMatchExpr + } + if tok.typ != tokenColon { p.raiseError(tok, "expected ']' or ':'") } // parse optional step tok = p.getToken() if tok.typ == tokenInteger { - step, _ = strconv.Atoi(tok.val) + step = tok.Int() if step < 0 { p.raiseError(tok, "step must be a positive value") } @@ -198,7 +234,7 @@ p.raiseError(tok, "expected ']'") } - p.appendPath(matchSliceFn(start, end, step)) + p.appendPath(&matchSliceFn{start, end, step}) return parseMatchExpr } @@ -213,12 +249,11 @@ } func parse(flow chan token) []PathFn { - result := []PathFn{} parser := &parser{ flow: flow, tokensBuffer: []token{}, - path: result, + path: []PathFn{}, } parser.run() - return result + return parser.path }
diff --git a/jpath/parser_test.go b/jpath/parser_test.go index 1571f02..2e16a2f 100644 --- a/jpath/parser_test.go +++ b/jpath/parser_test.go
@@ -29,9 +29,14 @@ t.Errorf("{%s} result value not of type %T: %T", location, node, resultNode) } else { - for i, v := range node { - assertValue(t, resultNode[i], v, fmt.Sprintf("%s[%d]", location, i)) - } + if len(node) != len(resultNode) { + t.Errorf("{%s} lengths do not match: %v vs %v", + location, node, resultNode) + } else { + for i, v := range node { + assertValue(t, resultNode[i], v, fmt.Sprintf("%s[%d]", location, i)) + } + } } case map[string]interface{}: if resultNode, ok := result.(*TomlTree); !ok {