Fix case-insensitive unicode lookup
Updates #113
diff --git a/tree.go b/tree.go
index 48f25f9..808f1e4 100644
--- a/tree.go
+++ b/tree.go
@@ -457,49 +457,100 @@
)
}
+// shift bytes in array by n bytes left
+func shiftNRuneBytes(rb [4]byte, n int) [4]byte {
+ switch n {
+ case 0:
+ return rb
+ case 1:
+ return [4]byte{rb[1], rb[2], rb[3], 0}
+ case 2:
+ return [4]byte{rb[2], rb[3]}
+ case 3:
+ return [4]byte{rb[3]}
+ default:
+ return [4]byte{}
+ }
+}
+
func (n *node) findCaseInsensitivePathRec(path, loPath string, ciPath []byte, rb [4]byte, fixTrailingSlash bool) ([]byte, bool) {
loNPath := strings.ToLower(n.path)
- // Outer loop for walking the tree
- for len(loPath) >= len(loNPath) && loPath[:len(loNPath)] == loNPath {
+ for len(loPath) >= len(loNPath) && (len(loNPath) == 0 || loPath[1:len(loNPath)] == loNPath[1:]) {
// add common path to result
ciPath = append(ciPath, n.path...)
- path = path[len(n.path):]
- loPath = loPath[len(loNPath):]
+ if path = path[len(n.path):]; len(path) > 0 {
+ loOld := loPath
+ loPath = loPath[len(loNPath):]
- if len(path) > 0 {
// If this node does not have a wildcard (param or catchAll) child,
// we can just look up the next child node and continue to walk down
// the tree
if !n.wildChild {
- // get lowercase and uppercase runes
- var lr, ur [4]byte
+ // skip rune bytes already processed
+ rb = shiftNRuneBytes(rb, len(loNPath))
+
if rb[0] != 0 {
// old rune not finished
- lr = rb
+ for i := 0; i < len(n.indices); i++ {
+ if n.indices[i] == rb[0] {
+ if out, found := n.children[i].findCaseInsensitivePathRec(
+ path, loPath, ciPath, rb, fixTrailingSlash,
+ ); found {
+ return out, true
+ }
+ }
+ }
} else {
- // start a new rune
- rv, _ := utf8.DecodeRuneInString(loPath)
- utf8.EncodeRune(lr[:], rv)
- utf8.EncodeRune(ur[:], unicode.ToUpper(rv))
- }
+ // process a new rune
+ var rv rune
- for i := 0; i < len(n.indices); i++ {
- // must use recursive approach since both the uppercase
- // byte and the lowercase byte might exist as an index.
- if idx := n.indices[i]; idx == lr[0] {
- rb = lr // lowercase matches
- } else if idx == ur[0] {
- rb = ur // uppercase matches
- } else {
- continue // no match, try next index
+ // find rune start
+ // runes are up to 4 byte long,
+ // -4 would definitely be another rune
+ var off int
+ for max := min(len(loNPath), 3); off < max; off++ {
+ if i := len(loNPath) - off; utf8.RuneStart(loOld[i]) {
+ // read rune from cached lowercase path
+ rv, _ = utf8.DecodeRuneInString(loOld[i:])
+ break
+ }
}
- if out, found := n.children[i].findCaseInsensitivePathRec(
- path, loPath, ciPath,
- [4]byte{rb[1], rb[2], rb[3], 0}, fixTrailingSlash); found {
- return out, true
+ // calculate lowercase bytes of current rune
+ utf8.EncodeRune(rb[:], rv)
+ // skipp already processed bytes
+ rb = shiftNRuneBytes(rb, off)
+
+ // must use recursive approach since both the uppercase
+ // byte and the lowercase byte might exist as an index
+ for i := 0; i < len(n.indices); i++ {
+ // lowercase matches
+ if n.indices[i] == rb[0] {
+ if out, found := n.children[i].findCaseInsensitivePathRec(
+ path, loPath, ciPath, rb, fixTrailingSlash,
+ ); found {
+ return out, true
+ }
+ }
+ }
+
+ // same for uppercase rune, if it differs
+ if up := unicode.ToUpper(rv); up != rv {
+ utf8.EncodeRune(rb[:], up)
+ rb = shiftNRuneBytes(rb, off)
+
+ for i := 0; i < len(n.indices); i++ {
+ // uppercase matches
+ if n.indices[i] == rb[0] {
+ if out, found := n.children[i].findCaseInsensitivePathRec(
+ path, loPath, ciPath, rb, fixTrailingSlash,
+ ); found {
+ return out, true
+ }
+ }
+ }
}
}
@@ -524,10 +575,10 @@
if k < len(path) {
if len(n.children) > 0 {
// continue with child node
- path = path[k:]
- loPath = loPath[k:]
n = n.children[0]
loNPath = strings.ToLower(n.path)
+ loPath = loPath[k:]
+ path = path[k:]
continue
}
@@ -587,9 +638,8 @@
if path == "/" {
return ciPath, true
}
- if len(path)+1 == len(n.path) && n.path[len(path)] == '/' &&
- loPath == loNPath[:len(loPath)] &&
- n.handle != nil {
+ if len(loPath)+1 == len(loNPath) && loNPath[len(loPath)] == '/' &&
+ loPath[1:] == loNPath[1:len(loPath)] && n.handle != nil {
return append(ciPath, n.path...), true
}
}
diff --git a/tree_test.go b/tree_test.go
index e28e8f1..a49045c 100644
--- a/tree_test.go
+++ b/tree_test.go
@@ -503,9 +503,15 @@
"/no/a",
"/no/b",
"/Π",
- "/apfêl/",
- "/äpfêl/",
- "/öpfêl",
+ "/u/apfêl/",
+ "/u/äpfêl/",
+ "/u/öpfêl",
+ "/v/Äpfêl/",
+ "/v/Öpfêl",
+ "/w/♬", // 3 byte
+ "/w/♭/", // 3 byte, last byte differs
+ "/w/𠜎", // 4 byte
+ "/w/𠜏/", // 4 byte
}
for _, route := range routes {
@@ -586,10 +592,18 @@
{"/DOC/GO", "", false, true},
{"/π", "/Π", true, false},
{"/π/", "/Π", true, true},
- {"/ÄPFÊL/", "/äpfêl/", true, false},
- {"/ÄPFÊL", "/äpfêl/", true, true},
- {"/ÖPFÊL/", "/öpfêl", true, true},
- {"/ÖPFÊL", "/öpfêl", true, false},
+ {"/u/ÄPFÊL/", "/u/äpfêl/", true, false},
+ {"/u/ÄPFÊL", "/u/äpfêl/", true, true},
+ {"/u/ÖPFÊL/", "/u/öpfêl", true, true},
+ {"/u/ÖPFÊL", "/u/öpfêl", true, false},
+ {"/v/äpfêL/", "/v/Äpfêl/", true, false},
+ {"/v/äpfêL", "/v/Äpfêl/", true, true},
+ {"/v/öpfêL/", "/v/Öpfêl", true, true},
+ {"/v/öpfêL", "/v/Öpfêl", true, false},
+ {"/w/♬/", "/w/♬", true, true},
+ {"/w/♭", "/w/♭/", true, true},
+ {"/w/𠜎/", "/w/𠜎", true, true},
+ {"/w/𠜏", "/w/𠜏/", true, true},
}
// With fixTrailingSlash = true
for _, test := range tests {