tree: fix case-insensitive unicode lookup
Fixes #113
diff --git a/tree.go b/tree.go
index eeadec9..48f25f9 100644
--- a/tree.go
+++ b/tree.go
@@ -7,6 +7,7 @@
import (
"strings"
"unicode"
+ "unicode/utf8"
)
func min(a, b int) int {
@@ -447,34 +448,64 @@
// It returns the case-corrected path and a bool indicating whether the lookup
// was successful.
func (n *node) findCaseInsensitivePath(path string, fixTrailingSlash bool) (ciPath []byte, found bool) {
- ciPath = make([]byte, 0, len(path)+1) // preallocate enough memory
+ return n.findCaseInsensitivePathRec(
+ path,
+ strings.ToLower(path),
+ make([]byte, 0, len(path)+1), // preallocate enough memory for new path
+ [4]byte{}, // empty rune buffer
+ fixTrailingSlash,
+ )
+}
+
+func (n *node) findCaseInsensitivePathRec(path, loPath string, ciPath []byte, rb [4]byte, fixTrailingSlash bool) ([]byte, bool) {
+ loNPath := strings.ToLower(n.path)
// Outer loop for walking the tree
- for len(path) >= len(n.path) && strings.ToLower(path[:len(n.path)]) == strings.ToLower(n.path) {
- path = path[len(n.path):]
+ for len(loPath) >= len(loNPath) && loPath[:len(loNPath)] == loNPath {
+ // add common path to result
ciPath = append(ciPath, n.path...)
+ path = path[len(n.path):]
+ loPath = loPath[len(loNPath):]
+
if len(path) > 0 {
// If this node does not have a wildcard (param or catchAll) child,
// we can just look up the next child node and continue to walk down
// the tree
if !n.wildChild {
- r := unicode.ToLower(rune(path[0]))
- for i, index := range n.indices {
- // must use recursive approach since both index and
- // ToLower(index) could exist. We must check both.
- if r == unicode.ToLower(index) {
- out, found := n.children[i].findCaseInsensitivePath(path, fixTrailingSlash)
- if found {
- return append(ciPath, out...), true
- }
+ // get lowercase and uppercase runes
+ var lr, ur [4]byte
+ if rb[0] != 0 {
+ // old rune not finished
+ lr = rb
+ } else {
+ // start a new rune
+ rv, _ := utf8.DecodeRuneInString(loPath)
+ utf8.EncodeRune(lr[:], rv)
+ utf8.EncodeRune(ur[:], unicode.ToUpper(rv))
+ }
+
+ for i := 0; i < len(n.indices); i++ {
+ // must use recursive approach since both the uppercase
+ // byte and the lowercase byte might exist as an index.
+ if idx := n.indices[i]; idx == lr[0] {
+ rb = lr // lowercase matches
+ } else if idx == ur[0] {
+ rb = ur // uppercase matches
+ } else {
+ continue // no match, try next index
+ }
+
+ if out, found := n.children[i].findCaseInsensitivePathRec(
+ path, loPath, ciPath,
+ [4]byte{rb[1], rb[2], rb[3], 0}, fixTrailingSlash); found {
+ return out, true
}
}
// Nothing found. We can recommend to redirect to the same URL
// without a trailing slash if a leaf exists for that path
- found = (fixTrailingSlash && path == "/" && n.handle != nil)
- return
+ return ciPath, (fixTrailingSlash && path == "/" && n.handle != nil)
}
n = n.children[0]
@@ -492,8 +523,11 @@
// we need to go deeper!
if k < len(path) {
if len(n.children) > 0 {
+ // continue with child node
path = path[k:]
+ loPath = loPath[k:]
n = n.children[0]
+ loNPath = strings.ToLower(n.path)
continue
}
@@ -501,7 +535,7 @@
if fixTrailingSlash && len(path) == k+1 {
return ciPath, true
}
- return
+ return ciPath, false
}
if n.handle != nil {
@@ -514,7 +548,7 @@
return append(ciPath, '/'), true
}
}
- return
+ return ciPath, false
case catchAll:
return append(ciPath, path...), true
@@ -539,11 +573,11 @@
(n.nType == catchAll && n.children[0].handle != nil) {
return append(ciPath, '/'), true
}
- return
+ return ciPath, false
}
}
}
- return
+ return ciPath, false
}
}
@@ -554,10 +588,10 @@
return ciPath, true
}
if len(path)+1 == len(n.path) && n.path[len(path)] == '/' &&
- strings.ToLower(path) == strings.ToLower(n.path[:len(path)]) &&
+ loPath == loNPath[:len(loPath)] &&
n.handle != nil {
return append(ciPath, n.path...), true
}
}
- return
+ return ciPath, false
}
diff --git a/tree_test.go b/tree_test.go
index 531768d..e28e8f1 100644
--- a/tree_test.go
+++ b/tree_test.go
@@ -502,6 +502,10 @@
"/doc/go/away",
"/no/a",
"/no/b",
+ "/Π",
+ "/apfêl/",
+ "/äpfêl/",
+ "/öpfêl",
}
for _, route := range routes {
@@ -580,6 +584,12 @@
{"/DOC/", "/doc", true, true},
{"/NO", "", false, true},
{"/DOC/GO", "", false, true},
+ {"/π", "/Π", true, false},
+ {"/π/", "/Π", true, true},
+ {"/ÄPFÊL/", "/äpfêl/", true, false},
+ {"/ÄPFÊL", "/äpfêl/", true, true},
+ {"/ÖPFÊL/", "/öpfêl", true, true},
+ {"/ÖPFÊL", "/öpfêl", true, false},
}
// With fixTrailingSlash = true
for _, test := range tests {