Fix case-insensitive unicode lookup

Updates #113
diff --git a/tree.go b/tree.go
index 48f25f9..808f1e4 100644
--- a/tree.go
+++ b/tree.go
@@ -457,49 +457,100 @@
 	)
 }
 
+// shift bytes in array by n bytes left
+func shiftNRuneBytes(rb [4]byte, n int) [4]byte {
+	switch n {
+	case 0:
+		return rb
+	case 1:
+		return [4]byte{rb[1], rb[2], rb[3], 0}
+	case 2:
+		return [4]byte{rb[2], rb[3]}
+	case 3:
+		return [4]byte{rb[3]}
+	default:
+		return [4]byte{}
+	}
+}
+
 func (n *node) findCaseInsensitivePathRec(path, loPath string, ciPath []byte, rb [4]byte, fixTrailingSlash bool) ([]byte, bool) {
 	loNPath := strings.ToLower(n.path)
 
-	// Outer loop for walking the tree
-	for len(loPath) >= len(loNPath) && loPath[:len(loNPath)] == loNPath {
+	for len(loPath) >= len(loNPath) && (len(loNPath) == 0 || loPath[1:len(loNPath)] == loNPath[1:]) {
 		// add common path to result
 		ciPath = append(ciPath, n.path...)
 
-		path = path[len(n.path):]
-		loPath = loPath[len(loNPath):]
+		if path = path[len(n.path):]; len(path) > 0 {
+			loOld := loPath
+			loPath = loPath[len(loNPath):]
 
-		if len(path) > 0 {
 			// If this node does not have a wildcard (param or catchAll) child,
 			// we can just look up the next child node and continue to walk down
 			// the tree
 			if !n.wildChild {
-				// get lowercase and uppercase runes
-				var lr, ur [4]byte
+				// skip rune bytes already processed
+				rb = shiftNRuneBytes(rb, len(loNPath))
+
 				if rb[0] != 0 {
 					// old rune not finished
-					lr = rb
+					for i := 0; i < len(n.indices); i++ {
+						if n.indices[i] == rb[0] {
+							if out, found := n.children[i].findCaseInsensitivePathRec(
+								path, loPath, ciPath, rb, fixTrailingSlash,
+							); found {
+								return out, true
+							}
+						}
+					}
 				} else {
-					// start a new rune
-					rv, _ := utf8.DecodeRuneInString(loPath)
-					utf8.EncodeRune(lr[:], rv)
-					utf8.EncodeRune(ur[:], unicode.ToUpper(rv))
-				}
+					// process a new rune
+					var rv rune
 
-				for i := 0; i < len(n.indices); i++ {
-					// must use recursive approach since both the uppercase
-					// byte and the lowercase byte might exist as an index.
-					if idx := n.indices[i]; idx == lr[0] {
-						rb = lr // lowercase matches
-					} else if idx == ur[0] {
-						rb = ur // uppercase matches
-					} else {
-						continue // no match, try next index
+					// find rune start
+					// runes are up to 4 byte long,
+					// -4 would definitely be another rune
+					var off int
+					for max := min(len(loNPath), 3); off < max; off++ {
+						if i := len(loNPath) - off; utf8.RuneStart(loOld[i]) {
+							// read rune from cached lowercase path
+							rv, _ = utf8.DecodeRuneInString(loOld[i:])
+							break
+						}
 					}
 
-					if out, found := n.children[i].findCaseInsensitivePathRec(
-						path, loPath, ciPath,
-						[4]byte{rb[1], rb[2], rb[3], 0}, fixTrailingSlash); found {
-						return out, true
+					// calculate lowercase bytes of current rune
+					utf8.EncodeRune(rb[:], rv)
+					// skipp already processed bytes
+					rb = shiftNRuneBytes(rb, off)
+
+					// must use recursive approach since both the uppercase
+					// byte and the lowercase byte might exist as an index
+					for i := 0; i < len(n.indices); i++ {
+						// lowercase matches
+						if n.indices[i] == rb[0] {
+							if out, found := n.children[i].findCaseInsensitivePathRec(
+								path, loPath, ciPath, rb, fixTrailingSlash,
+							); found {
+								return out, true
+							}
+						}
+					}
+
+					// same for uppercase rune, if it differs
+					if up := unicode.ToUpper(rv); up != rv {
+						utf8.EncodeRune(rb[:], up)
+						rb = shiftNRuneBytes(rb, off)
+
+						for i := 0; i < len(n.indices); i++ {
+							// uppercase matches
+							if n.indices[i] == rb[0] {
+								if out, found := n.children[i].findCaseInsensitivePathRec(
+									path, loPath, ciPath, rb, fixTrailingSlash,
+								); found {
+									return out, true
+								}
+							}
+						}
 					}
 				}
 
@@ -524,10 +575,10 @@
 				if k < len(path) {
 					if len(n.children) > 0 {
 						// continue with child node
-						path = path[k:]
-						loPath = loPath[k:]
 						n = n.children[0]
 						loNPath = strings.ToLower(n.path)
+						loPath = loPath[k:]
+						path = path[k:]
 						continue
 					}
 
@@ -587,9 +638,8 @@
 		if path == "/" {
 			return ciPath, true
 		}
-		if len(path)+1 == len(n.path) && n.path[len(path)] == '/' &&
-			loPath == loNPath[:len(loPath)] &&
-			n.handle != nil {
+		if len(loPath)+1 == len(loNPath) && loNPath[len(loPath)] == '/' &&
+			loPath[1:] == loNPath[1:len(loPath)] && n.handle != nil {
 			return append(ciPath, n.path...), true
 		}
 	}
diff --git a/tree_test.go b/tree_test.go
index e28e8f1..a49045c 100644
--- a/tree_test.go
+++ b/tree_test.go
@@ -503,9 +503,15 @@
 		"/no/a",
 		"/no/b",
 		"/Π",
-		"/apfêl/",
-		"/äpfêl/",
-		"/öpfêl",
+		"/u/apfêl/",
+		"/u/äpfêl/",
+		"/u/öpfêl",
+		"/v/Äpfêl/",
+		"/v/Öpfêl",
+		"/w/♬",  // 3 byte
+		"/w/♭/", // 3 byte, last byte differs
+		"/w/𠜎",  // 4 byte
+		"/w/𠜏/", // 4 byte
 	}
 
 	for _, route := range routes {
@@ -586,10 +592,18 @@
 		{"/DOC/GO", "", false, true},
 		{"/π", "/Π", true, false},
 		{"/π/", "/Π", true, true},
-		{"/ÄPFÊL/", "/äpfêl/", true, false},
-		{"/ÄPFÊL", "/äpfêl/", true, true},
-		{"/ÖPFÊL/", "/öpfêl", true, true},
-		{"/ÖPFÊL", "/öpfêl", true, false},
+		{"/u/ÄPFÊL/", "/u/äpfêl/", true, false},
+		{"/u/ÄPFÊL", "/u/äpfêl/", true, true},
+		{"/u/ÖPFÊL/", "/u/öpfêl", true, true},
+		{"/u/ÖPFÊL", "/u/öpfêl", true, false},
+		{"/v/äpfêL/", "/v/Äpfêl/", true, false},
+		{"/v/äpfêL", "/v/Äpfêl/", true, true},
+		{"/v/öpfêL/", "/v/Öpfêl", true, true},
+		{"/v/öpfêL", "/v/Öpfêl", true, false},
+		{"/w/♬/", "/w/♬", true, true},
+		{"/w/♭", "/w/♭/", true, true},
+		{"/w/𠜎/", "/w/𠜎", true, true},
+		{"/w/𠜏", "/w/𠜏/", true, true},
 	}
 	// With fixTrailingSlash = true
 	for _, test := range tests {