|  | // Copyright 2014 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package collate | 
|  |  | 
|  | import ( | 
|  | "sort" | 
|  |  | 
|  | "golang.org/x/text/internal/colltab" | 
|  | "golang.org/x/text/language" | 
|  | "golang.org/x/text/unicode/norm" | 
|  | ) | 
|  |  | 
|  | // newCollator creates a new collator with default options configured. | 
|  | func newCollator(t colltab.Weighter) *Collator { | 
|  | // Initialize a collator with default options. | 
|  | c := &Collator{ | 
|  | options: options{ | 
|  | ignore: [colltab.NumLevels]bool{ | 
|  | colltab.Quaternary: true, | 
|  | colltab.Identity:   true, | 
|  | }, | 
|  | f: norm.NFD, | 
|  | t: t, | 
|  | }, | 
|  | } | 
|  |  | 
|  | // TODO: store vt in tags or remove. | 
|  | c.variableTop = t.Top() | 
|  |  | 
|  | return c | 
|  | } | 
|  |  | 
|  | // An Option is used to change the behavior of a Collator. Options override the | 
|  | // settings passed through the locale identifier. | 
|  | type Option struct { | 
|  | priority int | 
|  | f        func(o *options) | 
|  | } | 
|  |  | 
|  | type prioritizedOptions []Option | 
|  |  | 
|  | func (p prioritizedOptions) Len() int { | 
|  | return len(p) | 
|  | } | 
|  |  | 
|  | func (p prioritizedOptions) Swap(i, j int) { | 
|  | p[i], p[j] = p[j], p[i] | 
|  | } | 
|  |  | 
|  | func (p prioritizedOptions) Less(i, j int) bool { | 
|  | return p[i].priority < p[j].priority | 
|  | } | 
|  |  | 
|  | type options struct { | 
|  | // ignore specifies which levels to ignore. | 
|  | ignore [colltab.NumLevels]bool | 
|  |  | 
|  | // caseLevel is true if there is an additional level of case matching | 
|  | // between the secondary and tertiary levels. | 
|  | caseLevel bool | 
|  |  | 
|  | // backwards specifies the order of sorting at the secondary level. | 
|  | // This option exists predominantly to support reverse sorting of accents in French. | 
|  | backwards bool | 
|  |  | 
|  | // numeric specifies whether any sequence of decimal digits (category is Nd) | 
|  | // is sorted at a primary level with its numeric value. | 
|  | // For example, "A-21" < "A-123". | 
|  | // This option is set by wrapping the main Weighter with NewNumericWeighter. | 
|  | numeric bool | 
|  |  | 
|  | // alternate specifies an alternative handling of variables. | 
|  | alternate alternateHandling | 
|  |  | 
|  | // variableTop is the largest primary value that is considered to be | 
|  | // variable. | 
|  | variableTop uint32 | 
|  |  | 
|  | t colltab.Weighter | 
|  |  | 
|  | f norm.Form | 
|  | } | 
|  |  | 
|  | func (o *options) setOptions(opts []Option) { | 
|  | sort.Sort(prioritizedOptions(opts)) | 
|  | for _, x := range opts { | 
|  | x.f(o) | 
|  | } | 
|  | } | 
|  |  | 
|  | // OptionsFromTag extracts the BCP47 collation options from the tag and | 
|  | // configures a collator accordingly. These options are set before any other | 
|  | // option. | 
|  | func OptionsFromTag(t language.Tag) Option { | 
|  | return Option{0, func(o *options) { | 
|  | o.setFromTag(t) | 
|  | }} | 
|  | } | 
|  |  | 
|  | func (o *options) setFromTag(t language.Tag) { | 
|  | o.caseLevel = ldmlBool(t, o.caseLevel, "kc") | 
|  | o.backwards = ldmlBool(t, o.backwards, "kb") | 
|  | o.numeric = ldmlBool(t, o.numeric, "kn") | 
|  |  | 
|  | // Extract settings from the BCP47 u extension. | 
|  | switch t.TypeForKey("ks") { // strength | 
|  | case "level1": | 
|  | o.ignore[colltab.Secondary] = true | 
|  | o.ignore[colltab.Tertiary] = true | 
|  | case "level2": | 
|  | o.ignore[colltab.Tertiary] = true | 
|  | case "level3", "": | 
|  | // The default. | 
|  | case "level4": | 
|  | o.ignore[colltab.Quaternary] = false | 
|  | case "identic": | 
|  | o.ignore[colltab.Quaternary] = false | 
|  | o.ignore[colltab.Identity] = false | 
|  | } | 
|  |  | 
|  | switch t.TypeForKey("ka") { | 
|  | case "shifted": | 
|  | o.alternate = altShifted | 
|  | // The following two types are not official BCP47, but we support them to | 
|  | // give access to this otherwise hidden functionality. The name blanked is | 
|  | // derived from the LDML name blanked and posix reflects the main use of | 
|  | // the shift-trimmed option. | 
|  | case "blanked": | 
|  | o.alternate = altBlanked | 
|  | case "posix": | 
|  | o.alternate = altShiftTrimmed | 
|  | } | 
|  |  | 
|  | // TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt"). | 
|  |  | 
|  | // Not used: | 
|  | // - normalization ("kk", not necessary for this implementation) | 
|  | // - hiraganaQuatenary ("kh", obsolete) | 
|  | } | 
|  |  | 
|  | func ldmlBool(t language.Tag, old bool, key string) bool { | 
|  | switch t.TypeForKey(key) { | 
|  | case "true": | 
|  | return true | 
|  | case "false": | 
|  | return false | 
|  | default: | 
|  | return old | 
|  | } | 
|  | } | 
|  |  | 
|  | var ( | 
|  | // IgnoreCase sets case-insensitive comparison. | 
|  | IgnoreCase Option = ignoreCase | 
|  | ignoreCase        = Option{3, ignoreCaseF} | 
|  |  | 
|  | // IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "รถ"). | 
|  | IgnoreDiacritics Option = ignoreDiacritics | 
|  | ignoreDiacritics        = Option{3, ignoreDiacriticsF} | 
|  |  | 
|  | // IgnoreWidth causes full-width characters to match their half-width | 
|  | // equivalents. | 
|  | IgnoreWidth Option = ignoreWidth | 
|  | ignoreWidth        = Option{2, ignoreWidthF} | 
|  |  | 
|  | // Loose sets the collator to ignore diacritics, case and weight. | 
|  | Loose Option = loose | 
|  | loose        = Option{4, looseF} | 
|  |  | 
|  | // Force ordering if strings are equivalent but not equal. | 
|  | Force Option = force | 
|  | force        = Option{5, forceF} | 
|  |  | 
|  | // Numeric specifies that numbers should sort numerically ("2" < "12"). | 
|  | Numeric Option = numeric | 
|  | numeric        = Option{5, numericF} | 
|  | ) | 
|  |  | 
|  | func ignoreWidthF(o *options) { | 
|  | o.ignore[colltab.Tertiary] = true | 
|  | o.caseLevel = true | 
|  | } | 
|  |  | 
|  | func ignoreDiacriticsF(o *options) { | 
|  | o.ignore[colltab.Secondary] = true | 
|  | } | 
|  |  | 
|  | func ignoreCaseF(o *options) { | 
|  | o.ignore[colltab.Tertiary] = true | 
|  | o.caseLevel = false | 
|  | } | 
|  |  | 
|  | func looseF(o *options) { | 
|  | ignoreWidthF(o) | 
|  | ignoreDiacriticsF(o) | 
|  | ignoreCaseF(o) | 
|  | } | 
|  |  | 
|  | func forceF(o *options) { | 
|  | o.ignore[colltab.Identity] = false | 
|  | } | 
|  |  | 
|  | func numericF(o *options) { o.numeric = true } | 
|  |  | 
|  | // Reorder overrides the pre-defined ordering of scripts and character sets. | 
|  | func Reorder(s ...string) Option { | 
|  | // TODO: need fractional weights to implement this. | 
|  | panic("TODO: implement") | 
|  | } | 
|  |  | 
|  | // TODO: consider making these public again. These options cannot be fully | 
|  | // specified in BCP47, so an API interface seems warranted. Still a higher-level | 
|  | // interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed) | 
|  |  | 
|  | // alternateHandling identifies the various ways in which variables are handled. | 
|  | // A rune with a primary weight lower than the variable top is considered a | 
|  | // variable. | 
|  | // See http://www.unicode.org/reports/tr10/#Variable_Weighting for details. | 
|  | type alternateHandling int | 
|  |  | 
|  | const ( | 
|  | // altNonIgnorable turns off special handling of variables. | 
|  | altNonIgnorable alternateHandling = iota | 
|  |  | 
|  | // altBlanked sets variables and all subsequent primary ignorables to be | 
|  | // ignorable at all levels. This is identical to removing all variables | 
|  | // and subsequent primary ignorables from the input. | 
|  | altBlanked | 
|  |  | 
|  | // altShifted sets variables to be ignorable for levels one through three and | 
|  | // adds a fourth level based on the values of the ignored levels. | 
|  | altShifted | 
|  |  | 
|  | // altShiftTrimmed is a slight variant of altShifted that is used to | 
|  | // emulate POSIX. | 
|  | altShiftTrimmed | 
|  | ) |