mirror of
https://github.com/kubernetes-sigs/prometheus-adapter.git
synced 2026-04-07 10:17:51 +00:00
Check in the vendor directory
Travis seems to be having issues pulling deps, so we'll have to check in the vendor directory and prevent the makefile from trying to regenerate it normally.
This commit is contained in:
parent
98e16bc315
commit
a293b2bf94
2526 changed files with 930931 additions and 4 deletions
702
vendor/golang.org/x/text/collate/build/builder.go
generated
vendored
Normal file
702
vendor/golang.org/x/text/collate/build/builder.go
generated
vendored
Normal file
|
|
@ -0,0 +1,702 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build // import "golang.org/x/text/collate/build"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// TODO: optimizations:
|
||||
// - expandElem is currently 20K. By putting unique colElems in a separate
|
||||
// table and having a byte array of indexes into this table, we can reduce
|
||||
// the total size to about 7K. By also factoring out the length bytes, we
|
||||
// can reduce this to about 6K.
|
||||
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
|
||||
// and many consecutive values with the same stride. This can be further
|
||||
// compacted.
|
||||
// - Compress secondary weights into 8 bits.
|
||||
// - Some LDML specs specify a context element. Currently we simply concatenate
|
||||
// those. Context can be implemented using the contraction trie. If Builder
|
||||
// could analyze and detect when using a context makes sense, there is no
|
||||
// need to expose this construct in the API.
|
||||
|
||||
// A Builder builds a root collation table. The user must specify the
|
||||
// collation elements for each entry. A common use will be to base the weights
|
||||
// on those specified in the allkeys* file as provided by the UCA or CLDR.
|
||||
type Builder struct {
|
||||
index *trieBuilder
|
||||
root ordering
|
||||
locale []*Tailoring
|
||||
t *table
|
||||
err error
|
||||
built bool
|
||||
|
||||
minNonVar int // lowest primary recorded for a variable
|
||||
varTop int // highest primary recorded for a non-variable
|
||||
|
||||
// indexes used for reusing expansions and contractions
|
||||
expIndex map[string]int // positions of expansions keyed by their string representation
|
||||
ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
|
||||
ctElem map[string]int // contraction elements keyed by their string representation
|
||||
}
|
||||
|
||||
// A Tailoring builds a collation table based on another collation table.
|
||||
// The table is defined by specifying tailorings to the underlying table.
|
||||
// See http://unicode.org/reports/tr35/ for an overview of tailoring
|
||||
// collation tables. The CLDR contains pre-defined tailorings for a variety
|
||||
// of languages (See http://www.unicode.org/Public/cldr/<version>/core.zip.)
|
||||
type Tailoring struct {
|
||||
id string
|
||||
builder *Builder
|
||||
index *ordering
|
||||
|
||||
anchor *entry
|
||||
before bool
|
||||
}
|
||||
|
||||
// NewBuilder returns a new Builder.
|
||||
func NewBuilder() *Builder {
|
||||
return &Builder{
|
||||
index: newTrieBuilder(),
|
||||
root: makeRootOrdering(),
|
||||
expIndex: make(map[string]int),
|
||||
ctHandle: make(map[string]ctHandle),
|
||||
ctElem: make(map[string]int),
|
||||
}
|
||||
}
|
||||
|
||||
// Tailoring returns a Tailoring for the given locale. One should
|
||||
// have completed all calls to Add before calling Tailoring.
|
||||
func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
|
||||
t := &Tailoring{
|
||||
id: loc.String(),
|
||||
builder: b,
|
||||
index: b.root.clone(),
|
||||
}
|
||||
t.index.id = t.id
|
||||
b.locale = append(b.locale, t)
|
||||
return t
|
||||
}
|
||||
|
||||
// Add adds an entry to the collation element table, mapping
|
||||
// a slice of runes to a sequence of collation elements.
|
||||
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
|
||||
// The entries are typically obtained from a collation element table
|
||||
// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
|
||||
// Note that the collation elements specified by colelems are only used
|
||||
// as a guide. The actual weights generated by Builder may differ.
|
||||
// The argument variables is a list of indices into colelems that should contain
|
||||
// a value for each colelem that is a variable. (See the reference above.)
|
||||
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
str := string(runes)
|
||||
elems := make([]rawCE, len(colelems))
|
||||
for i, ce := range colelems {
|
||||
if len(ce) == 0 {
|
||||
break
|
||||
}
|
||||
elems[i] = makeRawCE(ce, 0)
|
||||
if len(ce) == 1 {
|
||||
elems[i].w[1] = defaultSecondary
|
||||
}
|
||||
if len(ce) <= 2 {
|
||||
elems[i].w[2] = defaultTertiary
|
||||
}
|
||||
if len(ce) <= 3 {
|
||||
elems[i].w[3] = ce[0]
|
||||
}
|
||||
}
|
||||
for i, ce := range elems {
|
||||
p := ce.w[0]
|
||||
isvar := false
|
||||
for _, j := range variables {
|
||||
if i == j {
|
||||
isvar = true
|
||||
}
|
||||
}
|
||||
if isvar {
|
||||
if p >= b.minNonVar && b.minNonVar > 0 {
|
||||
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
|
||||
}
|
||||
if p > b.varTop {
|
||||
b.varTop = p
|
||||
}
|
||||
} else if p > 1 { // 1 is a special primary value reserved for FFFE
|
||||
if p <= b.varTop {
|
||||
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
|
||||
}
|
||||
if b.minNonVar == 0 || p < b.minNonVar {
|
||||
b.minNonVar = p
|
||||
}
|
||||
}
|
||||
}
|
||||
elems, err := convertLargeWeights(elems)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cccs := []uint8{}
|
||||
nfd := norm.NFD.String(str)
|
||||
for i := range nfd {
|
||||
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
|
||||
}
|
||||
if len(cccs) < len(elems) {
|
||||
if len(cccs) > 2 {
|
||||
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
|
||||
}
|
||||
p := len(elems) - 1
|
||||
for ; p > 0 && elems[p].w[0] == 0; p-- {
|
||||
elems[p].ccc = cccs[len(cccs)-1]
|
||||
}
|
||||
for ; p >= 0; p-- {
|
||||
elems[p].ccc = cccs[0]
|
||||
}
|
||||
} else {
|
||||
for i := range elems {
|
||||
elems[i].ccc = cccs[i]
|
||||
}
|
||||
}
|
||||
// doNorm in collate.go assumes that the following conditions hold.
|
||||
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
|
||||
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
|
||||
}
|
||||
b.root.newEntry(str, elems)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Tailoring) setAnchor(anchor string) error {
|
||||
anchor = norm.NFC.String(anchor)
|
||||
a := t.index.find(anchor)
|
||||
if a == nil {
|
||||
a = t.index.newEntry(anchor, nil)
|
||||
a.implicit = true
|
||||
a.modified = true
|
||||
for _, r := range []rune(anchor) {
|
||||
e := t.index.find(string(r))
|
||||
e.lock = true
|
||||
}
|
||||
}
|
||||
t.anchor = a
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetAnchor sets the point after which elements passed in subsequent calls to
|
||||
// Insert will be inserted. It is equivalent to the reset directive in an LDML
|
||||
// specification. See Insert for an example.
|
||||
// SetAnchor supports the following logical reset positions:
|
||||
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
|
||||
// and <last_non_ignorable/>.
|
||||
func (t *Tailoring) SetAnchor(anchor string) error {
|
||||
if err := t.setAnchor(anchor); err != nil {
|
||||
return err
|
||||
}
|
||||
t.before = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
|
||||
// Insert will insert entries before the anchor.
|
||||
func (t *Tailoring) SetAnchorBefore(anchor string) error {
|
||||
if err := t.setAnchor(anchor); err != nil {
|
||||
return err
|
||||
}
|
||||
t.before = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Insert sets the ordering of str relative to the entry set by the previous
|
||||
// call to SetAnchor or Insert. The argument extend corresponds
|
||||
// to the extend elements as defined in LDML. A non-empty value for extend
|
||||
// will cause the collation elements corresponding to extend to be appended
|
||||
// to the collation elements generated for the entry added by Insert.
|
||||
// This has the same net effect as sorting str after the string anchor+extend.
|
||||
// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
|
||||
// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements
|
||||
// for full details on LDML.
|
||||
//
|
||||
// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
|
||||
// at the primary sorting level:
|
||||
// t := b.Tailoring("se")
|
||||
// t.SetAnchor("z")
|
||||
// t.Insert(colltab.Primary, "ä", "")
|
||||
// Order "ü" after "ue" at the secondary sorting level:
|
||||
// t.SetAnchor("ue")
|
||||
// t.Insert(colltab.Secondary, "ü","")
|
||||
// or
|
||||
// t.SetAnchor("u")
|
||||
// t.Insert(colltab.Secondary, "ü", "e")
|
||||
// Order "q" afer "ab" at the secondary level and "Q" after "q"
|
||||
// at the tertiary level:
|
||||
// t.SetAnchor("ab")
|
||||
// t.Insert(colltab.Secondary, "q", "")
|
||||
// t.Insert(colltab.Tertiary, "Q", "")
|
||||
// Order "b" before "a":
|
||||
// t.SetAnchorBefore("a")
|
||||
// t.Insert(colltab.Primary, "b", "")
|
||||
// Order "0" after the last primary ignorable:
|
||||
// t.SetAnchor("<last_primary_ignorable/>")
|
||||
// t.Insert(colltab.Primary, "0", "")
|
||||
func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
|
||||
if t.anchor == nil {
|
||||
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
|
||||
}
|
||||
str = norm.NFC.String(str)
|
||||
e := t.index.find(str)
|
||||
if e == nil {
|
||||
e = t.index.newEntry(str, nil)
|
||||
} else if e.logical != noAnchor {
|
||||
return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
|
||||
}
|
||||
if e.lock {
|
||||
return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
|
||||
}
|
||||
a := t.anchor
|
||||
// Find the first element after the anchor which differs at a level smaller or
|
||||
// equal to the given level. Then insert at this position.
|
||||
// See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
|
||||
e.before = t.before
|
||||
if t.before {
|
||||
t.before = false
|
||||
if a.prev == nil {
|
||||
a.insertBefore(e)
|
||||
} else {
|
||||
for a = a.prev; a.level > level; a = a.prev {
|
||||
}
|
||||
a.insertAfter(e)
|
||||
}
|
||||
e.level = level
|
||||
} else {
|
||||
for ; a.level > level; a = a.next {
|
||||
}
|
||||
e.level = a.level
|
||||
if a != e {
|
||||
a.insertAfter(e)
|
||||
a.level = level
|
||||
} else {
|
||||
// We don't set a to prev itself. This has the effect of the entry
|
||||
// getting new collation elements that are an increment of itself.
|
||||
// This is intentional.
|
||||
a.prev.level = level
|
||||
}
|
||||
}
|
||||
e.extend = norm.NFD.String(extend)
|
||||
e.exclude = false
|
||||
e.modified = true
|
||||
e.elems = nil
|
||||
t.anchor = e
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *ordering) getWeight(e *entry) []rawCE {
|
||||
if len(e.elems) == 0 && e.logical == noAnchor {
|
||||
if e.implicit {
|
||||
for _, r := range e.runes {
|
||||
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
|
||||
}
|
||||
} else if e.before {
|
||||
count := [colltab.Identity + 1]int{}
|
||||
a := e
|
||||
for ; a.elems == nil && !a.implicit; a = a.next {
|
||||
count[a.level]++
|
||||
}
|
||||
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
|
||||
for i := colltab.Primary; i < colltab.Quaternary; i++ {
|
||||
if count[i] != 0 {
|
||||
e.elems[0].w[i] -= count[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if e.prev != nil {
|
||||
o.verifyWeights(e.prev, e, e.prev.level)
|
||||
}
|
||||
} else {
|
||||
prev := e.prev
|
||||
e.elems = nextWeight(prev.level, o.getWeight(prev))
|
||||
o.verifyWeights(e, e.next, e.level)
|
||||
}
|
||||
}
|
||||
return e.elems
|
||||
}
|
||||
|
||||
func (o *ordering) addExtension(e *entry) {
|
||||
if ex := o.find(e.extend); ex != nil {
|
||||
e.elems = append(e.elems, ex.elems...)
|
||||
} else {
|
||||
for _, r := range []rune(e.extend) {
|
||||
e.elems = append(e.elems, o.find(string(r)).elems...)
|
||||
}
|
||||
}
|
||||
e.extend = ""
|
||||
}
|
||||
|
||||
func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
|
||||
if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
|
||||
return nil
|
||||
}
|
||||
for i := colltab.Primary; i < level; i++ {
|
||||
if a.elems[0].w[i] < b.elems[0].w[i] {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if a.elems[0].w[level] >= b.elems[0].w[level] {
|
||||
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
|
||||
log.Println(err)
|
||||
// TODO: return the error instead, or better, fix the conflicting entry by making room.
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Builder) error(e error) {
|
||||
if e != nil {
|
||||
b.err = e
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) errorID(locale string, e error) {
|
||||
if e != nil {
|
||||
b.err = fmt.Errorf("%s:%v", locale, e)
|
||||
}
|
||||
}
|
||||
|
||||
// patchNorm ensures that NFC and NFD counterparts are consistent.
|
||||
func (o *ordering) patchNorm() {
|
||||
// Insert the NFD counterparts, if necessary.
|
||||
for _, e := range o.ordered {
|
||||
nfd := norm.NFD.String(e.str)
|
||||
if nfd != e.str {
|
||||
if e0 := o.find(nfd); e0 != nil && !e0.modified {
|
||||
e0.elems = e.elems
|
||||
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
|
||||
e := o.newEntry(nfd, e.elems)
|
||||
e.modified = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Update unchanged composed forms if one of their parts changed.
|
||||
for _, e := range o.ordered {
|
||||
nfd := norm.NFD.String(e.str)
|
||||
if e.modified || nfd == e.str {
|
||||
continue
|
||||
}
|
||||
if e0 := o.find(nfd); e0 != nil {
|
||||
e.elems = e0.elems
|
||||
} else {
|
||||
e.elems = o.genColElems(nfd)
|
||||
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
|
||||
r := []rune(nfd)
|
||||
head := string(r[0])
|
||||
tail := ""
|
||||
for i := 1; i < len(r); i++ {
|
||||
s := norm.NFC.String(head + string(r[i]))
|
||||
if e0 := o.find(s); e0 != nil && e0.modified {
|
||||
head = s
|
||||
} else {
|
||||
tail += string(r[i])
|
||||
}
|
||||
}
|
||||
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Exclude entries for which the individual runes generate the same collation elements.
|
||||
for _, e := range o.ordered {
|
||||
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
|
||||
e.exclude = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) buildOrdering(o *ordering) {
|
||||
for _, e := range o.ordered {
|
||||
o.getWeight(e)
|
||||
}
|
||||
for _, e := range o.ordered {
|
||||
o.addExtension(e)
|
||||
}
|
||||
o.patchNorm()
|
||||
o.sort()
|
||||
simplify(o)
|
||||
b.processExpansions(o) // requires simplify
|
||||
b.processContractions(o) // requires simplify
|
||||
|
||||
t := newNode()
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.skip() {
|
||||
ce, err := e.encode()
|
||||
b.errorID(o.id, err)
|
||||
t.insert(e.runes[0], ce)
|
||||
}
|
||||
}
|
||||
o.handle = b.index.addTrie(t)
|
||||
}
|
||||
|
||||
func (b *Builder) build() (*table, error) {
|
||||
if b.built {
|
||||
return b.t, b.err
|
||||
}
|
||||
b.built = true
|
||||
b.t = &table{
|
||||
Table: colltab.Table{
|
||||
MaxContractLen: utf8.UTFMax,
|
||||
VariableTop: uint32(b.varTop),
|
||||
},
|
||||
}
|
||||
|
||||
b.buildOrdering(&b.root)
|
||||
b.t.root = b.root.handle
|
||||
for _, t := range b.locale {
|
||||
b.buildOrdering(t.index)
|
||||
if b.err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
i, err := b.index.generate()
|
||||
b.t.trie = *i
|
||||
b.t.Index = colltab.Trie{
|
||||
Index: i.index,
|
||||
Values: i.values,
|
||||
Index0: i.index[blockSize*b.t.root.lookupStart:],
|
||||
Values0: i.values[blockSize*b.t.root.valueStart:],
|
||||
}
|
||||
b.error(err)
|
||||
return b.t, b.err
|
||||
}
|
||||
|
||||
// Build builds the root Collator.
|
||||
func (b *Builder) Build() (colltab.Weighter, error) {
|
||||
table, err := b.build()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return table, nil
|
||||
}
|
||||
|
||||
// Build builds a Collator for Tailoring t.
|
||||
func (t *Tailoring) Build() (colltab.Weighter, error) {
|
||||
// TODO: implement.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Print prints the tables for b and all its Tailorings as a Go file
|
||||
// that can be included in the Collate package.
|
||||
func (b *Builder) Print(w io.Writer) (n int, err error) {
|
||||
p := func(nn int, e error) {
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
t, err := b.build()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
p(fmt.Fprintf(w, `var availableLocales = "und`))
|
||||
for _, loc := range b.locale {
|
||||
if loc.id != "und" {
|
||||
p(fmt.Fprintf(w, ",%s", loc.id))
|
||||
}
|
||||
}
|
||||
p(fmt.Fprint(w, "\"\n\n"))
|
||||
p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
|
||||
p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
|
||||
for _, loc := range b.locale {
|
||||
if loc.id == "und" {
|
||||
p(t.fprintIndex(w, loc.index.handle, loc.id))
|
||||
}
|
||||
}
|
||||
for _, loc := range b.locale {
|
||||
if loc.id != "und" {
|
||||
p(t.fprintIndex(w, loc.index.handle, loc.id))
|
||||
}
|
||||
}
|
||||
p(fmt.Fprint(w, "}\n\n"))
|
||||
n, _, err = t.fprint(w, "main")
|
||||
return
|
||||
}
|
||||
|
||||
// reproducibleFromNFKD checks whether the given expansion could be generated
|
||||
// from an NFKD expansion.
|
||||
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
|
||||
// Length must be equal.
|
||||
if len(exp) != len(nfkd) {
|
||||
return false
|
||||
}
|
||||
for i, ce := range exp {
|
||||
// Primary and secondary values should be equal.
|
||||
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
|
||||
return false
|
||||
}
|
||||
// Tertiary values should be equal to maxTertiary for third element onwards.
|
||||
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
|
||||
// simply be dropped. Try this out by dropping the following code.
|
||||
if i >= 2 && ce.w[2] != maxTertiary {
|
||||
return false
|
||||
}
|
||||
if _, err := makeCE(ce); err != nil {
|
||||
// Simply return false. The error will be caught elsewhere.
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func simplify(o *ordering) {
|
||||
// Runes that are a starter of a contraction should not be removed.
|
||||
// (To date, there is only Kannada character 0CCA.)
|
||||
keep := make(map[rune]bool)
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if len(e.runes) > 1 {
|
||||
keep[e.runes[0]] = true
|
||||
}
|
||||
}
|
||||
// Tag entries for which the runes NFKD decompose to identical values.
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
s := e.str
|
||||
nfkd := norm.NFKD.String(s)
|
||||
nfd := norm.NFD.String(s)
|
||||
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
|
||||
continue
|
||||
}
|
||||
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
|
||||
e.decompose = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// appendExpansion converts the given collation sequence to
|
||||
// collation elements and adds them to the expansion table.
|
||||
// It returns an index to the expansion table.
|
||||
func (b *Builder) appendExpansion(e *entry) int {
|
||||
t := b.t
|
||||
i := len(t.ExpandElem)
|
||||
ce := uint32(len(e.elems))
|
||||
t.ExpandElem = append(t.ExpandElem, ce)
|
||||
for _, w := range e.elems {
|
||||
ce, err := makeCE(w)
|
||||
if err != nil {
|
||||
b.error(err)
|
||||
return -1
|
||||
}
|
||||
t.ExpandElem = append(t.ExpandElem, ce)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// processExpansions extracts data necessary to generate
|
||||
// the extraction tables.
|
||||
func (b *Builder) processExpansions(o *ordering) {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.expansion() {
|
||||
continue
|
||||
}
|
||||
key := fmt.Sprintf("%v", e.elems)
|
||||
i, ok := b.expIndex[key]
|
||||
if !ok {
|
||||
i = b.appendExpansion(e)
|
||||
b.expIndex[key] = i
|
||||
}
|
||||
e.expansionIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) processContractions(o *ordering) {
|
||||
// Collate contractions per starter rune.
|
||||
starters := []rune{}
|
||||
cm := make(map[rune][]*entry)
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if e.contraction() {
|
||||
if len(e.str) > b.t.MaxContractLen {
|
||||
b.t.MaxContractLen = len(e.str)
|
||||
}
|
||||
r := e.runes[0]
|
||||
if _, ok := cm[r]; !ok {
|
||||
starters = append(starters, r)
|
||||
}
|
||||
cm[r] = append(cm[r], e)
|
||||
}
|
||||
}
|
||||
// Add entries of single runes that are at a start of a contraction.
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.contraction() {
|
||||
r := e.runes[0]
|
||||
if _, ok := cm[r]; ok {
|
||||
cm[r] = append(cm[r], e)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Build the tries for the contractions.
|
||||
t := b.t
|
||||
for _, r := range starters {
|
||||
l := cm[r]
|
||||
// Compute suffix strings. There are 31 different contraction suffix
|
||||
// sets for 715 contractions and 82 contraction starter runes as of
|
||||
// version 6.0.0.
|
||||
sufx := []string{}
|
||||
hasSingle := false
|
||||
for _, e := range l {
|
||||
if len(e.runes) > 1 {
|
||||
sufx = append(sufx, string(e.runes[1:]))
|
||||
} else {
|
||||
hasSingle = true
|
||||
}
|
||||
}
|
||||
if !hasSingle {
|
||||
b.error(fmt.Errorf("no single entry for starter rune %U found", r))
|
||||
continue
|
||||
}
|
||||
// Unique the suffix set.
|
||||
sort.Strings(sufx)
|
||||
key := strings.Join(sufx, "\n")
|
||||
handle, ok := b.ctHandle[key]
|
||||
if !ok {
|
||||
var err error
|
||||
handle, err = appendTrie(&t.ContractTries, sufx)
|
||||
if err != nil {
|
||||
b.error(err)
|
||||
}
|
||||
b.ctHandle[key] = handle
|
||||
}
|
||||
// Bucket sort entries in index order.
|
||||
es := make([]*entry, len(l))
|
||||
for _, e := range l {
|
||||
var p, sn int
|
||||
if len(e.runes) > 1 {
|
||||
str := []byte(string(e.runes[1:]))
|
||||
p, sn = lookup(&t.ContractTries, handle, str)
|
||||
if sn != len(str) {
|
||||
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
|
||||
}
|
||||
}
|
||||
if es[p] != nil {
|
||||
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
|
||||
}
|
||||
es[p] = e
|
||||
}
|
||||
// Create collation elements for contractions.
|
||||
elems := []uint32{}
|
||||
for _, e := range es {
|
||||
ce, err := e.encodeBase()
|
||||
b.errorID(o.id, err)
|
||||
elems = append(elems, ce)
|
||||
}
|
||||
key = fmt.Sprintf("%v", elems)
|
||||
i, ok := b.ctElem[key]
|
||||
if !ok {
|
||||
i = len(t.ContractElem)
|
||||
b.ctElem[key] = i
|
||||
t.ContractElem = append(t.ContractElem, elems...)
|
||||
}
|
||||
// Store info in entry for starter rune.
|
||||
es[0].contractionIndex = i
|
||||
es[0].contractionHandle = handle
|
||||
}
|
||||
}
|
||||
294
vendor/golang.org/x/text/collate/build/colelem.go
generated
vendored
Normal file
294
vendor/golang.org/x/text/collate/build/colelem.go
generated
vendored
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
maxTertiary = 0x1F
|
||||
)
|
||||
|
||||
type rawCE struct {
|
||||
w []int
|
||||
ccc uint8
|
||||
}
|
||||
|
||||
func makeRawCE(w []int, ccc uint8) rawCE {
|
||||
ce := rawCE{w: make([]int, 4), ccc: ccc}
|
||||
copy(ce.w, w)
|
||||
return ce
|
||||
}
|
||||
|
||||
// A collation element is represented as an uint32.
|
||||
// In the typical case, a rune maps to a single collation element. If a rune
|
||||
// can be the start of a contraction or expands into multiple collation elements,
|
||||
// then the collation element that is associated with a rune will have a special
|
||||
// form to represent such m to n mappings. Such special collation elements
|
||||
// have a value >= 0x80000000.
|
||||
|
||||
const (
|
||||
maxPrimaryBits = 21
|
||||
maxSecondaryBits = 12
|
||||
maxTertiaryBits = 8
|
||||
)
|
||||
|
||||
func makeCE(ce rawCE) (uint32, error) {
|
||||
v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
|
||||
return uint32(v), e
|
||||
}
|
||||
|
||||
// For contractions, collation elements are of the form
|
||||
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
|
||||
// - n* is the size of the first node in the contraction trie.
|
||||
// - i* is the index of the first node in the contraction trie.
|
||||
// - b* is the offset into the contraction collation element table.
|
||||
// See contract.go for details on the contraction trie.
|
||||
const (
|
||||
contractID = 0xC0000000
|
||||
maxNBits = 4
|
||||
maxTrieIndexBits = 12
|
||||
maxContractOffsetBits = 13
|
||||
)
|
||||
|
||||
func makeContractIndex(h ctHandle, offset int) (uint32, error) {
|
||||
if h.n >= 1<<maxNBits {
|
||||
return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
|
||||
}
|
||||
if h.index >= 1<<maxTrieIndexBits {
|
||||
return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
|
||||
}
|
||||
if offset >= 1<<maxContractOffsetBits {
|
||||
return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
|
||||
}
|
||||
ce := uint32(contractID)
|
||||
ce += uint32(offset << (maxNBits + maxTrieIndexBits))
|
||||
ce += uint32(h.index << maxNBits)
|
||||
ce += uint32(h.n)
|
||||
return ce, nil
|
||||
}
|
||||
|
||||
// For expansions, collation elements are of the form
|
||||
// 11100000 00000000 bbbbbbbb bbbbbbbb,
|
||||
// where b* is the index into the expansion sequence table.
|
||||
const (
|
||||
expandID = 0xE0000000
|
||||
maxExpandIndexBits = 16
|
||||
)
|
||||
|
||||
func makeExpandIndex(index int) (uint32, error) {
|
||||
if index >= 1<<maxExpandIndexBits {
|
||||
return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
|
||||
}
|
||||
return expandID + uint32(index), nil
|
||||
}
|
||||
|
||||
// Each list of collation elements corresponding to an expansion starts with
|
||||
// a header indicating the length of the sequence.
|
||||
func makeExpansionHeader(n int) (uint32, error) {
|
||||
return uint32(n), nil
|
||||
}
|
||||
|
||||
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
|
||||
// sequence of collation elements, we decompose the rune and lookup the collation
|
||||
// elements for each rune in the decomposition and modify the tertiary weights.
|
||||
// The collation element, in this case, is of the form
|
||||
// 11110000 00000000 wwwwwwww vvvvvvvv, where
|
||||
// - v* is the replacement tertiary weight for the first rune,
|
||||
// - w* is the replacement tertiary weight for the second rune,
|
||||
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
|
||||
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
|
||||
const (
|
||||
decompID = 0xF0000000
|
||||
)
|
||||
|
||||
func makeDecompose(t1, t2 int) (uint32, error) {
|
||||
if t1 >= 256 || t1 < 0 {
|
||||
return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
|
||||
}
|
||||
if t2 >= 256 || t2 < 0 {
|
||||
return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
|
||||
}
|
||||
return uint32(t2<<8+t1) + decompID, nil
|
||||
}
|
||||
|
||||
const (
|
||||
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
|
||||
minUnified rune = 0x4E00
|
||||
maxUnified = 0x9FFF
|
||||
minCompatibility = 0xF900
|
||||
maxCompatibility = 0xFAFF
|
||||
minRare = 0x3400
|
||||
maxRare = 0x4DBF
|
||||
)
|
||||
const (
|
||||
commonUnifiedOffset = 0x10000
|
||||
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
|
||||
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
|
||||
illegalOffset = otherOffset + int(unicode.MaxRune)
|
||||
maxPrimary = illegalOffset + 1
|
||||
)
|
||||
|
||||
// implicitPrimary returns the primary weight for the a rune
|
||||
// for which there is no entry for the rune in the collation table.
|
||||
// We take a different approach from the one specified in
|
||||
// http://unicode.org/reports/tr10/#Implicit_Weights,
|
||||
// but preserve the resulting relative ordering of the runes.
|
||||
func implicitPrimary(r rune) int {
|
||||
if unicode.Is(unicode.Ideographic, r) {
|
||||
if r >= minUnified && r <= maxUnified {
|
||||
// The most common case for CJK.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
if r >= minCompatibility && r <= maxCompatibility {
|
||||
// This will typically not hit. The DUCET explicitly specifies mappings
|
||||
// for all characters that do not decompose.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
return int(r) + rareUnifiedOffset
|
||||
}
|
||||
return int(r) + otherOffset
|
||||
}
|
||||
|
||||
// convertLargeWeights converts collation elements with large
|
||||
// primaries (either double primaries or for illegal runes)
|
||||
// to our own representation.
|
||||
// A CJK character C is represented in the DUCET as
|
||||
// [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
|
||||
// We will rewrite these characters to a single CE.
|
||||
// We assume the CJK values start at 0x8000.
|
||||
// See http://unicode.org/reports/tr10/#Implicit_Weights
|
||||
func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
|
||||
const (
|
||||
cjkPrimaryStart = 0xFB40
|
||||
rarePrimaryStart = 0xFB80
|
||||
otherPrimaryStart = 0xFBC0
|
||||
illegalPrimary = 0xFFFE
|
||||
highBitsMask = 0x3F
|
||||
lowBitsMask = 0x7FFF
|
||||
lowBitsFlag = 0x8000
|
||||
shiftBits = 15
|
||||
)
|
||||
for i := 0; i < len(elems); i++ {
|
||||
ce := elems[i].w
|
||||
p := ce[0]
|
||||
if p < cjkPrimaryStart {
|
||||
continue
|
||||
}
|
||||
if p > 0xFFFF {
|
||||
return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
|
||||
}
|
||||
if p >= illegalPrimary {
|
||||
ce[0] = illegalOffset + p - illegalPrimary
|
||||
} else {
|
||||
if i+1 >= len(elems) {
|
||||
return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
|
||||
}
|
||||
if elems[i+1].w[0]&lowBitsFlag == 0 {
|
||||
return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
|
||||
}
|
||||
np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
|
||||
switch {
|
||||
case p < rarePrimaryStart:
|
||||
np += commonUnifiedOffset
|
||||
case p < otherPrimaryStart:
|
||||
np += rareUnifiedOffset
|
||||
default:
|
||||
p += otherOffset
|
||||
}
|
||||
ce[0] = np
|
||||
for j := i + 1; j+1 < len(elems); j++ {
|
||||
elems[j] = elems[j+1]
|
||||
}
|
||||
elems = elems[:len(elems)-1]
|
||||
}
|
||||
}
|
||||
return elems, nil
|
||||
}
|
||||
|
||||
// nextWeight computes the first possible collation weights following elems
|
||||
// for the given level.
|
||||
func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
|
||||
if level == colltab.Identity {
|
||||
next := make([]rawCE, len(elems))
|
||||
copy(next, elems)
|
||||
return next
|
||||
}
|
||||
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
|
||||
next[0].w[level]++
|
||||
if level < colltab.Secondary {
|
||||
next[0].w[colltab.Secondary] = defaultSecondary
|
||||
}
|
||||
if level < colltab.Tertiary {
|
||||
next[0].w[colltab.Tertiary] = defaultTertiary
|
||||
}
|
||||
// Filter entries that cannot influence ordering.
|
||||
for _, ce := range elems[1:] {
|
||||
skip := true
|
||||
for i := colltab.Primary; i < level; i++ {
|
||||
skip = skip && ce.w[i] == 0
|
||||
}
|
||||
if !skip {
|
||||
next = append(next, ce)
|
||||
}
|
||||
}
|
||||
return next
|
||||
}
|
||||
|
||||
func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
|
||||
for ; i < len(elems) && elems[i].w[level] == 0; i++ {
|
||||
}
|
||||
if i < len(elems) {
|
||||
return i, elems[i].w[level]
|
||||
}
|
||||
return i, 0
|
||||
}
|
||||
|
||||
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
|
||||
// It also returns the collation level at which the difference is found.
|
||||
func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
|
||||
for level := colltab.Primary; level < colltab.Identity; level++ {
|
||||
var va, vb int
|
||||
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
|
||||
ia, va = nextVal(a, ia, level)
|
||||
ib, vb = nextVal(b, ib, level)
|
||||
if va != vb {
|
||||
if va < vb {
|
||||
return -1, level
|
||||
} else {
|
||||
return 1, level
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, colltab.Identity
|
||||
}
|
||||
|
||||
func equalCE(a, b rawCE) bool {
|
||||
for i := 0; i < 3; i++ {
|
||||
if b.w[i] != a.w[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func equalCEArrays(a, b []rawCE) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if !equalCE(a[i], b[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
309
vendor/golang.org/x/text/collate/build/contract.go
generated
vendored
Normal file
309
vendor/golang.org/x/text/collate/build/contract.go
generated
vendored
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// This file contains code for detecting contractions and generating
|
||||
// the necessary tables.
|
||||
// Any Unicode Collation Algorithm (UCA) table entry that has more than
|
||||
// one rune one the left-hand side is called a contraction.
|
||||
// See http://www.unicode.org/reports/tr10/#Contractions for more details.
|
||||
//
|
||||
// We define the following terms:
|
||||
// initial: a rune that appears as the first rune in a contraction.
|
||||
// suffix: a sequence of runes succeeding the initial rune
|
||||
// in a given contraction.
|
||||
// non-initial: a rune that appears in a suffix.
|
||||
//
|
||||
// A rune may be both an initial and a non-initial and may be so in
|
||||
// many contractions. An initial may typically also appear by itself.
|
||||
// In case of ambiguities, the UCA requires we match the longest
|
||||
// contraction.
|
||||
//
|
||||
// Many contraction rules share the same set of possible suffixes.
|
||||
// We store sets of suffixes in a trie that associates an index with
|
||||
// each suffix in the set. This index can be used to look up a
|
||||
// collation element associated with the (starter rune, suffix) pair.
|
||||
//
|
||||
// The trie is defined on a UTF-8 byte sequence.
|
||||
// The overall trie is represented as an array of ctEntries. Each node of the trie
|
||||
// is represented as a subsequence of ctEntries, where each entry corresponds to
|
||||
// a possible match of a next character in the search string. An entry
|
||||
// also includes the length and offset to the next sequence of entries
|
||||
// to check in case of a match.
|
||||
|
||||
const (
|
||||
final = 0
|
||||
noIndex = 0xFF
|
||||
)
|
||||
|
||||
// ctEntry associates to a matching byte an offset and/or next sequence of
|
||||
// bytes to check. A ctEntry c is called final if a match means that the
|
||||
// longest suffix has been found. An entry c is final if c.N == 0.
|
||||
// A single final entry can match a range of characters to an offset.
|
||||
// A non-final entry always matches a single byte. Note that a non-final
|
||||
// entry might still resemble a completed suffix.
|
||||
// Examples:
|
||||
// The suffix strings "ab" and "ac" can be represented as:
|
||||
// []ctEntry{
|
||||
// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF.
|
||||
// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2
|
||||
// }
|
||||
//
|
||||
// The suffix strings "ab", "abc", "abd", and "abcd" can be represented as:
|
||||
// []ctEntry{
|
||||
// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'.
|
||||
// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'.
|
||||
// {'d', 'd', final, 3}, // "abd" -> 3
|
||||
// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'.
|
||||
// {'d', 'd', final, 4}, // "abcd" -> 4
|
||||
// }
|
||||
// See genStateTests in contract_test.go for more examples.
|
||||
type ctEntry struct {
|
||||
L uint8 // non-final: byte value to match; final: lowest match in range.
|
||||
H uint8 // non-final: relative index to next block; final: highest match in range.
|
||||
N uint8 // non-final: length of next block; final: final
|
||||
I uint8 // result offset. Will be noIndex if more bytes are needed to complete.
|
||||
}
|
||||
|
||||
// contractTrieSet holds a set of contraction tries. The tries are stored
|
||||
// consecutively in the entry field.
|
||||
type contractTrieSet []struct{ l, h, n, i uint8 }
|
||||
|
||||
// ctHandle is used to identify a trie in the trie set, consisting in an offset
|
||||
// in the array and the size of the first node.
|
||||
type ctHandle struct {
|
||||
index, n int
|
||||
}
|
||||
|
||||
// appendTrie adds a new trie for the given suffixes to the trie set and returns
|
||||
// a handle to it. The handle will be invalid on error.
|
||||
func appendTrie(ct *colltab.ContractTrieSet, suffixes []string) (ctHandle, error) {
|
||||
es := make([]stridx, len(suffixes))
|
||||
for i, s := range suffixes {
|
||||
es[i].str = s
|
||||
}
|
||||
sort.Sort(offsetSort(es))
|
||||
for i := range es {
|
||||
es[i].index = i + 1
|
||||
}
|
||||
sort.Sort(genidxSort(es))
|
||||
i := len(*ct)
|
||||
n, err := genStates(ct, es)
|
||||
if err != nil {
|
||||
*ct = (*ct)[:i]
|
||||
return ctHandle{}, err
|
||||
}
|
||||
return ctHandle{i, n}, nil
|
||||
}
|
||||
|
||||
// genStates generates ctEntries for a given suffix set and returns
|
||||
// the number of entries for the first node.
|
||||
func genStates(ct *colltab.ContractTrieSet, sis []stridx) (int, error) {
|
||||
if len(sis) == 0 {
|
||||
return 0, fmt.Errorf("genStates: list of suffices must be non-empty")
|
||||
}
|
||||
start := len(*ct)
|
||||
// create entries for differing first bytes.
|
||||
for _, si := range sis {
|
||||
s := si.str
|
||||
if len(s) == 0 {
|
||||
continue
|
||||
}
|
||||
added := false
|
||||
c := s[0]
|
||||
if len(s) > 1 {
|
||||
for j := len(*ct) - 1; j >= start; j-- {
|
||||
if (*ct)[j].L == c {
|
||||
added = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !added {
|
||||
*ct = append(*ct, ctEntry{L: c, I: noIndex})
|
||||
}
|
||||
} else {
|
||||
for j := len(*ct) - 1; j >= start; j-- {
|
||||
// Update the offset for longer suffixes with the same byte.
|
||||
if (*ct)[j].L == c {
|
||||
(*ct)[j].I = uint8(si.index)
|
||||
added = true
|
||||
}
|
||||
// Extend range of final ctEntry, if possible.
|
||||
if (*ct)[j].H+1 == c {
|
||||
(*ct)[j].H = c
|
||||
added = true
|
||||
}
|
||||
}
|
||||
if !added {
|
||||
*ct = append(*ct, ctEntry{L: c, H: c, N: final, I: uint8(si.index)})
|
||||
}
|
||||
}
|
||||
}
|
||||
n := len(*ct) - start
|
||||
// Append nodes for the remainder of the suffixes for each ctEntry.
|
||||
sp := 0
|
||||
for i, end := start, len(*ct); i < end; i++ {
|
||||
fe := (*ct)[i]
|
||||
if fe.H == 0 { // uninitialized non-final
|
||||
ln := len(*ct) - start - n
|
||||
if ln > 0xFF {
|
||||
return 0, fmt.Errorf("genStates: relative block offset too large: %d > 255", ln)
|
||||
}
|
||||
fe.H = uint8(ln)
|
||||
// Find first non-final strings with same byte as current entry.
|
||||
for ; sis[sp].str[0] != fe.L; sp++ {
|
||||
}
|
||||
se := sp + 1
|
||||
for ; se < len(sis) && len(sis[se].str) > 1 && sis[se].str[0] == fe.L; se++ {
|
||||
}
|
||||
sl := sis[sp:se]
|
||||
sp = se
|
||||
for i, si := range sl {
|
||||
sl[i].str = si.str[1:]
|
||||
}
|
||||
nn, err := genStates(ct, sl)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
fe.N = uint8(nn)
|
||||
(*ct)[i] = fe
|
||||
}
|
||||
}
|
||||
sort.Sort(entrySort((*ct)[start : start+n]))
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// There may be both a final and non-final entry for a byte if the byte
|
||||
// is implied in a range of matches in the final entry.
|
||||
// We need to ensure that the non-final entry comes first in that case.
|
||||
type entrySort colltab.ContractTrieSet
|
||||
|
||||
func (fe entrySort) Len() int { return len(fe) }
|
||||
func (fe entrySort) Swap(i, j int) { fe[i], fe[j] = fe[j], fe[i] }
|
||||
func (fe entrySort) Less(i, j int) bool {
|
||||
return fe[i].L > fe[j].L
|
||||
}
|
||||
|
||||
// stridx is used for sorting suffixes and their associated offsets.
|
||||
type stridx struct {
|
||||
str string
|
||||
index int
|
||||
}
|
||||
|
||||
// For computing the offsets, we first sort by size, and then by string.
|
||||
// This ensures that strings that only differ in the last byte by 1
|
||||
// are sorted consecutively in increasing order such that they can
|
||||
// be packed as a range in a final ctEntry.
|
||||
type offsetSort []stridx
|
||||
|
||||
func (si offsetSort) Len() int { return len(si) }
|
||||
func (si offsetSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
|
||||
func (si offsetSort) Less(i, j int) bool {
|
||||
if len(si[i].str) != len(si[j].str) {
|
||||
return len(si[i].str) > len(si[j].str)
|
||||
}
|
||||
return si[i].str < si[j].str
|
||||
}
|
||||
|
||||
// For indexing, we want to ensure that strings are sorted in string order, where
|
||||
// for strings with the same prefix, we put longer strings before shorter ones.
|
||||
type genidxSort []stridx
|
||||
|
||||
func (si genidxSort) Len() int { return len(si) }
|
||||
func (si genidxSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
|
||||
func (si genidxSort) Less(i, j int) bool {
|
||||
if strings.HasPrefix(si[j].str, si[i].str) {
|
||||
return false
|
||||
}
|
||||
if strings.HasPrefix(si[i].str, si[j].str) {
|
||||
return true
|
||||
}
|
||||
return si[i].str < si[j].str
|
||||
}
|
||||
|
||||
// lookup matches the longest suffix in str and returns the associated offset
|
||||
// and the number of bytes consumed.
|
||||
func lookup(ct *colltab.ContractTrieSet, h ctHandle, str []byte) (index, ns int) {
|
||||
states := (*ct)[h.index:]
|
||||
p := 0
|
||||
n := h.n
|
||||
for i := 0; i < n && p < len(str); {
|
||||
e := states[i]
|
||||
c := str[p]
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.I != noIndex {
|
||||
index, ns = int(e.I), p
|
||||
}
|
||||
if e.N != final {
|
||||
// set to new state
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
} else {
|
||||
return
|
||||
}
|
||||
continue
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
return int(c-e.L) + int(e.I), p
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// print writes the contractTrieSet t as compilable Go code to w. It returns
|
||||
// the total number of bytes written and the size of the resulting data structure in bytes.
|
||||
func print(t *colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
|
||||
update3 := func(nn, sz int, e error) {
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
size += sz
|
||||
}
|
||||
update2 := func(nn int, e error) { update3(nn, 0, e) }
|
||||
|
||||
update3(printArray(*t, w, name))
|
||||
update2(fmt.Fprintf(w, "var %sContractTrieSet = ", name))
|
||||
update3(printStruct(*t, w, name))
|
||||
update2(fmt.Fprintln(w))
|
||||
return
|
||||
}
|
||||
|
||||
func printArray(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
size = len(ct) * 4
|
||||
p("// %sCTEntries: %d entries, %d bytes\n", name, len(ct), size)
|
||||
p("var %sCTEntries = [%d]struct{L,H,N,I uint8}{\n", name, len(ct))
|
||||
for _, fe := range ct {
|
||||
p("\t{0x%X, 0x%X, %d, %d},\n", fe.L, fe.H, fe.N, fe.I)
|
||||
}
|
||||
p("}\n")
|
||||
return
|
||||
}
|
||||
|
||||
func printStruct(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
|
||||
n, err = fmt.Fprintf(w, "colltab.ContractTrieSet( %sCTEntries[:] )", name)
|
||||
size = int(reflect.TypeOf(ct).Size())
|
||||
return
|
||||
}
|
||||
393
vendor/golang.org/x/text/collate/build/order.go
generated
vendored
Normal file
393
vendor/golang.org/x/text/collate/build/order.go
generated
vendored
Normal file
|
|
@ -0,0 +1,393 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
type logicalAnchor int
|
||||
|
||||
const (
|
||||
firstAnchor logicalAnchor = -1
|
||||
noAnchor = 0
|
||||
lastAnchor = 1
|
||||
)
|
||||
|
||||
// entry is used to keep track of a single entry in the collation element table
|
||||
// during building. Examples of entries can be found in the Default Unicode
|
||||
// Collation Element Table.
|
||||
// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
|
||||
type entry struct {
|
||||
str string // same as string(runes)
|
||||
runes []rune
|
||||
elems []rawCE // the collation elements
|
||||
extend string // weights of extend to be appended to elems
|
||||
before bool // weights relative to next instead of previous.
|
||||
lock bool // entry is used in extension and can no longer be moved.
|
||||
|
||||
// prev, next, and level are used to keep track of tailorings.
|
||||
prev, next *entry
|
||||
level colltab.Level // next differs at this level
|
||||
skipRemove bool // do not unlink when removed
|
||||
|
||||
decompose bool // can use NFKD decomposition to generate elems
|
||||
exclude bool // do not include in table
|
||||
implicit bool // derived, is not included in the list
|
||||
modified bool // entry was modified in tailoring
|
||||
logical logicalAnchor
|
||||
|
||||
expansionIndex int // used to store index into expansion table
|
||||
contractionHandle ctHandle
|
||||
contractionIndex int // index into contraction elements
|
||||
}
|
||||
|
||||
func (e *entry) String() string {
|
||||
return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
|
||||
e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
|
||||
}
|
||||
|
||||
func (e *entry) skip() bool {
|
||||
return e.contraction()
|
||||
}
|
||||
|
||||
func (e *entry) expansion() bool {
|
||||
return !e.decompose && len(e.elems) > 1
|
||||
}
|
||||
|
||||
func (e *entry) contraction() bool {
|
||||
return len(e.runes) > 1
|
||||
}
|
||||
|
||||
func (e *entry) contractionStarter() bool {
|
||||
return e.contractionHandle.n != 0
|
||||
}
|
||||
|
||||
// nextIndexed gets the next entry that needs to be stored in the table.
|
||||
// It returns the entry and the collation level at which the next entry differs
|
||||
// from the current entry.
|
||||
// Entries that can be explicitly derived and logical reset positions are
|
||||
// examples of entries that will not be indexed.
|
||||
func (e *entry) nextIndexed() (*entry, colltab.Level) {
|
||||
level := e.level
|
||||
for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
|
||||
if e.level < level {
|
||||
level = e.level
|
||||
}
|
||||
}
|
||||
return e, level
|
||||
}
|
||||
|
||||
// remove unlinks entry e from the sorted chain and clears the collation
|
||||
// elements. e may not be at the front or end of the list. This should always
|
||||
// be the case, as the front and end of the list are always logical anchors,
|
||||
// which may not be removed.
|
||||
func (e *entry) remove() {
|
||||
if e.logical != noAnchor {
|
||||
log.Fatalf("may not remove anchor %q", e.str)
|
||||
}
|
||||
// TODO: need to set e.prev.level to e.level if e.level is smaller?
|
||||
e.elems = nil
|
||||
if !e.skipRemove {
|
||||
if e.prev != nil {
|
||||
e.prev.next = e.next
|
||||
}
|
||||
if e.next != nil {
|
||||
e.next.prev = e.prev
|
||||
}
|
||||
}
|
||||
e.skipRemove = false
|
||||
}
|
||||
|
||||
// insertAfter inserts n after e.
|
||||
func (e *entry) insertAfter(n *entry) {
|
||||
if e == n {
|
||||
panic("e == anchor")
|
||||
}
|
||||
if e == nil {
|
||||
panic("unexpected nil anchor")
|
||||
}
|
||||
n.remove()
|
||||
n.decompose = false // redo decomposition test
|
||||
|
||||
n.next = e.next
|
||||
n.prev = e
|
||||
if e.next != nil {
|
||||
e.next.prev = n
|
||||
}
|
||||
e.next = n
|
||||
}
|
||||
|
||||
// insertBefore inserts n before e.
|
||||
func (e *entry) insertBefore(n *entry) {
|
||||
if e == n {
|
||||
panic("e == anchor")
|
||||
}
|
||||
if e == nil {
|
||||
panic("unexpected nil anchor")
|
||||
}
|
||||
n.remove()
|
||||
n.decompose = false // redo decomposition test
|
||||
|
||||
n.prev = e.prev
|
||||
n.next = e
|
||||
if e.prev != nil {
|
||||
e.prev.next = n
|
||||
}
|
||||
e.prev = n
|
||||
}
|
||||
|
||||
func (e *entry) encodeBase() (ce uint32, err error) {
|
||||
switch {
|
||||
case e.expansion():
|
||||
ce, err = makeExpandIndex(e.expansionIndex)
|
||||
default:
|
||||
if e.decompose {
|
||||
log.Fatal("decompose should be handled elsewhere")
|
||||
}
|
||||
ce, err = makeCE(e.elems[0])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *entry) encode() (ce uint32, err error) {
|
||||
if e.skip() {
|
||||
log.Fatal("cannot build colElem for entry that should be skipped")
|
||||
}
|
||||
switch {
|
||||
case e.decompose:
|
||||
t1 := e.elems[0].w[2]
|
||||
t2 := 0
|
||||
if len(e.elems) > 1 {
|
||||
t2 = e.elems[1].w[2]
|
||||
}
|
||||
ce, err = makeDecompose(t1, t2)
|
||||
case e.contractionStarter():
|
||||
ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
|
||||
default:
|
||||
if len(e.runes) > 1 {
|
||||
log.Fatal("colElem: contractions are handled in contraction trie")
|
||||
}
|
||||
ce, err = e.encodeBase()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// entryLess returns true if a sorts before b and false otherwise.
|
||||
func entryLess(a, b *entry) bool {
|
||||
if res, _ := compareWeights(a.elems, b.elems); res != 0 {
|
||||
return res == -1
|
||||
}
|
||||
if a.logical != noAnchor {
|
||||
return a.logical == firstAnchor
|
||||
}
|
||||
if b.logical != noAnchor {
|
||||
return b.logical == lastAnchor
|
||||
}
|
||||
return a.str < b.str
|
||||
}
|
||||
|
||||
type sortedEntries []*entry
|
||||
|
||||
func (s sortedEntries) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortedEntries) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sortedEntries) Less(i, j int) bool {
|
||||
return entryLess(s[i], s[j])
|
||||
}
|
||||
|
||||
type ordering struct {
|
||||
id string
|
||||
entryMap map[string]*entry
|
||||
ordered []*entry
|
||||
handle *trieHandle
|
||||
}
|
||||
|
||||
// insert inserts e into both entryMap and ordered.
|
||||
// Note that insert simply appends e to ordered. To reattain a sorted
|
||||
// order, o.sort() should be called.
|
||||
func (o *ordering) insert(e *entry) {
|
||||
if e.logical == noAnchor {
|
||||
o.entryMap[e.str] = e
|
||||
} else {
|
||||
// Use key format as used in UCA rules.
|
||||
o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
|
||||
// Also add index entry for XML format.
|
||||
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
|
||||
}
|
||||
o.ordered = append(o.ordered, e)
|
||||
}
|
||||
|
||||
// newEntry creates a new entry for the given info and inserts it into
|
||||
// the index.
|
||||
func (o *ordering) newEntry(s string, ces []rawCE) *entry {
|
||||
e := &entry{
|
||||
runes: []rune(s),
|
||||
elems: ces,
|
||||
str: s,
|
||||
}
|
||||
o.insert(e)
|
||||
return e
|
||||
}
|
||||
|
||||
// find looks up and returns the entry for the given string.
|
||||
// It returns nil if str is not in the index and if an implicit value
|
||||
// cannot be derived, that is, if str represents more than one rune.
|
||||
func (o *ordering) find(str string) *entry {
|
||||
e := o.entryMap[str]
|
||||
if e == nil {
|
||||
r := []rune(str)
|
||||
if len(r) == 1 {
|
||||
const (
|
||||
firstHangul = 0xAC00
|
||||
lastHangul = 0xD7A3
|
||||
)
|
||||
if r[0] >= firstHangul && r[0] <= lastHangul {
|
||||
ce := []rawCE{}
|
||||
nfd := norm.NFD.String(str)
|
||||
for _, r := range nfd {
|
||||
ce = append(ce, o.find(string(r)).elems...)
|
||||
}
|
||||
e = o.newEntry(nfd, ce)
|
||||
} else {
|
||||
e = o.newEntry(string(r[0]), []rawCE{
|
||||
{w: []int{
|
||||
implicitPrimary(r[0]),
|
||||
defaultSecondary,
|
||||
defaultTertiary,
|
||||
int(r[0]),
|
||||
},
|
||||
},
|
||||
})
|
||||
e.modified = true
|
||||
}
|
||||
e.exclude = true // do not index implicits
|
||||
}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// makeRootOrdering returns a newly initialized ordering value and populates
|
||||
// it with a set of logical reset points that can be used as anchors.
|
||||
// The anchors first_tertiary_ignorable and __END__ will always sort at
|
||||
// the beginning and end, respectively. This means that prev and next are non-nil
|
||||
// for any indexed entry.
|
||||
func makeRootOrdering() ordering {
|
||||
const max = unicode.MaxRune
|
||||
o := ordering{
|
||||
entryMap: make(map[string]*entry),
|
||||
}
|
||||
insert := func(typ logicalAnchor, s string, ce []int) {
|
||||
e := &entry{
|
||||
elems: []rawCE{{w: ce}},
|
||||
str: s,
|
||||
exclude: true,
|
||||
logical: typ,
|
||||
}
|
||||
o.insert(e)
|
||||
}
|
||||
insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
|
||||
insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
|
||||
insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
|
||||
insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
|
||||
insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
|
||||
return o
|
||||
}
|
||||
|
||||
// patchForInsert eleminates entries from the list with more than one collation element.
|
||||
// The next and prev fields of the eliminated entries still point to appropriate
|
||||
// values in the newly created list.
|
||||
// It requires that sort has been called.
|
||||
func (o *ordering) patchForInsert() {
|
||||
for i := 0; i < len(o.ordered)-1; {
|
||||
e := o.ordered[i]
|
||||
lev := e.level
|
||||
n := e.next
|
||||
for ; n != nil && len(n.elems) > 1; n = n.next {
|
||||
if n.level < lev {
|
||||
lev = n.level
|
||||
}
|
||||
n.skipRemove = true
|
||||
}
|
||||
for ; o.ordered[i] != n; i++ {
|
||||
o.ordered[i].level = lev
|
||||
o.ordered[i].next = n
|
||||
o.ordered[i+1].prev = e
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clone copies all ordering of es into a new ordering value.
|
||||
func (o *ordering) clone() *ordering {
|
||||
o.sort()
|
||||
oo := ordering{
|
||||
entryMap: make(map[string]*entry),
|
||||
}
|
||||
for _, e := range o.ordered {
|
||||
ne := &entry{
|
||||
runes: e.runes,
|
||||
elems: e.elems,
|
||||
str: e.str,
|
||||
decompose: e.decompose,
|
||||
exclude: e.exclude,
|
||||
logical: e.logical,
|
||||
}
|
||||
oo.insert(ne)
|
||||
}
|
||||
oo.sort() // link all ordering.
|
||||
oo.patchForInsert()
|
||||
return &oo
|
||||
}
|
||||
|
||||
// front returns the first entry to be indexed.
|
||||
// It assumes that sort() has been called.
|
||||
func (o *ordering) front() *entry {
|
||||
e := o.ordered[0]
|
||||
if e.prev != nil {
|
||||
log.Panicf("unexpected first entry: %v", e)
|
||||
}
|
||||
// The first entry is always a logical position, which should not be indexed.
|
||||
e, _ = e.nextIndexed()
|
||||
return e
|
||||
}
|
||||
|
||||
// sort sorts all ordering based on their collation elements and initializes
|
||||
// the prev, next, and level fields accordingly.
|
||||
func (o *ordering) sort() {
|
||||
sort.Sort(sortedEntries(o.ordered))
|
||||
l := o.ordered
|
||||
for i := 1; i < len(l); i++ {
|
||||
k := i - 1
|
||||
l[k].next = l[i]
|
||||
_, l[k].level = compareWeights(l[k].elems, l[i].elems)
|
||||
l[i].prev = l[k]
|
||||
}
|
||||
}
|
||||
|
||||
// genColElems generates a collation element array from the runes in str. This
|
||||
// assumes that all collation elements have already been added to the Builder.
|
||||
func (o *ordering) genColElems(str string) []rawCE {
|
||||
elems := []rawCE{}
|
||||
for _, r := range []rune(str) {
|
||||
for _, ce := range o.find(string(r)).elems {
|
||||
if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
|
||||
elems = append(elems, ce)
|
||||
}
|
||||
}
|
||||
}
|
||||
return elems
|
||||
}
|
||||
81
vendor/golang.org/x/text/collate/build/table.go
generated
vendored
Normal file
81
vendor/golang.org/x/text/collate/build/table.go
generated
vendored
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// table is an intermediate structure that roughly resembles the table in collate.
|
||||
type table struct {
|
||||
colltab.Table
|
||||
trie trie
|
||||
root *trieHandle
|
||||
}
|
||||
|
||||
// print writes the table as Go compilable code to w. It prefixes the
|
||||
// variable names with name. It returns the number of bytes written
|
||||
// and the size of the resulting table.
|
||||
func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
|
||||
update := func(nn, sz int, e error) {
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
size += sz
|
||||
}
|
||||
// Write arrays needed for the structure.
|
||||
update(printColElems(w, t.ExpandElem, name+"ExpandElem"))
|
||||
update(printColElems(w, t.ContractElem, name+"ContractElem"))
|
||||
update(t.trie.printArrays(w, name))
|
||||
update(printArray(t.ContractTries, w, name))
|
||||
|
||||
nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
|
||||
update(nn, 0, e)
|
||||
return
|
||||
}
|
||||
|
||||
func (t *table) fprintIndex(w io.Writer, h *trieHandle, id string) (n int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
p("\t{ // %s\n", id)
|
||||
p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
|
||||
p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
|
||||
p("\t},\n")
|
||||
return
|
||||
}
|
||||
|
||||
func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
sz = len(a) * int(reflect.TypeOf(uint32(0)).Size())
|
||||
p("// %s: %d entries, %d bytes\n", name, len(a), sz)
|
||||
p("var %s = [%d]uint32 {", name, len(a))
|
||||
for i, c := range a {
|
||||
switch {
|
||||
case i%64 == 0:
|
||||
p("\n\t// Block %d, offset 0x%x\n", i/64, i)
|
||||
case (i%64)%6 == 0:
|
||||
p("\n\t")
|
||||
}
|
||||
p("0x%.8X, ", c)
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return
|
||||
}
|
||||
290
vendor/golang.org/x/text/collate/build/trie.go
generated
vendored
Normal file
290
vendor/golang.org/x/text/collate/build/trie.go
generated
vendored
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// The trie in this file is used to associate the first full character
|
||||
// in a UTF-8 string to a collation element.
|
||||
// All but the last byte in a UTF-8 byte sequence are
|
||||
// used to look up offsets in the index table to be used for the next byte.
|
||||
// The last byte is used to index into a table of collation elements.
|
||||
// This file contains the code for the generation of the trie.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
const (
|
||||
blockSize = 64
|
||||
blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes.
|
||||
)
|
||||
|
||||
type trieHandle struct {
|
||||
lookupStart uint16 // offset in table for first byte
|
||||
valueStart uint16 // offset in table for first byte
|
||||
}
|
||||
|
||||
type trie struct {
|
||||
index []uint16
|
||||
values []uint32
|
||||
}
|
||||
|
||||
// trieNode is the intermediate trie structure used for generating a trie.
|
||||
type trieNode struct {
|
||||
index []*trieNode
|
||||
value []uint32
|
||||
b byte
|
||||
refValue uint16
|
||||
refIndex uint16
|
||||
}
|
||||
|
||||
func newNode() *trieNode {
|
||||
return &trieNode{
|
||||
index: make([]*trieNode, 64),
|
||||
value: make([]uint32, 128), // root node size is 128 instead of 64
|
||||
}
|
||||
}
|
||||
|
||||
func (n *trieNode) isInternal() bool {
|
||||
return n.value != nil
|
||||
}
|
||||
|
||||
func (n *trieNode) insert(r rune, value uint32) {
|
||||
const maskx = 0x3F // mask out two most-significant bits
|
||||
str := string(r)
|
||||
if len(str) == 1 {
|
||||
n.value[str[0]] = value
|
||||
return
|
||||
}
|
||||
for i := 0; i < len(str)-1; i++ {
|
||||
b := str[i] & maskx
|
||||
if n.index == nil {
|
||||
n.index = make([]*trieNode, blockSize)
|
||||
}
|
||||
nn := n.index[b]
|
||||
if nn == nil {
|
||||
nn = &trieNode{}
|
||||
nn.b = b
|
||||
n.index[b] = nn
|
||||
}
|
||||
n = nn
|
||||
}
|
||||
if n.value == nil {
|
||||
n.value = make([]uint32, blockSize)
|
||||
}
|
||||
b := str[len(str)-1] & maskx
|
||||
n.value[b] = value
|
||||
}
|
||||
|
||||
type trieBuilder struct {
|
||||
t *trie
|
||||
|
||||
roots []*trieHandle
|
||||
|
||||
lookupBlocks []*trieNode
|
||||
valueBlocks []*trieNode
|
||||
|
||||
lookupBlockIdx map[uint32]*trieNode
|
||||
valueBlockIdx map[uint32]*trieNode
|
||||
}
|
||||
|
||||
func newTrieBuilder() *trieBuilder {
|
||||
index := &trieBuilder{}
|
||||
index.lookupBlocks = make([]*trieNode, 0)
|
||||
index.valueBlocks = make([]*trieNode, 0)
|
||||
index.lookupBlockIdx = make(map[uint32]*trieNode)
|
||||
index.valueBlockIdx = make(map[uint32]*trieNode)
|
||||
// The third nil is the default null block. The other two blocks
|
||||
// are used to guarantee an offset of at least 3 for each block.
|
||||
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
|
||||
index.t = &trie{}
|
||||
return index
|
||||
}
|
||||
|
||||
func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
|
||||
hasher := fnv.New32()
|
||||
if n.index != nil {
|
||||
for i, nn := range n.index {
|
||||
var vi, vv uint16
|
||||
if nn != nil {
|
||||
nn = b.computeOffsets(nn)
|
||||
n.index[i] = nn
|
||||
vi = nn.refIndex
|
||||
vv = nn.refValue
|
||||
}
|
||||
hasher.Write([]byte{byte(vi >> 8), byte(vi)})
|
||||
hasher.Write([]byte{byte(vv >> 8), byte(vv)})
|
||||
}
|
||||
h := hasher.Sum32()
|
||||
nn, ok := b.lookupBlockIdx[h]
|
||||
if !ok {
|
||||
n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
|
||||
b.lookupBlocks = append(b.lookupBlocks, n)
|
||||
b.lookupBlockIdx[h] = n
|
||||
} else {
|
||||
n = nn
|
||||
}
|
||||
} else {
|
||||
for _, v := range n.value {
|
||||
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
|
||||
}
|
||||
h := hasher.Sum32()
|
||||
nn, ok := b.valueBlockIdx[h]
|
||||
if !ok {
|
||||
n.refValue = uint16(len(b.valueBlocks)) - blockOffset
|
||||
n.refIndex = n.refValue
|
||||
b.valueBlocks = append(b.valueBlocks, n)
|
||||
b.valueBlockIdx[h] = n
|
||||
} else {
|
||||
n = nn
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
|
||||
hasher := fnv.New32()
|
||||
for _, v := range n.value[:2*blockSize] {
|
||||
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
|
||||
}
|
||||
h := hasher.Sum32()
|
||||
nn, ok := b.valueBlockIdx[h]
|
||||
if !ok {
|
||||
n.refValue = uint16(len(b.valueBlocks))
|
||||
n.refIndex = n.refValue
|
||||
b.valueBlocks = append(b.valueBlocks, n)
|
||||
// Add a dummy block to accommodate the double block size.
|
||||
b.valueBlocks = append(b.valueBlocks, nil)
|
||||
b.valueBlockIdx[h] = n
|
||||
} else {
|
||||
n = nn
|
||||
}
|
||||
return n.refValue
|
||||
}
|
||||
|
||||
func genValueBlock(t *trie, n *trieNode) {
|
||||
if n != nil {
|
||||
for _, v := range n.value {
|
||||
t.values = append(t.values, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func genLookupBlock(t *trie, n *trieNode) {
|
||||
for _, nn := range n.index {
|
||||
v := uint16(0)
|
||||
if nn != nil {
|
||||
if n.index != nil {
|
||||
v = nn.refIndex
|
||||
} else {
|
||||
v = nn.refValue
|
||||
}
|
||||
}
|
||||
t.index = append(t.index, v)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
|
||||
h := &trieHandle{}
|
||||
b.roots = append(b.roots, h)
|
||||
h.valueStart = b.addStartValueBlock(n)
|
||||
if len(b.roots) == 1 {
|
||||
// We insert a null block after the first start value block.
|
||||
// This ensures that continuation bytes UTF-8 sequences of length
|
||||
// greater than 2 will automatically hit a null block if there
|
||||
// was an undefined entry.
|
||||
b.valueBlocks = append(b.valueBlocks, nil)
|
||||
}
|
||||
n = b.computeOffsets(n)
|
||||
// Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
|
||||
h.lookupStart = n.refIndex - 1
|
||||
return h
|
||||
}
|
||||
|
||||
// generate generates and returns the trie for n.
|
||||
func (b *trieBuilder) generate() (t *trie, err error) {
|
||||
t = b.t
|
||||
if len(b.valueBlocks) >= 1<<16 {
|
||||
return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
|
||||
}
|
||||
if len(b.lookupBlocks) >= 1<<16 {
|
||||
return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
|
||||
}
|
||||
genValueBlock(t, b.valueBlocks[0])
|
||||
genValueBlock(t, &trieNode{value: make([]uint32, 64)})
|
||||
for i := 2; i < len(b.valueBlocks); i++ {
|
||||
genValueBlock(t, b.valueBlocks[i])
|
||||
}
|
||||
n := &trieNode{index: make([]*trieNode, 64)}
|
||||
genLookupBlock(t, n)
|
||||
genLookupBlock(t, n)
|
||||
genLookupBlock(t, n)
|
||||
for i := 3; i < len(b.lookupBlocks); i++ {
|
||||
genLookupBlock(t, b.lookupBlocks[i])
|
||||
}
|
||||
return b.t, nil
|
||||
}
|
||||
|
||||
func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
nv := len(t.values)
|
||||
p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
|
||||
p("// Block 2 is the null block.\n")
|
||||
p("var %sValues = [%d]uint32 {", name, nv)
|
||||
var printnewline bool
|
||||
for i, v := range t.values {
|
||||
if i%blockSize == 0 {
|
||||
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
|
||||
}
|
||||
if i%4 == 0 {
|
||||
printnewline = true
|
||||
}
|
||||
if v != 0 {
|
||||
if printnewline {
|
||||
p("\n\t")
|
||||
printnewline = false
|
||||
}
|
||||
p("%#04x:%#08x, ", i, v)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
ni := len(t.index)
|
||||
p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
|
||||
p("// Block 0 is the null block.\n")
|
||||
p("var %sLookup = [%d]uint16 {", name, ni)
|
||||
printnewline = false
|
||||
for i, v := range t.index {
|
||||
if i%blockSize == 0 {
|
||||
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
|
||||
}
|
||||
if i%8 == 0 {
|
||||
printnewline = true
|
||||
}
|
||||
if v != 0 {
|
||||
if printnewline {
|
||||
p("\n\t")
|
||||
printnewline = false
|
||||
}
|
||||
p("%#03x:%#02x, ", i, v)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return n, nv*4 + ni*2, err
|
||||
}
|
||||
|
||||
func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
|
||||
const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
|
||||
n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
|
||||
sz += int(reflect.TypeOf(trie{}).Size())
|
||||
return
|
||||
}
|
||||
403
vendor/golang.org/x/text/collate/collate.go
generated
vendored
Normal file
403
vendor/golang.org/x/text/collate/collate.go
generated
vendored
Normal file
|
|
@ -0,0 +1,403 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// TODO: remove hard-coded versions when we have implemented fractional weights.
|
||||
// The current implementation is incompatible with later CLDR versions.
|
||||
//go:generate go run maketables.go -cldr=23 -unicode=6.2.0
|
||||
|
||||
// Package collate contains types for comparing and sorting Unicode strings
|
||||
// according to a given collation order.
|
||||
package collate // import "golang.org/x/text/collate"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// Collator provides functionality for comparing strings for a given
|
||||
// collation order.
|
||||
type Collator struct {
|
||||
options
|
||||
|
||||
sorter sorter
|
||||
|
||||
_iter [2]iter
|
||||
}
|
||||
|
||||
func (c *Collator) iter(i int) *iter {
|
||||
// TODO: evaluate performance for making the second iterator optional.
|
||||
return &c._iter[i]
|
||||
}
|
||||
|
||||
// Supported returns the list of languages for which collating differs from its parent.
|
||||
func Supported() []language.Tag {
|
||||
// TODO: use language.Coverage instead.
|
||||
|
||||
t := make([]language.Tag, len(tags))
|
||||
copy(t, tags)
|
||||
return t
|
||||
}
|
||||
|
||||
func init() {
|
||||
ids := strings.Split(availableLocales, ",")
|
||||
tags = make([]language.Tag, len(ids))
|
||||
for i, s := range ids {
|
||||
tags[i] = language.Raw.MustParse(s)
|
||||
}
|
||||
}
|
||||
|
||||
var tags []language.Tag
|
||||
|
||||
// New returns a new Collator initialized for the given locale.
|
||||
func New(t language.Tag, o ...Option) *Collator {
|
||||
index := colltab.MatchLang(t, tags)
|
||||
c := newCollator(getTable(locales[index]))
|
||||
|
||||
// Set options from the user-supplied tag.
|
||||
c.setFromTag(t)
|
||||
|
||||
// Set the user-supplied options.
|
||||
c.setOptions(o)
|
||||
|
||||
c.init()
|
||||
return c
|
||||
}
|
||||
|
||||
// NewFromTable returns a new Collator for the given Weighter.
|
||||
func NewFromTable(w colltab.Weighter, o ...Option) *Collator {
|
||||
c := newCollator(w)
|
||||
c.setOptions(o)
|
||||
c.init()
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Collator) init() {
|
||||
if c.numeric {
|
||||
c.t = colltab.NewNumericWeighter(c.t)
|
||||
}
|
||||
c._iter[0].init(c)
|
||||
c._iter[1].init(c)
|
||||
}
|
||||
|
||||
// Buffer holds keys generated by Key and KeyString.
|
||||
type Buffer struct {
|
||||
buf [4096]byte
|
||||
key []byte
|
||||
}
|
||||
|
||||
func (b *Buffer) init() {
|
||||
if b.key == nil {
|
||||
b.key = b.buf[:0]
|
||||
}
|
||||
}
|
||||
|
||||
// Reset clears the buffer from previous results generated by Key and KeyString.
|
||||
func (b *Buffer) Reset() {
|
||||
b.key = b.key[:0]
|
||||
}
|
||||
|
||||
// Compare returns an integer comparing the two byte slices.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
|
||||
func (c *Collator) Compare(a, b []byte) int {
|
||||
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
|
||||
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
|
||||
c.iter(0).SetInput(a)
|
||||
c.iter(1).SetInput(b)
|
||||
if res := c.compare(); res != 0 {
|
||||
return res
|
||||
}
|
||||
if !c.ignore[colltab.Identity] {
|
||||
return bytes.Compare(a, b)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// CompareString returns an integer comparing the two strings.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
|
||||
func (c *Collator) CompareString(a, b string) int {
|
||||
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
|
||||
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
|
||||
c.iter(0).SetInputString(a)
|
||||
c.iter(1).SetInputString(b)
|
||||
if res := c.compare(); res != 0 {
|
||||
return res
|
||||
}
|
||||
if !c.ignore[colltab.Identity] {
|
||||
if a < b {
|
||||
return -1
|
||||
} else if a > b {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func compareLevel(f func(i *iter) int, a, b *iter) int {
|
||||
a.pce = 0
|
||||
b.pce = 0
|
||||
for {
|
||||
va := f(a)
|
||||
vb := f(b)
|
||||
if va != vb {
|
||||
if va < vb {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
} else if va == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *Collator) compare() int {
|
||||
ia, ib := c.iter(0), c.iter(1)
|
||||
// Process primary level
|
||||
if c.alternate != altShifted {
|
||||
// TODO: implement script reordering
|
||||
if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
} else {
|
||||
// TODO: handle shifted
|
||||
}
|
||||
if !c.ignore[colltab.Secondary] {
|
||||
f := (*iter).nextSecondary
|
||||
if c.backwards {
|
||||
f = (*iter).prevSecondary
|
||||
}
|
||||
if res := compareLevel(f, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
}
|
||||
// TODO: special case handling (Danish?)
|
||||
if !c.ignore[colltab.Tertiary] || c.caseLevel {
|
||||
if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
if !c.ignore[colltab.Quaternary] {
|
||||
if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Key returns the collation key for str.
|
||||
// Passing the buffer buf may avoid memory allocations.
|
||||
// The returned slice will point to an allocation in Buffer and will remain
|
||||
// valid until the next call to buf.Reset().
|
||||
func (c *Collator) Key(buf *Buffer, str []byte) []byte {
|
||||
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
|
||||
buf.init()
|
||||
return c.key(buf, c.getColElems(str))
|
||||
}
|
||||
|
||||
// KeyFromString returns the collation key for str.
|
||||
// Passing the buffer buf may avoid memory allocations.
|
||||
// The returned slice will point to an allocation in Buffer and will retain
|
||||
// valid until the next call to buf.ResetKeys().
|
||||
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
|
||||
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
|
||||
buf.init()
|
||||
return c.key(buf, c.getColElemsString(str))
|
||||
}
|
||||
|
||||
func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
|
||||
processWeights(c.alternate, c.t.Top(), w)
|
||||
kn := len(buf.key)
|
||||
c.keyFromElems(buf, w)
|
||||
return buf.key[kn:]
|
||||
}
|
||||
|
||||
func (c *Collator) getColElems(str []byte) []colltab.Elem {
|
||||
i := c.iter(0)
|
||||
i.SetInput(str)
|
||||
for i.Next() {
|
||||
}
|
||||
return i.Elems
|
||||
}
|
||||
|
||||
func (c *Collator) getColElemsString(str string) []colltab.Elem {
|
||||
i := c.iter(0)
|
||||
i.SetInputString(str)
|
||||
for i.Next() {
|
||||
}
|
||||
return i.Elems
|
||||
}
|
||||
|
||||
type iter struct {
|
||||
wa [512]colltab.Elem
|
||||
|
||||
colltab.Iter
|
||||
pce int
|
||||
}
|
||||
|
||||
func (i *iter) init(c *Collator) {
|
||||
i.Weighter = c.t
|
||||
i.Elems = i.wa[:0]
|
||||
}
|
||||
|
||||
func (i *iter) nextPrimary() int {
|
||||
for {
|
||||
for ; i.pce < i.N; i.pce++ {
|
||||
if v := i.Elems[i.pce].Primary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
if !i.Next() {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
panic("should not reach here")
|
||||
}
|
||||
|
||||
func (i *iter) nextSecondary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[i.pce].Secondary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *iter) prevSecondary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *iter) nextTertiary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[i.pce].Tertiary(); v != 0 {
|
||||
i.pce++
|
||||
return int(v)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *iter) nextQuaternary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[i.pce].Quaternary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func appendPrimary(key []byte, p int) []byte {
|
||||
// Convert to variable length encoding; supports up to 23 bits.
|
||||
if p <= 0x7FFF {
|
||||
key = append(key, uint8(p>>8), uint8(p))
|
||||
} else {
|
||||
key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
|
||||
}
|
||||
return key
|
||||
}
|
||||
|
||||
// keyFromElems converts the weights ws to a compact sequence of bytes.
|
||||
// The result will be appended to the byte buffer in buf.
|
||||
func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
|
||||
for _, v := range ws {
|
||||
if w := v.Primary(); w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
}
|
||||
}
|
||||
if !c.ignore[colltab.Secondary] {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
|
||||
if !c.backwards {
|
||||
for _, v := range ws {
|
||||
if w := v.Secondary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := len(ws) - 1; i >= 0; i-- {
|
||||
if w := ws[i].Secondary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if c.caseLevel {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
}
|
||||
if !c.ignore[colltab.Tertiary] || c.caseLevel {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.Tertiary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w))
|
||||
}
|
||||
}
|
||||
// Derive the quaternary weights from the options and other levels.
|
||||
// Note that we represent MaxQuaternary as 0xFF. The first byte of the
|
||||
// representation of a primary weight is always smaller than 0xFF,
|
||||
// so using this single byte value will compare correctly.
|
||||
if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted {
|
||||
if c.alternate == altShiftTrimmed {
|
||||
lastNonFFFF := len(buf.key)
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.Quaternary(); w == colltab.MaxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
lastNonFFFF = len(buf.key)
|
||||
}
|
||||
}
|
||||
buf.key = buf.key[:lastNonFFFF]
|
||||
} else {
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.Quaternary(); w == colltab.MaxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) {
|
||||
ignore := false
|
||||
vtop := int(top)
|
||||
switch vw {
|
||||
case altShifted, altShiftTrimmed:
|
||||
for i := range wa {
|
||||
if p := wa[i].Primary(); p <= vtop && p != 0 {
|
||||
wa[i] = colltab.MakeQuaternary(p)
|
||||
ignore = true
|
||||
} else if p == 0 {
|
||||
if ignore {
|
||||
wa[i] = colltab.Ignore
|
||||
}
|
||||
} else {
|
||||
ignore = false
|
||||
}
|
||||
}
|
||||
case altBlanked:
|
||||
for i := range wa {
|
||||
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
|
||||
wa[i] = colltab.Ignore
|
||||
ignore = true
|
||||
} else {
|
||||
ignore = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
32
vendor/golang.org/x/text/collate/index.go
generated
vendored
Normal file
32
vendor/golang.org/x/text/collate/index.go
generated
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import "golang.org/x/text/internal/colltab"
|
||||
|
||||
const blockSize = 64
|
||||
|
||||
func getTable(t tableIndex) *colltab.Table {
|
||||
return &colltab.Table{
|
||||
Index: colltab.Trie{
|
||||
Index0: mainLookup[:][blockSize*t.lookupOffset:],
|
||||
Values0: mainValues[:][blockSize*t.valuesOffset:],
|
||||
Index: mainLookup[:],
|
||||
Values: mainValues[:],
|
||||
},
|
||||
ExpandElem: mainExpandElem[:],
|
||||
ContractTries: colltab.ContractTrieSet(mainCTEntries[:]),
|
||||
ContractElem: mainContractElem[:],
|
||||
MaxContractLen: 18,
|
||||
VariableTop: varTop,
|
||||
}
|
||||
}
|
||||
|
||||
// tableIndex holds information for constructing a table
|
||||
// for a certain locale based on the main table.
|
||||
type tableIndex struct {
|
||||
lookupOffset uint32
|
||||
valuesOffset uint32
|
||||
}
|
||||
553
vendor/golang.org/x/text/collate/maketables.go
generated
vendored
Normal file
553
vendor/golang.org/x/text/collate/maketables.go
generated
vendored
Normal file
|
|
@ -0,0 +1,553 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Collation table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/collate"
|
||||
"golang.org/x/text/collate/build"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
|
||||
draft = flag.Bool("draft", false, `Use draft versions, when available.`)
|
||||
tags = flag.String("tags", "", "build tags to be included after +build directive")
|
||||
pkg = flag.String("package", "collate",
|
||||
"the name of the package in which the generated file is to be included")
|
||||
|
||||
tables = flagStringSetAllowAll("tables", "collate", "collate,chars",
|
||||
"comma-spearated list of tables to generate.")
|
||||
exclude = flagStringSet("exclude", "zh2", "",
|
||||
"comma-separated list of languages to exclude.")
|
||||
include = flagStringSet("include", "", "",
|
||||
"comma-separated list of languages to include. Include trumps exclude.")
|
||||
// TODO: Not included: unihan gb2312han zhuyin big5han (for size reasons)
|
||||
// TODO: Not included: traditional (buggy for Bengali)
|
||||
types = flagStringSetAllowAll("types", "standard,phonebook,phonetic,reformed,pinyin,stroke", "",
|
||||
"comma-separated list of types that should be included.")
|
||||
)
|
||||
|
||||
// stringSet implements an ordered set based on a list. It implements flag.Value
|
||||
// to allow a set to be specified as a comma-separated list.
|
||||
type stringSet struct {
|
||||
s []string
|
||||
allowed *stringSet
|
||||
dirty bool // needs compaction if true
|
||||
all bool
|
||||
allowAll bool
|
||||
}
|
||||
|
||||
func flagStringSet(name, def, allowed, usage string) *stringSet {
|
||||
ss := &stringSet{}
|
||||
if allowed != "" {
|
||||
usage += fmt.Sprintf(" (allowed values: any of %s)", allowed)
|
||||
ss.allowed = &stringSet{}
|
||||
failOnError(ss.allowed.Set(allowed))
|
||||
}
|
||||
ss.Set(def)
|
||||
flag.Var(ss, name, usage)
|
||||
return ss
|
||||
}
|
||||
|
||||
func flagStringSetAllowAll(name, def, allowed, usage string) *stringSet {
|
||||
ss := &stringSet{allowAll: true}
|
||||
if allowed == "" {
|
||||
flag.Var(ss, name, usage+fmt.Sprintf(` Use "all" to select all.`))
|
||||
} else {
|
||||
ss.allowed = &stringSet{}
|
||||
failOnError(ss.allowed.Set(allowed))
|
||||
flag.Var(ss, name, usage+fmt.Sprintf(` (allowed values: "all" or any of %s)`, allowed))
|
||||
}
|
||||
ss.Set(def)
|
||||
return ss
|
||||
}
|
||||
|
||||
func (ss stringSet) Len() int {
|
||||
return len(ss.s)
|
||||
}
|
||||
|
||||
func (ss stringSet) String() string {
|
||||
return strings.Join(ss.s, ",")
|
||||
}
|
||||
|
||||
func (ss *stringSet) Set(s string) error {
|
||||
if ss.allowAll && s == "all" {
|
||||
ss.s = nil
|
||||
ss.all = true
|
||||
return nil
|
||||
}
|
||||
ss.s = ss.s[:0]
|
||||
for _, s := range strings.Split(s, ",") {
|
||||
if s := strings.TrimSpace(s); s != "" {
|
||||
if ss.allowed != nil && !ss.allowed.contains(s) {
|
||||
return fmt.Errorf("unsupported value %q; must be one of %s", s, ss.allowed)
|
||||
}
|
||||
ss.add(s)
|
||||
}
|
||||
}
|
||||
ss.compact()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ss *stringSet) add(s string) {
|
||||
ss.s = append(ss.s, s)
|
||||
ss.dirty = true
|
||||
}
|
||||
|
||||
func (ss *stringSet) values() []string {
|
||||
ss.compact()
|
||||
return ss.s
|
||||
}
|
||||
|
||||
func (ss *stringSet) contains(s string) bool {
|
||||
if ss.all {
|
||||
return true
|
||||
}
|
||||
for _, v := range ss.s {
|
||||
if v == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ss *stringSet) compact() {
|
||||
if !ss.dirty {
|
||||
return
|
||||
}
|
||||
a := ss.s
|
||||
sort.Strings(a)
|
||||
k := 0
|
||||
for i := 1; i < len(a); i++ {
|
||||
if a[k] != a[i] {
|
||||
a[k+1] = a[i]
|
||||
k++
|
||||
}
|
||||
}
|
||||
ss.s = a[:k+1]
|
||||
ss.dirty = false
|
||||
}
|
||||
|
||||
func skipLang(l string) bool {
|
||||
if include.Len() > 0 {
|
||||
return !include.contains(l)
|
||||
}
|
||||
return exclude.contains(l)
|
||||
}
|
||||
|
||||
// altInclude returns a list of alternatives (for the LDML alt attribute)
|
||||
// in order of preference. An empty string in this list indicates the
|
||||
// default entry.
|
||||
func altInclude() []string {
|
||||
l := []string{}
|
||||
if *short {
|
||||
l = append(l, "short")
|
||||
}
|
||||
l = append(l, "")
|
||||
// TODO: handle draft using cldr.SetDraftLevel
|
||||
if *draft {
|
||||
l = append(l, "proposed")
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func failOnError(e error) {
|
||||
if e != nil {
|
||||
log.Panic(e)
|
||||
}
|
||||
}
|
||||
|
||||
func openArchive() *zip.Reader {
|
||||
f := gen.OpenCLDRCoreZip()
|
||||
buffer, err := ioutil.ReadAll(f)
|
||||
f.Close()
|
||||
failOnError(err)
|
||||
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
|
||||
failOnError(err)
|
||||
return archive
|
||||
}
|
||||
|
||||
// parseUCA parses a Default Unicode Collation Element Table of the format
|
||||
// specified in http://www.unicode.org/reports/tr10/#File_Format.
|
||||
// It returns the variable top.
|
||||
func parseUCA(builder *build.Builder) {
|
||||
var r io.ReadCloser
|
||||
var err error
|
||||
for _, f := range openArchive().File {
|
||||
if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
|
||||
r, err = f.Open()
|
||||
}
|
||||
}
|
||||
if r == nil {
|
||||
log.Fatal("File allkeys_CLDR.txt not found in archive.")
|
||||
}
|
||||
failOnError(err)
|
||||
defer r.Close()
|
||||
scanner := bufio.NewScanner(r)
|
||||
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
|
||||
for i := 1; scanner.Scan(); i++ {
|
||||
line := scanner.Text()
|
||||
if len(line) == 0 || line[0] == '#' {
|
||||
continue
|
||||
}
|
||||
if line[0] == '@' {
|
||||
// parse properties
|
||||
switch {
|
||||
case strings.HasPrefix(line[1:], "version "):
|
||||
a := strings.Split(line[1:], " ")
|
||||
if a[1] != gen.UnicodeVersion() {
|
||||
log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion())
|
||||
}
|
||||
case strings.HasPrefix(line[1:], "backwards "):
|
||||
log.Fatalf("%d: unsupported option backwards", i)
|
||||
default:
|
||||
log.Printf("%d: unknown option %s", i, line[1:])
|
||||
}
|
||||
} else {
|
||||
// parse entries
|
||||
part := strings.Split(line, " ; ")
|
||||
if len(part) != 2 {
|
||||
log.Fatalf("%d: production rule without ';': %v", i, line)
|
||||
}
|
||||
lhs := []rune{}
|
||||
for _, v := range strings.Split(part[0], " ") {
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
lhs = append(lhs, rune(convHex(i, v)))
|
||||
}
|
||||
var n int
|
||||
var vars []int
|
||||
rhs := [][]int{}
|
||||
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
|
||||
n += len(m[0])
|
||||
elem := []int{}
|
||||
for _, h := range strings.Split(m[2], ".") {
|
||||
elem = append(elem, convHex(i, h))
|
||||
}
|
||||
if m[1] == "*" {
|
||||
vars = append(vars, i)
|
||||
}
|
||||
rhs = append(rhs, elem)
|
||||
}
|
||||
if len(part[1]) < n+3 || part[1][n+1] != '#' {
|
||||
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
|
||||
}
|
||||
if *test {
|
||||
testInput.add(string(lhs))
|
||||
}
|
||||
failOnError(builder.Add(lhs, rhs, vars))
|
||||
}
|
||||
}
|
||||
if scanner.Err() != nil {
|
||||
log.Fatal(scanner.Err())
|
||||
}
|
||||
}
|
||||
|
||||
func convHex(line int, s string) int {
|
||||
r, e := strconv.ParseInt(s, 16, 32)
|
||||
if e != nil {
|
||||
log.Fatalf("%d: %v", line, e)
|
||||
}
|
||||
return int(r)
|
||||
}
|
||||
|
||||
var testInput = stringSet{}
|
||||
|
||||
var charRe = regexp.MustCompile(`&#x([0-9A-F]*);`)
|
||||
var tagRe = regexp.MustCompile(`<([a-z_]*) */>`)
|
||||
|
||||
var mainLocales = []string{}
|
||||
|
||||
// charsets holds a list of exemplar characters per category.
|
||||
type charSets map[string][]string
|
||||
|
||||
func (p charSets) fprint(w io.Writer) {
|
||||
fmt.Fprintln(w, "[exN]string{")
|
||||
for i, k := range []string{"", "contractions", "punctuation", "auxiliary", "currencySymbol", "index"} {
|
||||
if set := p[k]; len(set) != 0 {
|
||||
fmt.Fprintf(w, "\t\t%d: %q,\n", i, strings.Join(set, " "))
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "\t},")
|
||||
}
|
||||
|
||||
var localeChars = make(map[string]charSets)
|
||||
|
||||
const exemplarHeader = `
|
||||
type exemplarType int
|
||||
const (
|
||||
exCharacters exemplarType = iota
|
||||
exContractions
|
||||
exPunctuation
|
||||
exAuxiliary
|
||||
exCurrency
|
||||
exIndex
|
||||
exN
|
||||
)
|
||||
`
|
||||
|
||||
func printExemplarCharacters(w io.Writer) {
|
||||
fmt.Fprintln(w, exemplarHeader)
|
||||
fmt.Fprintln(w, "var exemplarCharacters = map[string][exN]string{")
|
||||
for _, loc := range mainLocales {
|
||||
fmt.Fprintf(w, "\t%q: ", loc)
|
||||
localeChars[loc].fprint(w)
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
}
|
||||
|
||||
func decodeCLDR(d *cldr.Decoder) *cldr.CLDR {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
data, err := d.DecodeZip(r)
|
||||
failOnError(err)
|
||||
return data
|
||||
}
|
||||
|
||||
// parseMain parses XML files in the main directory of the CLDR core.zip file.
|
||||
func parseMain() {
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("main")
|
||||
d.SetSectionFilter("characters")
|
||||
data := decodeCLDR(d)
|
||||
for _, loc := range data.Locales() {
|
||||
x := data.RawLDML(loc)
|
||||
if skipLang(x.Identity.Language.Type) {
|
||||
continue
|
||||
}
|
||||
if x.Characters != nil {
|
||||
x, _ = data.LDML(loc)
|
||||
loc = language.Make(loc).String()
|
||||
for _, ec := range x.Characters.ExemplarCharacters {
|
||||
if ec.Draft != "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := localeChars[loc]; !ok {
|
||||
mainLocales = append(mainLocales, loc)
|
||||
localeChars[loc] = make(charSets)
|
||||
}
|
||||
localeChars[loc][ec.Type] = parseCharacters(ec.Data())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseCharacters(chars string) []string {
|
||||
parseSingle := func(s string) (r rune, tail string, escaped bool) {
|
||||
if s[0] == '\\' {
|
||||
return rune(s[1]), s[2:], true
|
||||
}
|
||||
r, sz := utf8.DecodeRuneInString(s)
|
||||
return r, s[sz:], false
|
||||
}
|
||||
chars = strings.TrimSpace(chars)
|
||||
if n := len(chars) - 1; chars[n] == ']' && chars[0] == '[' {
|
||||
chars = chars[1:n]
|
||||
}
|
||||
list := []string{}
|
||||
var r, last, end rune
|
||||
for len(chars) > 0 {
|
||||
if chars[0] == '{' { // character sequence
|
||||
buf := []rune{}
|
||||
for chars = chars[1:]; len(chars) > 0; {
|
||||
r, chars, _ = parseSingle(chars)
|
||||
if r == '}' {
|
||||
break
|
||||
}
|
||||
if r == ' ' {
|
||||
log.Fatalf("space not supported in sequence %q", chars)
|
||||
}
|
||||
buf = append(buf, r)
|
||||
}
|
||||
list = append(list, string(buf))
|
||||
last = 0
|
||||
} else { // single character
|
||||
escaped := false
|
||||
r, chars, escaped = parseSingle(chars)
|
||||
if r != ' ' {
|
||||
if r == '-' && !escaped {
|
||||
if last == 0 {
|
||||
log.Fatal("'-' should be preceded by a character")
|
||||
}
|
||||
end, chars, _ = parseSingle(chars)
|
||||
for ; last <= end; last++ {
|
||||
list = append(list, string(last))
|
||||
}
|
||||
last = 0
|
||||
} else {
|
||||
list = append(list, string(r))
|
||||
last = r
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
var fileRe = regexp.MustCompile(`.*/collation/(.*)\.xml`)
|
||||
|
||||
// typeMap translates legacy type keys to their BCP47 equivalent.
|
||||
var typeMap = map[string]string{
|
||||
"phonebook": "phonebk",
|
||||
"traditional": "trad",
|
||||
}
|
||||
|
||||
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
|
||||
func parseCollation(b *build.Builder) {
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("collation")
|
||||
data := decodeCLDR(d)
|
||||
for _, loc := range data.Locales() {
|
||||
x, err := data.LDML(loc)
|
||||
failOnError(err)
|
||||
if skipLang(x.Identity.Language.Type) {
|
||||
continue
|
||||
}
|
||||
cs := x.Collations.Collation
|
||||
sl := cldr.MakeSlice(&cs)
|
||||
if len(types.s) == 0 {
|
||||
sl.SelectAnyOf("type", x.Collations.Default())
|
||||
} else if !types.all {
|
||||
sl.SelectAnyOf("type", types.s...)
|
||||
}
|
||||
sl.SelectOnePerGroup("alt", altInclude())
|
||||
|
||||
for _, c := range cs {
|
||||
id, err := language.Parse(loc)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "invalid locale: %q", err)
|
||||
continue
|
||||
}
|
||||
// Support both old- and new-style defaults.
|
||||
d := c.Type
|
||||
if x.Collations.DefaultCollation == nil {
|
||||
d = x.Collations.Default()
|
||||
} else {
|
||||
d = x.Collations.DefaultCollation.Data()
|
||||
}
|
||||
// We assume tables are being built either for search or collation,
|
||||
// but not both. For search the default is always "search".
|
||||
if d != c.Type && c.Type != "search" {
|
||||
typ := c.Type
|
||||
if len(c.Type) > 8 {
|
||||
typ = typeMap[c.Type]
|
||||
}
|
||||
id, err = id.SetTypeForKey("co", typ)
|
||||
failOnError(err)
|
||||
}
|
||||
t := b.Tailoring(id)
|
||||
c.Process(processor{t})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type processor struct {
|
||||
t *build.Tailoring
|
||||
}
|
||||
|
||||
func (p processor) Reset(anchor string, before int) (err error) {
|
||||
if before != 0 {
|
||||
err = p.t.SetAnchorBefore(anchor)
|
||||
} else {
|
||||
err = p.t.SetAnchor(anchor)
|
||||
}
|
||||
failOnError(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p processor) Insert(level int, str, context, extend string) error {
|
||||
str = context + str
|
||||
if *test {
|
||||
testInput.add(str)
|
||||
}
|
||||
// TODO: mimic bug in old maketables: remove.
|
||||
err := p.t.Insert(colltab.Level(level-1), str, context+extend)
|
||||
failOnError(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p processor) Index(id string) {
|
||||
}
|
||||
|
||||
func testCollator(c *collate.Collator) {
|
||||
c0 := collate.New(language.Und)
|
||||
|
||||
// iterator over all characters for all locales and check
|
||||
// whether Key is equal.
|
||||
buf := collate.Buffer{}
|
||||
|
||||
// Add all common and not too uncommon runes to the test set.
|
||||
for i := rune(0); i < 0x30000; i++ {
|
||||
testInput.add(string(i))
|
||||
}
|
||||
for i := rune(0xE0000); i < 0xF0000; i++ {
|
||||
testInput.add(string(i))
|
||||
}
|
||||
for _, str := range testInput.values() {
|
||||
k0 := c0.KeyFromString(&buf, str)
|
||||
k := c.KeyFromString(&buf, str)
|
||||
if !bytes.Equal(k0, k) {
|
||||
failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k))
|
||||
}
|
||||
buf.Reset()
|
||||
}
|
||||
fmt.Println("PASS")
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
b := build.NewBuilder()
|
||||
parseUCA(b)
|
||||
if tables.contains("chars") {
|
||||
parseMain()
|
||||
}
|
||||
parseCollation(b)
|
||||
|
||||
c, err := b.Build()
|
||||
failOnError(err)
|
||||
|
||||
if *test {
|
||||
testCollator(collate.NewFromTable(c))
|
||||
} else {
|
||||
w := &bytes.Buffer{}
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
if tables.contains("collate") {
|
||||
_, err = b.Print(w)
|
||||
failOnError(err)
|
||||
}
|
||||
if tables.contains("chars") {
|
||||
printExemplarCharacters(w)
|
||||
}
|
||||
gen.WriteGoFile("tables.go", *pkg, w.Bytes())
|
||||
}
|
||||
}
|
||||
239
vendor/golang.org/x/text/collate/option.go
generated
vendored
Normal file
239
vendor/golang.org/x/text/collate/option.go
generated
vendored
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// newCollator creates a new collator with default options configured.
|
||||
func newCollator(t colltab.Weighter) *Collator {
|
||||
// Initialize a collator with default options.
|
||||
c := &Collator{
|
||||
options: options{
|
||||
ignore: [colltab.NumLevels]bool{
|
||||
colltab.Quaternary: true,
|
||||
colltab.Identity: true,
|
||||
},
|
||||
f: norm.NFD,
|
||||
t: t,
|
||||
},
|
||||
}
|
||||
|
||||
// TODO: store vt in tags or remove.
|
||||
c.variableTop = t.Top()
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// An Option is used to change the behavior of a Collator. Options override the
|
||||
// settings passed through the locale identifier.
|
||||
type Option struct {
|
||||
priority int
|
||||
f func(o *options)
|
||||
}
|
||||
|
||||
type prioritizedOptions []Option
|
||||
|
||||
func (p prioritizedOptions) Len() int {
|
||||
return len(p)
|
||||
}
|
||||
|
||||
func (p prioritizedOptions) Swap(i, j int) {
|
||||
p[i], p[j] = p[j], p[i]
|
||||
}
|
||||
|
||||
func (p prioritizedOptions) Less(i, j int) bool {
|
||||
return p[i].priority < p[j].priority
|
||||
}
|
||||
|
||||
type options struct {
|
||||
// ignore specifies which levels to ignore.
|
||||
ignore [colltab.NumLevels]bool
|
||||
|
||||
// caseLevel is true if there is an additional level of case matching
|
||||
// between the secondary and tertiary levels.
|
||||
caseLevel bool
|
||||
|
||||
// backwards specifies the order of sorting at the secondary level.
|
||||
// This option exists predominantly to support reverse sorting of accents in French.
|
||||
backwards bool
|
||||
|
||||
// numeric specifies whether any sequence of decimal digits (category is Nd)
|
||||
// is sorted at a primary level with its numeric value.
|
||||
// For example, "A-21" < "A-123".
|
||||
// This option is set by wrapping the main Weighter with NewNumericWeighter.
|
||||
numeric bool
|
||||
|
||||
// alternate specifies an alternative handling of variables.
|
||||
alternate alternateHandling
|
||||
|
||||
// variableTop is the largest primary value that is considered to be
|
||||
// variable.
|
||||
variableTop uint32
|
||||
|
||||
t colltab.Weighter
|
||||
|
||||
f norm.Form
|
||||
}
|
||||
|
||||
func (o *options) setOptions(opts []Option) {
|
||||
sort.Sort(prioritizedOptions(opts))
|
||||
for _, x := range opts {
|
||||
x.f(o)
|
||||
}
|
||||
}
|
||||
|
||||
// OptionsFromTag extracts the BCP47 collation options from the tag and
|
||||
// configures a collator accordingly. These options are set before any other
|
||||
// option.
|
||||
func OptionsFromTag(t language.Tag) Option {
|
||||
return Option{0, func(o *options) {
|
||||
o.setFromTag(t)
|
||||
}}
|
||||
}
|
||||
|
||||
func (o *options) setFromTag(t language.Tag) {
|
||||
o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
|
||||
o.backwards = ldmlBool(t, o.backwards, "kb")
|
||||
o.numeric = ldmlBool(t, o.numeric, "kn")
|
||||
|
||||
// Extract settings from the BCP47 u extension.
|
||||
switch t.TypeForKey("ks") { // strength
|
||||
case "level1":
|
||||
o.ignore[colltab.Secondary] = true
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
case "level2":
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
case "level3", "":
|
||||
// The default.
|
||||
case "level4":
|
||||
o.ignore[colltab.Quaternary] = false
|
||||
case "identic":
|
||||
o.ignore[colltab.Quaternary] = false
|
||||
o.ignore[colltab.Identity] = false
|
||||
}
|
||||
|
||||
switch t.TypeForKey("ka") {
|
||||
case "shifted":
|
||||
o.alternate = altShifted
|
||||
// The following two types are not official BCP47, but we support them to
|
||||
// give access to this otherwise hidden functionality. The name blanked is
|
||||
// derived from the LDML name blanked and posix reflects the main use of
|
||||
// the shift-trimmed option.
|
||||
case "blanked":
|
||||
o.alternate = altBlanked
|
||||
case "posix":
|
||||
o.alternate = altShiftTrimmed
|
||||
}
|
||||
|
||||
// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").
|
||||
|
||||
// Not used:
|
||||
// - normalization ("kk", not necessary for this implementation)
|
||||
// - hiraganaQuatenary ("kh", obsolete)
|
||||
}
|
||||
|
||||
func ldmlBool(t language.Tag, old bool, key string) bool {
|
||||
switch t.TypeForKey(key) {
|
||||
case "true":
|
||||
return true
|
||||
case "false":
|
||||
return false
|
||||
default:
|
||||
return old
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// IgnoreCase sets case-insensitive comparison.
|
||||
IgnoreCase Option = ignoreCase
|
||||
ignoreCase = Option{3, ignoreCaseF}
|
||||
|
||||
// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "ö").
|
||||
IgnoreDiacritics Option = ignoreDiacritics
|
||||
ignoreDiacritics = Option{3, ignoreDiacriticsF}
|
||||
|
||||
// IgnoreWidth causes full-width characters to match their half-width
|
||||
// equivalents.
|
||||
IgnoreWidth Option = ignoreWidth
|
||||
ignoreWidth = Option{2, ignoreWidthF}
|
||||
|
||||
// Loose sets the collator to ignore diacritics, case and weight.
|
||||
Loose Option = loose
|
||||
loose = Option{4, looseF}
|
||||
|
||||
// Force ordering if strings are equivalent but not equal.
|
||||
Force Option = force
|
||||
force = Option{5, forceF}
|
||||
|
||||
// Numeric specifies that numbers should sort numerically ("2" < "12").
|
||||
Numeric Option = numeric
|
||||
numeric = Option{5, numericF}
|
||||
)
|
||||
|
||||
func ignoreWidthF(o *options) {
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
o.caseLevel = true
|
||||
}
|
||||
|
||||
func ignoreDiacriticsF(o *options) {
|
||||
o.ignore[colltab.Secondary] = true
|
||||
}
|
||||
|
||||
func ignoreCaseF(o *options) {
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
o.caseLevel = false
|
||||
}
|
||||
|
||||
func looseF(o *options) {
|
||||
ignoreWidthF(o)
|
||||
ignoreDiacriticsF(o)
|
||||
ignoreCaseF(o)
|
||||
}
|
||||
|
||||
func forceF(o *options) {
|
||||
o.ignore[colltab.Identity] = false
|
||||
}
|
||||
|
||||
func numericF(o *options) { o.numeric = true }
|
||||
|
||||
// Reorder overrides the pre-defined ordering of scripts and character sets.
|
||||
func Reorder(s ...string) Option {
|
||||
// TODO: need fractional weights to implement this.
|
||||
panic("TODO: implement")
|
||||
}
|
||||
|
||||
// TODO: consider making these public again. These options cannot be fully
|
||||
// specified in BCP47, so an API interface seems warranted. Still a higher-level
|
||||
// interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)
|
||||
|
||||
// alternateHandling identifies the various ways in which variables are handled.
|
||||
// A rune with a primary weight lower than the variable top is considered a
|
||||
// variable.
|
||||
// See http://www.unicode.org/reports/tr10/#Variable_Weighting for details.
|
||||
type alternateHandling int
|
||||
|
||||
const (
|
||||
// altNonIgnorable turns off special handling of variables.
|
||||
altNonIgnorable alternateHandling = iota
|
||||
|
||||
// altBlanked sets variables and all subsequent primary ignorables to be
|
||||
// ignorable at all levels. This is identical to removing all variables
|
||||
// and subsequent primary ignorables from the input.
|
||||
altBlanked
|
||||
|
||||
// altShifted sets variables to be ignorable for levels one through three and
|
||||
// adds a fourth level based on the values of the ignored levels.
|
||||
altShifted
|
||||
|
||||
// altShiftTrimmed is a slight variant of altShifted that is used to
|
||||
// emulate POSIX.
|
||||
altShiftTrimmed
|
||||
)
|
||||
81
vendor/golang.org/x/text/collate/sort.go
generated
vendored
Normal file
81
vendor/golang.org/x/text/collate/sort.go
generated
vendored
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const (
|
||||
maxSortBuffer = 40960
|
||||
maxSortEntries = 4096
|
||||
)
|
||||
|
||||
type swapper interface {
|
||||
Swap(i, j int)
|
||||
}
|
||||
|
||||
type sorter struct {
|
||||
buf *Buffer
|
||||
keys [][]byte
|
||||
src swapper
|
||||
}
|
||||
|
||||
func (s *sorter) init(n int) {
|
||||
if s.buf == nil {
|
||||
s.buf = &Buffer{}
|
||||
s.buf.init()
|
||||
}
|
||||
if cap(s.keys) < n {
|
||||
s.keys = make([][]byte, n)
|
||||
}
|
||||
s.keys = s.keys[0:n]
|
||||
}
|
||||
|
||||
func (s *sorter) sort(src swapper) {
|
||||
s.src = src
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s.keys)
|
||||
}
|
||||
|
||||
func (s sorter) Less(i, j int) bool {
|
||||
return bytes.Compare(s.keys[i], s.keys[j]) == -1
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
|
||||
s.src.Swap(i, j)
|
||||
}
|
||||
|
||||
// A Lister can be sorted by Collator's Sort method.
|
||||
type Lister interface {
|
||||
Len() int
|
||||
Swap(i, j int)
|
||||
// Bytes returns the bytes of the text at index i.
|
||||
Bytes(i int) []byte
|
||||
}
|
||||
|
||||
// Sort uses sort.Sort to sort the strings represented by x using the rules of c.
|
||||
func (c *Collator) Sort(x Lister) {
|
||||
n := x.Len()
|
||||
c.sorter.init(n)
|
||||
for i := 0; i < n; i++ {
|
||||
c.sorter.keys[i] = c.Key(c.sorter.buf, x.Bytes(i))
|
||||
}
|
||||
c.sorter.sort(x)
|
||||
}
|
||||
|
||||
// SortStrings uses sort.Sort to sort the strings in x using the rules of c.
|
||||
func (c *Collator) SortStrings(x []string) {
|
||||
c.sorter.init(len(x))
|
||||
for i, s := range x {
|
||||
c.sorter.keys[i] = c.KeyFromString(c.sorter.buf, s)
|
||||
}
|
||||
c.sorter.sort(sort.StringSlice(x))
|
||||
}
|
||||
73789
vendor/golang.org/x/text/collate/tables.go
generated
vendored
Normal file
73789
vendor/golang.org/x/text/collate/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue