Check in the vendor directory

Travis seems to be having issues pulling deps, so we'll have to check in
the vendor directory and prevent the makefile from trying to regenerate
it normally.
This commit is contained in:
Solly Ross 2018-07-13 17:31:57 -04:00
parent 98e16bc315
commit a293b2bf94
2526 changed files with 930931 additions and 4 deletions

702
vendor/golang.org/x/text/collate/build/builder.go generated vendored Normal file
View file

@ -0,0 +1,702 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build // import "golang.org/x/text/collate/build"
import (
"fmt"
"io"
"log"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
// TODO: optimizations:
// - expandElem is currently 20K. By putting unique colElems in a separate
// table and having a byte array of indexes into this table, we can reduce
// the total size to about 7K. By also factoring out the length bytes, we
// can reduce this to about 6K.
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
// and many consecutive values with the same stride. This can be further
// compacted.
// - Compress secondary weights into 8 bits.
// - Some LDML specs specify a context element. Currently we simply concatenate
// those. Context can be implemented using the contraction trie. If Builder
// could analyze and detect when using a context makes sense, there is no
// need to expose this construct in the API.
// A Builder builds a root collation table. The user must specify the
// collation elements for each entry. A common use will be to base the weights
// on those specified in the allkeys* file as provided by the UCA or CLDR.
type Builder struct {
index *trieBuilder
root ordering
locale []*Tailoring
t *table
err error
built bool
minNonVar int // lowest primary recorded for a variable
varTop int // highest primary recorded for a non-variable
// indexes used for reusing expansions and contractions
expIndex map[string]int // positions of expansions keyed by their string representation
ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
ctElem map[string]int // contraction elements keyed by their string representation
}
// A Tailoring builds a collation table based on another collation table.
// The table is defined by specifying tailorings to the underlying table.
// See http://unicode.org/reports/tr35/ for an overview of tailoring
// collation tables. The CLDR contains pre-defined tailorings for a variety
// of languages (See http://www.unicode.org/Public/cldr/<version>/core.zip.)
type Tailoring struct {
id string
builder *Builder
index *ordering
anchor *entry
before bool
}
// NewBuilder returns a new Builder.
func NewBuilder() *Builder {
return &Builder{
index: newTrieBuilder(),
root: makeRootOrdering(),
expIndex: make(map[string]int),
ctHandle: make(map[string]ctHandle),
ctElem: make(map[string]int),
}
}
// Tailoring returns a Tailoring for the given locale. One should
// have completed all calls to Add before calling Tailoring.
func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
t := &Tailoring{
id: loc.String(),
builder: b,
index: b.root.clone(),
}
t.index.id = t.id
b.locale = append(b.locale, t)
return t
}
// Add adds an entry to the collation element table, mapping
// a slice of runes to a sequence of collation elements.
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
// The entries are typically obtained from a collation element table
// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
// Note that the collation elements specified by colelems are only used
// as a guide. The actual weights generated by Builder may differ.
// The argument variables is a list of indices into colelems that should contain
// a value for each colelem that is a variable. (See the reference above.)
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
str := string(runes)
elems := make([]rawCE, len(colelems))
for i, ce := range colelems {
if len(ce) == 0 {
break
}
elems[i] = makeRawCE(ce, 0)
if len(ce) == 1 {
elems[i].w[1] = defaultSecondary
}
if len(ce) <= 2 {
elems[i].w[2] = defaultTertiary
}
if len(ce) <= 3 {
elems[i].w[3] = ce[0]
}
}
for i, ce := range elems {
p := ce.w[0]
isvar := false
for _, j := range variables {
if i == j {
isvar = true
}
}
if isvar {
if p >= b.minNonVar && b.minNonVar > 0 {
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
}
if p > b.varTop {
b.varTop = p
}
} else if p > 1 { // 1 is a special primary value reserved for FFFE
if p <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
}
if b.minNonVar == 0 || p < b.minNonVar {
b.minNonVar = p
}
}
}
elems, err := convertLargeWeights(elems)
if err != nil {
return err
}
cccs := []uint8{}
nfd := norm.NFD.String(str)
for i := range nfd {
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
}
if len(cccs) < len(elems) {
if len(cccs) > 2 {
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
}
p := len(elems) - 1
for ; p > 0 && elems[p].w[0] == 0; p-- {
elems[p].ccc = cccs[len(cccs)-1]
}
for ; p >= 0; p-- {
elems[p].ccc = cccs[0]
}
} else {
for i := range elems {
elems[i].ccc = cccs[i]
}
}
// doNorm in collate.go assumes that the following conditions hold.
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
}
b.root.newEntry(str, elems)
return nil
}
func (t *Tailoring) setAnchor(anchor string) error {
anchor = norm.NFC.String(anchor)
a := t.index.find(anchor)
if a == nil {
a = t.index.newEntry(anchor, nil)
a.implicit = true
a.modified = true
for _, r := range []rune(anchor) {
e := t.index.find(string(r))
e.lock = true
}
}
t.anchor = a
return nil
}
// SetAnchor sets the point after which elements passed in subsequent calls to
// Insert will be inserted. It is equivalent to the reset directive in an LDML
// specification. See Insert for an example.
// SetAnchor supports the following logical reset positions:
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
// and <last_non_ignorable/>.
func (t *Tailoring) SetAnchor(anchor string) error {
if err := t.setAnchor(anchor); err != nil {
return err
}
t.before = false
return nil
}
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
// Insert will insert entries before the anchor.
func (t *Tailoring) SetAnchorBefore(anchor string) error {
if err := t.setAnchor(anchor); err != nil {
return err
}
t.before = true
return nil
}
// Insert sets the ordering of str relative to the entry set by the previous
// call to SetAnchor or Insert. The argument extend corresponds
// to the extend elements as defined in LDML. A non-empty value for extend
// will cause the collation elements corresponding to extend to be appended
// to the collation elements generated for the entry added by Insert.
// This has the same net effect as sorting str after the string anchor+extend.
// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements
// for full details on LDML.
//
// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
// at the primary sorting level:
// t := b.Tailoring("se")
// t.SetAnchor("z")
// t.Insert(colltab.Primary, "ä", "")
// Order "ü" after "ue" at the secondary sorting level:
// t.SetAnchor("ue")
// t.Insert(colltab.Secondary, "ü","")
// or
// t.SetAnchor("u")
// t.Insert(colltab.Secondary, "ü", "e")
// Order "q" afer "ab" at the secondary level and "Q" after "q"
// at the tertiary level:
// t.SetAnchor("ab")
// t.Insert(colltab.Secondary, "q", "")
// t.Insert(colltab.Tertiary, "Q", "")
// Order "b" before "a":
// t.SetAnchorBefore("a")
// t.Insert(colltab.Primary, "b", "")
// Order "0" after the last primary ignorable:
// t.SetAnchor("<last_primary_ignorable/>")
// t.Insert(colltab.Primary, "0", "")
func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
if t.anchor == nil {
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
}
str = norm.NFC.String(str)
e := t.index.find(str)
if e == nil {
e = t.index.newEntry(str, nil)
} else if e.logical != noAnchor {
return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
}
if e.lock {
return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
}
a := t.anchor
// Find the first element after the anchor which differs at a level smaller or
// equal to the given level. Then insert at this position.
// See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
e.before = t.before
if t.before {
t.before = false
if a.prev == nil {
a.insertBefore(e)
} else {
for a = a.prev; a.level > level; a = a.prev {
}
a.insertAfter(e)
}
e.level = level
} else {
for ; a.level > level; a = a.next {
}
e.level = a.level
if a != e {
a.insertAfter(e)
a.level = level
} else {
// We don't set a to prev itself. This has the effect of the entry
// getting new collation elements that are an increment of itself.
// This is intentional.
a.prev.level = level
}
}
e.extend = norm.NFD.String(extend)
e.exclude = false
e.modified = true
e.elems = nil
t.anchor = e
return nil
}
func (o *ordering) getWeight(e *entry) []rawCE {
if len(e.elems) == 0 && e.logical == noAnchor {
if e.implicit {
for _, r := range e.runes {
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
}
} else if e.before {
count := [colltab.Identity + 1]int{}
a := e
for ; a.elems == nil && !a.implicit; a = a.next {
count[a.level]++
}
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
for i := colltab.Primary; i < colltab.Quaternary; i++ {
if count[i] != 0 {
e.elems[0].w[i] -= count[i]
break
}
}
if e.prev != nil {
o.verifyWeights(e.prev, e, e.prev.level)
}
} else {
prev := e.prev
e.elems = nextWeight(prev.level, o.getWeight(prev))
o.verifyWeights(e, e.next, e.level)
}
}
return e.elems
}
func (o *ordering) addExtension(e *entry) {
if ex := o.find(e.extend); ex != nil {
e.elems = append(e.elems, ex.elems...)
} else {
for _, r := range []rune(e.extend) {
e.elems = append(e.elems, o.find(string(r)).elems...)
}
}
e.extend = ""
}
func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
return nil
}
for i := colltab.Primary; i < level; i++ {
if a.elems[0].w[i] < b.elems[0].w[i] {
return nil
}
}
if a.elems[0].w[level] >= b.elems[0].w[level] {
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
log.Println(err)
// TODO: return the error instead, or better, fix the conflicting entry by making room.
}
return nil
}
func (b *Builder) error(e error) {
if e != nil {
b.err = e
}
}
func (b *Builder) errorID(locale string, e error) {
if e != nil {
b.err = fmt.Errorf("%s:%v", locale, e)
}
}
// patchNorm ensures that NFC and NFD counterparts are consistent.
func (o *ordering) patchNorm() {
// Insert the NFD counterparts, if necessary.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if nfd != e.str {
if e0 := o.find(nfd); e0 != nil && !e0.modified {
e0.elems = e.elems
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
e := o.newEntry(nfd, e.elems)
e.modified = true
}
}
}
// Update unchanged composed forms if one of their parts changed.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if e.modified || nfd == e.str {
continue
}
if e0 := o.find(nfd); e0 != nil {
e.elems = e0.elems
} else {
e.elems = o.genColElems(nfd)
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
r := []rune(nfd)
head := string(r[0])
tail := ""
for i := 1; i < len(r); i++ {
s := norm.NFC.String(head + string(r[i]))
if e0 := o.find(s); e0 != nil && e0.modified {
head = s
} else {
tail += string(r[i])
}
}
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
}
}
}
// Exclude entries for which the individual runes generate the same collation elements.
for _, e := range o.ordered {
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
e.exclude = true
}
}
}
func (b *Builder) buildOrdering(o *ordering) {
for _, e := range o.ordered {
o.getWeight(e)
}
for _, e := range o.ordered {
o.addExtension(e)
}
o.patchNorm()
o.sort()
simplify(o)
b.processExpansions(o) // requires simplify
b.processContractions(o) // requires simplify
t := newNode()
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.skip() {
ce, err := e.encode()
b.errorID(o.id, err)
t.insert(e.runes[0], ce)
}
}
o.handle = b.index.addTrie(t)
}
func (b *Builder) build() (*table, error) {
if b.built {
return b.t, b.err
}
b.built = true
b.t = &table{
Table: colltab.Table{
MaxContractLen: utf8.UTFMax,
VariableTop: uint32(b.varTop),
},
}
b.buildOrdering(&b.root)
b.t.root = b.root.handle
for _, t := range b.locale {
b.buildOrdering(t.index)
if b.err != nil {
break
}
}
i, err := b.index.generate()
b.t.trie = *i
b.t.Index = colltab.Trie{
Index: i.index,
Values: i.values,
Index0: i.index[blockSize*b.t.root.lookupStart:],
Values0: i.values[blockSize*b.t.root.valueStart:],
}
b.error(err)
return b.t, b.err
}
// Build builds the root Collator.
func (b *Builder) Build() (colltab.Weighter, error) {
table, err := b.build()
if err != nil {
return nil, err
}
return table, nil
}
// Build builds a Collator for Tailoring t.
func (t *Tailoring) Build() (colltab.Weighter, error) {
// TODO: implement.
return nil, nil
}
// Print prints the tables for b and all its Tailorings as a Go file
// that can be included in the Collate package.
func (b *Builder) Print(w io.Writer) (n int, err error) {
p := func(nn int, e error) {
n += nn
if err == nil {
err = e
}
}
t, err := b.build()
if err != nil {
return 0, err
}
p(fmt.Fprintf(w, `var availableLocales = "und`))
for _, loc := range b.locale {
if loc.id != "und" {
p(fmt.Fprintf(w, ",%s", loc.id))
}
}
p(fmt.Fprint(w, "\"\n\n"))
p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
for _, loc := range b.locale {
if loc.id == "und" {
p(t.fprintIndex(w, loc.index.handle, loc.id))
}
}
for _, loc := range b.locale {
if loc.id != "und" {
p(t.fprintIndex(w, loc.index.handle, loc.id))
}
}
p(fmt.Fprint(w, "}\n\n"))
n, _, err = t.fprint(w, "main")
return
}
// reproducibleFromNFKD checks whether the given expansion could be generated
// from an NFKD expansion.
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
// Length must be equal.
if len(exp) != len(nfkd) {
return false
}
for i, ce := range exp {
// Primary and secondary values should be equal.
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
return false
}
// Tertiary values should be equal to maxTertiary for third element onwards.
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
// simply be dropped. Try this out by dropping the following code.
if i >= 2 && ce.w[2] != maxTertiary {
return false
}
if _, err := makeCE(ce); err != nil {
// Simply return false. The error will be caught elsewhere.
return false
}
}
return true
}
func simplify(o *ordering) {
// Runes that are a starter of a contraction should not be removed.
// (To date, there is only Kannada character 0CCA.)
keep := make(map[rune]bool)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if len(e.runes) > 1 {
keep[e.runes[0]] = true
}
}
// Tag entries for which the runes NFKD decompose to identical values.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
s := e.str
nfkd := norm.NFKD.String(s)
nfd := norm.NFD.String(s)
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
continue
}
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
e.decompose = true
}
}
}
// appendExpansion converts the given collation sequence to
// collation elements and adds them to the expansion table.
// It returns an index to the expansion table.
func (b *Builder) appendExpansion(e *entry) int {
t := b.t
i := len(t.ExpandElem)
ce := uint32(len(e.elems))
t.ExpandElem = append(t.ExpandElem, ce)
for _, w := range e.elems {
ce, err := makeCE(w)
if err != nil {
b.error(err)
return -1
}
t.ExpandElem = append(t.ExpandElem, ce)
}
return i
}
// processExpansions extracts data necessary to generate
// the extraction tables.
func (b *Builder) processExpansions(o *ordering) {
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.expansion() {
continue
}
key := fmt.Sprintf("%v", e.elems)
i, ok := b.expIndex[key]
if !ok {
i = b.appendExpansion(e)
b.expIndex[key] = i
}
e.expansionIndex = i
}
}
func (b *Builder) processContractions(o *ordering) {
// Collate contractions per starter rune.
starters := []rune{}
cm := make(map[rune][]*entry)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if e.contraction() {
if len(e.str) > b.t.MaxContractLen {
b.t.MaxContractLen = len(e.str)
}
r := e.runes[0]
if _, ok := cm[r]; !ok {
starters = append(starters, r)
}
cm[r] = append(cm[r], e)
}
}
// Add entries of single runes that are at a start of a contraction.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.contraction() {
r := e.runes[0]
if _, ok := cm[r]; ok {
cm[r] = append(cm[r], e)
}
}
}
// Build the tries for the contractions.
t := b.t
for _, r := range starters {
l := cm[r]
// Compute suffix strings. There are 31 different contraction suffix
// sets for 715 contractions and 82 contraction starter runes as of
// version 6.0.0.
sufx := []string{}
hasSingle := false
for _, e := range l {
if len(e.runes) > 1 {
sufx = append(sufx, string(e.runes[1:]))
} else {
hasSingle = true
}
}
if !hasSingle {
b.error(fmt.Errorf("no single entry for starter rune %U found", r))
continue
}
// Unique the suffix set.
sort.Strings(sufx)
key := strings.Join(sufx, "\n")
handle, ok := b.ctHandle[key]
if !ok {
var err error
handle, err = appendTrie(&t.ContractTries, sufx)
if err != nil {
b.error(err)
}
b.ctHandle[key] = handle
}
// Bucket sort entries in index order.
es := make([]*entry, len(l))
for _, e := range l {
var p, sn int
if len(e.runes) > 1 {
str := []byte(string(e.runes[1:]))
p, sn = lookup(&t.ContractTries, handle, str)
if sn != len(str) {
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
}
}
if es[p] != nil {
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
}
es[p] = e
}
// Create collation elements for contractions.
elems := []uint32{}
for _, e := range es {
ce, err := e.encodeBase()
b.errorID(o.id, err)
elems = append(elems, ce)
}
key = fmt.Sprintf("%v", elems)
i, ok := b.ctElem[key]
if !ok {
i = len(t.ContractElem)
b.ctElem[key] = i
t.ContractElem = append(t.ContractElem, elems...)
}
// Store info in entry for starter rune.
es[0].contractionIndex = i
es[0].contractionHandle = handle
}
}

294
vendor/golang.org/x/text/collate/build/colelem.go generated vendored Normal file
View file

@ -0,0 +1,294 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"unicode"
"golang.org/x/text/internal/colltab"
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
)
type rawCE struct {
w []int
ccc uint8
}
func makeRawCE(w []int, ccc uint8) rawCE {
ce := rawCE{w: make([]int, 4), ccc: ccc}
copy(ce.w, w)
return ce
}
// A collation element is represented as an uint32.
// In the typical case, a rune maps to a single collation element. If a rune
// can be the start of a contraction or expands into multiple collation elements,
// then the collation element that is associated with a rune will have a special
// form to represent such m to n mappings. Such special collation elements
// have a value >= 0x80000000.
const (
maxPrimaryBits = 21
maxSecondaryBits = 12
maxTertiaryBits = 8
)
func makeCE(ce rawCE) (uint32, error) {
v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
return uint32(v), e
}
// For contractions, collation elements are of the form
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
// - n* is the size of the first node in the contraction trie.
// - i* is the index of the first node in the contraction trie.
// - b* is the offset into the contraction collation element table.
// See contract.go for details on the contraction trie.
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
func makeContractIndex(h ctHandle, offset int) (uint32, error) {
if h.n >= 1<<maxNBits {
return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
}
if h.index >= 1<<maxTrieIndexBits {
return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
}
if offset >= 1<<maxContractOffsetBits {
return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
}
ce := uint32(contractID)
ce += uint32(offset << (maxNBits + maxTrieIndexBits))
ce += uint32(h.index << maxNBits)
ce += uint32(h.n)
return ce, nil
}
// For expansions, collation elements are of the form
// 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const (
expandID = 0xE0000000
maxExpandIndexBits = 16
)
func makeExpandIndex(index int) (uint32, error) {
if index >= 1<<maxExpandIndexBits {
return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
}
return expandID + uint32(index), nil
}
// Each list of collation elements corresponding to an expansion starts with
// a header indicating the length of the sequence.
func makeExpansionHeader(n int) (uint32, error) {
return uint32(n), nil
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The collation element, in this case, is of the form
// 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
const (
decompID = 0xF0000000
)
func makeDecompose(t1, t2 int) (uint32, error) {
if t1 >= 256 || t1 < 0 {
return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
}
if t2 >= 256 || t2 < 0 {
return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
}
return uint32(t2<<8+t1) + decompID, nil
}
const (
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
minUnified rune = 0x4E00
maxUnified = 0x9FFF
minCompatibility = 0xF900
maxCompatibility = 0xFAFF
minRare = 0x3400
maxRare = 0x4DBF
)
const (
commonUnifiedOffset = 0x10000
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
illegalOffset = otherOffset + int(unicode.MaxRune)
maxPrimary = illegalOffset + 1
)
// implicitPrimary returns the primary weight for the a rune
// for which there is no entry for the rune in the collation table.
// We take a different approach from the one specified in
// http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset
}
// convertLargeWeights converts collation elements with large
// primaries (either double primaries or for illegal runes)
// to our own representation.
// A CJK character C is represented in the DUCET as
// [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
// We will rewrite these characters to a single CE.
// We assume the CJK values start at 0x8000.
// See http://unicode.org/reports/tr10/#Implicit_Weights
func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
const (
cjkPrimaryStart = 0xFB40
rarePrimaryStart = 0xFB80
otherPrimaryStart = 0xFBC0
illegalPrimary = 0xFFFE
highBitsMask = 0x3F
lowBitsMask = 0x7FFF
lowBitsFlag = 0x8000
shiftBits = 15
)
for i := 0; i < len(elems); i++ {
ce := elems[i].w
p := ce[0]
if p < cjkPrimaryStart {
continue
}
if p > 0xFFFF {
return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
}
if p >= illegalPrimary {
ce[0] = illegalOffset + p - illegalPrimary
} else {
if i+1 >= len(elems) {
return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
}
if elems[i+1].w[0]&lowBitsFlag == 0 {
return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
}
np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
switch {
case p < rarePrimaryStart:
np += commonUnifiedOffset
case p < otherPrimaryStart:
np += rareUnifiedOffset
default:
p += otherOffset
}
ce[0] = np
for j := i + 1; j+1 < len(elems); j++ {
elems[j] = elems[j+1]
}
elems = elems[:len(elems)-1]
}
}
return elems, nil
}
// nextWeight computes the first possible collation weights following elems
// for the given level.
func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
if level == colltab.Identity {
next := make([]rawCE, len(elems))
copy(next, elems)
return next
}
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
next[0].w[level]++
if level < colltab.Secondary {
next[0].w[colltab.Secondary] = defaultSecondary
}
if level < colltab.Tertiary {
next[0].w[colltab.Tertiary] = defaultTertiary
}
// Filter entries that cannot influence ordering.
for _, ce := range elems[1:] {
skip := true
for i := colltab.Primary; i < level; i++ {
skip = skip && ce.w[i] == 0
}
if !skip {
next = append(next, ce)
}
}
return next
}
func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
for ; i < len(elems) && elems[i].w[level] == 0; i++ {
}
if i < len(elems) {
return i, elems[i].w[level]
}
return i, 0
}
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
// It also returns the collation level at which the difference is found.
func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
for level := colltab.Primary; level < colltab.Identity; level++ {
var va, vb int
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
ia, va = nextVal(a, ia, level)
ib, vb = nextVal(b, ib, level)
if va != vb {
if va < vb {
return -1, level
} else {
return 1, level
}
}
}
}
return 0, colltab.Identity
}
func equalCE(a, b rawCE) bool {
for i := 0; i < 3; i++ {
if b.w[i] != a.w[i] {
return false
}
}
return true
}
func equalCEArrays(a, b []rawCE) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if !equalCE(a[i], b[i]) {
return false
}
}
return true
}

309
vendor/golang.org/x/text/collate/build/contract.go generated vendored Normal file
View file

@ -0,0 +1,309 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"io"
"reflect"
"sort"
"strings"
"golang.org/x/text/internal/colltab"
)
// This file contains code for detecting contractions and generating
// the necessary tables.
// Any Unicode Collation Algorithm (UCA) table entry that has more than
// one rune one the left-hand side is called a contraction.
// See http://www.unicode.org/reports/tr10/#Contractions for more details.
//
// We define the following terms:
// initial: a rune that appears as the first rune in a contraction.
// suffix: a sequence of runes succeeding the initial rune
// in a given contraction.
// non-initial: a rune that appears in a suffix.
//
// A rune may be both an initial and a non-initial and may be so in
// many contractions. An initial may typically also appear by itself.
// In case of ambiguities, the UCA requires we match the longest
// contraction.
//
// Many contraction rules share the same set of possible suffixes.
// We store sets of suffixes in a trie that associates an index with
// each suffix in the set. This index can be used to look up a
// collation element associated with the (starter rune, suffix) pair.
//
// The trie is defined on a UTF-8 byte sequence.
// The overall trie is represented as an array of ctEntries. Each node of the trie
// is represented as a subsequence of ctEntries, where each entry corresponds to
// a possible match of a next character in the search string. An entry
// also includes the length and offset to the next sequence of entries
// to check in case of a match.
const (
final = 0
noIndex = 0xFF
)
// ctEntry associates to a matching byte an offset and/or next sequence of
// bytes to check. A ctEntry c is called final if a match means that the
// longest suffix has been found. An entry c is final if c.N == 0.
// A single final entry can match a range of characters to an offset.
// A non-final entry always matches a single byte. Note that a non-final
// entry might still resemble a completed suffix.
// Examples:
// The suffix strings "ab" and "ac" can be represented as:
// []ctEntry{
// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF.
// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2
// }
//
// The suffix strings "ab", "abc", "abd", and "abcd" can be represented as:
// []ctEntry{
// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'.
// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'.
// {'d', 'd', final, 3}, // "abd" -> 3
// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'.
// {'d', 'd', final, 4}, // "abcd" -> 4
// }
// See genStateTests in contract_test.go for more examples.
type ctEntry struct {
L uint8 // non-final: byte value to match; final: lowest match in range.
H uint8 // non-final: relative index to next block; final: highest match in range.
N uint8 // non-final: length of next block; final: final
I uint8 // result offset. Will be noIndex if more bytes are needed to complete.
}
// contractTrieSet holds a set of contraction tries. The tries are stored
// consecutively in the entry field.
type contractTrieSet []struct{ l, h, n, i uint8 }
// ctHandle is used to identify a trie in the trie set, consisting in an offset
// in the array and the size of the first node.
type ctHandle struct {
index, n int
}
// appendTrie adds a new trie for the given suffixes to the trie set and returns
// a handle to it. The handle will be invalid on error.
func appendTrie(ct *colltab.ContractTrieSet, suffixes []string) (ctHandle, error) {
es := make([]stridx, len(suffixes))
for i, s := range suffixes {
es[i].str = s
}
sort.Sort(offsetSort(es))
for i := range es {
es[i].index = i + 1
}
sort.Sort(genidxSort(es))
i := len(*ct)
n, err := genStates(ct, es)
if err != nil {
*ct = (*ct)[:i]
return ctHandle{}, err
}
return ctHandle{i, n}, nil
}
// genStates generates ctEntries for a given suffix set and returns
// the number of entries for the first node.
func genStates(ct *colltab.ContractTrieSet, sis []stridx) (int, error) {
if len(sis) == 0 {
return 0, fmt.Errorf("genStates: list of suffices must be non-empty")
}
start := len(*ct)
// create entries for differing first bytes.
for _, si := range sis {
s := si.str
if len(s) == 0 {
continue
}
added := false
c := s[0]
if len(s) > 1 {
for j := len(*ct) - 1; j >= start; j-- {
if (*ct)[j].L == c {
added = true
break
}
}
if !added {
*ct = append(*ct, ctEntry{L: c, I: noIndex})
}
} else {
for j := len(*ct) - 1; j >= start; j-- {
// Update the offset for longer suffixes with the same byte.
if (*ct)[j].L == c {
(*ct)[j].I = uint8(si.index)
added = true
}
// Extend range of final ctEntry, if possible.
if (*ct)[j].H+1 == c {
(*ct)[j].H = c
added = true
}
}
if !added {
*ct = append(*ct, ctEntry{L: c, H: c, N: final, I: uint8(si.index)})
}
}
}
n := len(*ct) - start
// Append nodes for the remainder of the suffixes for each ctEntry.
sp := 0
for i, end := start, len(*ct); i < end; i++ {
fe := (*ct)[i]
if fe.H == 0 { // uninitialized non-final
ln := len(*ct) - start - n
if ln > 0xFF {
return 0, fmt.Errorf("genStates: relative block offset too large: %d > 255", ln)
}
fe.H = uint8(ln)
// Find first non-final strings with same byte as current entry.
for ; sis[sp].str[0] != fe.L; sp++ {
}
se := sp + 1
for ; se < len(sis) && len(sis[se].str) > 1 && sis[se].str[0] == fe.L; se++ {
}
sl := sis[sp:se]
sp = se
for i, si := range sl {
sl[i].str = si.str[1:]
}
nn, err := genStates(ct, sl)
if err != nil {
return 0, err
}
fe.N = uint8(nn)
(*ct)[i] = fe
}
}
sort.Sort(entrySort((*ct)[start : start+n]))
return n, nil
}
// There may be both a final and non-final entry for a byte if the byte
// is implied in a range of matches in the final entry.
// We need to ensure that the non-final entry comes first in that case.
type entrySort colltab.ContractTrieSet
func (fe entrySort) Len() int { return len(fe) }
func (fe entrySort) Swap(i, j int) { fe[i], fe[j] = fe[j], fe[i] }
func (fe entrySort) Less(i, j int) bool {
return fe[i].L > fe[j].L
}
// stridx is used for sorting suffixes and their associated offsets.
type stridx struct {
str string
index int
}
// For computing the offsets, we first sort by size, and then by string.
// This ensures that strings that only differ in the last byte by 1
// are sorted consecutively in increasing order such that they can
// be packed as a range in a final ctEntry.
type offsetSort []stridx
func (si offsetSort) Len() int { return len(si) }
func (si offsetSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
func (si offsetSort) Less(i, j int) bool {
if len(si[i].str) != len(si[j].str) {
return len(si[i].str) > len(si[j].str)
}
return si[i].str < si[j].str
}
// For indexing, we want to ensure that strings are sorted in string order, where
// for strings with the same prefix, we put longer strings before shorter ones.
type genidxSort []stridx
func (si genidxSort) Len() int { return len(si) }
func (si genidxSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
func (si genidxSort) Less(i, j int) bool {
if strings.HasPrefix(si[j].str, si[i].str) {
return false
}
if strings.HasPrefix(si[i].str, si[j].str) {
return true
}
return si[i].str < si[j].str
}
// lookup matches the longest suffix in str and returns the associated offset
// and the number of bytes consumed.
func lookup(ct *colltab.ContractTrieSet, h ctHandle, str []byte) (index, ns int) {
states := (*ct)[h.index:]
p := 0
n := h.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
index, ns = int(e.I), p
}
if e.N != final {
// set to new state
i, states, n = 0, states[int(e.H)+n:], int(e.N)
} else {
return
}
continue
} else if e.N == final && c <= e.H {
p++
return int(c-e.L) + int(e.I), p
}
}
i++
}
return
}
// print writes the contractTrieSet t as compilable Go code to w. It returns
// the total number of bytes written and the size of the resulting data structure in bytes.
func print(t *colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
update3 := func(nn, sz int, e error) {
n += nn
if err == nil {
err = e
}
size += sz
}
update2 := func(nn int, e error) { update3(nn, 0, e) }
update3(printArray(*t, w, name))
update2(fmt.Fprintf(w, "var %sContractTrieSet = ", name))
update3(printStruct(*t, w, name))
update2(fmt.Fprintln(w))
return
}
func printArray(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
size = len(ct) * 4
p("// %sCTEntries: %d entries, %d bytes\n", name, len(ct), size)
p("var %sCTEntries = [%d]struct{L,H,N,I uint8}{\n", name, len(ct))
for _, fe := range ct {
p("\t{0x%X, 0x%X, %d, %d},\n", fe.L, fe.H, fe.N, fe.I)
}
p("}\n")
return
}
func printStruct(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
n, err = fmt.Fprintf(w, "colltab.ContractTrieSet( %sCTEntries[:] )", name)
size = int(reflect.TypeOf(ct).Size())
return
}

393
vendor/golang.org/x/text/collate/build/order.go generated vendored Normal file
View file

@ -0,0 +1,393 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"log"
"sort"
"strings"
"unicode"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/unicode/norm"
)
type logicalAnchor int
const (
firstAnchor logicalAnchor = -1
noAnchor = 0
lastAnchor = 1
)
// entry is used to keep track of a single entry in the collation element table
// during building. Examples of entries can be found in the Default Unicode
// Collation Element Table.
// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
type entry struct {
str string // same as string(runes)
runes []rune
elems []rawCE // the collation elements
extend string // weights of extend to be appended to elems
before bool // weights relative to next instead of previous.
lock bool // entry is used in extension and can no longer be moved.
// prev, next, and level are used to keep track of tailorings.
prev, next *entry
level colltab.Level // next differs at this level
skipRemove bool // do not unlink when removed
decompose bool // can use NFKD decomposition to generate elems
exclude bool // do not include in table
implicit bool // derived, is not included in the list
modified bool // entry was modified in tailoring
logical logicalAnchor
expansionIndex int // used to store index into expansion table
contractionHandle ctHandle
contractionIndex int // index into contraction elements
}
func (e *entry) String() string {
return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
}
func (e *entry) skip() bool {
return e.contraction()
}
func (e *entry) expansion() bool {
return !e.decompose && len(e.elems) > 1
}
func (e *entry) contraction() bool {
return len(e.runes) > 1
}
func (e *entry) contractionStarter() bool {
return e.contractionHandle.n != 0
}
// nextIndexed gets the next entry that needs to be stored in the table.
// It returns the entry and the collation level at which the next entry differs
// from the current entry.
// Entries that can be explicitly derived and logical reset positions are
// examples of entries that will not be indexed.
func (e *entry) nextIndexed() (*entry, colltab.Level) {
level := e.level
for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
if e.level < level {
level = e.level
}
}
return e, level
}
// remove unlinks entry e from the sorted chain and clears the collation
// elements. e may not be at the front or end of the list. This should always
// be the case, as the front and end of the list are always logical anchors,
// which may not be removed.
func (e *entry) remove() {
if e.logical != noAnchor {
log.Fatalf("may not remove anchor %q", e.str)
}
// TODO: need to set e.prev.level to e.level if e.level is smaller?
e.elems = nil
if !e.skipRemove {
if e.prev != nil {
e.prev.next = e.next
}
if e.next != nil {
e.next.prev = e.prev
}
}
e.skipRemove = false
}
// insertAfter inserts n after e.
func (e *entry) insertAfter(n *entry) {
if e == n {
panic("e == anchor")
}
if e == nil {
panic("unexpected nil anchor")
}
n.remove()
n.decompose = false // redo decomposition test
n.next = e.next
n.prev = e
if e.next != nil {
e.next.prev = n
}
e.next = n
}
// insertBefore inserts n before e.
func (e *entry) insertBefore(n *entry) {
if e == n {
panic("e == anchor")
}
if e == nil {
panic("unexpected nil anchor")
}
n.remove()
n.decompose = false // redo decomposition test
n.prev = e.prev
n.next = e
if e.prev != nil {
e.prev.next = n
}
e.prev = n
}
func (e *entry) encodeBase() (ce uint32, err error) {
switch {
case e.expansion():
ce, err = makeExpandIndex(e.expansionIndex)
default:
if e.decompose {
log.Fatal("decompose should be handled elsewhere")
}
ce, err = makeCE(e.elems[0])
}
return
}
func (e *entry) encode() (ce uint32, err error) {
if e.skip() {
log.Fatal("cannot build colElem for entry that should be skipped")
}
switch {
case e.decompose:
t1 := e.elems[0].w[2]
t2 := 0
if len(e.elems) > 1 {
t2 = e.elems[1].w[2]
}
ce, err = makeDecompose(t1, t2)
case e.contractionStarter():
ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
default:
if len(e.runes) > 1 {
log.Fatal("colElem: contractions are handled in contraction trie")
}
ce, err = e.encodeBase()
}
return
}
// entryLess returns true if a sorts before b and false otherwise.
func entryLess(a, b *entry) bool {
if res, _ := compareWeights(a.elems, b.elems); res != 0 {
return res == -1
}
if a.logical != noAnchor {
return a.logical == firstAnchor
}
if b.logical != noAnchor {
return b.logical == lastAnchor
}
return a.str < b.str
}
type sortedEntries []*entry
func (s sortedEntries) Len() int {
return len(s)
}
func (s sortedEntries) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s sortedEntries) Less(i, j int) bool {
return entryLess(s[i], s[j])
}
type ordering struct {
id string
entryMap map[string]*entry
ordered []*entry
handle *trieHandle
}
// insert inserts e into both entryMap and ordered.
// Note that insert simply appends e to ordered. To reattain a sorted
// order, o.sort() should be called.
func (o *ordering) insert(e *entry) {
if e.logical == noAnchor {
o.entryMap[e.str] = e
} else {
// Use key format as used in UCA rules.
o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
// Also add index entry for XML format.
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
}
o.ordered = append(o.ordered, e)
}
// newEntry creates a new entry for the given info and inserts it into
// the index.
func (o *ordering) newEntry(s string, ces []rawCE) *entry {
e := &entry{
runes: []rune(s),
elems: ces,
str: s,
}
o.insert(e)
return e
}
// find looks up and returns the entry for the given string.
// It returns nil if str is not in the index and if an implicit value
// cannot be derived, that is, if str represents more than one rune.
func (o *ordering) find(str string) *entry {
e := o.entryMap[str]
if e == nil {
r := []rune(str)
if len(r) == 1 {
const (
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r[0] >= firstHangul && r[0] <= lastHangul {
ce := []rawCE{}
nfd := norm.NFD.String(str)
for _, r := range nfd {
ce = append(ce, o.find(string(r)).elems...)
}
e = o.newEntry(nfd, ce)
} else {
e = o.newEntry(string(r[0]), []rawCE{
{w: []int{
implicitPrimary(r[0]),
defaultSecondary,
defaultTertiary,
int(r[0]),
},
},
})
e.modified = true
}
e.exclude = true // do not index implicits
}
}
return e
}
// makeRootOrdering returns a newly initialized ordering value and populates
// it with a set of logical reset points that can be used as anchors.
// The anchors first_tertiary_ignorable and __END__ will always sort at
// the beginning and end, respectively. This means that prev and next are non-nil
// for any indexed entry.
func makeRootOrdering() ordering {
const max = unicode.MaxRune
o := ordering{
entryMap: make(map[string]*entry),
}
insert := func(typ logicalAnchor, s string, ce []int) {
e := &entry{
elems: []rawCE{{w: ce}},
str: s,
exclude: true,
logical: typ,
}
o.insert(e)
}
insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
return o
}
// patchForInsert eleminates entries from the list with more than one collation element.
// The next and prev fields of the eliminated entries still point to appropriate
// values in the newly created list.
// It requires that sort has been called.
func (o *ordering) patchForInsert() {
for i := 0; i < len(o.ordered)-1; {
e := o.ordered[i]
lev := e.level
n := e.next
for ; n != nil && len(n.elems) > 1; n = n.next {
if n.level < lev {
lev = n.level
}
n.skipRemove = true
}
for ; o.ordered[i] != n; i++ {
o.ordered[i].level = lev
o.ordered[i].next = n
o.ordered[i+1].prev = e
}
}
}
// clone copies all ordering of es into a new ordering value.
func (o *ordering) clone() *ordering {
o.sort()
oo := ordering{
entryMap: make(map[string]*entry),
}
for _, e := range o.ordered {
ne := &entry{
runes: e.runes,
elems: e.elems,
str: e.str,
decompose: e.decompose,
exclude: e.exclude,
logical: e.logical,
}
oo.insert(ne)
}
oo.sort() // link all ordering.
oo.patchForInsert()
return &oo
}
// front returns the first entry to be indexed.
// It assumes that sort() has been called.
func (o *ordering) front() *entry {
e := o.ordered[0]
if e.prev != nil {
log.Panicf("unexpected first entry: %v", e)
}
// The first entry is always a logical position, which should not be indexed.
e, _ = e.nextIndexed()
return e
}
// sort sorts all ordering based on their collation elements and initializes
// the prev, next, and level fields accordingly.
func (o *ordering) sort() {
sort.Sort(sortedEntries(o.ordered))
l := o.ordered
for i := 1; i < len(l); i++ {
k := i - 1
l[k].next = l[i]
_, l[k].level = compareWeights(l[k].elems, l[i].elems)
l[i].prev = l[k]
}
}
// genColElems generates a collation element array from the runes in str. This
// assumes that all collation elements have already been added to the Builder.
func (o *ordering) genColElems(str string) []rawCE {
elems := []rawCE{}
for _, r := range []rune(str) {
for _, ce := range o.find(string(r)).elems {
if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
elems = append(elems, ce)
}
}
}
return elems
}

81
vendor/golang.org/x/text/collate/build/table.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"io"
"reflect"
"golang.org/x/text/internal/colltab"
)
// table is an intermediate structure that roughly resembles the table in collate.
type table struct {
colltab.Table
trie trie
root *trieHandle
}
// print writes the table as Go compilable code to w. It prefixes the
// variable names with name. It returns the number of bytes written
// and the size of the resulting table.
func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
update := func(nn, sz int, e error) {
n += nn
if err == nil {
err = e
}
size += sz
}
// Write arrays needed for the structure.
update(printColElems(w, t.ExpandElem, name+"ExpandElem"))
update(printColElems(w, t.ContractElem, name+"ContractElem"))
update(t.trie.printArrays(w, name))
update(printArray(t.ContractTries, w, name))
nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
update(nn, 0, e)
return
}
func (t *table) fprintIndex(w io.Writer, h *trieHandle, id string) (n int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
p("\t{ // %s\n", id)
p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
p("\t},\n")
return
}
func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
sz = len(a) * int(reflect.TypeOf(uint32(0)).Size())
p("// %s: %d entries, %d bytes\n", name, len(a), sz)
p("var %s = [%d]uint32 {", name, len(a))
for i, c := range a {
switch {
case i%64 == 0:
p("\n\t// Block %d, offset 0x%x\n", i/64, i)
case (i%64)%6 == 0:
p("\n\t")
}
p("0x%.8X, ", c)
}
p("\n}\n\n")
return
}

290
vendor/golang.org/x/text/collate/build/trie.go generated vendored Normal file
View file

@ -0,0 +1,290 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The trie in this file is used to associate the first full character
// in a UTF-8 string to a collation element.
// All but the last byte in a UTF-8 byte sequence are
// used to look up offsets in the index table to be used for the next byte.
// The last byte is used to index into a table of collation elements.
// This file contains the code for the generation of the trie.
package build
import (
"fmt"
"hash/fnv"
"io"
"reflect"
)
const (
blockSize = 64
blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes.
)
type trieHandle struct {
lookupStart uint16 // offset in table for first byte
valueStart uint16 // offset in table for first byte
}
type trie struct {
index []uint16
values []uint32
}
// trieNode is the intermediate trie structure used for generating a trie.
type trieNode struct {
index []*trieNode
value []uint32
b byte
refValue uint16
refIndex uint16
}
func newNode() *trieNode {
return &trieNode{
index: make([]*trieNode, 64),
value: make([]uint32, 128), // root node size is 128 instead of 64
}
}
func (n *trieNode) isInternal() bool {
return n.value != nil
}
func (n *trieNode) insert(r rune, value uint32) {
const maskx = 0x3F // mask out two most-significant bits
str := string(r)
if len(str) == 1 {
n.value[str[0]] = value
return
}
for i := 0; i < len(str)-1; i++ {
b := str[i] & maskx
if n.index == nil {
n.index = make([]*trieNode, blockSize)
}
nn := n.index[b]
if nn == nil {
nn = &trieNode{}
nn.b = b
n.index[b] = nn
}
n = nn
}
if n.value == nil {
n.value = make([]uint32, blockSize)
}
b := str[len(str)-1] & maskx
n.value[b] = value
}
type trieBuilder struct {
t *trie
roots []*trieHandle
lookupBlocks []*trieNode
valueBlocks []*trieNode
lookupBlockIdx map[uint32]*trieNode
valueBlockIdx map[uint32]*trieNode
}
func newTrieBuilder() *trieBuilder {
index := &trieBuilder{}
index.lookupBlocks = make([]*trieNode, 0)
index.valueBlocks = make([]*trieNode, 0)
index.lookupBlockIdx = make(map[uint32]*trieNode)
index.valueBlockIdx = make(map[uint32]*trieNode)
// The third nil is the default null block. The other two blocks
// are used to guarantee an offset of at least 3 for each block.
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
index.t = &trie{}
return index
}
func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
hasher := fnv.New32()
if n.index != nil {
for i, nn := range n.index {
var vi, vv uint16
if nn != nil {
nn = b.computeOffsets(nn)
n.index[i] = nn
vi = nn.refIndex
vv = nn.refValue
}
hasher.Write([]byte{byte(vi >> 8), byte(vi)})
hasher.Write([]byte{byte(vv >> 8), byte(vv)})
}
h := hasher.Sum32()
nn, ok := b.lookupBlockIdx[h]
if !ok {
n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
b.lookupBlocks = append(b.lookupBlocks, n)
b.lookupBlockIdx[h] = n
} else {
n = nn
}
} else {
for _, v := range n.value {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
n.refValue = uint16(len(b.valueBlocks)) - blockOffset
n.refIndex = n.refValue
b.valueBlocks = append(b.valueBlocks, n)
b.valueBlockIdx[h] = n
} else {
n = nn
}
}
return n
}
func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
hasher := fnv.New32()
for _, v := range n.value[:2*blockSize] {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
n.refValue = uint16(len(b.valueBlocks))
n.refIndex = n.refValue
b.valueBlocks = append(b.valueBlocks, n)
// Add a dummy block to accommodate the double block size.
b.valueBlocks = append(b.valueBlocks, nil)
b.valueBlockIdx[h] = n
} else {
n = nn
}
return n.refValue
}
func genValueBlock(t *trie, n *trieNode) {
if n != nil {
for _, v := range n.value {
t.values = append(t.values, v)
}
}
}
func genLookupBlock(t *trie, n *trieNode) {
for _, nn := range n.index {
v := uint16(0)
if nn != nil {
if n.index != nil {
v = nn.refIndex
} else {
v = nn.refValue
}
}
t.index = append(t.index, v)
}
}
func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
h := &trieHandle{}
b.roots = append(b.roots, h)
h.valueStart = b.addStartValueBlock(n)
if len(b.roots) == 1 {
// We insert a null block after the first start value block.
// This ensures that continuation bytes UTF-8 sequences of length
// greater than 2 will automatically hit a null block if there
// was an undefined entry.
b.valueBlocks = append(b.valueBlocks, nil)
}
n = b.computeOffsets(n)
// Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
h.lookupStart = n.refIndex - 1
return h
}
// generate generates and returns the trie for n.
func (b *trieBuilder) generate() (t *trie, err error) {
t = b.t
if len(b.valueBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
}
if len(b.lookupBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
}
genValueBlock(t, b.valueBlocks[0])
genValueBlock(t, &trieNode{value: make([]uint32, 64)})
for i := 2; i < len(b.valueBlocks); i++ {
genValueBlock(t, b.valueBlocks[i])
}
n := &trieNode{index: make([]*trieNode, 64)}
genLookupBlock(t, n)
genLookupBlock(t, n)
genLookupBlock(t, n)
for i := 3; i < len(b.lookupBlocks); i++ {
genLookupBlock(t, b.lookupBlocks[i])
}
return b.t, nil
}
func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
nv := len(t.values)
p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
p("// Block 2 is the null block.\n")
p("var %sValues = [%d]uint32 {", name, nv)
var printnewline bool
for i, v := range t.values {
if i%blockSize == 0 {
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
}
if i%4 == 0 {
printnewline = true
}
if v != 0 {
if printnewline {
p("\n\t")
printnewline = false
}
p("%#04x:%#08x, ", i, v)
}
}
p("\n}\n\n")
ni := len(t.index)
p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
p("// Block 0 is the null block.\n")
p("var %sLookup = [%d]uint16 {", name, ni)
printnewline = false
for i, v := range t.index {
if i%blockSize == 0 {
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
}
if i%8 == 0 {
printnewline = true
}
if v != 0 {
if printnewline {
p("\n\t")
printnewline = false
}
p("%#03x:%#02x, ", i, v)
}
}
p("\n}\n\n")
return n, nv*4 + ni*2, err
}
func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
sz += int(reflect.TypeOf(trie{}).Size())
return
}

403
vendor/golang.org/x/text/collate/collate.go generated vendored Normal file
View file

@ -0,0 +1,403 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// TODO: remove hard-coded versions when we have implemented fractional weights.
// The current implementation is incompatible with later CLDR versions.
//go:generate go run maketables.go -cldr=23 -unicode=6.2.0
// Package collate contains types for comparing and sorting Unicode strings
// according to a given collation order.
package collate // import "golang.org/x/text/collate"
import (
"bytes"
"strings"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
)
// Collator provides functionality for comparing strings for a given
// collation order.
type Collator struct {
options
sorter sorter
_iter [2]iter
}
func (c *Collator) iter(i int) *iter {
// TODO: evaluate performance for making the second iterator optional.
return &c._iter[i]
}
// Supported returns the list of languages for which collating differs from its parent.
func Supported() []language.Tag {
// TODO: use language.Coverage instead.
t := make([]language.Tag, len(tags))
copy(t, tags)
return t
}
func init() {
ids := strings.Split(availableLocales, ",")
tags = make([]language.Tag, len(ids))
for i, s := range ids {
tags[i] = language.Raw.MustParse(s)
}
}
var tags []language.Tag
// New returns a new Collator initialized for the given locale.
func New(t language.Tag, o ...Option) *Collator {
index := colltab.MatchLang(t, tags)
c := newCollator(getTable(locales[index]))
// Set options from the user-supplied tag.
c.setFromTag(t)
// Set the user-supplied options.
c.setOptions(o)
c.init()
return c
}
// NewFromTable returns a new Collator for the given Weighter.
func NewFromTable(w colltab.Weighter, o ...Option) *Collator {
c := newCollator(w)
c.setOptions(o)
c.init()
return c
}
func (c *Collator) init() {
if c.numeric {
c.t = colltab.NewNumericWeighter(c.t)
}
c._iter[0].init(c)
c._iter[1].init(c)
}
// Buffer holds keys generated by Key and KeyString.
type Buffer struct {
buf [4096]byte
key []byte
}
func (b *Buffer) init() {
if b.key == nil {
b.key = b.buf[:0]
}
}
// Reset clears the buffer from previous results generated by Key and KeyString.
func (b *Buffer) Reset() {
b.key = b.key[:0]
}
// Compare returns an integer comparing the two byte slices.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
func (c *Collator) Compare(a, b []byte) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInput(a)
c.iter(1).SetInput(b)
if res := c.compare(); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
return bytes.Compare(a, b)
}
return 0
}
// CompareString returns an integer comparing the two strings.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
func (c *Collator) CompareString(a, b string) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInputString(a)
c.iter(1).SetInputString(b)
if res := c.compare(); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
if a < b {
return -1
} else if a > b {
return 1
}
}
return 0
}
func compareLevel(f func(i *iter) int, a, b *iter) int {
a.pce = 0
b.pce = 0
for {
va := f(a)
vb := f(b)
if va != vb {
if va < vb {
return -1
}
return 1
} else if va == 0 {
break
}
}
return 0
}
func (c *Collator) compare() int {
ia, ib := c.iter(0), c.iter(1)
// Process primary level
if c.alternate != altShifted {
// TODO: implement script reordering
if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
return res
}
} else {
// TODO: handle shifted
}
if !c.ignore[colltab.Secondary] {
f := (*iter).nextSecondary
if c.backwards {
f = (*iter).prevSecondary
}
if res := compareLevel(f, ia, ib); res != 0 {
return res
}
}
// TODO: special case handling (Danish?)
if !c.ignore[colltab.Tertiary] || c.caseLevel {
if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
return res
}
if !c.ignore[colltab.Quaternary] {
if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
return res
}
}
}
return 0
}
// Key returns the collation key for str.
// Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will remain
// valid until the next call to buf.Reset().
func (c *Collator) Key(buf *Buffer, str []byte) []byte {
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
buf.init()
return c.key(buf, c.getColElems(str))
}
// KeyFromString returns the collation key for str.
// Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will retain
// valid until the next call to buf.ResetKeys().
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
buf.init()
return c.key(buf, c.getColElemsString(str))
}
func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
processWeights(c.alternate, c.t.Top(), w)
kn := len(buf.key)
c.keyFromElems(buf, w)
return buf.key[kn:]
}
func (c *Collator) getColElems(str []byte) []colltab.Elem {
i := c.iter(0)
i.SetInput(str)
for i.Next() {
}
return i.Elems
}
func (c *Collator) getColElemsString(str string) []colltab.Elem {
i := c.iter(0)
i.SetInputString(str)
for i.Next() {
}
return i.Elems
}
type iter struct {
wa [512]colltab.Elem
colltab.Iter
pce int
}
func (i *iter) init(c *Collator) {
i.Weighter = c.t
i.Elems = i.wa[:0]
}
func (i *iter) nextPrimary() int {
for {
for ; i.pce < i.N; i.pce++ {
if v := i.Elems[i.pce].Primary(); v != 0 {
i.pce++
return v
}
}
if !i.Next() {
return 0
}
}
panic("should not reach here")
}
func (i *iter) nextSecondary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Secondary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func (i *iter) prevSecondary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func (i *iter) nextTertiary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Tertiary(); v != 0 {
i.pce++
return int(v)
}
}
return 0
}
func (i *iter) nextQuaternary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Quaternary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func appendPrimary(key []byte, p int) []byte {
// Convert to variable length encoding; supports up to 23 bits.
if p <= 0x7FFF {
key = append(key, uint8(p>>8), uint8(p))
} else {
key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
}
return key
}
// keyFromElems converts the weights ws to a compact sequence of bytes.
// The result will be appended to the byte buffer in buf.
func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
for _, v := range ws {
if w := v.Primary(); w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
if !c.ignore[colltab.Secondary] {
buf.key = append(buf.key, 0, 0)
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
if !c.backwards {
for _, v := range ws {
if w := v.Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
} else {
for i := len(ws) - 1; i >= 0; i-- {
if w := ws[i].Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
}
} else if c.caseLevel {
buf.key = append(buf.key, 0, 0)
}
if !c.ignore[colltab.Tertiary] || c.caseLevel {
buf.key = append(buf.key, 0, 0)
for _, v := range ws {
if w := v.Tertiary(); w > 0 {
buf.key = append(buf.key, uint8(w))
}
}
// Derive the quaternary weights from the options and other levels.
// Note that we represent MaxQuaternary as 0xFF. The first byte of the
// representation of a primary weight is always smaller than 0xFF,
// so using this single byte value will compare correctly.
if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted {
if c.alternate == altShiftTrimmed {
lastNonFFFF := len(buf.key)
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
lastNonFFFF = len(buf.key)
}
}
buf.key = buf.key[:lastNonFFFF]
} else {
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
}
}
}
}
func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) {
ignore := false
vtop := int(top)
switch vw {
case altShifted, altShiftTrimmed:
for i := range wa {
if p := wa[i].Primary(); p <= vtop && p != 0 {
wa[i] = colltab.MakeQuaternary(p)
ignore = true
} else if p == 0 {
if ignore {
wa[i] = colltab.Ignore
}
} else {
ignore = false
}
}
case altBlanked:
for i := range wa {
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
wa[i] = colltab.Ignore
ignore = true
} else {
ignore = false
}
}
}
}

32
vendor/golang.org/x/text/collate/index.go generated vendored Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import "golang.org/x/text/internal/colltab"
const blockSize = 64
func getTable(t tableIndex) *colltab.Table {
return &colltab.Table{
Index: colltab.Trie{
Index0: mainLookup[:][blockSize*t.lookupOffset:],
Values0: mainValues[:][blockSize*t.valuesOffset:],
Index: mainLookup[:],
Values: mainValues[:],
},
ExpandElem: mainExpandElem[:],
ContractTries: colltab.ContractTrieSet(mainCTEntries[:]),
ContractElem: mainContractElem[:],
MaxContractLen: 18,
VariableTop: varTop,
}
}
// tableIndex holds information for constructing a table
// for a certain locale based on the main table.
type tableIndex struct {
lookupOffset uint32
valuesOffset uint32
}

553
vendor/golang.org/x/text/collate/maketables.go generated vendored Normal file
View file

@ -0,0 +1,553 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Collation table generator.
// Data read from the web.
package main
import (
"archive/zip"
"bufio"
"bytes"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"regexp"
"sort"
"strconv"
"strings"
"unicode/utf8"
"golang.org/x/text/collate"
"golang.org/x/text/collate/build"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
draft = flag.Bool("draft", false, `Use draft versions, when available.`)
tags = flag.String("tags", "", "build tags to be included after +build directive")
pkg = flag.String("package", "collate",
"the name of the package in which the generated file is to be included")
tables = flagStringSetAllowAll("tables", "collate", "collate,chars",
"comma-spearated list of tables to generate.")
exclude = flagStringSet("exclude", "zh2", "",
"comma-separated list of languages to exclude.")
include = flagStringSet("include", "", "",
"comma-separated list of languages to include. Include trumps exclude.")
// TODO: Not included: unihan gb2312han zhuyin big5han (for size reasons)
// TODO: Not included: traditional (buggy for Bengali)
types = flagStringSetAllowAll("types", "standard,phonebook,phonetic,reformed,pinyin,stroke", "",
"comma-separated list of types that should be included.")
)
// stringSet implements an ordered set based on a list. It implements flag.Value
// to allow a set to be specified as a comma-separated list.
type stringSet struct {
s []string
allowed *stringSet
dirty bool // needs compaction if true
all bool
allowAll bool
}
func flagStringSet(name, def, allowed, usage string) *stringSet {
ss := &stringSet{}
if allowed != "" {
usage += fmt.Sprintf(" (allowed values: any of %s)", allowed)
ss.allowed = &stringSet{}
failOnError(ss.allowed.Set(allowed))
}
ss.Set(def)
flag.Var(ss, name, usage)
return ss
}
func flagStringSetAllowAll(name, def, allowed, usage string) *stringSet {
ss := &stringSet{allowAll: true}
if allowed == "" {
flag.Var(ss, name, usage+fmt.Sprintf(` Use "all" to select all.`))
} else {
ss.allowed = &stringSet{}
failOnError(ss.allowed.Set(allowed))
flag.Var(ss, name, usage+fmt.Sprintf(` (allowed values: "all" or any of %s)`, allowed))
}
ss.Set(def)
return ss
}
func (ss stringSet) Len() int {
return len(ss.s)
}
func (ss stringSet) String() string {
return strings.Join(ss.s, ",")
}
func (ss *stringSet) Set(s string) error {
if ss.allowAll && s == "all" {
ss.s = nil
ss.all = true
return nil
}
ss.s = ss.s[:0]
for _, s := range strings.Split(s, ",") {
if s := strings.TrimSpace(s); s != "" {
if ss.allowed != nil && !ss.allowed.contains(s) {
return fmt.Errorf("unsupported value %q; must be one of %s", s, ss.allowed)
}
ss.add(s)
}
}
ss.compact()
return nil
}
func (ss *stringSet) add(s string) {
ss.s = append(ss.s, s)
ss.dirty = true
}
func (ss *stringSet) values() []string {
ss.compact()
return ss.s
}
func (ss *stringSet) contains(s string) bool {
if ss.all {
return true
}
for _, v := range ss.s {
if v == s {
return true
}
}
return false
}
func (ss *stringSet) compact() {
if !ss.dirty {
return
}
a := ss.s
sort.Strings(a)
k := 0
for i := 1; i < len(a); i++ {
if a[k] != a[i] {
a[k+1] = a[i]
k++
}
}
ss.s = a[:k+1]
ss.dirty = false
}
func skipLang(l string) bool {
if include.Len() > 0 {
return !include.contains(l)
}
return exclude.contains(l)
}
// altInclude returns a list of alternatives (for the LDML alt attribute)
// in order of preference. An empty string in this list indicates the
// default entry.
func altInclude() []string {
l := []string{}
if *short {
l = append(l, "short")
}
l = append(l, "")
// TODO: handle draft using cldr.SetDraftLevel
if *draft {
l = append(l, "proposed")
}
return l
}
func failOnError(e error) {
if e != nil {
log.Panic(e)
}
}
func openArchive() *zip.Reader {
f := gen.OpenCLDRCoreZip()
buffer, err := ioutil.ReadAll(f)
f.Close()
failOnError(err)
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
failOnError(err)
return archive
}
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
var r io.ReadCloser
var err error
for _, f := range openArchive().File {
if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
r, err = f.Open()
}
}
if r == nil {
log.Fatal("File allkeys_CLDR.txt not found in archive.")
}
failOnError(err)
defer r.Close()
scanner := bufio.NewScanner(r)
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
for i := 1; scanner.Scan(); i++ {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
if line[0] == '@' {
// parse properties
switch {
case strings.HasPrefix(line[1:], "version "):
a := strings.Split(line[1:], " ")
if a[1] != gen.UnicodeVersion() {
log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion())
}
case strings.HasPrefix(line[1:], "backwards "):
log.Fatalf("%d: unsupported option backwards", i)
default:
log.Printf("%d: unknown option %s", i, line[1:])
}
} else {
// parse entries
part := strings.Split(line, " ; ")
if len(part) != 2 {
log.Fatalf("%d: production rule without ';': %v", i, line)
}
lhs := []rune{}
for _, v := range strings.Split(part[0], " ") {
if v == "" {
continue
}
lhs = append(lhs, rune(convHex(i, v)))
}
var n int
var vars []int
rhs := [][]int{}
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
n += len(m[0])
elem := []int{}
for _, h := range strings.Split(m[2], ".") {
elem = append(elem, convHex(i, h))
}
if m[1] == "*" {
vars = append(vars, i)
}
rhs = append(rhs, elem)
}
if len(part[1]) < n+3 || part[1][n+1] != '#' {
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
}
if *test {
testInput.add(string(lhs))
}
failOnError(builder.Add(lhs, rhs, vars))
}
}
if scanner.Err() != nil {
log.Fatal(scanner.Err())
}
}
func convHex(line int, s string) int {
r, e := strconv.ParseInt(s, 16, 32)
if e != nil {
log.Fatalf("%d: %v", line, e)
}
return int(r)
}
var testInput = stringSet{}
var charRe = regexp.MustCompile(`&#x([0-9A-F]*);`)
var tagRe = regexp.MustCompile(`<([a-z_]*) */>`)
var mainLocales = []string{}
// charsets holds a list of exemplar characters per category.
type charSets map[string][]string
func (p charSets) fprint(w io.Writer) {
fmt.Fprintln(w, "[exN]string{")
for i, k := range []string{"", "contractions", "punctuation", "auxiliary", "currencySymbol", "index"} {
if set := p[k]; len(set) != 0 {
fmt.Fprintf(w, "\t\t%d: %q,\n", i, strings.Join(set, " "))
}
}
fmt.Fprintln(w, "\t},")
}
var localeChars = make(map[string]charSets)
const exemplarHeader = `
type exemplarType int
const (
exCharacters exemplarType = iota
exContractions
exPunctuation
exAuxiliary
exCurrency
exIndex
exN
)
`
func printExemplarCharacters(w io.Writer) {
fmt.Fprintln(w, exemplarHeader)
fmt.Fprintln(w, "var exemplarCharacters = map[string][exN]string{")
for _, loc := range mainLocales {
fmt.Fprintf(w, "\t%q: ", loc)
localeChars[loc].fprint(w)
}
fmt.Fprintln(w, "}")
}
func decodeCLDR(d *cldr.Decoder) *cldr.CLDR {
r := gen.OpenCLDRCoreZip()
data, err := d.DecodeZip(r)
failOnError(err)
return data
}
// parseMain parses XML files in the main directory of the CLDR core.zip file.
func parseMain() {
d := &cldr.Decoder{}
d.SetDirFilter("main")
d.SetSectionFilter("characters")
data := decodeCLDR(d)
for _, loc := range data.Locales() {
x := data.RawLDML(loc)
if skipLang(x.Identity.Language.Type) {
continue
}
if x.Characters != nil {
x, _ = data.LDML(loc)
loc = language.Make(loc).String()
for _, ec := range x.Characters.ExemplarCharacters {
if ec.Draft != "" {
continue
}
if _, ok := localeChars[loc]; !ok {
mainLocales = append(mainLocales, loc)
localeChars[loc] = make(charSets)
}
localeChars[loc][ec.Type] = parseCharacters(ec.Data())
}
}
}
}
func parseCharacters(chars string) []string {
parseSingle := func(s string) (r rune, tail string, escaped bool) {
if s[0] == '\\' {
return rune(s[1]), s[2:], true
}
r, sz := utf8.DecodeRuneInString(s)
return r, s[sz:], false
}
chars = strings.TrimSpace(chars)
if n := len(chars) - 1; chars[n] == ']' && chars[0] == '[' {
chars = chars[1:n]
}
list := []string{}
var r, last, end rune
for len(chars) > 0 {
if chars[0] == '{' { // character sequence
buf := []rune{}
for chars = chars[1:]; len(chars) > 0; {
r, chars, _ = parseSingle(chars)
if r == '}' {
break
}
if r == ' ' {
log.Fatalf("space not supported in sequence %q", chars)
}
buf = append(buf, r)
}
list = append(list, string(buf))
last = 0
} else { // single character
escaped := false
r, chars, escaped = parseSingle(chars)
if r != ' ' {
if r == '-' && !escaped {
if last == 0 {
log.Fatal("'-' should be preceded by a character")
}
end, chars, _ = parseSingle(chars)
for ; last <= end; last++ {
list = append(list, string(last))
}
last = 0
} else {
list = append(list, string(r))
last = r
}
}
}
}
return list
}
var fileRe = regexp.MustCompile(`.*/collation/(.*)\.xml`)
// typeMap translates legacy type keys to their BCP47 equivalent.
var typeMap = map[string]string{
"phonebook": "phonebk",
"traditional": "trad",
}
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
func parseCollation(b *build.Builder) {
d := &cldr.Decoder{}
d.SetDirFilter("collation")
data := decodeCLDR(d)
for _, loc := range data.Locales() {
x, err := data.LDML(loc)
failOnError(err)
if skipLang(x.Identity.Language.Type) {
continue
}
cs := x.Collations.Collation
sl := cldr.MakeSlice(&cs)
if len(types.s) == 0 {
sl.SelectAnyOf("type", x.Collations.Default())
} else if !types.all {
sl.SelectAnyOf("type", types.s...)
}
sl.SelectOnePerGroup("alt", altInclude())
for _, c := range cs {
id, err := language.Parse(loc)
if err != nil {
fmt.Fprintf(os.Stderr, "invalid locale: %q", err)
continue
}
// Support both old- and new-style defaults.
d := c.Type
if x.Collations.DefaultCollation == nil {
d = x.Collations.Default()
} else {
d = x.Collations.DefaultCollation.Data()
}
// We assume tables are being built either for search or collation,
// but not both. For search the default is always "search".
if d != c.Type && c.Type != "search" {
typ := c.Type
if len(c.Type) > 8 {
typ = typeMap[c.Type]
}
id, err = id.SetTypeForKey("co", typ)
failOnError(err)
}
t := b.Tailoring(id)
c.Process(processor{t})
}
}
}
type processor struct {
t *build.Tailoring
}
func (p processor) Reset(anchor string, before int) (err error) {
if before != 0 {
err = p.t.SetAnchorBefore(anchor)
} else {
err = p.t.SetAnchor(anchor)
}
failOnError(err)
return nil
}
func (p processor) Insert(level int, str, context, extend string) error {
str = context + str
if *test {
testInput.add(str)
}
// TODO: mimic bug in old maketables: remove.
err := p.t.Insert(colltab.Level(level-1), str, context+extend)
failOnError(err)
return nil
}
func (p processor) Index(id string) {
}
func testCollator(c *collate.Collator) {
c0 := collate.New(language.Und)
// iterator over all characters for all locales and check
// whether Key is equal.
buf := collate.Buffer{}
// Add all common and not too uncommon runes to the test set.
for i := rune(0); i < 0x30000; i++ {
testInput.add(string(i))
}
for i := rune(0xE0000); i < 0xF0000; i++ {
testInput.add(string(i))
}
for _, str := range testInput.values() {
k0 := c0.KeyFromString(&buf, str)
k := c.KeyFromString(&buf, str)
if !bytes.Equal(k0, k) {
failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k))
}
buf.Reset()
}
fmt.Println("PASS")
}
func main() {
gen.Init()
b := build.NewBuilder()
parseUCA(b)
if tables.contains("chars") {
parseMain()
}
parseCollation(b)
c, err := b.Build()
failOnError(err)
if *test {
testCollator(collate.NewFromTable(c))
} else {
w := &bytes.Buffer{}
gen.WriteUnicodeVersion(w)
gen.WriteCLDRVersion(w)
if tables.contains("collate") {
_, err = b.Print(w)
failOnError(err)
}
if tables.contains("chars") {
printExemplarCharacters(w)
}
gen.WriteGoFile("tables.go", *pkg, w.Bytes())
}
}

239
vendor/golang.org/x/text/collate/option.go generated vendored Normal file
View file

@ -0,0 +1,239 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"sort"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
// newCollator creates a new collator with default options configured.
func newCollator(t colltab.Weighter) *Collator {
// Initialize a collator with default options.
c := &Collator{
options: options{
ignore: [colltab.NumLevels]bool{
colltab.Quaternary: true,
colltab.Identity: true,
},
f: norm.NFD,
t: t,
},
}
// TODO: store vt in tags or remove.
c.variableTop = t.Top()
return c
}
// An Option is used to change the behavior of a Collator. Options override the
// settings passed through the locale identifier.
type Option struct {
priority int
f func(o *options)
}
type prioritizedOptions []Option
func (p prioritizedOptions) Len() int {
return len(p)
}
func (p prioritizedOptions) Swap(i, j int) {
p[i], p[j] = p[j], p[i]
}
func (p prioritizedOptions) Less(i, j int) bool {
return p[i].priority < p[j].priority
}
type options struct {
// ignore specifies which levels to ignore.
ignore [colltab.NumLevels]bool
// caseLevel is true if there is an additional level of case matching
// between the secondary and tertiary levels.
caseLevel bool
// backwards specifies the order of sorting at the secondary level.
// This option exists predominantly to support reverse sorting of accents in French.
backwards bool
// numeric specifies whether any sequence of decimal digits (category is Nd)
// is sorted at a primary level with its numeric value.
// For example, "A-21" < "A-123".
// This option is set by wrapping the main Weighter with NewNumericWeighter.
numeric bool
// alternate specifies an alternative handling of variables.
alternate alternateHandling
// variableTop is the largest primary value that is considered to be
// variable.
variableTop uint32
t colltab.Weighter
f norm.Form
}
func (o *options) setOptions(opts []Option) {
sort.Sort(prioritizedOptions(opts))
for _, x := range opts {
x.f(o)
}
}
// OptionsFromTag extracts the BCP47 collation options from the tag and
// configures a collator accordingly. These options are set before any other
// option.
func OptionsFromTag(t language.Tag) Option {
return Option{0, func(o *options) {
o.setFromTag(t)
}}
}
func (o *options) setFromTag(t language.Tag) {
o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
o.backwards = ldmlBool(t, o.backwards, "kb")
o.numeric = ldmlBool(t, o.numeric, "kn")
// Extract settings from the BCP47 u extension.
switch t.TypeForKey("ks") { // strength
case "level1":
o.ignore[colltab.Secondary] = true
o.ignore[colltab.Tertiary] = true
case "level2":
o.ignore[colltab.Tertiary] = true
case "level3", "":
// The default.
case "level4":
o.ignore[colltab.Quaternary] = false
case "identic":
o.ignore[colltab.Quaternary] = false
o.ignore[colltab.Identity] = false
}
switch t.TypeForKey("ka") {
case "shifted":
o.alternate = altShifted
// The following two types are not official BCP47, but we support them to
// give access to this otherwise hidden functionality. The name blanked is
// derived from the LDML name blanked and posix reflects the main use of
// the shift-trimmed option.
case "blanked":
o.alternate = altBlanked
case "posix":
o.alternate = altShiftTrimmed
}
// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").
// Not used:
// - normalization ("kk", not necessary for this implementation)
// - hiraganaQuatenary ("kh", obsolete)
}
func ldmlBool(t language.Tag, old bool, key string) bool {
switch t.TypeForKey(key) {
case "true":
return true
case "false":
return false
default:
return old
}
}
var (
// IgnoreCase sets case-insensitive comparison.
IgnoreCase Option = ignoreCase
ignoreCase = Option{3, ignoreCaseF}
// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "ö").
IgnoreDiacritics Option = ignoreDiacritics
ignoreDiacritics = Option{3, ignoreDiacriticsF}
// IgnoreWidth causes full-width characters to match their half-width
// equivalents.
IgnoreWidth Option = ignoreWidth
ignoreWidth = Option{2, ignoreWidthF}
// Loose sets the collator to ignore diacritics, case and weight.
Loose Option = loose
loose = Option{4, looseF}
// Force ordering if strings are equivalent but not equal.
Force Option = force
force = Option{5, forceF}
// Numeric specifies that numbers should sort numerically ("2" < "12").
Numeric Option = numeric
numeric = Option{5, numericF}
)
func ignoreWidthF(o *options) {
o.ignore[colltab.Tertiary] = true
o.caseLevel = true
}
func ignoreDiacriticsF(o *options) {
o.ignore[colltab.Secondary] = true
}
func ignoreCaseF(o *options) {
o.ignore[colltab.Tertiary] = true
o.caseLevel = false
}
func looseF(o *options) {
ignoreWidthF(o)
ignoreDiacriticsF(o)
ignoreCaseF(o)
}
func forceF(o *options) {
o.ignore[colltab.Identity] = false
}
func numericF(o *options) { o.numeric = true }
// Reorder overrides the pre-defined ordering of scripts and character sets.
func Reorder(s ...string) Option {
// TODO: need fractional weights to implement this.
panic("TODO: implement")
}
// TODO: consider making these public again. These options cannot be fully
// specified in BCP47, so an API interface seems warranted. Still a higher-level
// interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)
// alternateHandling identifies the various ways in which variables are handled.
// A rune with a primary weight lower than the variable top is considered a
// variable.
// See http://www.unicode.org/reports/tr10/#Variable_Weighting for details.
type alternateHandling int
const (
// altNonIgnorable turns off special handling of variables.
altNonIgnorable alternateHandling = iota
// altBlanked sets variables and all subsequent primary ignorables to be
// ignorable at all levels. This is identical to removing all variables
// and subsequent primary ignorables from the input.
altBlanked
// altShifted sets variables to be ignorable for levels one through three and
// adds a fourth level based on the values of the ignored levels.
altShifted
// altShiftTrimmed is a slight variant of altShifted that is used to
// emulate POSIX.
altShiftTrimmed
)

81
vendor/golang.org/x/text/collate/sort.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"bytes"
"sort"
)
const (
maxSortBuffer = 40960
maxSortEntries = 4096
)
type swapper interface {
Swap(i, j int)
}
type sorter struct {
buf *Buffer
keys [][]byte
src swapper
}
func (s *sorter) init(n int) {
if s.buf == nil {
s.buf = &Buffer{}
s.buf.init()
}
if cap(s.keys) < n {
s.keys = make([][]byte, n)
}
s.keys = s.keys[0:n]
}
func (s *sorter) sort(src swapper) {
s.src = src
sort.Sort(s)
}
func (s sorter) Len() int {
return len(s.keys)
}
func (s sorter) Less(i, j int) bool {
return bytes.Compare(s.keys[i], s.keys[j]) == -1
}
func (s sorter) Swap(i, j int) {
s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
s.src.Swap(i, j)
}
// A Lister can be sorted by Collator's Sort method.
type Lister interface {
Len() int
Swap(i, j int)
// Bytes returns the bytes of the text at index i.
Bytes(i int) []byte
}
// Sort uses sort.Sort to sort the strings represented by x using the rules of c.
func (c *Collator) Sort(x Lister) {
n := x.Len()
c.sorter.init(n)
for i := 0; i < n; i++ {
c.sorter.keys[i] = c.Key(c.sorter.buf, x.Bytes(i))
}
c.sorter.sort(x)
}
// SortStrings uses sort.Sort to sort the strings in x using the rules of c.
func (c *Collator) SortStrings(x []string) {
c.sorter.init(len(x))
for i, s := range x {
c.sorter.keys[i] = c.KeyFromString(c.sorter.buf, s)
}
c.sorter.sort(sort.StringSlice(x))
}

73789
vendor/golang.org/x/text/collate/tables.go generated vendored Normal file

File diff suppressed because it is too large Load diff