mirror of
https://github.com/kubernetes-sigs/prometheus-adapter.git
synced 2026-04-06 17:57:51 +00:00
Travis seems to be having issues pulling deps, so we'll have to check in the vendor directory and prevent the makefile from trying to regenerate it normally.
702 lines
20 KiB
Go
702 lines
20 KiB
Go
// Copyright 2012 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package build // import "golang.org/x/text/collate/build"
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"sort"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/internal/colltab"
|
|
"golang.org/x/text/language"
|
|
"golang.org/x/text/unicode/norm"
|
|
)
|
|
|
|
// TODO: optimizations:
|
|
// - expandElem is currently 20K. By putting unique colElems in a separate
|
|
// table and having a byte array of indexes into this table, we can reduce
|
|
// the total size to about 7K. By also factoring out the length bytes, we
|
|
// can reduce this to about 6K.
|
|
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
|
|
// and many consecutive values with the same stride. This can be further
|
|
// compacted.
|
|
// - Compress secondary weights into 8 bits.
|
|
// - Some LDML specs specify a context element. Currently we simply concatenate
|
|
// those. Context can be implemented using the contraction trie. If Builder
|
|
// could analyze and detect when using a context makes sense, there is no
|
|
// need to expose this construct in the API.
|
|
|
|
// A Builder builds a root collation table. The user must specify the
|
|
// collation elements for each entry. A common use will be to base the weights
|
|
// on those specified in the allkeys* file as provided by the UCA or CLDR.
|
|
type Builder struct {
|
|
index *trieBuilder
|
|
root ordering
|
|
locale []*Tailoring
|
|
t *table
|
|
err error
|
|
built bool
|
|
|
|
minNonVar int // lowest primary recorded for a variable
|
|
varTop int // highest primary recorded for a non-variable
|
|
|
|
// indexes used for reusing expansions and contractions
|
|
expIndex map[string]int // positions of expansions keyed by their string representation
|
|
ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
|
|
ctElem map[string]int // contraction elements keyed by their string representation
|
|
}
|
|
|
|
// A Tailoring builds a collation table based on another collation table.
|
|
// The table is defined by specifying tailorings to the underlying table.
|
|
// See http://unicode.org/reports/tr35/ for an overview of tailoring
|
|
// collation tables. The CLDR contains pre-defined tailorings for a variety
|
|
// of languages (See http://www.unicode.org/Public/cldr/<version>/core.zip.)
|
|
type Tailoring struct {
|
|
id string
|
|
builder *Builder
|
|
index *ordering
|
|
|
|
anchor *entry
|
|
before bool
|
|
}
|
|
|
|
// NewBuilder returns a new Builder.
|
|
func NewBuilder() *Builder {
|
|
return &Builder{
|
|
index: newTrieBuilder(),
|
|
root: makeRootOrdering(),
|
|
expIndex: make(map[string]int),
|
|
ctHandle: make(map[string]ctHandle),
|
|
ctElem: make(map[string]int),
|
|
}
|
|
}
|
|
|
|
// Tailoring returns a Tailoring for the given locale. One should
|
|
// have completed all calls to Add before calling Tailoring.
|
|
func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
|
|
t := &Tailoring{
|
|
id: loc.String(),
|
|
builder: b,
|
|
index: b.root.clone(),
|
|
}
|
|
t.index.id = t.id
|
|
b.locale = append(b.locale, t)
|
|
return t
|
|
}
|
|
|
|
// Add adds an entry to the collation element table, mapping
|
|
// a slice of runes to a sequence of collation elements.
|
|
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
|
|
// The entries are typically obtained from a collation element table
|
|
// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
|
|
// Note that the collation elements specified by colelems are only used
|
|
// as a guide. The actual weights generated by Builder may differ.
|
|
// The argument variables is a list of indices into colelems that should contain
|
|
// a value for each colelem that is a variable. (See the reference above.)
|
|
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
|
str := string(runes)
|
|
elems := make([]rawCE, len(colelems))
|
|
for i, ce := range colelems {
|
|
if len(ce) == 0 {
|
|
break
|
|
}
|
|
elems[i] = makeRawCE(ce, 0)
|
|
if len(ce) == 1 {
|
|
elems[i].w[1] = defaultSecondary
|
|
}
|
|
if len(ce) <= 2 {
|
|
elems[i].w[2] = defaultTertiary
|
|
}
|
|
if len(ce) <= 3 {
|
|
elems[i].w[3] = ce[0]
|
|
}
|
|
}
|
|
for i, ce := range elems {
|
|
p := ce.w[0]
|
|
isvar := false
|
|
for _, j := range variables {
|
|
if i == j {
|
|
isvar = true
|
|
}
|
|
}
|
|
if isvar {
|
|
if p >= b.minNonVar && b.minNonVar > 0 {
|
|
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
|
|
}
|
|
if p > b.varTop {
|
|
b.varTop = p
|
|
}
|
|
} else if p > 1 { // 1 is a special primary value reserved for FFFE
|
|
if p <= b.varTop {
|
|
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
|
|
}
|
|
if b.minNonVar == 0 || p < b.minNonVar {
|
|
b.minNonVar = p
|
|
}
|
|
}
|
|
}
|
|
elems, err := convertLargeWeights(elems)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cccs := []uint8{}
|
|
nfd := norm.NFD.String(str)
|
|
for i := range nfd {
|
|
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
|
|
}
|
|
if len(cccs) < len(elems) {
|
|
if len(cccs) > 2 {
|
|
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
|
|
}
|
|
p := len(elems) - 1
|
|
for ; p > 0 && elems[p].w[0] == 0; p-- {
|
|
elems[p].ccc = cccs[len(cccs)-1]
|
|
}
|
|
for ; p >= 0; p-- {
|
|
elems[p].ccc = cccs[0]
|
|
}
|
|
} else {
|
|
for i := range elems {
|
|
elems[i].ccc = cccs[i]
|
|
}
|
|
}
|
|
// doNorm in collate.go assumes that the following conditions hold.
|
|
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
|
|
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
|
|
}
|
|
b.root.newEntry(str, elems)
|
|
return nil
|
|
}
|
|
|
|
func (t *Tailoring) setAnchor(anchor string) error {
|
|
anchor = norm.NFC.String(anchor)
|
|
a := t.index.find(anchor)
|
|
if a == nil {
|
|
a = t.index.newEntry(anchor, nil)
|
|
a.implicit = true
|
|
a.modified = true
|
|
for _, r := range []rune(anchor) {
|
|
e := t.index.find(string(r))
|
|
e.lock = true
|
|
}
|
|
}
|
|
t.anchor = a
|
|
return nil
|
|
}
|
|
|
|
// SetAnchor sets the point after which elements passed in subsequent calls to
|
|
// Insert will be inserted. It is equivalent to the reset directive in an LDML
|
|
// specification. See Insert for an example.
|
|
// SetAnchor supports the following logical reset positions:
|
|
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
|
|
// and <last_non_ignorable/>.
|
|
func (t *Tailoring) SetAnchor(anchor string) error {
|
|
if err := t.setAnchor(anchor); err != nil {
|
|
return err
|
|
}
|
|
t.before = false
|
|
return nil
|
|
}
|
|
|
|
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
|
|
// Insert will insert entries before the anchor.
|
|
func (t *Tailoring) SetAnchorBefore(anchor string) error {
|
|
if err := t.setAnchor(anchor); err != nil {
|
|
return err
|
|
}
|
|
t.before = true
|
|
return nil
|
|
}
|
|
|
|
// Insert sets the ordering of str relative to the entry set by the previous
|
|
// call to SetAnchor or Insert. The argument extend corresponds
|
|
// to the extend elements as defined in LDML. A non-empty value for extend
|
|
// will cause the collation elements corresponding to extend to be appended
|
|
// to the collation elements generated for the entry added by Insert.
|
|
// This has the same net effect as sorting str after the string anchor+extend.
|
|
// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
|
|
// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements
|
|
// for full details on LDML.
|
|
//
|
|
// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
|
|
// at the primary sorting level:
|
|
// t := b.Tailoring("se")
|
|
// t.SetAnchor("z")
|
|
// t.Insert(colltab.Primary, "ä", "")
|
|
// Order "ü" after "ue" at the secondary sorting level:
|
|
// t.SetAnchor("ue")
|
|
// t.Insert(colltab.Secondary, "ü","")
|
|
// or
|
|
// t.SetAnchor("u")
|
|
// t.Insert(colltab.Secondary, "ü", "e")
|
|
// Order "q" afer "ab" at the secondary level and "Q" after "q"
|
|
// at the tertiary level:
|
|
// t.SetAnchor("ab")
|
|
// t.Insert(colltab.Secondary, "q", "")
|
|
// t.Insert(colltab.Tertiary, "Q", "")
|
|
// Order "b" before "a":
|
|
// t.SetAnchorBefore("a")
|
|
// t.Insert(colltab.Primary, "b", "")
|
|
// Order "0" after the last primary ignorable:
|
|
// t.SetAnchor("<last_primary_ignorable/>")
|
|
// t.Insert(colltab.Primary, "0", "")
|
|
func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
|
|
if t.anchor == nil {
|
|
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
|
|
}
|
|
str = norm.NFC.String(str)
|
|
e := t.index.find(str)
|
|
if e == nil {
|
|
e = t.index.newEntry(str, nil)
|
|
} else if e.logical != noAnchor {
|
|
return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
|
|
}
|
|
if e.lock {
|
|
return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
|
|
}
|
|
a := t.anchor
|
|
// Find the first element after the anchor which differs at a level smaller or
|
|
// equal to the given level. Then insert at this position.
|
|
// See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
|
|
e.before = t.before
|
|
if t.before {
|
|
t.before = false
|
|
if a.prev == nil {
|
|
a.insertBefore(e)
|
|
} else {
|
|
for a = a.prev; a.level > level; a = a.prev {
|
|
}
|
|
a.insertAfter(e)
|
|
}
|
|
e.level = level
|
|
} else {
|
|
for ; a.level > level; a = a.next {
|
|
}
|
|
e.level = a.level
|
|
if a != e {
|
|
a.insertAfter(e)
|
|
a.level = level
|
|
} else {
|
|
// We don't set a to prev itself. This has the effect of the entry
|
|
// getting new collation elements that are an increment of itself.
|
|
// This is intentional.
|
|
a.prev.level = level
|
|
}
|
|
}
|
|
e.extend = norm.NFD.String(extend)
|
|
e.exclude = false
|
|
e.modified = true
|
|
e.elems = nil
|
|
t.anchor = e
|
|
return nil
|
|
}
|
|
|
|
func (o *ordering) getWeight(e *entry) []rawCE {
|
|
if len(e.elems) == 0 && e.logical == noAnchor {
|
|
if e.implicit {
|
|
for _, r := range e.runes {
|
|
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
|
|
}
|
|
} else if e.before {
|
|
count := [colltab.Identity + 1]int{}
|
|
a := e
|
|
for ; a.elems == nil && !a.implicit; a = a.next {
|
|
count[a.level]++
|
|
}
|
|
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
|
|
for i := colltab.Primary; i < colltab.Quaternary; i++ {
|
|
if count[i] != 0 {
|
|
e.elems[0].w[i] -= count[i]
|
|
break
|
|
}
|
|
}
|
|
if e.prev != nil {
|
|
o.verifyWeights(e.prev, e, e.prev.level)
|
|
}
|
|
} else {
|
|
prev := e.prev
|
|
e.elems = nextWeight(prev.level, o.getWeight(prev))
|
|
o.verifyWeights(e, e.next, e.level)
|
|
}
|
|
}
|
|
return e.elems
|
|
}
|
|
|
|
func (o *ordering) addExtension(e *entry) {
|
|
if ex := o.find(e.extend); ex != nil {
|
|
e.elems = append(e.elems, ex.elems...)
|
|
} else {
|
|
for _, r := range []rune(e.extend) {
|
|
e.elems = append(e.elems, o.find(string(r)).elems...)
|
|
}
|
|
}
|
|
e.extend = ""
|
|
}
|
|
|
|
func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
|
|
if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
|
|
return nil
|
|
}
|
|
for i := colltab.Primary; i < level; i++ {
|
|
if a.elems[0].w[i] < b.elems[0].w[i] {
|
|
return nil
|
|
}
|
|
}
|
|
if a.elems[0].w[level] >= b.elems[0].w[level] {
|
|
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
|
|
log.Println(err)
|
|
// TODO: return the error instead, or better, fix the conflicting entry by making room.
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (b *Builder) error(e error) {
|
|
if e != nil {
|
|
b.err = e
|
|
}
|
|
}
|
|
|
|
func (b *Builder) errorID(locale string, e error) {
|
|
if e != nil {
|
|
b.err = fmt.Errorf("%s:%v", locale, e)
|
|
}
|
|
}
|
|
|
|
// patchNorm ensures that NFC and NFD counterparts are consistent.
|
|
func (o *ordering) patchNorm() {
|
|
// Insert the NFD counterparts, if necessary.
|
|
for _, e := range o.ordered {
|
|
nfd := norm.NFD.String(e.str)
|
|
if nfd != e.str {
|
|
if e0 := o.find(nfd); e0 != nil && !e0.modified {
|
|
e0.elems = e.elems
|
|
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
|
|
e := o.newEntry(nfd, e.elems)
|
|
e.modified = true
|
|
}
|
|
}
|
|
}
|
|
// Update unchanged composed forms if one of their parts changed.
|
|
for _, e := range o.ordered {
|
|
nfd := norm.NFD.String(e.str)
|
|
if e.modified || nfd == e.str {
|
|
continue
|
|
}
|
|
if e0 := o.find(nfd); e0 != nil {
|
|
e.elems = e0.elems
|
|
} else {
|
|
e.elems = o.genColElems(nfd)
|
|
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
|
|
r := []rune(nfd)
|
|
head := string(r[0])
|
|
tail := ""
|
|
for i := 1; i < len(r); i++ {
|
|
s := norm.NFC.String(head + string(r[i]))
|
|
if e0 := o.find(s); e0 != nil && e0.modified {
|
|
head = s
|
|
} else {
|
|
tail += string(r[i])
|
|
}
|
|
}
|
|
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
|
|
}
|
|
}
|
|
}
|
|
// Exclude entries for which the individual runes generate the same collation elements.
|
|
for _, e := range o.ordered {
|
|
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
|
|
e.exclude = true
|
|
}
|
|
}
|
|
}
|
|
|
|
func (b *Builder) buildOrdering(o *ordering) {
|
|
for _, e := range o.ordered {
|
|
o.getWeight(e)
|
|
}
|
|
for _, e := range o.ordered {
|
|
o.addExtension(e)
|
|
}
|
|
o.patchNorm()
|
|
o.sort()
|
|
simplify(o)
|
|
b.processExpansions(o) // requires simplify
|
|
b.processContractions(o) // requires simplify
|
|
|
|
t := newNode()
|
|
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
|
if !e.skip() {
|
|
ce, err := e.encode()
|
|
b.errorID(o.id, err)
|
|
t.insert(e.runes[0], ce)
|
|
}
|
|
}
|
|
o.handle = b.index.addTrie(t)
|
|
}
|
|
|
|
func (b *Builder) build() (*table, error) {
|
|
if b.built {
|
|
return b.t, b.err
|
|
}
|
|
b.built = true
|
|
b.t = &table{
|
|
Table: colltab.Table{
|
|
MaxContractLen: utf8.UTFMax,
|
|
VariableTop: uint32(b.varTop),
|
|
},
|
|
}
|
|
|
|
b.buildOrdering(&b.root)
|
|
b.t.root = b.root.handle
|
|
for _, t := range b.locale {
|
|
b.buildOrdering(t.index)
|
|
if b.err != nil {
|
|
break
|
|
}
|
|
}
|
|
i, err := b.index.generate()
|
|
b.t.trie = *i
|
|
b.t.Index = colltab.Trie{
|
|
Index: i.index,
|
|
Values: i.values,
|
|
Index0: i.index[blockSize*b.t.root.lookupStart:],
|
|
Values0: i.values[blockSize*b.t.root.valueStart:],
|
|
}
|
|
b.error(err)
|
|
return b.t, b.err
|
|
}
|
|
|
|
// Build builds the root Collator.
|
|
func (b *Builder) Build() (colltab.Weighter, error) {
|
|
table, err := b.build()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return table, nil
|
|
}
|
|
|
|
// Build builds a Collator for Tailoring t.
|
|
func (t *Tailoring) Build() (colltab.Weighter, error) {
|
|
// TODO: implement.
|
|
return nil, nil
|
|
}
|
|
|
|
// Print prints the tables for b and all its Tailorings as a Go file
|
|
// that can be included in the Collate package.
|
|
func (b *Builder) Print(w io.Writer) (n int, err error) {
|
|
p := func(nn int, e error) {
|
|
n += nn
|
|
if err == nil {
|
|
err = e
|
|
}
|
|
}
|
|
t, err := b.build()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
p(fmt.Fprintf(w, `var availableLocales = "und`))
|
|
for _, loc := range b.locale {
|
|
if loc.id != "und" {
|
|
p(fmt.Fprintf(w, ",%s", loc.id))
|
|
}
|
|
}
|
|
p(fmt.Fprint(w, "\"\n\n"))
|
|
p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
|
|
p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
|
|
for _, loc := range b.locale {
|
|
if loc.id == "und" {
|
|
p(t.fprintIndex(w, loc.index.handle, loc.id))
|
|
}
|
|
}
|
|
for _, loc := range b.locale {
|
|
if loc.id != "und" {
|
|
p(t.fprintIndex(w, loc.index.handle, loc.id))
|
|
}
|
|
}
|
|
p(fmt.Fprint(w, "}\n\n"))
|
|
n, _, err = t.fprint(w, "main")
|
|
return
|
|
}
|
|
|
|
// reproducibleFromNFKD checks whether the given expansion could be generated
|
|
// from an NFKD expansion.
|
|
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
|
|
// Length must be equal.
|
|
if len(exp) != len(nfkd) {
|
|
return false
|
|
}
|
|
for i, ce := range exp {
|
|
// Primary and secondary values should be equal.
|
|
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
|
|
return false
|
|
}
|
|
// Tertiary values should be equal to maxTertiary for third element onwards.
|
|
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
|
|
// simply be dropped. Try this out by dropping the following code.
|
|
if i >= 2 && ce.w[2] != maxTertiary {
|
|
return false
|
|
}
|
|
if _, err := makeCE(ce); err != nil {
|
|
// Simply return false. The error will be caught elsewhere.
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func simplify(o *ordering) {
|
|
// Runes that are a starter of a contraction should not be removed.
|
|
// (To date, there is only Kannada character 0CCA.)
|
|
keep := make(map[rune]bool)
|
|
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
|
if len(e.runes) > 1 {
|
|
keep[e.runes[0]] = true
|
|
}
|
|
}
|
|
// Tag entries for which the runes NFKD decompose to identical values.
|
|
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
|
s := e.str
|
|
nfkd := norm.NFKD.String(s)
|
|
nfd := norm.NFD.String(s)
|
|
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
|
|
continue
|
|
}
|
|
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
|
|
e.decompose = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// appendExpansion converts the given collation sequence to
|
|
// collation elements and adds them to the expansion table.
|
|
// It returns an index to the expansion table.
|
|
func (b *Builder) appendExpansion(e *entry) int {
|
|
t := b.t
|
|
i := len(t.ExpandElem)
|
|
ce := uint32(len(e.elems))
|
|
t.ExpandElem = append(t.ExpandElem, ce)
|
|
for _, w := range e.elems {
|
|
ce, err := makeCE(w)
|
|
if err != nil {
|
|
b.error(err)
|
|
return -1
|
|
}
|
|
t.ExpandElem = append(t.ExpandElem, ce)
|
|
}
|
|
return i
|
|
}
|
|
|
|
// processExpansions extracts data necessary to generate
|
|
// the extraction tables.
|
|
func (b *Builder) processExpansions(o *ordering) {
|
|
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
|
if !e.expansion() {
|
|
continue
|
|
}
|
|
key := fmt.Sprintf("%v", e.elems)
|
|
i, ok := b.expIndex[key]
|
|
if !ok {
|
|
i = b.appendExpansion(e)
|
|
b.expIndex[key] = i
|
|
}
|
|
e.expansionIndex = i
|
|
}
|
|
}
|
|
|
|
func (b *Builder) processContractions(o *ordering) {
|
|
// Collate contractions per starter rune.
|
|
starters := []rune{}
|
|
cm := make(map[rune][]*entry)
|
|
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
|
if e.contraction() {
|
|
if len(e.str) > b.t.MaxContractLen {
|
|
b.t.MaxContractLen = len(e.str)
|
|
}
|
|
r := e.runes[0]
|
|
if _, ok := cm[r]; !ok {
|
|
starters = append(starters, r)
|
|
}
|
|
cm[r] = append(cm[r], e)
|
|
}
|
|
}
|
|
// Add entries of single runes that are at a start of a contraction.
|
|
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
|
if !e.contraction() {
|
|
r := e.runes[0]
|
|
if _, ok := cm[r]; ok {
|
|
cm[r] = append(cm[r], e)
|
|
}
|
|
}
|
|
}
|
|
// Build the tries for the contractions.
|
|
t := b.t
|
|
for _, r := range starters {
|
|
l := cm[r]
|
|
// Compute suffix strings. There are 31 different contraction suffix
|
|
// sets for 715 contractions and 82 contraction starter runes as of
|
|
// version 6.0.0.
|
|
sufx := []string{}
|
|
hasSingle := false
|
|
for _, e := range l {
|
|
if len(e.runes) > 1 {
|
|
sufx = append(sufx, string(e.runes[1:]))
|
|
} else {
|
|
hasSingle = true
|
|
}
|
|
}
|
|
if !hasSingle {
|
|
b.error(fmt.Errorf("no single entry for starter rune %U found", r))
|
|
continue
|
|
}
|
|
// Unique the suffix set.
|
|
sort.Strings(sufx)
|
|
key := strings.Join(sufx, "\n")
|
|
handle, ok := b.ctHandle[key]
|
|
if !ok {
|
|
var err error
|
|
handle, err = appendTrie(&t.ContractTries, sufx)
|
|
if err != nil {
|
|
b.error(err)
|
|
}
|
|
b.ctHandle[key] = handle
|
|
}
|
|
// Bucket sort entries in index order.
|
|
es := make([]*entry, len(l))
|
|
for _, e := range l {
|
|
var p, sn int
|
|
if len(e.runes) > 1 {
|
|
str := []byte(string(e.runes[1:]))
|
|
p, sn = lookup(&t.ContractTries, handle, str)
|
|
if sn != len(str) {
|
|
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
|
|
}
|
|
}
|
|
if es[p] != nil {
|
|
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
|
|
}
|
|
es[p] = e
|
|
}
|
|
// Create collation elements for contractions.
|
|
elems := []uint32{}
|
|
for _, e := range es {
|
|
ce, err := e.encodeBase()
|
|
b.errorID(o.id, err)
|
|
elems = append(elems, ce)
|
|
}
|
|
key = fmt.Sprintf("%v", elems)
|
|
i, ok := b.ctElem[key]
|
|
if !ok {
|
|
i = len(t.ContractElem)
|
|
b.ctElem[key] = i
|
|
t.ContractElem = append(t.ContractElem, elems...)
|
|
}
|
|
// Store info in entry for starter rune.
|
|
es[0].contractionIndex = i
|
|
es[0].contractionHandle = handle
|
|
}
|
|
}
|