mirror of
https://github.com/kubernetes-sigs/prometheus-adapter.git
synced 2026-04-06 09:47:54 +00:00
Travis seems to be having issues pulling deps, so we'll have to check in the vendor directory and prevent the makefile from trying to regenerate it normally.
275 lines
7.1 KiB
Go
275 lines
7.1 KiB
Go
// Copyright 2012 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package colltab
|
|
|
|
import (
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/unicode/norm"
|
|
)
|
|
|
|
// Table holds all collation data for a given collation ordering.
|
|
type Table struct {
|
|
Index Trie // main trie
|
|
|
|
// expansion info
|
|
ExpandElem []uint32
|
|
|
|
// contraction info
|
|
ContractTries ContractTrieSet
|
|
ContractElem []uint32
|
|
MaxContractLen int
|
|
VariableTop uint32
|
|
}
|
|
|
|
func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
|
|
return t.appendNext(w, source{bytes: b})
|
|
}
|
|
|
|
func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
|
|
return t.appendNext(w, source{str: s})
|
|
}
|
|
|
|
func (t *Table) Start(p int, b []byte) int {
|
|
// TODO: implement
|
|
panic("not implemented")
|
|
}
|
|
|
|
func (t *Table) StartString(p int, s string) int {
|
|
// TODO: implement
|
|
panic("not implemented")
|
|
}
|
|
|
|
func (t *Table) Domain() []string {
|
|
// TODO: implement
|
|
panic("not implemented")
|
|
}
|
|
|
|
func (t *Table) Top() uint32 {
|
|
return t.VariableTop
|
|
}
|
|
|
|
type source struct {
|
|
str string
|
|
bytes []byte
|
|
}
|
|
|
|
func (src *source) lookup(t *Table) (ce Elem, sz int) {
|
|
if src.bytes == nil {
|
|
return t.Index.lookupString(src.str)
|
|
}
|
|
return t.Index.lookup(src.bytes)
|
|
}
|
|
|
|
func (src *source) tail(sz int) {
|
|
if src.bytes == nil {
|
|
src.str = src.str[sz:]
|
|
} else {
|
|
src.bytes = src.bytes[sz:]
|
|
}
|
|
}
|
|
|
|
func (src *source) nfd(buf []byte, end int) []byte {
|
|
if src.bytes == nil {
|
|
return norm.NFD.AppendString(buf[:0], src.str[:end])
|
|
}
|
|
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
|
|
}
|
|
|
|
func (src *source) rune() (r rune, sz int) {
|
|
if src.bytes == nil {
|
|
return utf8.DecodeRuneInString(src.str)
|
|
}
|
|
return utf8.DecodeRune(src.bytes)
|
|
}
|
|
|
|
func (src *source) properties(f norm.Form) norm.Properties {
|
|
if src.bytes == nil {
|
|
return f.PropertiesString(src.str)
|
|
}
|
|
return f.Properties(src.bytes)
|
|
}
|
|
|
|
// appendNext appends the weights corresponding to the next rune or
|
|
// contraction in s. If a contraction is matched to a discontinuous
|
|
// sequence of runes, the weights for the interstitial runes are
|
|
// appended as well. It returns a new slice that includes the appended
|
|
// weights and the number of bytes consumed from s.
|
|
func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
|
|
ce, sz := src.lookup(t)
|
|
tp := ce.ctype()
|
|
if tp == ceNormal {
|
|
if ce == 0 {
|
|
r, _ := src.rune()
|
|
const (
|
|
hangulSize = 3
|
|
firstHangul = 0xAC00
|
|
lastHangul = 0xD7A3
|
|
)
|
|
if r >= firstHangul && r <= lastHangul {
|
|
// TODO: performance can be considerably improved here.
|
|
n = sz
|
|
var buf [16]byte // Used for decomposing Hangul.
|
|
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
|
|
ce, sz = t.Index.lookup(b)
|
|
w = append(w, ce)
|
|
}
|
|
return w, n
|
|
}
|
|
ce = makeImplicitCE(implicitPrimary(r))
|
|
}
|
|
w = append(w, ce)
|
|
} else if tp == ceExpansionIndex {
|
|
w = t.appendExpansion(w, ce)
|
|
} else if tp == ceContractionIndex {
|
|
n := 0
|
|
src.tail(sz)
|
|
if src.bytes == nil {
|
|
w, n = t.matchContractionString(w, ce, src.str)
|
|
} else {
|
|
w, n = t.matchContraction(w, ce, src.bytes)
|
|
}
|
|
sz += n
|
|
} else if tp == ceDecompose {
|
|
// Decompose using NFKD and replace tertiary weights.
|
|
t1, t2 := splitDecompose(ce)
|
|
i := len(w)
|
|
nfkd := src.properties(norm.NFKD).Decomposition()
|
|
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
|
|
w, p = t.appendNext(w, source{bytes: nfkd})
|
|
}
|
|
w[i] = w[i].updateTertiary(t1)
|
|
if i++; i < len(w) {
|
|
w[i] = w[i].updateTertiary(t2)
|
|
for i++; i < len(w); i++ {
|
|
w[i] = w[i].updateTertiary(maxTertiary)
|
|
}
|
|
}
|
|
}
|
|
return w, sz
|
|
}
|
|
|
|
func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
|
|
i := splitExpandIndex(ce)
|
|
n := int(t.ExpandElem[i])
|
|
i++
|
|
for _, ce := range t.ExpandElem[i : i+n] {
|
|
w = append(w, Elem(ce))
|
|
}
|
|
return w
|
|
}
|
|
|
|
func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
|
|
index, n, offset := splitContractIndex(ce)
|
|
|
|
scan := t.ContractTries.scanner(index, n, suffix)
|
|
buf := [norm.MaxSegmentSize]byte{}
|
|
bufp := 0
|
|
p := scan.scan(0)
|
|
|
|
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
|
|
// By now we should have filtered most cases.
|
|
p0 := p
|
|
bufn := 0
|
|
rune := norm.NFD.Properties(suffix[p:])
|
|
p += rune.Size()
|
|
if rune.LeadCCC() != 0 {
|
|
prevCC := rune.TrailCCC()
|
|
// A gap may only occur in the last normalization segment.
|
|
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
|
|
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
|
|
scan.s = suffix[:p+end]
|
|
}
|
|
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
|
|
rune = norm.NFD.Properties(suffix[p:])
|
|
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
|
|
break
|
|
}
|
|
prevCC = rune.TrailCCC()
|
|
if pp := scan.scan(p); pp != p {
|
|
// Copy the interstitial runes for later processing.
|
|
bufn += copy(buf[bufn:], suffix[p0:p])
|
|
if scan.pindex == pp {
|
|
bufp = bufn
|
|
}
|
|
p, p0 = pp, pp
|
|
} else {
|
|
p += rune.Size()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Append weights for the matched contraction, which may be an expansion.
|
|
i, n := scan.result()
|
|
ce = Elem(t.ContractElem[i+offset])
|
|
if ce.ctype() == ceNormal {
|
|
w = append(w, ce)
|
|
} else {
|
|
w = t.appendExpansion(w, ce)
|
|
}
|
|
// Append weights for the runes in the segment not part of the contraction.
|
|
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
|
|
w, p = t.appendNext(w, source{bytes: b})
|
|
}
|
|
return w, n
|
|
}
|
|
|
|
// TODO: unify the two implementations. This is best done after first simplifying
|
|
// the algorithm taking into account the inclusion of both NFC and NFD forms
|
|
// in the table.
|
|
func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
|
|
index, n, offset := splitContractIndex(ce)
|
|
|
|
scan := t.ContractTries.scannerString(index, n, suffix)
|
|
buf := [norm.MaxSegmentSize]byte{}
|
|
bufp := 0
|
|
p := scan.scan(0)
|
|
|
|
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
|
|
// By now we should have filtered most cases.
|
|
p0 := p
|
|
bufn := 0
|
|
rune := norm.NFD.PropertiesString(suffix[p:])
|
|
p += rune.Size()
|
|
if rune.LeadCCC() != 0 {
|
|
prevCC := rune.TrailCCC()
|
|
// A gap may only occur in the last normalization segment.
|
|
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
|
|
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
|
|
scan.s = suffix[:p+end]
|
|
}
|
|
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
|
|
rune = norm.NFD.PropertiesString(suffix[p:])
|
|
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
|
|
break
|
|
}
|
|
prevCC = rune.TrailCCC()
|
|
if pp := scan.scan(p); pp != p {
|
|
// Copy the interstitial runes for later processing.
|
|
bufn += copy(buf[bufn:], suffix[p0:p])
|
|
if scan.pindex == pp {
|
|
bufp = bufn
|
|
}
|
|
p, p0 = pp, pp
|
|
} else {
|
|
p += rune.Size()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Append weights for the matched contraction, which may be an expansion.
|
|
i, n := scan.result()
|
|
ce = Elem(t.ContractElem[i+offset])
|
|
if ce.ctype() == ceNormal {
|
|
w = append(w, ce)
|
|
} else {
|
|
w = t.appendExpansion(w, ce)
|
|
}
|
|
// Append weights for the runes in the segment not part of the contraction.
|
|
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
|
|
w, p = t.appendNext(w, source{bytes: b})
|
|
}
|
|
return w, n
|
|
}
|