prometheus-adapter/vendor/golang.org/x/text/collate/maketables.go
Solly Ross a293b2bf94 Check in the vendor directory
Travis seems to be having issues pulling deps, so we'll have to check in
the vendor directory and prevent the makefile from trying to regenerate
it normally.
2018-07-13 17:32:49 -04:00

553 lines
13 KiB
Go

// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Collation table generator.
// Data read from the web.
package main
import (
"archive/zip"
"bufio"
"bytes"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"regexp"
"sort"
"strconv"
"strings"
"unicode/utf8"
"golang.org/x/text/collate"
"golang.org/x/text/collate/build"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
draft = flag.Bool("draft", false, `Use draft versions, when available.`)
tags = flag.String("tags", "", "build tags to be included after +build directive")
pkg = flag.String("package", "collate",
"the name of the package in which the generated file is to be included")
tables = flagStringSetAllowAll("tables", "collate", "collate,chars",
"comma-spearated list of tables to generate.")
exclude = flagStringSet("exclude", "zh2", "",
"comma-separated list of languages to exclude.")
include = flagStringSet("include", "", "",
"comma-separated list of languages to include. Include trumps exclude.")
// TODO: Not included: unihan gb2312han zhuyin big5han (for size reasons)
// TODO: Not included: traditional (buggy for Bengali)
types = flagStringSetAllowAll("types", "standard,phonebook,phonetic,reformed,pinyin,stroke", "",
"comma-separated list of types that should be included.")
)
// stringSet implements an ordered set based on a list. It implements flag.Value
// to allow a set to be specified as a comma-separated list.
type stringSet struct {
s []string
allowed *stringSet
dirty bool // needs compaction if true
all bool
allowAll bool
}
func flagStringSet(name, def, allowed, usage string) *stringSet {
ss := &stringSet{}
if allowed != "" {
usage += fmt.Sprintf(" (allowed values: any of %s)", allowed)
ss.allowed = &stringSet{}
failOnError(ss.allowed.Set(allowed))
}
ss.Set(def)
flag.Var(ss, name, usage)
return ss
}
func flagStringSetAllowAll(name, def, allowed, usage string) *stringSet {
ss := &stringSet{allowAll: true}
if allowed == "" {
flag.Var(ss, name, usage+fmt.Sprintf(` Use "all" to select all.`))
} else {
ss.allowed = &stringSet{}
failOnError(ss.allowed.Set(allowed))
flag.Var(ss, name, usage+fmt.Sprintf(` (allowed values: "all" or any of %s)`, allowed))
}
ss.Set(def)
return ss
}
func (ss stringSet) Len() int {
return len(ss.s)
}
func (ss stringSet) String() string {
return strings.Join(ss.s, ",")
}
func (ss *stringSet) Set(s string) error {
if ss.allowAll && s == "all" {
ss.s = nil
ss.all = true
return nil
}
ss.s = ss.s[:0]
for _, s := range strings.Split(s, ",") {
if s := strings.TrimSpace(s); s != "" {
if ss.allowed != nil && !ss.allowed.contains(s) {
return fmt.Errorf("unsupported value %q; must be one of %s", s, ss.allowed)
}
ss.add(s)
}
}
ss.compact()
return nil
}
func (ss *stringSet) add(s string) {
ss.s = append(ss.s, s)
ss.dirty = true
}
func (ss *stringSet) values() []string {
ss.compact()
return ss.s
}
func (ss *stringSet) contains(s string) bool {
if ss.all {
return true
}
for _, v := range ss.s {
if v == s {
return true
}
}
return false
}
func (ss *stringSet) compact() {
if !ss.dirty {
return
}
a := ss.s
sort.Strings(a)
k := 0
for i := 1; i < len(a); i++ {
if a[k] != a[i] {
a[k+1] = a[i]
k++
}
}
ss.s = a[:k+1]
ss.dirty = false
}
func skipLang(l string) bool {
if include.Len() > 0 {
return !include.contains(l)
}
return exclude.contains(l)
}
// altInclude returns a list of alternatives (for the LDML alt attribute)
// in order of preference. An empty string in this list indicates the
// default entry.
func altInclude() []string {
l := []string{}
if *short {
l = append(l, "short")
}
l = append(l, "")
// TODO: handle draft using cldr.SetDraftLevel
if *draft {
l = append(l, "proposed")
}
return l
}
func failOnError(e error) {
if e != nil {
log.Panic(e)
}
}
func openArchive() *zip.Reader {
f := gen.OpenCLDRCoreZip()
buffer, err := ioutil.ReadAll(f)
f.Close()
failOnError(err)
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
failOnError(err)
return archive
}
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
var r io.ReadCloser
var err error
for _, f := range openArchive().File {
if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
r, err = f.Open()
}
}
if r == nil {
log.Fatal("File allkeys_CLDR.txt not found in archive.")
}
failOnError(err)
defer r.Close()
scanner := bufio.NewScanner(r)
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
for i := 1; scanner.Scan(); i++ {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
if line[0] == '@' {
// parse properties
switch {
case strings.HasPrefix(line[1:], "version "):
a := strings.Split(line[1:], " ")
if a[1] != gen.UnicodeVersion() {
log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion())
}
case strings.HasPrefix(line[1:], "backwards "):
log.Fatalf("%d: unsupported option backwards", i)
default:
log.Printf("%d: unknown option %s", i, line[1:])
}
} else {
// parse entries
part := strings.Split(line, " ; ")
if len(part) != 2 {
log.Fatalf("%d: production rule without ';': %v", i, line)
}
lhs := []rune{}
for _, v := range strings.Split(part[0], " ") {
if v == "" {
continue
}
lhs = append(lhs, rune(convHex(i, v)))
}
var n int
var vars []int
rhs := [][]int{}
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
n += len(m[0])
elem := []int{}
for _, h := range strings.Split(m[2], ".") {
elem = append(elem, convHex(i, h))
}
if m[1] == "*" {
vars = append(vars, i)
}
rhs = append(rhs, elem)
}
if len(part[1]) < n+3 || part[1][n+1] != '#' {
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
}
if *test {
testInput.add(string(lhs))
}
failOnError(builder.Add(lhs, rhs, vars))
}
}
if scanner.Err() != nil {
log.Fatal(scanner.Err())
}
}
func convHex(line int, s string) int {
r, e := strconv.ParseInt(s, 16, 32)
if e != nil {
log.Fatalf("%d: %v", line, e)
}
return int(r)
}
var testInput = stringSet{}
var charRe = regexp.MustCompile(`&#x([0-9A-F]*);`)
var tagRe = regexp.MustCompile(`<([a-z_]*) */>`)
var mainLocales = []string{}
// charsets holds a list of exemplar characters per category.
type charSets map[string][]string
func (p charSets) fprint(w io.Writer) {
fmt.Fprintln(w, "[exN]string{")
for i, k := range []string{"", "contractions", "punctuation", "auxiliary", "currencySymbol", "index"} {
if set := p[k]; len(set) != 0 {
fmt.Fprintf(w, "\t\t%d: %q,\n", i, strings.Join(set, " "))
}
}
fmt.Fprintln(w, "\t},")
}
var localeChars = make(map[string]charSets)
const exemplarHeader = `
type exemplarType int
const (
exCharacters exemplarType = iota
exContractions
exPunctuation
exAuxiliary
exCurrency
exIndex
exN
)
`
func printExemplarCharacters(w io.Writer) {
fmt.Fprintln(w, exemplarHeader)
fmt.Fprintln(w, "var exemplarCharacters = map[string][exN]string{")
for _, loc := range mainLocales {
fmt.Fprintf(w, "\t%q: ", loc)
localeChars[loc].fprint(w)
}
fmt.Fprintln(w, "}")
}
func decodeCLDR(d *cldr.Decoder) *cldr.CLDR {
r := gen.OpenCLDRCoreZip()
data, err := d.DecodeZip(r)
failOnError(err)
return data
}
// parseMain parses XML files in the main directory of the CLDR core.zip file.
func parseMain() {
d := &cldr.Decoder{}
d.SetDirFilter("main")
d.SetSectionFilter("characters")
data := decodeCLDR(d)
for _, loc := range data.Locales() {
x := data.RawLDML(loc)
if skipLang(x.Identity.Language.Type) {
continue
}
if x.Characters != nil {
x, _ = data.LDML(loc)
loc = language.Make(loc).String()
for _, ec := range x.Characters.ExemplarCharacters {
if ec.Draft != "" {
continue
}
if _, ok := localeChars[loc]; !ok {
mainLocales = append(mainLocales, loc)
localeChars[loc] = make(charSets)
}
localeChars[loc][ec.Type] = parseCharacters(ec.Data())
}
}
}
}
func parseCharacters(chars string) []string {
parseSingle := func(s string) (r rune, tail string, escaped bool) {
if s[0] == '\\' {
return rune(s[1]), s[2:], true
}
r, sz := utf8.DecodeRuneInString(s)
return r, s[sz:], false
}
chars = strings.TrimSpace(chars)
if n := len(chars) - 1; chars[n] == ']' && chars[0] == '[' {
chars = chars[1:n]
}
list := []string{}
var r, last, end rune
for len(chars) > 0 {
if chars[0] == '{' { // character sequence
buf := []rune{}
for chars = chars[1:]; len(chars) > 0; {
r, chars, _ = parseSingle(chars)
if r == '}' {
break
}
if r == ' ' {
log.Fatalf("space not supported in sequence %q", chars)
}
buf = append(buf, r)
}
list = append(list, string(buf))
last = 0
} else { // single character
escaped := false
r, chars, escaped = parseSingle(chars)
if r != ' ' {
if r == '-' && !escaped {
if last == 0 {
log.Fatal("'-' should be preceded by a character")
}
end, chars, _ = parseSingle(chars)
for ; last <= end; last++ {
list = append(list, string(last))
}
last = 0
} else {
list = append(list, string(r))
last = r
}
}
}
}
return list
}
var fileRe = regexp.MustCompile(`.*/collation/(.*)\.xml`)
// typeMap translates legacy type keys to their BCP47 equivalent.
var typeMap = map[string]string{
"phonebook": "phonebk",
"traditional": "trad",
}
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
func parseCollation(b *build.Builder) {
d := &cldr.Decoder{}
d.SetDirFilter("collation")
data := decodeCLDR(d)
for _, loc := range data.Locales() {
x, err := data.LDML(loc)
failOnError(err)
if skipLang(x.Identity.Language.Type) {
continue
}
cs := x.Collations.Collation
sl := cldr.MakeSlice(&cs)
if len(types.s) == 0 {
sl.SelectAnyOf("type", x.Collations.Default())
} else if !types.all {
sl.SelectAnyOf("type", types.s...)
}
sl.SelectOnePerGroup("alt", altInclude())
for _, c := range cs {
id, err := language.Parse(loc)
if err != nil {
fmt.Fprintf(os.Stderr, "invalid locale: %q", err)
continue
}
// Support both old- and new-style defaults.
d := c.Type
if x.Collations.DefaultCollation == nil {
d = x.Collations.Default()
} else {
d = x.Collations.DefaultCollation.Data()
}
// We assume tables are being built either for search or collation,
// but not both. For search the default is always "search".
if d != c.Type && c.Type != "search" {
typ := c.Type
if len(c.Type) > 8 {
typ = typeMap[c.Type]
}
id, err = id.SetTypeForKey("co", typ)
failOnError(err)
}
t := b.Tailoring(id)
c.Process(processor{t})
}
}
}
type processor struct {
t *build.Tailoring
}
func (p processor) Reset(anchor string, before int) (err error) {
if before != 0 {
err = p.t.SetAnchorBefore(anchor)
} else {
err = p.t.SetAnchor(anchor)
}
failOnError(err)
return nil
}
func (p processor) Insert(level int, str, context, extend string) error {
str = context + str
if *test {
testInput.add(str)
}
// TODO: mimic bug in old maketables: remove.
err := p.t.Insert(colltab.Level(level-1), str, context+extend)
failOnError(err)
return nil
}
func (p processor) Index(id string) {
}
func testCollator(c *collate.Collator) {
c0 := collate.New(language.Und)
// iterator over all characters for all locales and check
// whether Key is equal.
buf := collate.Buffer{}
// Add all common and not too uncommon runes to the test set.
for i := rune(0); i < 0x30000; i++ {
testInput.add(string(i))
}
for i := rune(0xE0000); i < 0xF0000; i++ {
testInput.add(string(i))
}
for _, str := range testInput.values() {
k0 := c0.KeyFromString(&buf, str)
k := c.KeyFromString(&buf, str)
if !bytes.Equal(k0, k) {
failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k))
}
buf.Reset()
}
fmt.Println("PASS")
}
func main() {
gen.Init()
b := build.NewBuilder()
parseUCA(b)
if tables.contains("chars") {
parseMain()
}
parseCollation(b)
c, err := b.Build()
failOnError(err)
if *test {
testCollator(collate.NewFromTable(c))
} else {
w := &bytes.Buffer{}
gen.WriteUnicodeVersion(w)
gen.WriteCLDRVersion(w)
if tables.contains("collate") {
_, err = b.Print(w)
failOnError(err)
}
if tables.contains("chars") {
printExemplarCharacters(w)
}
gen.WriteGoFile("tables.go", *pkg, w.Bytes())
}
}