vendored changes

This commit is contained in:
Sergii Koshel 2020-02-12 17:56:04 +02:00
parent d091fff18b
commit 128f9a29f5
522 changed files with 29974 additions and 25705 deletions

View file

@ -159,6 +159,10 @@ func bytesToKey(b []byte, pasteActive bool) (rune, []byte) {
return keyClearScreen, b[1:]
case 23: // ^W
return keyDeleteWord, b[1:]
case 14: // ^N
return keyDown, b[1:]
case 16: // ^P
return keyUp, b[1:]
}
}

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin dragonfly freebsd linux,!appengine netbsd openbsd
// +build aix darwin dragonfly freebsd linux,!appengine netbsd openbsd
// Package terminal provides support functions for dealing with terminals, as
// commonly found on UNIX systems.
@ -25,7 +25,7 @@ type State struct {
termios unix.Termios
}
// IsTerminal returns true if the given file descriptor is a terminal.
// IsTerminal returns whether the given file descriptor is a terminal.
func IsTerminal(fd int) bool {
_, err := unix.IoctlGetTermios(fd, ioctlReadTermios)
return err == nil

12
vendor/golang.org/x/crypto/ssh/terminal/util_aix.go generated vendored Normal file
View file

@ -0,0 +1,12 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build aix
package terminal
import "golang.org/x/sys/unix"
const ioctlReadTermios = unix.TCGETS
const ioctlWriteTermios = unix.TCSETS

View file

@ -21,7 +21,7 @@ import (
type State struct{}
// IsTerminal returns true if the given file descriptor is a terminal.
// IsTerminal returns whether the given file descriptor is a terminal.
func IsTerminal(fd int) bool {
return false
}

View file

@ -17,7 +17,7 @@ type State struct {
termios unix.Termios
}
// IsTerminal returns true if the given file descriptor is a terminal.
// IsTerminal returns whether the given file descriptor is a terminal.
func IsTerminal(fd int) bool {
_, err := unix.IoctlGetTermio(fd, unix.TCGETA)
return err == nil

View file

@ -26,7 +26,7 @@ type State struct {
mode uint32
}
// IsTerminal returns true if the given file descriptor is a terminal.
// IsTerminal returns whether the given file descriptor is a terminal.
func IsTerminal(fd int) bool {
var st uint32
err := windows.GetConsoleMode(windows.Handle(fd), &st)
@ -64,13 +64,15 @@ func Restore(fd int, state *State) error {
return windows.SetConsoleMode(windows.Handle(fd), state.mode)
}
// GetSize returns the dimensions of the given terminal.
// GetSize returns the visible dimensions of the given terminal.
//
// These dimensions don't include any scrollback buffer height.
func GetSize(fd int) (width, height int, err error) {
var info windows.ConsoleScreenBufferInfo
if err := windows.GetConsoleScreenBufferInfo(windows.Handle(fd), &info); err != nil {
return 0, 0, err
}
return int(info.Size.X), int(info.Size.Y), nil
return int(info.Window.Right - info.Window.Left + 1), int(info.Window.Bottom - info.Window.Top + 1), nil
}
// ReadPassword reads a line of input from a terminal without local echo. This

View file

@ -1,712 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
//go:generate go run gen.go
//go:generate go run gen.go -test
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"io/ioutil"
"math/rand"
"os"
"sort"
"strings"
)
// identifier converts s to a Go exported identifier.
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
func identifier(s string) string {
b := make([]byte, 0, len(s))
cap := true
for _, c := range s {
if c == '-' {
cap = true
continue
}
if cap && 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
cap = false
b = append(b, byte(c))
}
return string(b)
}
var test = flag.Bool("test", false, "generate table_test.go")
func genFile(name string, buf *bytes.Buffer) {
b, err := format.Source(buf.Bytes())
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
if err := ioutil.WriteFile(name, b, 0644); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func main() {
flag.Parse()
var all []string
all = append(all, elements...)
all = append(all, attributes...)
all = append(all, eventHandlers...)
all = append(all, extra...)
sort.Strings(all)
// uniq - lists have dups
w := 0
for _, s := range all {
if w == 0 || all[w-1] != s {
all[w] = s
w++
}
}
all = all[:w]
if *test {
var buf bytes.Buffer
fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
fmt.Fprintln(&buf, "package atom\n")
fmt.Fprintln(&buf, "var testAtomList = []string{")
for _, s := range all {
fmt.Fprintf(&buf, "\t%q,\n", s)
}
fmt.Fprintln(&buf, "}")
genFile("table_test.go", &buf)
return
}
// Find hash that minimizes table size.
var best *table
for i := 0; i < 1000000; i++ {
if best != nil && 1<<(best.k-1) < len(all) {
break
}
h := rand.Uint32()
for k := uint(0); k <= 16; k++ {
if best != nil && k >= best.k {
break
}
var t table
if t.init(h, k, all) {
best = &t
break
}
}
}
if best == nil {
fmt.Fprintf(os.Stderr, "failed to construct string table\n")
os.Exit(1)
}
// Lay out strings, using overlaps when possible.
layout := append([]string{}, all...)
// Remove strings that are substrings of other strings
for changed := true; changed; {
changed = false
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i != j && t != "" && strings.Contains(s, t) {
changed = true
layout[j] = ""
}
}
}
}
// Join strings where one suffix matches another prefix.
for {
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
// maximizing overlap length k.
besti := -1
bestj := -1
bestk := 0
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i == j {
continue
}
for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
if s[len(s)-k:] == t[:k] {
besti = i
bestj = j
bestk = k
}
}
}
}
if bestk > 0 {
layout[besti] += layout[bestj][bestk:]
layout[bestj] = ""
continue
}
break
}
text := strings.Join(layout, "")
atom := map[string]uint32{}
for _, s := range all {
off := strings.Index(text, s)
if off < 0 {
panic("lost string " + s)
}
atom[s] = uint32(off<<8 | len(s))
}
var buf bytes.Buffer
// Generate the Go code.
fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
fmt.Fprintln(&buf, "package atom\n\nconst (")
// compute max len
maxLen := 0
for _, s := range all {
if maxLen < len(s) {
maxLen = len(s)
}
fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
}
fmt.Fprintln(&buf, ")\n")
fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
for i, s := range best.tab {
if s == "" {
continue
}
fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
}
fmt.Fprintf(&buf, "}\n")
datasize := (1 << best.k) * 4
fmt.Fprintln(&buf, "const atomText =")
textsize := len(text)
for len(text) > 60 {
fmt.Fprintf(&buf, "\t%q +\n", text[:60])
text = text[60:]
}
fmt.Fprintf(&buf, "\t%q\n\n", text)
genFile("table.go", &buf)
fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
}
type byLen []string
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byLen) Len() int { return len(x) }
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s string) uint32 {
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
// A table represents an attempt at constructing the lookup table.
// The lookup table uses cuckoo hashing, meaning that each string
// can be found in one of two positions.
type table struct {
h0 uint32
k uint
mask uint32
tab []string
}
// hash returns the two hashes for s.
func (t *table) hash(s string) (h1, h2 uint32) {
h := fnv(t.h0, s)
h1 = h & t.mask
h2 = (h >> 16) & t.mask
return
}
// init initializes the table with the given parameters.
// h0 is the initial hash value,
// k is the number of bits of hash value to use, and
// x is the list of strings to store in the table.
// init returns false if the table cannot be constructed.
func (t *table) init(h0 uint32, k uint, x []string) bool {
t.h0 = h0
t.k = k
t.tab = make([]string, 1<<k)
t.mask = 1<<k - 1
for _, s := range x {
if !t.insert(s) {
return false
}
}
return true
}
// insert inserts s in the table.
func (t *table) insert(s string) bool {
h1, h2 := t.hash(s)
if t.tab[h1] == "" {
t.tab[h1] = s
return true
}
if t.tab[h2] == "" {
t.tab[h2] = s
return true
}
if t.push(h1, 0) {
t.tab[h1] = s
return true
}
if t.push(h2, 0) {
t.tab[h2] = s
return true
}
return false
}
// push attempts to push aside the entry in slot i.
func (t *table) push(i uint32, depth int) bool {
if depth > len(t.tab) {
return false
}
s := t.tab[i]
h1, h2 := t.hash(s)
j := h1 + h2 - i
if t.tab[j] != "" && !t.push(j, depth+1) {
return false
}
t.tab[j] = s
return true
}
// The lists of element names and attribute keys were taken from
// https://html.spec.whatwg.org/multipage/indices.html#index
// as of the "HTML Living Standard - Last Updated 16 April 2018" version.
// "command", "keygen" and "menuitem" have been removed from the spec,
// but are kept here for backwards compatibility.
var elements = []string{
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"command",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"main",
"map",
"mark",
"menu",
"menuitem",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"picture",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"slot",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
}
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
//
// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
// but are kept here for backwards compatibility.
var attributes = []string{
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"allowfullscreen",
"allowpaymentrequest",
"allowusermedia",
"alt",
"as",
"async",
"autocomplete",
"autofocus",
"autoplay",
"challenge",
"charset",
"checked",
"cite",
"class",
"color",
"cols",
"colspan",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"datetime",
"default",
"defer",
"dir",
"dirname",
"disabled",
"download",
"draggable",
"dropzone",
"enctype",
"for",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"headers",
"height",
"hidden",
"high",
"href",
"hreflang",
"http-equiv",
"icon",
"id",
"inputmode",
"integrity",
"is",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"keytype",
"kind",
"label",
"lang",
"list",
"loop",
"low",
"manifest",
"max",
"maxlength",
"media",
"mediagroup",
"method",
"min",
"minlength",
"multiple",
"muted",
"name",
"nomodule",
"nonce",
"novalidate",
"open",
"optimum",
"pattern",
"ping",
"placeholder",
"playsinline",
"poster",
"preload",
"radiogroup",
"readonly",
"referrerpolicy",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"sandbox",
"spellcheck",
"scope",
"scoped",
"seamless",
"selected",
"shape",
"size",
"sizes",
"sortable",
"sorted",
"slot",
"span",
"spellcheck",
"src",
"srcdoc",
"srclang",
"srcset",
"start",
"step",
"style",
"tabindex",
"target",
"title",
"translate",
"type",
"typemustmatch",
"updateviacache",
"usemap",
"value",
"width",
"workertype",
"wrap",
}
// "onautocomplete", "onautocompleteerror", "onmousewheel",
// "onshow" and "onsort" have been removed from the spec,
// but are kept here for backwards compatibility.
var eventHandlers = []string{
"onabort",
"onautocomplete",
"onautocompleteerror",
"onauxclick",
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncopy",
"oncuechange",
"oncut",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragexit",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadend",
"onloadstart",
"onmessage",
"onmessageerror",
"onmousedown",
"onmouseenter",
"onmouseleave",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onwheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpaste",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onrejectionhandled",
"onscroll",
"onsecuritypolicyviolation",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunhandledrejection",
"onunload",
"onvolumechange",
"onwaiting",
}
// extra are ad-hoc values not covered by any of the lists above.
var extra = []string{
"acronym",
"align",
"annotation",
"annotation-xml",
"applet",
"basefont",
"bgsound",
"big",
"blink",
"center",
"color",
"desc",
"face",
"font",
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
"foreignobject",
"frame",
"frameset",
"image",
"isindex",
"listing",
"malignmark",
"marquee",
"math",
"mglyph",
"mi",
"mn",
"mo",
"ms",
"mtext",
"nobr",
"noembed",
"noframes",
"plaintext",
"prompt",
"public",
"rb",
"rtc",
"spacer",
"strike",
"svg",
"system",
"tt",
"xmp",
}

View file

@ -177,7 +177,7 @@ func (s *nodeStack) index(n *Node) int {
// contains returns whether a is within s.
func (s *nodeStack) contains(a atom.Atom) bool {
for _, n := range *s {
if n.DataAtom == a {
if n.DataAtom == a && n.Namespace == "" {
return true
}
}

View file

@ -1719,8 +1719,12 @@ func inSelectIM(p *parser) bool {
}
p.addElement()
case a.Select:
p.tok.Type = EndTagToken
return false
if p.popUntil(selectScope, a.Select) {
p.resetInsertionMode()
} else {
// Ignore the token.
return true
}
case a.Input, a.Keygen, a.Textarea:
if p.elementInScope(selectScope, a.Select) {
p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@ -1750,6 +1754,9 @@ func inSelectIM(p *parser) bool {
case a.Select:
if p.popUntil(selectScope, a.Select) {
p.resetInsertionMode()
} else {
// Ignore the token.
return true
}
case a.Template:
return inHeadIM(p)
@ -1775,13 +1782,22 @@ func inSelectInTableIM(p *parser) bool {
case StartTagToken, EndTagToken:
switch p.tok.DataAtom {
case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
return false
} else {
if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
// Ignore the token.
return true
}
// This is like p.popUntil(selectScope, a.Select), but it also
// matches <math select>, not just <select>. Matching the MathML
// tag is arguably incorrect (conceptually), but it mimics what
// Chromium does.
for i := len(p.oe) - 1; i >= 0; i-- {
if n := p.oe[i]; n.DataAtom == a.Select {
p.oe = p.oe[:i]
break
}
}
p.resetInsertionMode()
return false
}
}
return inSelectIM(p)

View file

@ -52,10 +52,11 @@ import (
)
const (
prefaceTimeout = 10 * time.Second
firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway
handlerChunkWriteSize = 4 << 10
defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to?
prefaceTimeout = 10 * time.Second
firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway
handlerChunkWriteSize = 4 << 10
defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to?
maxQueuedControlFrames = 10000
)
var (
@ -163,6 +164,15 @@ func (s *Server) maxConcurrentStreams() uint32 {
return defaultMaxStreams
}
// maxQueuedControlFrames is the maximum number of control frames like
// SETTINGS, PING and RST_STREAM that will be queued for writing before
// the connection is closed to prevent memory exhaustion attacks.
func (s *Server) maxQueuedControlFrames() int {
// TODO: if anybody asks, add a Server field, and remember to define the
// behavior of negative values.
return maxQueuedControlFrames
}
type serverInternalState struct {
mu sync.Mutex
activeConns map[*serverConn]struct{}
@ -482,6 +492,7 @@ type serverConn struct {
sawFirstSettings bool // got the initial SETTINGS frame after the preface
needToSendSettingsAck bool
unackedSettings int // how many SETTINGS have we sent without ACKs?
queuedControlFrames int // control frames in the writeSched queue
clientMaxStreams uint32 // SETTINGS_MAX_CONCURRENT_STREAMS from client (our PUSH_PROMISE limit)
advMaxStreams uint32 // our SETTINGS_MAX_CONCURRENT_STREAMS advertised the client
curClientStreams uint32 // number of open streams initiated by the client
@ -870,6 +881,14 @@ func (sc *serverConn) serve() {
}
}
// If the peer is causing us to generate a lot of control frames,
// but not reading them from us, assume they are trying to make us
// run out of memory.
if sc.queuedControlFrames > sc.srv.maxQueuedControlFrames() {
sc.vlogf("http2: too many control frames in send queue, closing connection")
return
}
// Start the shutdown timer after sending a GOAWAY. When sending GOAWAY
// with no error code (graceful shutdown), don't start the timer until
// all open streams have been completed.
@ -1069,6 +1088,14 @@ func (sc *serverConn) writeFrame(wr FrameWriteRequest) {
}
if !ignoreWrite {
if wr.isControl() {
sc.queuedControlFrames++
// For extra safety, detect wraparounds, which should not happen,
// and pull the plug.
if sc.queuedControlFrames < 0 {
sc.conn.Close()
}
}
sc.writeSched.Push(wr)
}
sc.scheduleFrameWrite()
@ -1186,10 +1213,8 @@ func (sc *serverConn) wroteFrame(res frameWriteResult) {
// If a frame is already being written, nothing happens. This will be called again
// when the frame is done being written.
//
// If a frame isn't being written we need to send one, the best frame
// to send is selected, preferring first things that aren't
// stream-specific (e.g. ACKing settings), and then finding the
// highest priority stream.
// If a frame isn't being written and we need to send one, the best frame
// to send is selected by writeSched.
//
// If a frame isn't being written and there's nothing else to send, we
// flush the write buffer.
@ -1217,6 +1242,9 @@ func (sc *serverConn) scheduleFrameWrite() {
}
if !sc.inGoAway || sc.goAwayCode == ErrCodeNo {
if wr, ok := sc.writeSched.Pop(); ok {
if wr.isControl() {
sc.queuedControlFrames--
}
sc.startFrameWrite(wr)
continue
}
@ -1509,6 +1537,8 @@ func (sc *serverConn) processSettings(f *SettingsFrame) error {
if err := f.ForeachSetting(sc.processSetting); err != nil {
return err
}
// TODO: judging by RFC 7540, Section 6.5.3 each SETTINGS frame should be
// acknowledged individually, even if multiple are received before the ACK.
sc.needToSendSettingsAck = true
sc.scheduleFrameWrite()
return nil
@ -1594,12 +1624,6 @@ func (sc *serverConn) processData(f *DataFrame) error {
// type PROTOCOL_ERROR."
return ConnectionError(ErrCodeProtocol)
}
// RFC 7540, sec 6.1: If a DATA frame is received whose stream is not in
// "open" or "half-closed (local)" state, the recipient MUST respond with a
// stream error (Section 5.4.2) of type STREAM_CLOSED.
if state == stateClosed {
return streamError(id, ErrCodeStreamClosed)
}
if st == nil || state != stateOpen || st.gotTrailerHeader || st.resetQueued {
// This includes sending a RST_STREAM if the stream is
// in stateHalfClosedLocal (which currently means that

View file

@ -32,7 +32,7 @@ type WriteScheduler interface {
// Pop dequeues the next frame to write. Returns false if no frames can
// be written. Frames with a given wr.StreamID() are Pop'd in the same
// order they are Push'd.
// order they are Push'd. No frames should be discarded except by CloseStream.
Pop() (wr FrameWriteRequest, ok bool)
}
@ -76,6 +76,12 @@ func (wr FrameWriteRequest) StreamID() uint32 {
return wr.stream.id
}
// isControl reports whether wr is a control frame for MaxQueuedControlFrames
// purposes. That includes non-stream frames and RST_STREAM frames.
func (wr FrameWriteRequest) isControl() bool {
return wr.stream == nil
}
// DataSize returns the number of flow control bytes that must be consumed
// to write this entire frame. This is 0 for non-DATA frames.
func (wr FrameWriteRequest) DataSize() int {

View file

@ -86,6 +86,12 @@ import (
// FOR DEBUGGING ONLY. This will slow down the program.
var DebugUseAfterFinish = false
// HTTP ServeMux paths.
const (
debugRequestsPath = "/debug/requests"
debugEventsPath = "/debug/events"
)
// AuthRequest determines whether a specific request is permitted to load the
// /debug/requests or /debug/events pages.
//
@ -112,8 +118,8 @@ var AuthRequest = func(req *http.Request) (any, sensitive bool) {
}
func init() {
_, pat := http.DefaultServeMux.Handler(&http.Request{URL: &url.URL{Path: "/debug/requests"}})
if pat != "" {
_, pat := http.DefaultServeMux.Handler(&http.Request{URL: &url.URL{Path: debugRequestsPath}})
if pat == debugRequestsPath {
panic("/debug/requests is already registered. You may have two independent copies of " +
"golang.org/x/net/trace in your binary, trying to maintain separate state. This may " +
"involve a vendored copy of golang.org/x/net/trace.")
@ -121,8 +127,8 @@ func init() {
// TODO(jbd): Serve Traces from /debug/traces in the future?
// There is no requirement for a request to be present to have traces.
http.HandleFunc("/debug/requests", Traces)
http.HandleFunc("/debug/events", Events)
http.HandleFunc(debugRequestsPath, Traces)
http.HandleFunc(debugEventsPath, Events)
}
// NewContext returns a copy of the parent context

View file

@ -1,61 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// mkasm_darwin.go generates assembly trampolines to call libSystem routines from Go.
//This program must be run after mksyscall.go.
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"strings"
)
func main() {
in1, err := ioutil.ReadFile("syscall_darwin.go")
if err != nil {
log.Fatalf("can't open syscall_darwin.go: %s", err)
}
arch := os.Args[1]
in2, err := ioutil.ReadFile(fmt.Sprintf("syscall_darwin_%s.go", arch))
if err != nil {
log.Fatalf("can't open syscall_darwin_%s.go: %s", arch, err)
}
in3, err := ioutil.ReadFile(fmt.Sprintf("zsyscall_darwin_%s.go", arch))
if err != nil {
log.Fatalf("can't open zsyscall_darwin_%s.go: %s", arch, err)
}
in := string(in1) + string(in2) + string(in3)
trampolines := map[string]bool{}
var out bytes.Buffer
fmt.Fprintf(&out, "// go run mkasm_darwin.go %s\n", strings.Join(os.Args[1:], " "))
fmt.Fprintf(&out, "// Code generated by the command above; DO NOT EDIT.\n")
fmt.Fprintf(&out, "\n")
fmt.Fprintf(&out, "// +build go1.12\n")
fmt.Fprintf(&out, "\n")
fmt.Fprintf(&out, "#include \"textflag.h\"\n")
for _, line := range strings.Split(in, "\n") {
if !strings.HasPrefix(line, "func ") || !strings.HasSuffix(line, "_trampoline()") {
continue
}
fn := line[5 : len(line)-13]
if !trampolines[fn] {
trampolines[fn] = true
fmt.Fprintf(&out, "TEXT ·%s_trampoline(SB),NOSPLIT,$0-0\n", fn)
fmt.Fprintf(&out, "\tJMP\t%s(SB)\n", fn)
}
}
err = ioutil.WriteFile(fmt.Sprintf("zsyscall_darwin_%s.s", arch), out.Bytes(), 0644)
if err != nil {
log.Fatalf("can't write zsyscall_darwin_%s.s: %s", arch, err)
}
}

View file

@ -1,106 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// mkpost processes the output of cgo -godefs to
// modify the generated types. It is used to clean up
// the sys API in an architecture specific manner.
//
// mkpost is run after cgo -godefs; see README.md.
package main
import (
"bytes"
"fmt"
"go/format"
"io/ioutil"
"log"
"os"
"regexp"
)
func main() {
// Get the OS and architecture (using GOARCH_TARGET if it exists)
goos := os.Getenv("GOOS")
goarch := os.Getenv("GOARCH_TARGET")
if goarch == "" {
goarch = os.Getenv("GOARCH")
}
// Check that we are using the Docker-based build system if we should be.
if goos == "linux" {
if os.Getenv("GOLANG_SYS_BUILD") != "docker" {
os.Stderr.WriteString("In the Docker-based build system, mkpost should not be called directly.\n")
os.Stderr.WriteString("See README.md\n")
os.Exit(1)
}
}
b, err := ioutil.ReadAll(os.Stdin)
if err != nil {
log.Fatal(err)
}
// Intentionally export __val fields in Fsid and Sigset_t
valRegex := regexp.MustCompile(`type (Fsid|Sigset_t) struct {(\s+)X__val(\s+\S+\s+)}`)
b = valRegex.ReplaceAll(b, []byte("type $1 struct {${2}Val$3}"))
// Intentionally export __fds_bits field in FdSet
fdSetRegex := regexp.MustCompile(`type (FdSet) struct {(\s+)X__fds_bits(\s+\S+\s+)}`)
b = fdSetRegex.ReplaceAll(b, []byte("type $1 struct {${2}Bits$3}"))
// If we have empty Ptrace structs, we should delete them. Only s390x emits
// nonempty Ptrace structs.
ptraceRexexp := regexp.MustCompile(`type Ptrace((Psw|Fpregs|Per) struct {\s*})`)
b = ptraceRexexp.ReplaceAll(b, nil)
// Replace the control_regs union with a blank identifier for now.
controlRegsRegex := regexp.MustCompile(`(Control_regs)\s+\[0\]uint64`)
b = controlRegsRegex.ReplaceAll(b, []byte("_ [0]uint64"))
// Remove fields that are added by glibc
// Note that this is unstable as the identifers are private.
removeFieldsRegex := regexp.MustCompile(`X__glibc\S*`)
b = removeFieldsRegex.ReplaceAll(b, []byte("_"))
// Convert [65]int8 to [65]byte in Utsname members to simplify
// conversion to string; see golang.org/issue/20753
convertUtsnameRegex := regexp.MustCompile(`((Sys|Node|Domain)name|Release|Version|Machine)(\s+)\[(\d+)\]u?int8`)
b = convertUtsnameRegex.ReplaceAll(b, []byte("$1$3[$4]byte"))
// Convert [1024]int8 to [1024]byte in Ptmget members
convertPtmget := regexp.MustCompile(`([SC]n)(\s+)\[(\d+)\]u?int8`)
b = convertPtmget.ReplaceAll(b, []byte("$1[$3]byte"))
// Remove spare fields (e.g. in Statx_t)
spareFieldsRegex := regexp.MustCompile(`X__spare\S*`)
b = spareFieldsRegex.ReplaceAll(b, []byte("_"))
// Remove cgo padding fields
removePaddingFieldsRegex := regexp.MustCompile(`Pad_cgo_\d+`)
b = removePaddingFieldsRegex.ReplaceAll(b, []byte("_"))
// Remove padding, hidden, or unused fields
removeFieldsRegex = regexp.MustCompile(`\b(X_\S+|Padding)`)
b = removeFieldsRegex.ReplaceAll(b, []byte("_"))
// Remove the first line of warning from cgo
b = b[bytes.IndexByte(b, '\n')+1:]
// Modify the command in the header to include:
// mkpost, our own warning, and a build tag.
replacement := fmt.Sprintf(`$1 | go run mkpost.go
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s,%s`, goarch, goos)
cgoCommandRegex := regexp.MustCompile(`(cgo -godefs .*)`)
b = cgoCommandRegex.ReplaceAll(b, []byte(replacement))
// gofmt
b, err = format.Source(b)
if err != nil {
log.Fatal(err)
}
os.Stdout.Write(b)
}

View file

@ -1,407 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
This program reads a file containing function prototypes
(like syscall_darwin.go) and generates system call bodies.
The prototypes are marked by lines beginning with "//sys"
and read like func declarations if //sys is replaced by func, but:
* The parameter lists must give a name for each argument.
This includes return parameters.
* The parameter lists must give a type for each argument:
the (x, y, z int) shorthand is not allowed.
* If the return parameter is an error number, it must be named errno.
A line beginning with //sysnb is like //sys, except that the
goroutine will not be suspended during the execution of the system
call. This must only be used for system calls which can never
block, as otherwise the system call could cause all goroutines to
hang.
*/
package main
import (
"bufio"
"flag"
"fmt"
"os"
"regexp"
"strings"
)
var (
b32 = flag.Bool("b32", false, "32bit big-endian")
l32 = flag.Bool("l32", false, "32bit little-endian")
plan9 = flag.Bool("plan9", false, "plan9")
openbsd = flag.Bool("openbsd", false, "openbsd")
netbsd = flag.Bool("netbsd", false, "netbsd")
dragonfly = flag.Bool("dragonfly", false, "dragonfly")
arm = flag.Bool("arm", false, "arm") // 64-bit value should use (even, odd)-pair
tags = flag.String("tags", "", "build tags")
filename = flag.String("output", "", "output file name (standard output if omitted)")
)
// cmdLine returns this programs's commandline arguments
func cmdLine() string {
return "go run mksyscall.go " + strings.Join(os.Args[1:], " ")
}
// buildTags returns build tags
func buildTags() string {
return *tags
}
// Param is function parameter
type Param struct {
Name string
Type string
}
// usage prints the program usage
func usage() {
fmt.Fprintf(os.Stderr, "usage: go run mksyscall.go [-b32 | -l32] [-tags x,y] [file ...]\n")
os.Exit(1)
}
// parseParamList parses parameter list and returns a slice of parameters
func parseParamList(list string) []string {
list = strings.TrimSpace(list)
if list == "" {
return []string{}
}
return regexp.MustCompile(`\s*,\s*`).Split(list, -1)
}
// parseParam splits a parameter into name and type
func parseParam(p string) Param {
ps := regexp.MustCompile(`^(\S*) (\S*)$`).FindStringSubmatch(p)
if ps == nil {
fmt.Fprintf(os.Stderr, "malformed parameter: %s\n", p)
os.Exit(1)
}
return Param{ps[1], ps[2]}
}
func main() {
// Get the OS and architecture (using GOARCH_TARGET if it exists)
goos := os.Getenv("GOOS")
if goos == "" {
fmt.Fprintln(os.Stderr, "GOOS not defined in environment")
os.Exit(1)
}
goarch := os.Getenv("GOARCH_TARGET")
if goarch == "" {
goarch = os.Getenv("GOARCH")
}
// Check that we are using the Docker-based build system if we should
if goos == "linux" {
if os.Getenv("GOLANG_SYS_BUILD") != "docker" {
fmt.Fprintf(os.Stderr, "In the Docker-based build system, mksyscall should not be called directly.\n")
fmt.Fprintf(os.Stderr, "See README.md\n")
os.Exit(1)
}
}
flag.Usage = usage
flag.Parse()
if len(flag.Args()) <= 0 {
fmt.Fprintf(os.Stderr, "no files to parse provided\n")
usage()
}
endianness := ""
if *b32 {
endianness = "big-endian"
} else if *l32 {
endianness = "little-endian"
}
libc := false
if goos == "darwin" && strings.Contains(buildTags(), ",go1.12") {
libc = true
}
trampolines := map[string]bool{}
text := ""
for _, path := range flag.Args() {
file, err := os.Open(path)
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
s := bufio.NewScanner(file)
for s.Scan() {
t := s.Text()
t = strings.TrimSpace(t)
t = regexp.MustCompile(`\s+`).ReplaceAllString(t, ` `)
nonblock := regexp.MustCompile(`^\/\/sysnb `).FindStringSubmatch(t)
if regexp.MustCompile(`^\/\/sys `).FindStringSubmatch(t) == nil && nonblock == nil {
continue
}
// Line must be of the form
// func Open(path string, mode int, perm int) (fd int, errno error)
// Split into name, in params, out params.
f := regexp.MustCompile(`^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*((?i)SYS_[A-Z0-9_]+))?$`).FindStringSubmatch(t)
if f == nil {
fmt.Fprintf(os.Stderr, "%s:%s\nmalformed //sys declaration\n", path, t)
os.Exit(1)
}
funct, inps, outps, sysname := f[2], f[3], f[4], f[5]
// ClockGettime doesn't have a syscall number on Darwin, only generate libc wrappers.
if goos == "darwin" && !libc && funct == "ClockGettime" {
continue
}
// Split argument lists on comma.
in := parseParamList(inps)
out := parseParamList(outps)
// Try in vain to keep people from editing this file.
// The theory is that they jump into the middle of the file
// without reading the header.
text += "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n"
// Go function header.
outDecl := ""
if len(out) > 0 {
outDecl = fmt.Sprintf(" (%s)", strings.Join(out, ", "))
}
text += fmt.Sprintf("func %s(%s)%s {\n", funct, strings.Join(in, ", "), outDecl)
// Check if err return available
errvar := ""
for _, param := range out {
p := parseParam(param)
if p.Type == "error" {
errvar = p.Name
break
}
}
// Prepare arguments to Syscall.
var args []string
n := 0
for _, param := range in {
p := parseParam(param)
if regexp.MustCompile(`^\*`).FindStringSubmatch(p.Type) != nil {
args = append(args, "uintptr(unsafe.Pointer("+p.Name+"))")
} else if p.Type == "string" && errvar != "" {
text += fmt.Sprintf("\tvar _p%d *byte\n", n)
text += fmt.Sprintf("\t_p%d, %s = BytePtrFromString(%s)\n", n, errvar, p.Name)
text += fmt.Sprintf("\tif %s != nil {\n\t\treturn\n\t}\n", errvar)
args = append(args, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n))
n++
} else if p.Type == "string" {
fmt.Fprintf(os.Stderr, path+":"+funct+" uses string arguments, but has no error return\n")
text += fmt.Sprintf("\tvar _p%d *byte\n", n)
text += fmt.Sprintf("\t_p%d, _ = BytePtrFromString(%s)\n", n, p.Name)
args = append(args, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n))
n++
} else if regexp.MustCompile(`^\[\](.*)`).FindStringSubmatch(p.Type) != nil {
// Convert slice into pointer, length.
// Have to be careful not to take address of &a[0] if len == 0:
// pass dummy pointer in that case.
// Used to pass nil, but some OSes or simulators reject write(fd, nil, 0).
text += fmt.Sprintf("\tvar _p%d unsafe.Pointer\n", n)
text += fmt.Sprintf("\tif len(%s) > 0 {\n\t\t_p%d = unsafe.Pointer(&%s[0])\n\t}", p.Name, n, p.Name)
text += fmt.Sprintf(" else {\n\t\t_p%d = unsafe.Pointer(&_zero)\n\t}\n", n)
args = append(args, fmt.Sprintf("uintptr(_p%d)", n), fmt.Sprintf("uintptr(len(%s))", p.Name))
n++
} else if p.Type == "int64" && (*openbsd || *netbsd) {
args = append(args, "0")
if endianness == "big-endian" {
args = append(args, fmt.Sprintf("uintptr(%s>>32)", p.Name), fmt.Sprintf("uintptr(%s)", p.Name))
} else if endianness == "little-endian" {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name), fmt.Sprintf("uintptr(%s>>32)", p.Name))
} else {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name))
}
} else if p.Type == "int64" && *dragonfly {
if regexp.MustCompile(`^(?i)extp(read|write)`).FindStringSubmatch(funct) == nil {
args = append(args, "0")
}
if endianness == "big-endian" {
args = append(args, fmt.Sprintf("uintptr(%s>>32)", p.Name), fmt.Sprintf("uintptr(%s)", p.Name))
} else if endianness == "little-endian" {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name), fmt.Sprintf("uintptr(%s>>32)", p.Name))
} else {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name))
}
} else if (p.Type == "int64" || p.Type == "uint64") && endianness != "" {
if len(args)%2 == 1 && *arm {
// arm abi specifies 64-bit argument uses
// (even, odd) pair
args = append(args, "0")
}
if endianness == "big-endian" {
args = append(args, fmt.Sprintf("uintptr(%s>>32)", p.Name), fmt.Sprintf("uintptr(%s)", p.Name))
} else {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name), fmt.Sprintf("uintptr(%s>>32)", p.Name))
}
} else {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name))
}
}
// Determine which form to use; pad args with zeros.
asm := "Syscall"
if nonblock != nil {
if errvar == "" && goos == "linux" {
asm = "RawSyscallNoError"
} else {
asm = "RawSyscall"
}
} else {
if errvar == "" && goos == "linux" {
asm = "SyscallNoError"
}
}
if len(args) <= 3 {
for len(args) < 3 {
args = append(args, "0")
}
} else if len(args) <= 6 {
asm += "6"
for len(args) < 6 {
args = append(args, "0")
}
} else if len(args) <= 9 {
asm += "9"
for len(args) < 9 {
args = append(args, "0")
}
} else {
fmt.Fprintf(os.Stderr, "%s:%s too many arguments to system call\n", path, funct)
}
// System call number.
if sysname == "" {
sysname = "SYS_" + funct
sysname = regexp.MustCompile(`([a-z])([A-Z])`).ReplaceAllString(sysname, `${1}_$2`)
sysname = strings.ToUpper(sysname)
}
var libcFn string
if libc {
asm = "syscall_" + strings.ToLower(asm[:1]) + asm[1:] // internal syscall call
sysname = strings.TrimPrefix(sysname, "SYS_") // remove SYS_
sysname = strings.ToLower(sysname) // lowercase
if sysname == "getdirentries64" {
// Special case - libSystem name and
// raw syscall name don't match.
sysname = "__getdirentries64"
}
libcFn = sysname
sysname = "funcPC(libc_" + sysname + "_trampoline)"
}
// Actual call.
arglist := strings.Join(args, ", ")
call := fmt.Sprintf("%s(%s, %s)", asm, sysname, arglist)
// Assign return values.
body := ""
ret := []string{"_", "_", "_"}
doErrno := false
for i := 0; i < len(out); i++ {
p := parseParam(out[i])
reg := ""
if p.Name == "err" && !*plan9 {
reg = "e1"
ret[2] = reg
doErrno = true
} else if p.Name == "err" && *plan9 {
ret[0] = "r0"
ret[2] = "e1"
break
} else {
reg = fmt.Sprintf("r%d", i)
ret[i] = reg
}
if p.Type == "bool" {
reg = fmt.Sprintf("%s != 0", reg)
}
if p.Type == "int64" && endianness != "" {
// 64-bit number in r1:r0 or r0:r1.
if i+2 > len(out) {
fmt.Fprintf(os.Stderr, "%s:%s not enough registers for int64 return\n", path, funct)
}
if endianness == "big-endian" {
reg = fmt.Sprintf("int64(r%d)<<32 | int64(r%d)", i, i+1)
} else {
reg = fmt.Sprintf("int64(r%d)<<32 | int64(r%d)", i+1, i)
}
ret[i] = fmt.Sprintf("r%d", i)
ret[i+1] = fmt.Sprintf("r%d", i+1)
}
if reg != "e1" || *plan9 {
body += fmt.Sprintf("\t%s = %s(%s)\n", p.Name, p.Type, reg)
}
}
if ret[0] == "_" && ret[1] == "_" && ret[2] == "_" {
text += fmt.Sprintf("\t%s\n", call)
} else {
if errvar == "" && goos == "linux" {
// raw syscall without error on Linux, see golang.org/issue/22924
text += fmt.Sprintf("\t%s, %s := %s\n", ret[0], ret[1], call)
} else {
text += fmt.Sprintf("\t%s, %s, %s := %s\n", ret[0], ret[1], ret[2], call)
}
}
text += body
if *plan9 && ret[2] == "e1" {
text += "\tif int32(r0) == -1 {\n"
text += "\t\terr = e1\n"
text += "\t}\n"
} else if doErrno {
text += "\tif e1 != 0 {\n"
text += "\t\terr = errnoErr(e1)\n"
text += "\t}\n"
}
text += "\treturn\n"
text += "}\n\n"
if libc && !trampolines[libcFn] {
// some system calls share a trampoline, like read and readlen.
trampolines[libcFn] = true
// Declare assembly trampoline.
text += fmt.Sprintf("func libc_%s_trampoline()\n", libcFn)
// Assembly trampoline calls the libc_* function, which this magic
// redirects to use the function from libSystem.
text += fmt.Sprintf("//go:linkname libc_%s libc_%s\n", libcFn, libcFn)
text += fmt.Sprintf("//go:cgo_import_dynamic libc_%s %s \"/usr/lib/libSystem.B.dylib\"\n", libcFn, libcFn)
text += "\n"
}
}
if err := s.Err(); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
file.Close()
}
fmt.Printf(srcTemplate, cmdLine(), buildTags(), text)
}
const srcTemplate = `// %s
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s
package unix
import (
"syscall"
"unsafe"
)
var _ syscall.Errno
%s
`

View file

@ -1,415 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
This program reads a file containing function prototypes
(like syscall_aix.go) and generates system call bodies.
The prototypes are marked by lines beginning with "//sys"
and read like func declarations if //sys is replaced by func, but:
* The parameter lists must give a name for each argument.
This includes return parameters.
* The parameter lists must give a type for each argument:
the (x, y, z int) shorthand is not allowed.
* If the return parameter is an error number, it must be named err.
* If go func name needs to be different than its libc name,
* or the function is not in libc, name could be specified
* at the end, after "=" sign, like
//sys getsockopt(s int, level int, name int, val uintptr, vallen *_Socklen) (err error) = libsocket.getsockopt
*/
package main
import (
"bufio"
"flag"
"fmt"
"os"
"regexp"
"strings"
)
var (
b32 = flag.Bool("b32", false, "32bit big-endian")
l32 = flag.Bool("l32", false, "32bit little-endian")
aix = flag.Bool("aix", false, "aix")
tags = flag.String("tags", "", "build tags")
)
// cmdLine returns this programs's commandline arguments
func cmdLine() string {
return "go run mksyscall_aix_ppc.go " + strings.Join(os.Args[1:], " ")
}
// buildTags returns build tags
func buildTags() string {
return *tags
}
// Param is function parameter
type Param struct {
Name string
Type string
}
// usage prints the program usage
func usage() {
fmt.Fprintf(os.Stderr, "usage: go run mksyscall_aix_ppc.go [-b32 | -l32] [-tags x,y] [file ...]\n")
os.Exit(1)
}
// parseParamList parses parameter list and returns a slice of parameters
func parseParamList(list string) []string {
list = strings.TrimSpace(list)
if list == "" {
return []string{}
}
return regexp.MustCompile(`\s*,\s*`).Split(list, -1)
}
// parseParam splits a parameter into name and type
func parseParam(p string) Param {
ps := regexp.MustCompile(`^(\S*) (\S*)$`).FindStringSubmatch(p)
if ps == nil {
fmt.Fprintf(os.Stderr, "malformed parameter: %s\n", p)
os.Exit(1)
}
return Param{ps[1], ps[2]}
}
func main() {
flag.Usage = usage
flag.Parse()
if len(flag.Args()) <= 0 {
fmt.Fprintf(os.Stderr, "no files to parse provided\n")
usage()
}
endianness := ""
if *b32 {
endianness = "big-endian"
} else if *l32 {
endianness = "little-endian"
}
pack := ""
text := ""
cExtern := "/*\n#include <stdint.h>\n#include <stddef.h>\n"
for _, path := range flag.Args() {
file, err := os.Open(path)
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
s := bufio.NewScanner(file)
for s.Scan() {
t := s.Text()
t = strings.TrimSpace(t)
t = regexp.MustCompile(`\s+`).ReplaceAllString(t, ` `)
if p := regexp.MustCompile(`^package (\S+)$`).FindStringSubmatch(t); p != nil && pack == "" {
pack = p[1]
}
nonblock := regexp.MustCompile(`^\/\/sysnb `).FindStringSubmatch(t)
if regexp.MustCompile(`^\/\/sys `).FindStringSubmatch(t) == nil && nonblock == nil {
continue
}
// Line must be of the form
// func Open(path string, mode int, perm int) (fd int, err error)
// Split into name, in params, out params.
f := regexp.MustCompile(`^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*(?:(\w*)\.)?(\w*))?$`).FindStringSubmatch(t)
if f == nil {
fmt.Fprintf(os.Stderr, "%s:%s\nmalformed //sys declaration\n", path, t)
os.Exit(1)
}
funct, inps, outps, modname, sysname := f[2], f[3], f[4], f[5], f[6]
// Split argument lists on comma.
in := parseParamList(inps)
out := parseParamList(outps)
inps = strings.Join(in, ", ")
outps = strings.Join(out, ", ")
// Try in vain to keep people from editing this file.
// The theory is that they jump into the middle of the file
// without reading the header.
text += "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n"
// Check if value return, err return available
errvar := ""
retvar := ""
rettype := ""
for _, param := range out {
p := parseParam(param)
if p.Type == "error" {
errvar = p.Name
} else {
retvar = p.Name
rettype = p.Type
}
}
// System call name.
if sysname == "" {
sysname = funct
}
sysname = regexp.MustCompile(`([a-z])([A-Z])`).ReplaceAllString(sysname, `${1}_$2`)
sysname = strings.ToLower(sysname) // All libc functions are lowercase.
cRettype := ""
if rettype == "unsafe.Pointer" {
cRettype = "uintptr_t"
} else if rettype == "uintptr" {
cRettype = "uintptr_t"
} else if regexp.MustCompile(`^_`).FindStringSubmatch(rettype) != nil {
cRettype = "uintptr_t"
} else if rettype == "int" {
cRettype = "int"
} else if rettype == "int32" {
cRettype = "int"
} else if rettype == "int64" {
cRettype = "long long"
} else if rettype == "uint32" {
cRettype = "unsigned int"
} else if rettype == "uint64" {
cRettype = "unsigned long long"
} else {
cRettype = "int"
}
if sysname == "exit" {
cRettype = "void"
}
// Change p.Types to c
var cIn []string
for _, param := range in {
p := parseParam(param)
if regexp.MustCompile(`^\*`).FindStringSubmatch(p.Type) != nil {
cIn = append(cIn, "uintptr_t")
} else if p.Type == "string" {
cIn = append(cIn, "uintptr_t")
} else if regexp.MustCompile(`^\[\](.*)`).FindStringSubmatch(p.Type) != nil {
cIn = append(cIn, "uintptr_t", "size_t")
} else if p.Type == "unsafe.Pointer" {
cIn = append(cIn, "uintptr_t")
} else if p.Type == "uintptr" {
cIn = append(cIn, "uintptr_t")
} else if regexp.MustCompile(`^_`).FindStringSubmatch(p.Type) != nil {
cIn = append(cIn, "uintptr_t")
} else if p.Type == "int" {
cIn = append(cIn, "int")
} else if p.Type == "int32" {
cIn = append(cIn, "int")
} else if p.Type == "int64" {
cIn = append(cIn, "long long")
} else if p.Type == "uint32" {
cIn = append(cIn, "unsigned int")
} else if p.Type == "uint64" {
cIn = append(cIn, "unsigned long long")
} else {
cIn = append(cIn, "int")
}
}
if funct != "fcntl" && funct != "FcntlInt" && funct != "readlen" && funct != "writelen" {
if sysname == "select" {
// select is a keyword of Go. Its name is
// changed to c_select.
cExtern += "#define c_select select\n"
}
// Imports of system calls from libc
cExtern += fmt.Sprintf("%s %s", cRettype, sysname)
cIn := strings.Join(cIn, ", ")
cExtern += fmt.Sprintf("(%s);\n", cIn)
}
// So file name.
if *aix {
if modname == "" {
modname = "libc.a/shr_64.o"
} else {
fmt.Fprintf(os.Stderr, "%s: only syscall using libc are available\n", funct)
os.Exit(1)
}
}
strconvfunc := "C.CString"
// Go function header.
if outps != "" {
outps = fmt.Sprintf(" (%s)", outps)
}
if text != "" {
text += "\n"
}
text += fmt.Sprintf("func %s(%s)%s {\n", funct, strings.Join(in, ", "), outps)
// Prepare arguments to Syscall.
var args []string
n := 0
argN := 0
for _, param := range in {
p := parseParam(param)
if regexp.MustCompile(`^\*`).FindStringSubmatch(p.Type) != nil {
args = append(args, "C.uintptr_t(uintptr(unsafe.Pointer("+p.Name+")))")
} else if p.Type == "string" && errvar != "" {
text += fmt.Sprintf("\t_p%d := uintptr(unsafe.Pointer(%s(%s)))\n", n, strconvfunc, p.Name)
args = append(args, fmt.Sprintf("C.uintptr_t(_p%d)", n))
n++
} else if p.Type == "string" {
fmt.Fprintf(os.Stderr, path+":"+funct+" uses string arguments, but has no error return\n")
text += fmt.Sprintf("\t_p%d := uintptr(unsafe.Pointer(%s(%s)))\n", n, strconvfunc, p.Name)
args = append(args, fmt.Sprintf("C.uintptr_t(_p%d)", n))
n++
} else if m := regexp.MustCompile(`^\[\](.*)`).FindStringSubmatch(p.Type); m != nil {
// Convert slice into pointer, length.
// Have to be careful not to take address of &a[0] if len == 0:
// pass nil in that case.
text += fmt.Sprintf("\tvar _p%d *%s\n", n, m[1])
text += fmt.Sprintf("\tif len(%s) > 0 {\n\t\t_p%d = &%s[0]\n\t}\n", p.Name, n, p.Name)
args = append(args, fmt.Sprintf("C.uintptr_t(uintptr(unsafe.Pointer(_p%d)))", n))
n++
text += fmt.Sprintf("\tvar _p%d int\n", n)
text += fmt.Sprintf("\t_p%d = len(%s)\n", n, p.Name)
args = append(args, fmt.Sprintf("C.size_t(_p%d)", n))
n++
} else if p.Type == "int64" && endianness != "" {
if endianness == "big-endian" {
args = append(args, fmt.Sprintf("uintptr(%s>>32)", p.Name), fmt.Sprintf("uintptr(%s)", p.Name))
} else {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name), fmt.Sprintf("uintptr(%s>>32)", p.Name))
}
n++
} else if p.Type == "bool" {
text += fmt.Sprintf("\tvar _p%d uint32\n", n)
text += fmt.Sprintf("\tif %s {\n\t\t_p%d = 1\n\t} else {\n\t\t_p%d = 0\n\t}\n", p.Name, n, n)
args = append(args, fmt.Sprintf("_p%d", n))
} else if regexp.MustCompile(`^_`).FindStringSubmatch(p.Type) != nil {
args = append(args, fmt.Sprintf("C.uintptr_t(uintptr(%s))", p.Name))
} else if p.Type == "unsafe.Pointer" {
args = append(args, fmt.Sprintf("C.uintptr_t(uintptr(%s))", p.Name))
} else if p.Type == "int" {
if (argN == 2) && ((funct == "readlen") || (funct == "writelen")) {
args = append(args, fmt.Sprintf("C.size_t(%s)", p.Name))
} else if argN == 0 && funct == "fcntl" {
args = append(args, fmt.Sprintf("C.uintptr_t(%s)", p.Name))
} else if (argN == 2) && ((funct == "fcntl") || (funct == "FcntlInt")) {
args = append(args, fmt.Sprintf("C.uintptr_t(%s)", p.Name))
} else {
args = append(args, fmt.Sprintf("C.int(%s)", p.Name))
}
} else if p.Type == "int32" {
args = append(args, fmt.Sprintf("C.int(%s)", p.Name))
} else if p.Type == "int64" {
args = append(args, fmt.Sprintf("C.longlong(%s)", p.Name))
} else if p.Type == "uint32" {
args = append(args, fmt.Sprintf("C.uint(%s)", p.Name))
} else if p.Type == "uint64" {
args = append(args, fmt.Sprintf("C.ulonglong(%s)", p.Name))
} else if p.Type == "uintptr" {
args = append(args, fmt.Sprintf("C.uintptr_t(%s)", p.Name))
} else {
args = append(args, fmt.Sprintf("C.int(%s)", p.Name))
}
argN++
}
// Actual call.
arglist := strings.Join(args, ", ")
call := ""
if sysname == "exit" {
if errvar != "" {
call += "er :="
} else {
call += ""
}
} else if errvar != "" {
call += "r0,er :="
} else if retvar != "" {
call += "r0,_ :="
} else {
call += ""
}
if sysname == "select" {
// select is a keyword of Go. Its name is
// changed to c_select.
call += fmt.Sprintf("C.c_%s(%s)", sysname, arglist)
} else {
call += fmt.Sprintf("C.%s(%s)", sysname, arglist)
}
// Assign return values.
body := ""
for i := 0; i < len(out); i++ {
p := parseParam(out[i])
reg := ""
if p.Name == "err" {
reg = "e1"
} else {
reg = "r0"
}
if reg != "e1" {
body += fmt.Sprintf("\t%s = %s(%s)\n", p.Name, p.Type, reg)
}
}
// verify return
if sysname != "exit" && errvar != "" {
if regexp.MustCompile(`^uintptr`).FindStringSubmatch(cRettype) != nil {
body += "\tif (uintptr(r0) ==^uintptr(0) && er != nil) {\n"
body += fmt.Sprintf("\t\t%s = er\n", errvar)
body += "\t}\n"
} else {
body += "\tif (r0 ==-1 && er != nil) {\n"
body += fmt.Sprintf("\t\t%s = er\n", errvar)
body += "\t}\n"
}
} else if errvar != "" {
body += "\tif (er != nil) {\n"
body += fmt.Sprintf("\t\t%s = er\n", errvar)
body += "\t}\n"
}
text += fmt.Sprintf("\t%s\n", call)
text += body
text += "\treturn\n"
text += "}\n"
}
if err := s.Err(); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
file.Close()
}
imp := ""
if pack != "unix" {
imp = "import \"golang.org/x/sys/unix\"\n"
}
fmt.Printf(srcTemplate, cmdLine(), buildTags(), pack, cExtern, imp, text)
}
const srcTemplate = `// %s
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s
package %s
%s
*/
import "C"
import (
"unsafe"
)
%s
%s
`

View file

@ -1,614 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
This program reads a file containing function prototypes
(like syscall_aix.go) and generates system call bodies.
The prototypes are marked by lines beginning with "//sys"
and read like func declarations if //sys is replaced by func, but:
* The parameter lists must give a name for each argument.
This includes return parameters.
* The parameter lists must give a type for each argument:
the (x, y, z int) shorthand is not allowed.
* If the return parameter is an error number, it must be named err.
* If go func name needs to be different than its libc name,
* or the function is not in libc, name could be specified
* at the end, after "=" sign, like
//sys getsockopt(s int, level int, name int, val uintptr, vallen *_Socklen) (err error) = libsocket.getsockopt
This program will generate three files and handle both gc and gccgo implementation:
- zsyscall_aix_ppc64.go: the common part of each implementation (error handler, pointer creation)
- zsyscall_aix_ppc64_gc.go: gc part with //go_cgo_import_dynamic and a call to syscall6
- zsyscall_aix_ppc64_gccgo.go: gccgo part with C function and conversion to C type.
The generated code looks like this
zsyscall_aix_ppc64.go
func asyscall(...) (n int, err error) {
// Pointer Creation
r1, e1 := callasyscall(...)
// Type Conversion
// Error Handler
return
}
zsyscall_aix_ppc64_gc.go
//go:cgo_import_dynamic libc_asyscall asyscall "libc.a/shr_64.o"
//go:linkname libc_asyscall libc_asyscall
var asyscall syscallFunc
func callasyscall(...) (r1 uintptr, e1 Errno) {
r1, _, e1 = syscall6(uintptr(unsafe.Pointer(&libc_asyscall)), "nb_args", ... )
return
}
zsyscall_aix_ppc64_ggcgo.go
// int asyscall(...)
import "C"
func callasyscall(...) (r1 uintptr, e1 Errno) {
r1 = uintptr(C.asyscall(...))
e1 = syscall.GetErrno()
return
}
*/
package main
import (
"bufio"
"flag"
"fmt"
"io/ioutil"
"os"
"regexp"
"strings"
)
var (
b32 = flag.Bool("b32", false, "32bit big-endian")
l32 = flag.Bool("l32", false, "32bit little-endian")
aix = flag.Bool("aix", false, "aix")
tags = flag.String("tags", "", "build tags")
)
// cmdLine returns this programs's commandline arguments
func cmdLine() string {
return "go run mksyscall_aix_ppc64.go " + strings.Join(os.Args[1:], " ")
}
// buildTags returns build tags
func buildTags() string {
return *tags
}
// Param is function parameter
type Param struct {
Name string
Type string
}
// usage prints the program usage
func usage() {
fmt.Fprintf(os.Stderr, "usage: go run mksyscall_aix_ppc64.go [-b32 | -l32] [-tags x,y] [file ...]\n")
os.Exit(1)
}
// parseParamList parses parameter list and returns a slice of parameters
func parseParamList(list string) []string {
list = strings.TrimSpace(list)
if list == "" {
return []string{}
}
return regexp.MustCompile(`\s*,\s*`).Split(list, -1)
}
// parseParam splits a parameter into name and type
func parseParam(p string) Param {
ps := regexp.MustCompile(`^(\S*) (\S*)$`).FindStringSubmatch(p)
if ps == nil {
fmt.Fprintf(os.Stderr, "malformed parameter: %s\n", p)
os.Exit(1)
}
return Param{ps[1], ps[2]}
}
func main() {
flag.Usage = usage
flag.Parse()
if len(flag.Args()) <= 0 {
fmt.Fprintf(os.Stderr, "no files to parse provided\n")
usage()
}
endianness := ""
if *b32 {
endianness = "big-endian"
} else if *l32 {
endianness = "little-endian"
}
pack := ""
// GCCGO
textgccgo := ""
cExtern := "/*\n#include <stdint.h>\n"
// GC
textgc := ""
dynimports := ""
linknames := ""
var vars []string
// COMMON
textcommon := ""
for _, path := range flag.Args() {
file, err := os.Open(path)
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
s := bufio.NewScanner(file)
for s.Scan() {
t := s.Text()
t = strings.TrimSpace(t)
t = regexp.MustCompile(`\s+`).ReplaceAllString(t, ` `)
if p := regexp.MustCompile(`^package (\S+)$`).FindStringSubmatch(t); p != nil && pack == "" {
pack = p[1]
}
nonblock := regexp.MustCompile(`^\/\/sysnb `).FindStringSubmatch(t)
if regexp.MustCompile(`^\/\/sys `).FindStringSubmatch(t) == nil && nonblock == nil {
continue
}
// Line must be of the form
// func Open(path string, mode int, perm int) (fd int, err error)
// Split into name, in params, out params.
f := regexp.MustCompile(`^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*(?:(\w*)\.)?(\w*))?$`).FindStringSubmatch(t)
if f == nil {
fmt.Fprintf(os.Stderr, "%s:%s\nmalformed //sys declaration\n", path, t)
os.Exit(1)
}
funct, inps, outps, modname, sysname := f[2], f[3], f[4], f[5], f[6]
// Split argument lists on comma.
in := parseParamList(inps)
out := parseParamList(outps)
inps = strings.Join(in, ", ")
outps = strings.Join(out, ", ")
if sysname == "" {
sysname = funct
}
onlyCommon := false
if funct == "readlen" || funct == "writelen" || funct == "FcntlInt" || funct == "FcntlFlock" {
// This function call another syscall which is already implemented.
// Therefore, the gc and gccgo part must not be generated.
onlyCommon = true
}
// Try in vain to keep people from editing this file.
// The theory is that they jump into the middle of the file
// without reading the header.
textcommon += "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n"
if !onlyCommon {
textgccgo += "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n"
textgc += "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n"
}
// Check if value return, err return available
errvar := ""
rettype := ""
for _, param := range out {
p := parseParam(param)
if p.Type == "error" {
errvar = p.Name
} else {
rettype = p.Type
}
}
sysname = regexp.MustCompile(`([a-z])([A-Z])`).ReplaceAllString(sysname, `${1}_$2`)
sysname = strings.ToLower(sysname) // All libc functions are lowercase.
// GCCGO Prototype return type
cRettype := ""
if rettype == "unsafe.Pointer" {
cRettype = "uintptr_t"
} else if rettype == "uintptr" {
cRettype = "uintptr_t"
} else if regexp.MustCompile(`^_`).FindStringSubmatch(rettype) != nil {
cRettype = "uintptr_t"
} else if rettype == "int" {
cRettype = "int"
} else if rettype == "int32" {
cRettype = "int"
} else if rettype == "int64" {
cRettype = "long long"
} else if rettype == "uint32" {
cRettype = "unsigned int"
} else if rettype == "uint64" {
cRettype = "unsigned long long"
} else {
cRettype = "int"
}
if sysname == "exit" {
cRettype = "void"
}
// GCCGO Prototype arguments type
var cIn []string
for i, param := range in {
p := parseParam(param)
if regexp.MustCompile(`^\*`).FindStringSubmatch(p.Type) != nil {
cIn = append(cIn, "uintptr_t")
} else if p.Type == "string" {
cIn = append(cIn, "uintptr_t")
} else if regexp.MustCompile(`^\[\](.*)`).FindStringSubmatch(p.Type) != nil {
cIn = append(cIn, "uintptr_t", "size_t")
} else if p.Type == "unsafe.Pointer" {
cIn = append(cIn, "uintptr_t")
} else if p.Type == "uintptr" {
cIn = append(cIn, "uintptr_t")
} else if regexp.MustCompile(`^_`).FindStringSubmatch(p.Type) != nil {
cIn = append(cIn, "uintptr_t")
} else if p.Type == "int" {
if (i == 0 || i == 2) && funct == "fcntl" {
// These fcntl arguments needs to be uintptr to be able to call FcntlInt and FcntlFlock
cIn = append(cIn, "uintptr_t")
} else {
cIn = append(cIn, "int")
}
} else if p.Type == "int32" {
cIn = append(cIn, "int")
} else if p.Type == "int64" {
cIn = append(cIn, "long long")
} else if p.Type == "uint32" {
cIn = append(cIn, "unsigned int")
} else if p.Type == "uint64" {
cIn = append(cIn, "unsigned long long")
} else {
cIn = append(cIn, "int")
}
}
if !onlyCommon {
// GCCGO Prototype Generation
// Imports of system calls from libc
if sysname == "select" {
// select is a keyword of Go. Its name is
// changed to c_select.
cExtern += "#define c_select select\n"
}
cExtern += fmt.Sprintf("%s %s", cRettype, sysname)
cIn := strings.Join(cIn, ", ")
cExtern += fmt.Sprintf("(%s);\n", cIn)
}
// GC Library name
if modname == "" {
modname = "libc.a/shr_64.o"
} else {
fmt.Fprintf(os.Stderr, "%s: only syscall using libc are available\n", funct)
os.Exit(1)
}
sysvarname := fmt.Sprintf("libc_%s", sysname)
if !onlyCommon {
// GC Runtime import of function to allow cross-platform builds.
dynimports += fmt.Sprintf("//go:cgo_import_dynamic %s %s \"%s\"\n", sysvarname, sysname, modname)
// GC Link symbol to proc address variable.
linknames += fmt.Sprintf("//go:linkname %s %s\n", sysvarname, sysvarname)
// GC Library proc address variable.
vars = append(vars, sysvarname)
}
strconvfunc := "BytePtrFromString"
strconvtype := "*byte"
// Go function header.
if outps != "" {
outps = fmt.Sprintf(" (%s)", outps)
}
if textcommon != "" {
textcommon += "\n"
}
textcommon += fmt.Sprintf("func %s(%s)%s {\n", funct, strings.Join(in, ", "), outps)
// Prepare arguments tocall.
var argscommon []string // Arguments in the common part
var argscall []string // Arguments for call prototype
var argsgc []string // Arguments for gc call (with syscall6)
var argsgccgo []string // Arguments for gccgo call (with C.name_of_syscall)
n := 0
argN := 0
for _, param := range in {
p := parseParam(param)
if regexp.MustCompile(`^\*`).FindStringSubmatch(p.Type) != nil {
argscommon = append(argscommon, fmt.Sprintf("uintptr(unsafe.Pointer(%s))", p.Name))
argscall = append(argscall, fmt.Sprintf("%s uintptr", p.Name))
argsgc = append(argsgc, p.Name)
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uintptr_t(%s)", p.Name))
} else if p.Type == "string" && errvar != "" {
textcommon += fmt.Sprintf("\tvar _p%d %s\n", n, strconvtype)
textcommon += fmt.Sprintf("\t_p%d, %s = %s(%s)\n", n, errvar, strconvfunc, p.Name)
textcommon += fmt.Sprintf("\tif %s != nil {\n\t\treturn\n\t}\n", errvar)
argscommon = append(argscommon, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n))
argscall = append(argscall, fmt.Sprintf("_p%d uintptr ", n))
argsgc = append(argsgc, fmt.Sprintf("_p%d", n))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uintptr_t(_p%d)", n))
n++
} else if p.Type == "string" {
fmt.Fprintf(os.Stderr, path+":"+funct+" uses string arguments, but has no error return\n")
textcommon += fmt.Sprintf("\tvar _p%d %s\n", n, strconvtype)
textcommon += fmt.Sprintf("\t_p%d, %s = %s(%s)\n", n, errvar, strconvfunc, p.Name)
textcommon += fmt.Sprintf("\tif %s != nil {\n\t\treturn\n\t}\n", errvar)
argscommon = append(argscommon, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n))
argscall = append(argscall, fmt.Sprintf("_p%d uintptr", n))
argsgc = append(argsgc, fmt.Sprintf("_p%d", n))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uintptr_t(_p%d)", n))
n++
} else if m := regexp.MustCompile(`^\[\](.*)`).FindStringSubmatch(p.Type); m != nil {
// Convert slice into pointer, length.
// Have to be careful not to take address of &a[0] if len == 0:
// pass nil in that case.
textcommon += fmt.Sprintf("\tvar _p%d *%s\n", n, m[1])
textcommon += fmt.Sprintf("\tif len(%s) > 0 {\n\t\t_p%d = &%s[0]\n\t}\n", p.Name, n, p.Name)
argscommon = append(argscommon, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n), fmt.Sprintf("len(%s)", p.Name))
argscall = append(argscall, fmt.Sprintf("_p%d uintptr", n), fmt.Sprintf("_lenp%d int", n))
argsgc = append(argsgc, fmt.Sprintf("_p%d", n), fmt.Sprintf("uintptr(_lenp%d)", n))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uintptr_t(_p%d)", n), fmt.Sprintf("C.size_t(_lenp%d)", n))
n++
} else if p.Type == "int64" && endianness != "" {
fmt.Fprintf(os.Stderr, path+":"+funct+" uses int64 with 32 bits mode. Case not yet implemented\n")
} else if p.Type == "bool" {
fmt.Fprintf(os.Stderr, path+":"+funct+" uses bool. Case not yet implemented\n")
} else if regexp.MustCompile(`^_`).FindStringSubmatch(p.Type) != nil || p.Type == "unsafe.Pointer" {
argscommon = append(argscommon, fmt.Sprintf("uintptr(%s)", p.Name))
argscall = append(argscall, fmt.Sprintf("%s uintptr", p.Name))
argsgc = append(argsgc, p.Name)
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uintptr_t(%s)", p.Name))
} else if p.Type == "int" {
if (argN == 0 || argN == 2) && ((funct == "fcntl") || (funct == "FcntlInt") || (funct == "FcntlFlock")) {
// These fcntl arguments need to be uintptr to be able to call FcntlInt and FcntlFlock
argscommon = append(argscommon, fmt.Sprintf("uintptr(%s)", p.Name))
argscall = append(argscall, fmt.Sprintf("%s uintptr", p.Name))
argsgc = append(argsgc, p.Name)
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uintptr_t(%s)", p.Name))
} else {
argscommon = append(argscommon, p.Name)
argscall = append(argscall, fmt.Sprintf("%s int", p.Name))
argsgc = append(argsgc, fmt.Sprintf("uintptr(%s)", p.Name))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.int(%s)", p.Name))
}
} else if p.Type == "int32" {
argscommon = append(argscommon, p.Name)
argscall = append(argscall, fmt.Sprintf("%s int32", p.Name))
argsgc = append(argsgc, fmt.Sprintf("uintptr(%s)", p.Name))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.int(%s)", p.Name))
} else if p.Type == "int64" {
argscommon = append(argscommon, p.Name)
argscall = append(argscall, fmt.Sprintf("%s int64", p.Name))
argsgc = append(argsgc, fmt.Sprintf("uintptr(%s)", p.Name))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.longlong(%s)", p.Name))
} else if p.Type == "uint32" {
argscommon = append(argscommon, p.Name)
argscall = append(argscall, fmt.Sprintf("%s uint32", p.Name))
argsgc = append(argsgc, fmt.Sprintf("uintptr(%s)", p.Name))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uint(%s)", p.Name))
} else if p.Type == "uint64" {
argscommon = append(argscommon, p.Name)
argscall = append(argscall, fmt.Sprintf("%s uint64", p.Name))
argsgc = append(argsgc, fmt.Sprintf("uintptr(%s)", p.Name))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.ulonglong(%s)", p.Name))
} else if p.Type == "uintptr" {
argscommon = append(argscommon, p.Name)
argscall = append(argscall, fmt.Sprintf("%s uintptr", p.Name))
argsgc = append(argsgc, p.Name)
argsgccgo = append(argsgccgo, fmt.Sprintf("C.uintptr_t(%s)", p.Name))
} else {
argscommon = append(argscommon, fmt.Sprintf("int(%s)", p.Name))
argscall = append(argscall, fmt.Sprintf("%s int", p.Name))
argsgc = append(argsgc, fmt.Sprintf("uintptr(%s)", p.Name))
argsgccgo = append(argsgccgo, fmt.Sprintf("C.int(%s)", p.Name))
}
argN++
}
nargs := len(argsgc)
// COMMON function generation
argscommonlist := strings.Join(argscommon, ", ")
callcommon := fmt.Sprintf("call%s(%s)", sysname, argscommonlist)
ret := []string{"_", "_"}
body := ""
doErrno := false
for i := 0; i < len(out); i++ {
p := parseParam(out[i])
reg := ""
if p.Name == "err" {
reg = "e1"
ret[1] = reg
doErrno = true
} else {
reg = "r0"
ret[0] = reg
}
if p.Type == "bool" {
reg = fmt.Sprintf("%s != 0", reg)
}
if reg != "e1" {
body += fmt.Sprintf("\t%s = %s(%s)\n", p.Name, p.Type, reg)
}
}
if ret[0] == "_" && ret[1] == "_" {
textcommon += fmt.Sprintf("\t%s\n", callcommon)
} else {
textcommon += fmt.Sprintf("\t%s, %s := %s\n", ret[0], ret[1], callcommon)
}
textcommon += body
if doErrno {
textcommon += "\tif e1 != 0 {\n"
textcommon += "\t\terr = errnoErr(e1)\n"
textcommon += "\t}\n"
}
textcommon += "\treturn\n"
textcommon += "}\n"
if onlyCommon {
continue
}
// CALL Prototype
callProto := fmt.Sprintf("func call%s(%s) (r1 uintptr, e1 Errno) {\n", sysname, strings.Join(argscall, ", "))
// GC function generation
asm := "syscall6"
if nonblock != nil {
asm = "rawSyscall6"
}
if len(argsgc) <= 6 {
for len(argsgc) < 6 {
argsgc = append(argsgc, "0")
}
} else {
fmt.Fprintf(os.Stderr, "%s: too many arguments to system call", funct)
os.Exit(1)
}
argsgclist := strings.Join(argsgc, ", ")
callgc := fmt.Sprintf("%s(uintptr(unsafe.Pointer(&%s)), %d, %s)", asm, sysvarname, nargs, argsgclist)
textgc += callProto
textgc += fmt.Sprintf("\tr1, _, e1 = %s\n", callgc)
textgc += "\treturn\n}\n"
// GCCGO function generation
argsgccgolist := strings.Join(argsgccgo, ", ")
var callgccgo string
if sysname == "select" {
// select is a keyword of Go. Its name is
// changed to c_select.
callgccgo = fmt.Sprintf("C.c_%s(%s)", sysname, argsgccgolist)
} else {
callgccgo = fmt.Sprintf("C.%s(%s)", sysname, argsgccgolist)
}
textgccgo += callProto
textgccgo += fmt.Sprintf("\tr1 = uintptr(%s)\n", callgccgo)
textgccgo += "\te1 = syscall.GetErrno()\n"
textgccgo += "\treturn\n}\n"
}
if err := s.Err(); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
file.Close()
}
imp := ""
if pack != "unix" {
imp = "import \"golang.org/x/sys/unix\"\n"
}
// Print zsyscall_aix_ppc64.go
err := ioutil.WriteFile("zsyscall_aix_ppc64.go",
[]byte(fmt.Sprintf(srcTemplate1, cmdLine(), buildTags(), pack, imp, textcommon)),
0644)
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
// Print zsyscall_aix_ppc64_gc.go
vardecls := "\t" + strings.Join(vars, ",\n\t")
vardecls += " syscallFunc"
err = ioutil.WriteFile("zsyscall_aix_ppc64_gc.go",
[]byte(fmt.Sprintf(srcTemplate2, cmdLine(), buildTags(), pack, imp, dynimports, linknames, vardecls, textgc)),
0644)
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
// Print zsyscall_aix_ppc64_gccgo.go
err = ioutil.WriteFile("zsyscall_aix_ppc64_gccgo.go",
[]byte(fmt.Sprintf(srcTemplate3, cmdLine(), buildTags(), pack, cExtern, imp, textgccgo)),
0644)
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
}
const srcTemplate1 = `// %s
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s
package %s
import (
"unsafe"
)
%s
%s
`
const srcTemplate2 = `// %s
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s
// +build !gccgo
package %s
import (
"unsafe"
)
%s
%s
%s
type syscallFunc uintptr
var (
%s
)
// Implemented in runtime/syscall_aix.go.
func rawSyscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno)
func syscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno)
%s
`
const srcTemplate3 = `// %s
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s
// +build gccgo
package %s
%s
*/
import "C"
import (
"syscall"
)
%s
%s
`

View file

@ -1,335 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
This program reads a file containing function prototypes
(like syscall_solaris.go) and generates system call bodies.
The prototypes are marked by lines beginning with "//sys"
and read like func declarations if //sys is replaced by func, but:
* The parameter lists must give a name for each argument.
This includes return parameters.
* The parameter lists must give a type for each argument:
the (x, y, z int) shorthand is not allowed.
* If the return parameter is an error number, it must be named err.
* If go func name needs to be different than its libc name,
* or the function is not in libc, name could be specified
* at the end, after "=" sign, like
//sys getsockopt(s int, level int, name int, val uintptr, vallen *_Socklen) (err error) = libsocket.getsockopt
*/
package main
import (
"bufio"
"flag"
"fmt"
"os"
"regexp"
"strings"
)
var (
b32 = flag.Bool("b32", false, "32bit big-endian")
l32 = flag.Bool("l32", false, "32bit little-endian")
tags = flag.String("tags", "", "build tags")
)
// cmdLine returns this programs's commandline arguments
func cmdLine() string {
return "go run mksyscall_solaris.go " + strings.Join(os.Args[1:], " ")
}
// buildTags returns build tags
func buildTags() string {
return *tags
}
// Param is function parameter
type Param struct {
Name string
Type string
}
// usage prints the program usage
func usage() {
fmt.Fprintf(os.Stderr, "usage: go run mksyscall_solaris.go [-b32 | -l32] [-tags x,y] [file ...]\n")
os.Exit(1)
}
// parseParamList parses parameter list and returns a slice of parameters
func parseParamList(list string) []string {
list = strings.TrimSpace(list)
if list == "" {
return []string{}
}
return regexp.MustCompile(`\s*,\s*`).Split(list, -1)
}
// parseParam splits a parameter into name and type
func parseParam(p string) Param {
ps := regexp.MustCompile(`^(\S*) (\S*)$`).FindStringSubmatch(p)
if ps == nil {
fmt.Fprintf(os.Stderr, "malformed parameter: %s\n", p)
os.Exit(1)
}
return Param{ps[1], ps[2]}
}
func main() {
flag.Usage = usage
flag.Parse()
if len(flag.Args()) <= 0 {
fmt.Fprintf(os.Stderr, "no files to parse provided\n")
usage()
}
endianness := ""
if *b32 {
endianness = "big-endian"
} else if *l32 {
endianness = "little-endian"
}
pack := ""
text := ""
dynimports := ""
linknames := ""
var vars []string
for _, path := range flag.Args() {
file, err := os.Open(path)
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
s := bufio.NewScanner(file)
for s.Scan() {
t := s.Text()
t = strings.TrimSpace(t)
t = regexp.MustCompile(`\s+`).ReplaceAllString(t, ` `)
if p := regexp.MustCompile(`^package (\S+)$`).FindStringSubmatch(t); p != nil && pack == "" {
pack = p[1]
}
nonblock := regexp.MustCompile(`^\/\/sysnb `).FindStringSubmatch(t)
if regexp.MustCompile(`^\/\/sys `).FindStringSubmatch(t) == nil && nonblock == nil {
continue
}
// Line must be of the form
// func Open(path string, mode int, perm int) (fd int, err error)
// Split into name, in params, out params.
f := regexp.MustCompile(`^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*(?:(\w*)\.)?(\w*))?$`).FindStringSubmatch(t)
if f == nil {
fmt.Fprintf(os.Stderr, "%s:%s\nmalformed //sys declaration\n", path, t)
os.Exit(1)
}
funct, inps, outps, modname, sysname := f[2], f[3], f[4], f[5], f[6]
// Split argument lists on comma.
in := parseParamList(inps)
out := parseParamList(outps)
inps = strings.Join(in, ", ")
outps = strings.Join(out, ", ")
// Try in vain to keep people from editing this file.
// The theory is that they jump into the middle of the file
// without reading the header.
text += "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n"
// So file name.
if modname == "" {
modname = "libc"
}
// System call name.
if sysname == "" {
sysname = funct
}
// System call pointer variable name.
sysvarname := fmt.Sprintf("proc%s", sysname)
strconvfunc := "BytePtrFromString"
strconvtype := "*byte"
sysname = strings.ToLower(sysname) // All libc functions are lowercase.
// Runtime import of function to allow cross-platform builds.
dynimports += fmt.Sprintf("//go:cgo_import_dynamic libc_%s %s \"%s.so\"\n", sysname, sysname, modname)
// Link symbol to proc address variable.
linknames += fmt.Sprintf("//go:linkname %s libc_%s\n", sysvarname, sysname)
// Library proc address variable.
vars = append(vars, sysvarname)
// Go function header.
outlist := strings.Join(out, ", ")
if outlist != "" {
outlist = fmt.Sprintf(" (%s)", outlist)
}
if text != "" {
text += "\n"
}
text += fmt.Sprintf("func %s(%s)%s {\n", funct, strings.Join(in, ", "), outlist)
// Check if err return available
errvar := ""
for _, param := range out {
p := parseParam(param)
if p.Type == "error" {
errvar = p.Name
continue
}
}
// Prepare arguments to Syscall.
var args []string
n := 0
for _, param := range in {
p := parseParam(param)
if regexp.MustCompile(`^\*`).FindStringSubmatch(p.Type) != nil {
args = append(args, "uintptr(unsafe.Pointer("+p.Name+"))")
} else if p.Type == "string" && errvar != "" {
text += fmt.Sprintf("\tvar _p%d %s\n", n, strconvtype)
text += fmt.Sprintf("\t_p%d, %s = %s(%s)\n", n, errvar, strconvfunc, p.Name)
text += fmt.Sprintf("\tif %s != nil {\n\t\treturn\n\t}\n", errvar)
args = append(args, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n))
n++
} else if p.Type == "string" {
fmt.Fprintf(os.Stderr, path+":"+funct+" uses string arguments, but has no error return\n")
text += fmt.Sprintf("\tvar _p%d %s\n", n, strconvtype)
text += fmt.Sprintf("\t_p%d, _ = %s(%s)\n", n, strconvfunc, p.Name)
args = append(args, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n))
n++
} else if s := regexp.MustCompile(`^\[\](.*)`).FindStringSubmatch(p.Type); s != nil {
// Convert slice into pointer, length.
// Have to be careful not to take address of &a[0] if len == 0:
// pass nil in that case.
text += fmt.Sprintf("\tvar _p%d *%s\n", n, s[1])
text += fmt.Sprintf("\tif len(%s) > 0 {\n\t\t_p%d = &%s[0]\n\t}\n", p.Name, n, p.Name)
args = append(args, fmt.Sprintf("uintptr(unsafe.Pointer(_p%d))", n), fmt.Sprintf("uintptr(len(%s))", p.Name))
n++
} else if p.Type == "int64" && endianness != "" {
if endianness == "big-endian" {
args = append(args, fmt.Sprintf("uintptr(%s>>32)", p.Name), fmt.Sprintf("uintptr(%s)", p.Name))
} else {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name), fmt.Sprintf("uintptr(%s>>32)", p.Name))
}
} else if p.Type == "bool" {
text += fmt.Sprintf("\tvar _p%d uint32\n", n)
text += fmt.Sprintf("\tif %s {\n\t\t_p%d = 1\n\t} else {\n\t\t_p%d = 0\n\t}\n", p.Name, n, n)
args = append(args, fmt.Sprintf("uintptr(_p%d)", n))
n++
} else {
args = append(args, fmt.Sprintf("uintptr(%s)", p.Name))
}
}
nargs := len(args)
// Determine which form to use; pad args with zeros.
asm := "sysvicall6"
if nonblock != nil {
asm = "rawSysvicall6"
}
if len(args) <= 6 {
for len(args) < 6 {
args = append(args, "0")
}
} else {
fmt.Fprintf(os.Stderr, "%s: too many arguments to system call\n", path)
os.Exit(1)
}
// Actual call.
arglist := strings.Join(args, ", ")
call := fmt.Sprintf("%s(uintptr(unsafe.Pointer(&%s)), %d, %s)", asm, sysvarname, nargs, arglist)
// Assign return values.
body := ""
ret := []string{"_", "_", "_"}
doErrno := false
for i := 0; i < len(out); i++ {
p := parseParam(out[i])
reg := ""
if p.Name == "err" {
reg = "e1"
ret[2] = reg
doErrno = true
} else {
reg = fmt.Sprintf("r%d", i)
ret[i] = reg
}
if p.Type == "bool" {
reg = fmt.Sprintf("%d != 0", reg)
}
if p.Type == "int64" && endianness != "" {
// 64-bit number in r1:r0 or r0:r1.
if i+2 > len(out) {
fmt.Fprintf(os.Stderr, "%s: not enough registers for int64 return\n", path)
os.Exit(1)
}
if endianness == "big-endian" {
reg = fmt.Sprintf("int64(r%d)<<32 | int64(r%d)", i, i+1)
} else {
reg = fmt.Sprintf("int64(r%d)<<32 | int64(r%d)", i+1, i)
}
ret[i] = fmt.Sprintf("r%d", i)
ret[i+1] = fmt.Sprintf("r%d", i+1)
}
if reg != "e1" {
body += fmt.Sprintf("\t%s = %s(%s)\n", p.Name, p.Type, reg)
}
}
if ret[0] == "_" && ret[1] == "_" && ret[2] == "_" {
text += fmt.Sprintf("\t%s\n", call)
} else {
text += fmt.Sprintf("\t%s, %s, %s := %s\n", ret[0], ret[1], ret[2], call)
}
text += body
if doErrno {
text += "\tif e1 != 0 {\n"
text += "\t\terr = e1\n"
text += "\t}\n"
}
text += "\treturn\n"
text += "}\n"
}
if err := s.Err(); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
file.Close()
}
imp := ""
if pack != "unix" {
imp = "import \"golang.org/x/sys/unix\"\n"
}
vardecls := "\t" + strings.Join(vars, ",\n\t")
vardecls += " syscallFunc"
fmt.Printf(srcTemplate, cmdLine(), buildTags(), pack, imp, dynimports, linknames, vardecls, text)
}
const srcTemplate = `// %s
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s
package %s
import (
"syscall"
"unsafe"
)
%s
%s
%s
var (
%s
)
%s
`

View file

@ -1,190 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Generate system call table for DragonFly, NetBSD,
// FreeBSD, OpenBSD or Darwin from master list
// (for example, /usr/src/sys/kern/syscalls.master or
// sys/syscall.h).
package main
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"regexp"
"strings"
)
var (
goos, goarch string
)
// cmdLine returns this programs's commandline arguments
func cmdLine() string {
return "go run mksysnum.go " + strings.Join(os.Args[1:], " ")
}
// buildTags returns build tags
func buildTags() string {
return fmt.Sprintf("%s,%s", goarch, goos)
}
func checkErr(err error) {
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
}
// source string and substring slice for regexp
type re struct {
str string // source string
sub []string // matched sub-string
}
// Match performs regular expression match
func (r *re) Match(exp string) bool {
r.sub = regexp.MustCompile(exp).FindStringSubmatch(r.str)
if r.sub != nil {
return true
}
return false
}
// fetchFile fetches a text file from URL
func fetchFile(URL string) io.Reader {
resp, err := http.Get(URL)
checkErr(err)
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
checkErr(err)
return strings.NewReader(string(body))
}
// readFile reads a text file from path
func readFile(path string) io.Reader {
file, err := os.Open(os.Args[1])
checkErr(err)
return file
}
func format(name, num, proto string) string {
name = strings.ToUpper(name)
// There are multiple entries for enosys and nosys, so comment them out.
nm := re{str: name}
if nm.Match(`^SYS_E?NOSYS$`) {
name = fmt.Sprintf("// %s", name)
}
if name == `SYS_SYS_EXIT` {
name = `SYS_EXIT`
}
return fmt.Sprintf(" %s = %s; // %s\n", name, num, proto)
}
func main() {
// Get the OS (using GOOS_TARGET if it exist)
goos = os.Getenv("GOOS_TARGET")
if goos == "" {
goos = os.Getenv("GOOS")
}
// Get the architecture (using GOARCH_TARGET if it exists)
goarch = os.Getenv("GOARCH_TARGET")
if goarch == "" {
goarch = os.Getenv("GOARCH")
}
// Check if GOOS and GOARCH environment variables are defined
if goarch == "" || goos == "" {
fmt.Fprintf(os.Stderr, "GOARCH or GOOS not defined in environment\n")
os.Exit(1)
}
file := strings.TrimSpace(os.Args[1])
var syscalls io.Reader
if strings.HasPrefix(file, "https://") || strings.HasPrefix(file, "http://") {
// Download syscalls.master file
syscalls = fetchFile(file)
} else {
syscalls = readFile(file)
}
var text, line string
s := bufio.NewScanner(syscalls)
for s.Scan() {
t := re{str: line}
if t.Match(`^(.*)\\$`) {
// Handle continuation
line = t.sub[1]
line += strings.TrimLeft(s.Text(), " \t")
} else {
// New line
line = s.Text()
}
t = re{str: line}
if t.Match(`\\$`) {
continue
}
t = re{str: line}
switch goos {
case "dragonfly":
if t.Match(`^([0-9]+)\s+STD\s+({ \S+\s+(\w+).*)$`) {
num, proto := t.sub[1], t.sub[2]
name := fmt.Sprintf("SYS_%s", t.sub[3])
text += format(name, num, proto)
}
case "freebsd":
if t.Match(`^([0-9]+)\s+\S+\s+(?:NO)?STD\s+({ \S+\s+(\w+).*)$`) {
num, proto := t.sub[1], t.sub[2]
name := fmt.Sprintf("SYS_%s", t.sub[3])
text += format(name, num, proto)
}
case "openbsd":
if t.Match(`^([0-9]+)\s+STD\s+(NOLOCK\s+)?({ \S+\s+\*?(\w+).*)$`) {
num, proto, name := t.sub[1], t.sub[3], t.sub[4]
text += format(name, num, proto)
}
case "netbsd":
if t.Match(`^([0-9]+)\s+((STD)|(NOERR))\s+(RUMP\s+)?({\s+\S+\s*\*?\s*\|(\S+)\|(\S*)\|(\w+).*\s+})(\s+(\S+))?$`) {
num, proto, compat := t.sub[1], t.sub[6], t.sub[8]
name := t.sub[7] + "_" + t.sub[9]
if t.sub[11] != "" {
name = t.sub[7] + "_" + t.sub[11]
}
name = strings.ToUpper(name)
if compat == "" || compat == "13" || compat == "30" || compat == "50" {
text += fmt.Sprintf(" %s = %s; // %s\n", name, num, proto)
}
}
case "darwin":
if t.Match(`^#define\s+SYS_(\w+)\s+([0-9]+)`) {
name, num := t.sub[1], t.sub[2]
name = strings.ToUpper(name)
text += fmt.Sprintf(" SYS_%s = %s;\n", name, num)
}
default:
fmt.Fprintf(os.Stderr, "unrecognized GOOS=%s\n", goos)
os.Exit(1)
}
}
err := s.Err()
checkErr(err)
fmt.Printf(template, cmdLine(), buildTags(), text)
}
const template = `// %s
// Code generated by the command above; see README.md. DO NOT EDIT.
// +build %s
package unix
const(
%s)`

View file

@ -1,236 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// +build aix
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#include <sys/types.h>
#include <sys/time.h>
#include <sys/limits.h>
#include <sys/un.h>
#include <utime.h>
#include <sys/utsname.h>
#include <sys/poll.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/termio.h>
#include <sys/ioctl.h>
#include <termios.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <dirent.h>
#include <fcntl.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics
const (
SizeofPtr = C.sizeofPtr
SizeofShort = C.sizeof_short
SizeofInt = C.sizeof_int
SizeofLong = C.sizeof_long
SizeofLongLong = C.sizeof_longlong
PathMax = C.PATH_MAX
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
type off64 C.off64_t
type off C.off_t
type Mode_t C.mode_t
// Time
type Timespec C.struct_timespec
type StTimespec C.struct_st_timespec
type Timeval C.struct_timeval
type Timeval32 C.struct_timeval32
type Timex C.struct_timex
type Time_t C.time_t
type Tms C.struct_tms
type Utimbuf C.struct_utimbuf
type Timezone C.struct_timezone
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit64
type Pid_t C.pid_t
type _Gid_t C.gid_t
type dev_t C.dev_t
// Files
type Stat_t C.struct_stat
type StatxTimestamp C.struct_statx_timestamp
type Statx_t C.struct_statx
type Dirent C.struct_dirent
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Cmsghdr C.struct_cmsghdr
type ICMPv6Filter C.struct_icmp6_filter
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type IPv6MTUInfo C.struct_ip6_mtuinfo
type Linger C.struct_linger
type Msghdr C.struct_msghdr
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
)
type IfMsgHdr C.struct_if_msghdr
// Misc
type FdSet C.fd_set
type Utsname C.struct_utsname
type Ustat_t C.struct_ustat
type Sigset_t C.sigset_t
const (
AT_FDCWD = C.AT_FDCWD
AT_REMOVEDIR = C.AT_REMOVEDIR
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
// Terminal handling
type Termios C.struct_termios
type Termio C.struct_termio
type Winsize C.struct_winsize
//poll
type PollFd struct {
Fd int32
Events uint16
Revents uint16
}
const (
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLIN = C.POLLIN
POLLNVAL = C.POLLNVAL
POLLOUT = C.POLLOUT
POLLPRI = C.POLLPRI
POLLRDBAND = C.POLLRDBAND
POLLRDNORM = C.POLLRDNORM
POLLWRBAND = C.POLLWRBAND
POLLWRNORM = C.POLLWRNORM
)
//flock_t
type Flock_t C.struct_flock64
// Statfs
type Fsid_t C.struct_fsid_t
type Fsid64_t C.struct_fsid64_t
type Statfs_t C.struct_statfs
const RNDGETENTCNT = 0x80045200

View file

@ -1,283 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See README.md
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define __DARWIN_UNIX03 0
#define KERNEL
#define _DARWIN_USE_64_BIT_INODE
#include <dirent.h>
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <termios.h>
#include <unistd.h>
#include <mach/mach.h>
#include <mach/message.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics
const (
SizeofPtr = C.sizeofPtr
SizeofShort = C.sizeof_short
SizeofInt = C.sizeof_int
SizeofLong = C.sizeof_long
SizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
type Timeval32 C.struct_timeval32
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat64
type Statfs_t C.struct_statfs64
type Flock_t C.struct_flock
type Fstore_t C.struct_fstore
type Radvisory_t C.struct_radvisory
type Fbootstraptransfer_t C.struct_fbootstraptransfer
type Log2phys_t C.struct_log2phys
type Fsid C.struct_fsid
type Dirent C.struct_dirent
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet4Pktinfo C.struct_in_pktinfo
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet4Pktinfo = C.sizeof_struct_in_pktinfo
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfmaMsghdr = C.sizeof_struct_ifma_msghdr
SizeofIfmaMsghdr2 = C.sizeof_struct_ifma_msghdr2
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfmaMsghdr C.struct_ifma_msghdr
type IfmaMsghdr2 C.struct_ifma_msghdr2
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
// Terminal handling
type Termios C.struct_termios
type Winsize C.struct_winsize
// fchmodat-like syscalls.
const (
AT_FDCWD = C.AT_FDCWD
AT_REMOVEDIR = C.AT_REMOVEDIR
AT_SYMLINK_FOLLOW = C.AT_SYMLINK_FOLLOW
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
// poll
type PollFd C.struct_pollfd
const (
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLIN = C.POLLIN
POLLNVAL = C.POLLNVAL
POLLOUT = C.POLLOUT
POLLPRI = C.POLLPRI
POLLRDBAND = C.POLLRDBAND
POLLRDNORM = C.POLLRDNORM
POLLWRBAND = C.POLLWRBAND
POLLWRNORM = C.POLLWRNORM
)
// uname
type Utsname C.struct_utsname
// Clockinfo
const SizeofClockinfo = C.sizeof_struct_clockinfo
type Clockinfo C.struct_clockinfo

View file

@ -1,263 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See README.md
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
#include <dirent.h>
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics
const (
SizeofPtr = C.sizeofPtr
SizeofShort = C.sizeof_short
SizeofInt = C.sizeof_int
SizeofLong = C.sizeof_long
SizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat
type Statfs_t C.struct_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type Fsid C.struct_fsid
// File system limits
const (
PathMax = C.PATH_MAX
)
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfmaMsghdr = C.sizeof_struct_ifma_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfmaMsghdr C.struct_ifma_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
// Terminal handling
type Termios C.struct_termios
type Winsize C.struct_winsize
// fchmodat-like syscalls.
const (
AT_FDCWD = C.AT_FDCWD
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
// poll
type PollFd C.struct_pollfd
const (
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLIN = C.POLLIN
POLLNVAL = C.POLLNVAL
POLLOUT = C.POLLOUT
POLLPRI = C.POLLPRI
POLLRDBAND = C.POLLRDBAND
POLLRDNORM = C.POLLRDNORM
POLLWRBAND = C.POLLWRBAND
POLLWRNORM = C.POLLWRNORM
)
// Uname
type Utsname C.struct_utsname

View file

@ -1,356 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See README.md
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define _WANT_FREEBSD11_STAT 1
#define _WANT_FREEBSD11_STATFS 1
#define _WANT_FREEBSD11_DIRENT 1
#define _WANT_FREEBSD11_KEVENT 1
#include <dirent.h>
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/capsicum.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
// This structure is a duplicate of if_data on FreeBSD 8-STABLE.
// See /usr/include/net/if.h.
struct if_data8 {
u_char ifi_type;
u_char ifi_physical;
u_char ifi_addrlen;
u_char ifi_hdrlen;
u_char ifi_link_state;
u_char ifi_spare_char1;
u_char ifi_spare_char2;
u_char ifi_datalen;
u_long ifi_mtu;
u_long ifi_metric;
u_long ifi_baudrate;
u_long ifi_ipackets;
u_long ifi_ierrors;
u_long ifi_opackets;
u_long ifi_oerrors;
u_long ifi_collisions;
u_long ifi_ibytes;
u_long ifi_obytes;
u_long ifi_imcasts;
u_long ifi_omcasts;
u_long ifi_iqdrops;
u_long ifi_noproto;
u_long ifi_hwassist;
// FIXME: these are now unions, so maybe need to change definitions?
#undef ifi_epoch
time_t ifi_epoch;
#undef ifi_lastchange
struct timeval ifi_lastchange;
};
// This structure is a duplicate of if_msghdr on FreeBSD 8-STABLE.
// See /usr/include/net/if.h.
struct if_msghdr8 {
u_short ifm_msglen;
u_char ifm_version;
u_char ifm_type;
int ifm_addrs;
int ifm_flags;
u_short ifm_index;
struct if_data8 ifm_data;
};
*/
import "C"
// Machine characteristics
const (
SizeofPtr = C.sizeofPtr
SizeofShort = C.sizeof_short
SizeofInt = C.sizeof_int
SizeofLong = C.sizeof_long
SizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
const (
_statfsVersion = C.STATFS_VERSION
_dirblksiz = C.DIRBLKSIZ
)
type Stat_t C.struct_stat
type stat_freebsd11_t C.struct_freebsd11_stat
type Statfs_t C.struct_statfs
type statfs_freebsd11_t C.struct_freebsd11_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type dirent_freebsd11 C.struct_freebsd11_dirent
type Fsid C.struct_fsid
// File system limits
const (
PathMax = C.PATH_MAX
)
// Advice to Fadvise
const (
FADV_NORMAL = C.POSIX_FADV_NORMAL
FADV_RANDOM = C.POSIX_FADV_RANDOM
FADV_SEQUENTIAL = C.POSIX_FADV_SEQUENTIAL
FADV_WILLNEED = C.POSIX_FADV_WILLNEED
FADV_DONTNEED = C.POSIX_FADV_DONTNEED
FADV_NOREUSE = C.POSIX_FADV_NOREUSE
)
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPMreqn C.struct_ip_mreqn
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPMreqn = C.sizeof_struct_ip_mreqn
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent_freebsd11
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
sizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfMsghdr = C.sizeof_struct_if_msghdr8
sizeofIfData = C.sizeof_struct_if_data
SizeofIfData = C.sizeof_struct_if_data8
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfmaMsghdr = C.sizeof_struct_ifma_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type ifMsghdr C.struct_if_msghdr
type IfMsghdr C.struct_if_msghdr8
type ifData C.struct_if_data
type IfData C.struct_if_data8
type IfaMsghdr C.struct_ifa_msghdr
type IfmaMsghdr C.struct_ifma_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfZbuf = C.sizeof_struct_bpf_zbuf
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
SizeofBpfZbufHeader = C.sizeof_struct_bpf_zbuf_header
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfZbuf C.struct_bpf_zbuf
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
type BpfZbufHeader C.struct_bpf_zbuf_header
// Terminal handling
type Termios C.struct_termios
type Winsize C.struct_winsize
// fchmodat-like syscalls.
const (
AT_FDCWD = C.AT_FDCWD
AT_REMOVEDIR = C.AT_REMOVEDIR
AT_SYMLINK_FOLLOW = C.AT_SYMLINK_FOLLOW
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
// poll
type PollFd C.struct_pollfd
const (
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLIN = C.POLLIN
POLLINIGNEOF = C.POLLINIGNEOF
POLLNVAL = C.POLLNVAL
POLLOUT = C.POLLOUT
POLLPRI = C.POLLPRI
POLLRDBAND = C.POLLRDBAND
POLLRDNORM = C.POLLRDNORM
POLLWRBAND = C.POLLWRBAND
POLLWRNORM = C.POLLWRNORM
)
// Capabilities
type CapRights C.struct_cap_rights
// Uname
type Utsname C.struct_utsname

View file

@ -1,289 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See README.md
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
#include <dirent.h>
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics
const (
SizeofPtr = C.sizeofPtr
SizeofShort = C.sizeof_short
SizeofInt = C.sizeof_int
SizeofLong = C.sizeof_long
SizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat
type Statfs_t C.struct_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type Fsid C.fsid_t
// File system limits
const (
PathMax = C.PATH_MAX
)
// Advice to Fadvise
const (
FADV_NORMAL = C.POSIX_FADV_NORMAL
FADV_RANDOM = C.POSIX_FADV_RANDOM
FADV_SEQUENTIAL = C.POSIX_FADV_SEQUENTIAL
FADV_WILLNEED = C.POSIX_FADV_WILLNEED
FADV_DONTNEED = C.POSIX_FADV_DONTNEED
FADV_NOREUSE = C.POSIX_FADV_NOREUSE
)
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
type Mclpool C.struct_mclpool
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
type BpfTimeval C.struct_bpf_timeval
// Terminal handling
type Termios C.struct_termios
type Winsize C.struct_winsize
type Ptmget C.struct_ptmget
// fchmodat-like syscalls.
const (
AT_FDCWD = C.AT_FDCWD
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
// poll
type PollFd C.struct_pollfd
const (
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLIN = C.POLLIN
POLLNVAL = C.POLLNVAL
POLLOUT = C.POLLOUT
POLLPRI = C.POLLPRI
POLLRDBAND = C.POLLRDBAND
POLLRDNORM = C.POLLRDNORM
POLLWRBAND = C.POLLWRBAND
POLLWRNORM = C.POLLWRNORM
)
// Sysctl
type Sysctlnode C.struct_sysctlnode
// Uname
type Utsname C.struct_utsname
// Clockinfo
const SizeofClockinfo = C.sizeof_struct_clockinfo
type Clockinfo C.struct_clockinfo

View file

@ -1,282 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See README.md
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
#include <dirent.h>
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <uvm/uvmexp.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics
const (
SizeofPtr = C.sizeofPtr
SizeofShort = C.sizeof_short
SizeofInt = C.sizeof_int
SizeofLong = C.sizeof_long
SizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat
type Statfs_t C.struct_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type Fsid C.fsid_t
// File system limits
const (
PathMax = C.PATH_MAX
)
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
type Mclpool C.struct_mclpool
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
type BpfTimeval C.struct_bpf_timeval
// Terminal handling
type Termios C.struct_termios
type Winsize C.struct_winsize
// fchmodat-like syscalls.
const (
AT_FDCWD = C.AT_FDCWD
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
// poll
type PollFd C.struct_pollfd
const (
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLIN = C.POLLIN
POLLNVAL = C.POLLNVAL
POLLOUT = C.POLLOUT
POLLPRI = C.POLLPRI
POLLRDBAND = C.POLLRDBAND
POLLRDNORM = C.POLLRDNORM
POLLWRBAND = C.POLLWRBAND
POLLWRNORM = C.POLLWRNORM
)
// Signal Sets
type Sigset_t C.sigset_t
// Uname
type Utsname C.struct_utsname
// Uvmexp
const SizeofUvmexp = C.sizeof_struct_uvmexp
type Uvmexp C.struct_uvmexp
// Clockinfo
const SizeofClockinfo = C.sizeof_struct_clockinfo
type Clockinfo C.struct_clockinfo

View file

@ -1,266 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See README.md
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
// These defines ensure that builds done on newer versions of Solaris are
// backwards-compatible with older versions of Solaris and
// OpenSolaris-based derivatives.
#define __USE_SUNOS_SOCKETS__ // msghdr
#define __USE_LEGACY_PROTOTYPES__ // iovec
#include <dirent.h>
#include <fcntl.h>
#include <netdb.h>
#include <limits.h>
#include <poll.h>
#include <signal.h>
#include <termios.h>
#include <termio.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
#include <ustat.h>
#include <utime.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics
const (
SizeofPtr = C.sizeofPtr
SizeofShort = C.sizeof_short
SizeofInt = C.sizeof_int
SizeofLong = C.sizeof_long
SizeofLongLong = C.sizeof_longlong
PathMax = C.PATH_MAX
MaxHostNameLen = C.MAXHOSTNAMELEN
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
type Timeval32 C.struct_timeval32
type Tms C.struct_tms
type Utimbuf C.struct_utimbuf
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat
type Flock_t C.struct_flock
type Dirent C.struct_dirent
// Filesystems
type _Fsblkcnt_t C.fsblkcnt_t
type Statvfs_t C.struct_statvfs
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Select
type FdSet C.fd_set
// Misc
type Utsname C.struct_utsname
type Ustat_t C.struct_ustat
const (
AT_FDCWD = C.AT_FDCWD
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
AT_SYMLINK_FOLLOW = C.AT_SYMLINK_FOLLOW
AT_REMOVEDIR = C.AT_REMOVEDIR
AT_EACCESS = C.AT_EACCESS
)
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfTimeval C.struct_bpf_timeval
type BpfHdr C.struct_bpf_hdr
// Terminal handling
type Termios C.struct_termios
type Termio C.struct_termio
type Winsize C.struct_winsize
// poll
type PollFd C.struct_pollfd
const (
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLIN = C.POLLIN
POLLNVAL = C.POLLNVAL
POLLOUT = C.POLLOUT
POLLPRI = C.POLLPRI
POLLRDBAND = C.POLLRDBAND
POLLRDNORM = C.POLLRDNORM
POLLWRBAND = C.POLLWRBAND
POLLWRNORM = C.POLLWRNORM
)

View file

@ -1,556 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/internal/gen"
)
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
` !"#$%&'()*+,-./0123456789:;<=>?` +
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
var encodings = []struct {
name string
mib string
comment string
varName string
replacement byte
mapping string
}{
{
"IBM Code Page 037",
"IBM037",
"",
"CodePage037",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
},
{
"IBM Code Page 437",
"PC8CodePage437",
"",
"CodePage437",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
},
{
"IBM Code Page 850",
"PC850Multilingual",
"",
"CodePage850",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
},
{
"IBM Code Page 852",
"PCp852",
"",
"CodePage852",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
},
{
"IBM Code Page 855",
"IBM855",
"",
"CodePage855",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
},
{
"Windows Code Page 858", // PC latin1 with Euro
"IBM00858",
"",
"CodePage858",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
},
{
"IBM Code Page 860",
"IBM860",
"",
"CodePage860",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
},
{
"IBM Code Page 862",
"PC862LatinHebrew",
"",
"CodePage862",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
},
{
"IBM Code Page 863",
"IBM863",
"",
"CodePage863",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
},
{
"IBM Code Page 865",
"IBM865",
"",
"CodePage865",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
},
{
"IBM Code Page 866",
"IBM866",
"",
"CodePage866",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-ibm866.txt",
},
{
"IBM Code Page 1047",
"IBM1047",
"",
"CodePage1047",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
},
{
"IBM Code Page 1140",
"IBM01140",
"",
"CodePage1140",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
},
{
"ISO 8859-1",
"ISOLatin1",
"",
"ISO8859_1",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
},
{
"ISO 8859-2",
"ISOLatin2",
"",
"ISO8859_2",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
},
{
"ISO 8859-3",
"ISOLatin3",
"",
"ISO8859_3",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
},
{
"ISO 8859-4",
"ISOLatin4",
"",
"ISO8859_4",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
},
{
"ISO 8859-5",
"ISOLatinCyrillic",
"",
"ISO8859_5",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
},
{
"ISO 8859-6",
"ISOLatinArabic",
"",
"ISO8859_6,ISO8859_6E,ISO8859_6I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
},
{
"ISO 8859-7",
"ISOLatinGreek",
"",
"ISO8859_7",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
},
{
"ISO 8859-8",
"ISOLatinHebrew",
"",
"ISO8859_8,ISO8859_8E,ISO8859_8I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
},
{
"ISO 8859-9",
"ISOLatin5",
"",
"ISO8859_9",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
},
{
"ISO 8859-10",
"ISOLatin6",
"",
"ISO8859_10",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
},
{
"ISO 8859-13",
"ISO885913",
"",
"ISO8859_13",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
},
{
"ISO 8859-14",
"ISO885914",
"",
"ISO8859_14",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
},
{
"ISO 8859-15",
"ISO885915",
"",
"ISO8859_15",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
},
{
"ISO 8859-16",
"ISO885916",
"",
"ISO8859_16",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
},
{
"KOI8-R",
"KOI8R",
"",
"KOI8R",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
},
{
"KOI8-U",
"KOI8U",
"",
"KOI8U",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
},
{
"Macintosh",
"Macintosh",
"",
"Macintosh",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-macintosh.txt",
},
{
"Macintosh Cyrillic",
"MacintoshCyrillic",
"",
"MacintoshCyrillic",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
},
{
"Windows 874",
"Windows874",
"",
"Windows874",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-874.txt",
},
{
"Windows 1250",
"Windows1250",
"",
"Windows1250",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
},
{
"Windows 1251",
"Windows1251",
"",
"Windows1251",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
},
{
"Windows 1252",
"Windows1252",
"",
"Windows1252",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
},
{
"Windows 1253",
"Windows1253",
"",
"Windows1253",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
},
{
"Windows 1254",
"Windows1254",
"",
"Windows1254",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
},
{
"Windows 1255",
"Windows1255",
"",
"Windows1255",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
},
{
"Windows 1256",
"Windows1256",
"",
"Windows1256",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
},
{
"Windows 1257",
"Windows1257",
"",
"Windows1257",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
},
{
"Windows 1258",
"Windows1258",
"",
"Windows1258",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
},
{
"X-User-Defined",
"XUserDefined",
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
"XUserDefined",
encoding.ASCIISub,
ascii +
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
},
}
func getWHATWG(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 128)
for i := range mapping {
mapping[i] = '\ufffd'
}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, 0
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 128 <= x {
log.Fatalf("code %d is out of range", x)
}
if 0x80 <= y && y < 0xa0 {
// We diverge from the WHATWG spec by mapping control characters
// in the range [0x80, 0xa0) to U+FFFD.
continue
}
mapping[x] = rune(y)
}
return ascii + string(mapping)
}
func getUCM(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 256)
for i := range mapping {
mapping[i] = '\ufffd'
}
charsFound := 0
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
var c byte
var r rune
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
continue
}
mapping[c] = r
charsFound++
}
if charsFound < 200 {
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
}
return string(mapping)
}
func main() {
mibs := map[string]bool{}
all := []string{}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "charmap")
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
printf("import (\n")
printf("\t\"golang.org/x/text/encoding\"\n")
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
printf(")\n\n")
for _, e := range encodings {
varNames := strings.Split(e.varName, ",")
all = append(all, varNames...)
varName := varNames[0]
switch {
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
e.mapping = getWHATWG(e.mapping)
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
e.mapping = getUCM(e.mapping)
}
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
if asciiSuperset {
low = 0x80
}
lvn := 1
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
lvn = 3
}
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
printf("// %s is the %s encoding.\n", varName, e.name)
if e.comment != "" {
printf("//\n// %s\n", e.comment)
}
printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
varName, lowerVarName, lowerVarName, e.name)
if mibs[e.mib] {
log.Fatalf("MIB type %q declared multiple times.", e.mib)
}
printf("mib: identifier.%s,\n", e.mib)
printf("asciiSuperset: %t,\n", asciiSuperset)
printf("low: 0x%02x,\n", low)
printf("replacement: 0x%02x,\n", e.replacement)
printf("decode: [256]utf8Enc{\n")
i, backMapping := 0, map[rune]byte{}
for _, c := range e.mapping {
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
backMapping[c] = byte(i)
}
var buf [8]byte
n := utf8.EncodeRune(buf[:], c)
if n > 3 {
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
}
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
if i%2 == 1 {
printf("\n")
}
i++
}
printf("},\n")
printf("encode: [256]uint32{\n")
encode := make([]uint32, 0, 256)
for c, i := range backMapping {
encode = append(encode, uint32(i)<<24|uint32(c))
}
sort.Sort(byRune(encode))
for len(encode) < cap(encode) {
encode = append(encode, encode[len(encode)-1])
}
for i, enc := range encode {
printf("0x%08x,", enc)
if i%8 == 7 {
printf("\n")
}
}
printf("},\n}\n")
// Add an estimate of the size of a single Charmap{} struct value, which
// includes two 256 elem arrays of 4 bytes and some extra fields, which
// align to 3 uint64s on 64-bit architectures.
w.Size += 2*4*256 + 3*8
}
// TODO: add proper line breaking.
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
}
type byRune []uint32
func (b byRune) Len() int { return len(b) }
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -124,7 +124,7 @@ func (e *Encoder) Writer(w io.Writer) io.Writer {
}
// ASCIISub is the ASCII substitute character, as recommended by
// http://unicode.org/reports/tr36/#Text_Comparison
// https://unicode.org/reports/tr36/#Text_Comparison
const ASCIISub = '\x1a'
// Nop is the nop encoding. Its transformed bytes are the same as the source

View file

@ -1,173 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type group struct {
Encodings []struct {
Labels []string
Name string
}
}
func main() {
gen.Init()
r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
var groups []group
if err := json.NewDecoder(r).Decode(&groups); err != nil {
log.Fatalf("Error reading encodings.json: %v", err)
}
w := &bytes.Buffer{}
fmt.Fprintln(w, "type htmlEncoding byte")
fmt.Fprintln(w, "const (")
for i, g := range groups {
for _, e := range g.Encodings {
key := strings.ToLower(e.Name)
name := consts[key]
if name == "" {
log.Fatalf("No const defined for %s.", key)
}
if i == 0 {
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
} else {
fmt.Fprintf(w, "%s\n", name)
}
}
}
fmt.Fprintln(w, "numEncodings")
fmt.Fprint(w, ")\n\n")
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
for _, g := range groups {
for _, e := range g.Encodings {
fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
}
}
fmt.Fprint(w, "}\n\n")
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
for _, g := range groups {
for _, e := range g.Encodings {
for _, l := range e.Labels {
key := strings.ToLower(e.Name)
name := consts[key]
fmt.Fprintf(w, "%q: %s,\n", l, name)
}
}
}
fmt.Fprint(w, "}\n\n")
var tags []string
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
for _, loc := range locales {
tags = append(tags, loc.tag)
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
}
fmt.Fprint(w, "}\n\n")
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
}
// consts maps canonical encoding name to internal constant.
var consts = map[string]string{
"utf-8": "utf8",
"ibm866": "ibm866",
"iso-8859-2": "iso8859_2",
"iso-8859-3": "iso8859_3",
"iso-8859-4": "iso8859_4",
"iso-8859-5": "iso8859_5",
"iso-8859-6": "iso8859_6",
"iso-8859-7": "iso8859_7",
"iso-8859-8": "iso8859_8",
"iso-8859-8-i": "iso8859_8I",
"iso-8859-10": "iso8859_10",
"iso-8859-13": "iso8859_13",
"iso-8859-14": "iso8859_14",
"iso-8859-15": "iso8859_15",
"iso-8859-16": "iso8859_16",
"koi8-r": "koi8r",
"koi8-u": "koi8u",
"macintosh": "macintosh",
"windows-874": "windows874",
"windows-1250": "windows1250",
"windows-1251": "windows1251",
"windows-1252": "windows1252",
"windows-1253": "windows1253",
"windows-1254": "windows1254",
"windows-1255": "windows1255",
"windows-1256": "windows1256",
"windows-1257": "windows1257",
"windows-1258": "windows1258",
"x-mac-cyrillic": "macintoshCyrillic",
"gbk": "gbk",
"gb18030": "gb18030",
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
"big5": "big5",
"euc-jp": "eucjp",
"iso-2022-jp": "iso2022jp",
"shift_jis": "shiftJIS",
"euc-kr": "euckr",
"replacement": "replacement",
"utf-16be": "utf16be",
"utf-16le": "utf16le",
"x-user-defined": "xUserDefined",
}
// locales is taken from
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
var locales = []struct{ tag, name string }{
// The default value. Explicitly state latin to benefit from the exact
// script option, while still making 1252 the default encoding for languages
// written in Latin script.
{"und_Latn", "windows-1252"},
{"ar", "windows-1256"},
{"ba", "windows-1251"},
{"be", "windows-1251"},
{"bg", "windows-1251"},
{"cs", "windows-1250"},
{"el", "iso-8859-7"},
{"et", "windows-1257"},
{"fa", "windows-1256"},
{"he", "windows-1255"},
{"hr", "windows-1250"},
{"hu", "iso-8859-2"},
{"ja", "shift_jis"},
{"kk", "windows-1251"},
{"ko", "euc-kr"},
{"ku", "windows-1254"},
{"ky", "windows-1251"},
{"lt", "windows-1257"},
{"lv", "windows-1257"},
{"mk", "windows-1251"},
{"pl", "iso-8859-2"},
{"ru", "windows-1251"},
{"sah", "windows-1251"},
{"sk", "windows-1250"},
{"sl", "iso-8859-2"},
{"sr", "windows-1251"},
{"tg", "windows-1251"},
{"th", "windows-874"},
{"tr", "windows-1254"},
{"tt", "windows-1251"},
{"uk", "windows-1251"},
{"vi", "windows-1258"},
{"zh-hans", "gb18030"},
{"zh-hant", "big5"},
}

View file

@ -306,6 +306,7 @@ var nameMap = map[string]htmlEncoding{
"iso-2022-cn": replacement,
"iso-2022-cn-ext": replacement,
"iso-2022-kr": replacement,
"replacement": replacement,
"utf-16be": utf16be,
"utf-16": utf16le,
"utf-16le": utf16le,

View file

@ -1,137 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type registry struct {
XMLName xml.Name `xml:"registry"`
Updated string `xml:"updated"`
Registry []struct {
ID string `xml:"id,attr"`
Record []struct {
Name string `xml:"name"`
Xref []struct {
Type string `xml:"type,attr"`
Data string `xml:"data,attr"`
} `xml:"xref"`
Desc struct {
Data string `xml:",innerxml"`
// Any []struct {
// Data string `xml:",chardata"`
// } `xml:",any"`
// Data string `xml:",chardata"`
} `xml:"description,"`
MIB string `xml:"value"`
Alias []string `xml:"alias"`
MIME string `xml:"preferred_alias"`
} `xml:"record"`
} `xml:"registry"`
}
func main() {
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
reg := &registry{}
if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
log.Fatalf("Error decoding charset registry: %v", err)
}
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
}
w := &bytes.Buffer{}
fmt.Fprintf(w, "const (\n")
for _, rec := range reg.Registry[0].Record {
constName := ""
for _, a := range rec.Alias {
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
// Some of the constant definitions have comments in them. Strip those.
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
}
}
if constName == "" {
switch rec.MIB {
case "2085":
constName = "HZGB2312" // Not listed as alias for some reason.
default:
log.Fatalf("No cs alias defined for %s.", rec.MIB)
}
}
if rec.MIME != "" {
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
}
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
if len(rec.Desc.Data) > 0 {
fmt.Fprint(w, "// ")
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
inElem := true
attr := ""
for {
t, err := d.Token()
if err != nil {
if err != io.EOF {
log.Fatal(err)
}
break
}
switch x := t.(type) {
case xml.CharData:
attr = "" // Don't need attribute info.
a := bytes.Split([]byte(x), []byte("\n"))
for i, b := range a {
if b = bytes.TrimSpace(b); len(b) != 0 {
if !inElem && i > 0 {
fmt.Fprint(w, "\n// ")
}
inElem = false
fmt.Fprintf(w, "%s ", string(b))
}
}
case xml.StartElement:
if x.Name.Local == "xref" {
inElem = true
use := false
for _, a := range x.Attr {
if a.Name.Local == "type" {
use = use || a.Value != "person"
}
if a.Name.Local == "data" && use {
attr = a.Value + " "
}
}
}
case xml.EndElement:
inElem = false
fmt.Fprint(w, attr)
}
}
fmt.Fprint(w, "\n")
}
for _, x := range rec.Xref {
switch x.Type {
case "rfc":
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
case "uri":
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
}
}
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
fmt.Fprintln(w)
}
fmt.Fprintln(w, ")")
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
}

View file

@ -34,7 +34,7 @@ package identifier
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
// - http://www.ietf.org/rfc/rfc2978.txt
// - http://www.unicode.org/reports/tr22/
// - https://www.unicode.org/reports/tr22/
// - http://www.w3.org/TR/encoding/
// - https://encoding.spec.whatwg.org/
// - https://encoding.spec.whatwg.org/encodings.json

View file

@ -884,27 +884,27 @@ const (
// CESU8 is the MIB identifier with IANA name CESU-8.
//
// http://www.unicode.org/unicode/reports/tr26
// https://www.unicode.org/unicode/reports/tr26
CESU8 MIB = 1016
// UTF32 is the MIB identifier with IANA name UTF-32.
//
// http://www.unicode.org/unicode/reports/tr19/
// https://www.unicode.org/unicode/reports/tr19/
UTF32 MIB = 1017
// UTF32BE is the MIB identifier with IANA name UTF-32BE.
//
// http://www.unicode.org/unicode/reports/tr19/
// https://www.unicode.org/unicode/reports/tr19/
UTF32BE MIB = 1018
// UTF32LE is the MIB identifier with IANA name UTF-32LE.
//
// http://www.unicode.org/unicode/reports/tr19/
// https://www.unicode.org/unicode/reports/tr19/
UTF32LE MIB = 1019
// BOCU1 is the MIB identifier with IANA name BOCU-1.
//
// http://www.unicode.org/notes/tn6/
// https://www.unicode.org/notes/tn6/
BOCU1 MIB = 1020
// Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1.

View file

@ -1,161 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
// TODO: Emoji extensions?
// http://www.unicode.org/faq/emoji_dingbats.html
// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
type entry struct {
jisCode, table int
}
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
reverse := [65536]entry{}
for i := range reverse {
reverse[i].table = -1
}
tables := []struct {
url string
name string
}{
{"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
{"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
}
for i, table := range tables {
res, err := http.Get(table.url)
if err != nil {
log.Fatalf("%q: Get: %v", table.url, err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("%q: could not parse %q", table.url, s)
}
if x < 0 || 120*94 <= x {
log.Fatalf("%q: JIS code %d is out of range", table.url, x)
}
mapping[x] = y
if reverse[y].table == -1 {
reverse[y] = entry{jisCode: x, table: i}
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("%q: scanner error: %v", table.url, err)
}
fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
table.name, table.name, table.url)
fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
for i, m := range mapping {
if m != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, m)
}
}
fmt.Printf("}\n\n")
}
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v.table == -1 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const (\n")
fmt.Printf("\tjis0208 = 1\n")
fmt.Printf("\tjis0212 = 2\n")
fmt.Printf("\tcodeMask = 0x7f\n")
fmt.Printf("\tcodeShift = 7\n")
fmt.Printf("\ttableShift = 14\n")
fmt.Printf(")\n\n")
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("//\n")
fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x.table == -1 {
continue
}
fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,143 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
reverse := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
log.Fatalf("EUC-KR code %d is out of range", x)
}
mapping[x] = y
if reverse[y] == 0 {
c0, c1 := uint16(0), uint16(0)
if x < 178*(0xc7-0x81) {
c0 = uint16(x/178) + 0x81
c1 = uint16(x % 178)
switch {
case c1 < 1*26:
c1 += 0x41
case c1 < 2*26:
c1 += 0x47
default:
c1 += 0x4d
}
} else {
x -= 178 * (0xc7 - 0x81)
c0 = uint16(x/94) + 0xc7
c1 = uint16(x%94) + 0xa1
}
reverse[y] = c0<<8 | c1
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
fmt.Printf("var decode = [...]uint16{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,161 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
printGB18030()
printGBK()
}
func printGB18030() {
res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
fmt.Printf("var gb18030 = [...][2]uint16{\n")
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint32(0), uint32(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0x10000 && y < 0x10000 {
fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
}
}
fmt.Printf("}\n\n")
}
func printGBK() {
res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
reverse := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 126*190 <= x {
log.Fatalf("GBK code %d is out of range", x)
}
mapping[x] = y
if reverse[y] == 0 {
c0, c1 := x/190, x%190
if c1 >= 0x3f {
c1++
}
reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
fmt.Printf("var decode = [...]uint16{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,140 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint32{}
reverse := [65536 * 4]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint32(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 126*157 <= x {
log.Fatalf("Big5 code %d is out of range", x)
}
mapping[x] = y
// The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
// "The index pointer for code point in index is the first pointer
// corresponding to code point in index", which would normally mean
// that the code below should be guarded by "if reverse[y] == 0", but
// last instead of first seems to match the behavior of
// "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
// http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
// (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
// and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
c0, c1 := x/157, x%157
if c1 < 0x3f {
c1 += 0x40
} else {
c1 += 0x62
}
reverse[y] = (0x81+c0)<<8 | c1
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
fmt.Printf("var decode = [...]uint32{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%08X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -145,7 +145,7 @@ func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e
// and consumed in a greater context that implies a certain endianness, use
// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
//
// In the language of http://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
// In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
// corresponds to "Where the precise type of the data stream is known... the
// BOM should not be used" and ExpectBOM corresponds to "A particular
// protocol... may require use of the BOM".

View file

@ -4,13 +4,13 @@ package language
// This file contains code common to the maketables.go and the package code.
// langAliasType is the type of an alias in langAliasMap.
type langAliasType int8
// AliasType is the type of an alias in AliasMap.
type AliasType int8
const (
langDeprecated langAliasType = iota
langMacro
langLegacy
Deprecated AliasType = iota
Macro
Legacy
langAliasTypeUnknown langAliasType = -1
AliasTypeUnknown AliasType = -1
)

29
vendor/golang.org/x/text/internal/language/compact.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// CompactCoreInfo is a compact integer with the three core tags encoded.
type CompactCoreInfo uint32
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
// different language, region, and script values.
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
if t.LangID > langNoIndexOffset {
return 0, false
}
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
cci |= CompactCoreInfo(t.ScriptID) << 12
cci |= CompactCoreInfo(t.RegionID)
return cci, true
}
// Tag generates a tag from c.
func (c CompactCoreInfo) Tag() Tag {
return Tag{
LangID: Language(c >> 20),
RegionID: Region(c & 0x3ff),
ScriptID: Script(c>>12) & 0xff,
}
}

View file

@ -0,0 +1,61 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package compact defines a compact representation of language tags.
//
// Common language tags (at least all for which locale information is defined
// in CLDR) are assigned a unique index. Each Tag is associated with such an
// ID for selecting language-related resources (such as translations) as well
// as one for selecting regional defaults (currency, number formatting, etc.)
//
// It may want to export this functionality at some point, but at this point
// this is only available for use within x/text.
package compact // import "golang.org/x/text/internal/language/compact"
import (
"sort"
"strings"
"golang.org/x/text/internal/language"
)
// ID is an integer identifying a single tag.
type ID uint16
func getCoreIndex(t language.Tag) (id ID, ok bool) {
cci, ok := language.GetCompactCore(t)
if !ok {
return 0, false
}
i := sort.Search(len(coreTags), func(i int) bool {
return cci <= coreTags[i]
})
if i == len(coreTags) || coreTags[i] != cci {
return 0, false
}
return ID(i), true
}
// Parent returns the ID of the parent or the root ID if id is already the root.
func (id ID) Parent() ID {
return parents[id]
}
// Tag converts id to an internal language Tag.
func (id ID) Tag() language.Tag {
if int(id) >= len(coreTags) {
return specialTags[int(id)-len(coreTags)]
}
return coreTags[id].Tag()
}
var specialTags []language.Tag
func init() {
tags := strings.Split(specialTagsStr, " ")
specialTags = make([]language.Tag, len(tags))
for i, t := range tags {
specialTags[i] = language.MustParse(t)
}
}

View file

@ -0,0 +1,260 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_index.go -output tables.go
//go:generate go run gen_parents.go
package compact
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"strings"
"golang.org/x/text/internal/language"
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
// NOTE: exported tags will become part of the public API.
language ID
locale ID
full fullTag // always a language.Tag for now.
}
const _und = 0
type fullTag interface {
IsRoot() bool
Parent() language.Tag
}
// Make a compact Tag from a fully specified internal language Tag.
func Make(t language.Tag) (tag Tag) {
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
if r, err := language.ParseRegion(region[:2]); err == nil {
tFull := t
t, _ = t.SetTypeForKey("rg", "")
// TODO: should we not consider "va" for the language tag?
var exact1, exact2 bool
tag.language, exact1 = FromTag(t)
t.RegionID = r
tag.locale, exact2 = FromTag(t)
if !exact1 || !exact2 {
tag.full = tFull
}
return tag
}
}
lang, ok := FromTag(t)
tag.language = lang
tag.locale = lang
if !ok {
tag.full = t
}
return tag
}
// Tag returns an internal language Tag version of this tag.
func (t Tag) Tag() language.Tag {
if t.full != nil {
return t.full.(language.Tag)
}
tag := t.language.Tag()
if t.language != t.locale {
loc := t.locale.Tag()
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
}
return tag
}
// IsCompact reports whether this tag is fully defined in terms of ID.
func (t *Tag) IsCompact() bool {
return t.full == nil
}
// MayHaveVariants reports whether a tag may have variants. If it returns false
// it is guaranteed the tag does not have variants.
func (t Tag) MayHaveVariants() bool {
return t.full != nil || int(t.language) >= len(coreTags)
}
// MayHaveExtensions reports whether a tag may have extensions. If it returns
// false it is guaranteed the tag does not have them.
func (t Tag) MayHaveExtensions() bool {
return t.full != nil ||
int(t.language) >= len(coreTags) ||
t.language != t.locale
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if t.full != nil {
return t.full.IsRoot()
}
return t.language == _und
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.full != nil {
return Make(t.full.Parent())
}
if t.language != t.locale {
// Simulate stripping -u-rg-xxxxxx
return Tag{language: t.language, locale: t.language}
}
// TODO: use parent lookup table once cycle from internal package is
// removed. Probably by internalizing the table and declaring this fast
// enough.
// lang := compactID(internal.Parent(uint16(t.language)))
lang, _ := FromTag(t.language.Tag().Parent())
return Tag{language: lang, locale: lang}
}
// returns token t and the rest of the string.
func nextToken(s string) (t, tail string) {
p := strings.Index(s[1:], "-")
if p == -1 {
return s[1:], ""
}
p++
return s[1:p], s[p:]
}
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. If t does not match a compact
// index, exact will be false and the compact index will be returned for the
// first match after repeatedly taking the Parent of t.
func LanguageID(t Tag) (id ID, exact bool) {
return t.language, t.full == nil
}
// RegionalID returns the ID for the regional variant of this tag. This index is
// used to indicate region-specific overrides, such as default currency, default
// calendar and week data, default time cycle, and default measurement system
// and unit preferences.
//
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
// settings for currency, number formatting, etc. The CompactIndex for this tag
// will be that for en-GB, while the RegionalID will be the one corresponding to
// en-US.
func RegionalID(t Tag) (id ID, exact bool) {
return t.locale, t.full == nil
}
// LanguageTag returns t stripped of regional variant indicators.
//
// At the moment this means it is stripped of a regional and variant subtag "rg"
// and "va" in the "u" extension.
func (t Tag) LanguageTag() Tag {
if t.full == nil {
return Tag{language: t.language, locale: t.language}
}
tt := t.Tag()
tt.SetTypeForKey("rg", "")
tt.SetTypeForKey("va", "")
return Make(tt)
}
// RegionalTag returns the regional variant of the tag.
//
// At the moment this means that the region is set from the regional subtag
// "rg" in the "u" extension.
func (t Tag) RegionalTag() Tag {
rt := Tag{language: t.locale, locale: t.locale}
if t.full == nil {
return rt
}
b := language.Builder{}
tag := t.Tag()
// tag, _ = tag.SetTypeForKey("rg", "")
b.SetTag(t.locale.Tag())
if v := tag.Variants(); v != "" {
for _, v := range strings.Split(v, "-") {
b.AddVariant(v)
}
}
for _, e := range tag.Extensions() {
b.AddExt(e)
}
return t
}
// FromTag reports closest matching ID for an internal language Tag.
func FromTag(t language.Tag) (id ID, exact bool) {
// TODO: perhaps give more frequent tags a lower index.
// TODO: we could make the indexes stable. This will excluded some
// possibilities for optimization, so don't do this quite yet.
exact = true
b, s, r := t.Raw()
if t.HasString() {
if t.IsPrivateUse() {
// We have no entries for user-defined tags.
return 0, false
}
hasExtra := false
if t.HasVariants() {
if t.HasExtensions() {
build := language.Builder{}
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
build.AddVariant(t.Variants())
exact = false
t = build.Make()
}
hasExtra = true
} else if _, ok := t.Extension('u'); ok {
// TODO: va may mean something else. Consider not considering it.
// Strip all but the 'va' entry.
old := t
variant := t.TypeForKey("va")
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
if variant != "" {
t, _ = t.SetTypeForKey("va", variant)
hasExtra = true
}
exact = old == t
} else {
exact = false
}
if hasExtra {
// We have some variants.
for i, s := range specialTags {
if s == t {
return ID(i + len(coreTags)), exact
}
}
exact = false
}
}
if x, ok := getCoreIndex(t); ok {
return x, exact
}
exact = false
if r != 0 && s == 0 {
// Deal with cases where an extra script is inserted for the region.
t, _ := t.Maximize()
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
for t = t.Parent(); t != root; t = t.Parent() {
// No variants specified: just compare core components.
// The key has the form lllssrrr, where l, s, and r are nibbles for
// respectively the langID, scriptID, and regionID.
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
return 0, exact
}
var root = language.Tag{}

View file

@ -0,0 +1,120 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package compact
// parents maps a compact index of a tag to the compact index of the parent of
// this tag.
var parents = []ID{ // 775 elements
// Entry 0 - 3F
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
// Entry 40 - 7F
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
// Entry 80 - BF
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
// Entry C0 - FF
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
// Entry 100 - 13F
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
// Entry 140 - 17F
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
// Entry 180 - 1BF
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
// Entry 200 - 23F
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
// Entry 240 - 27F
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
// Entry 280 - 2BF
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
// Entry 2C0 - 2FF
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
// Entry 300 - 33F
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
} // Size: 1574 bytes
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package compact
var (
und = Tag{}
Und Tag = Tag{}
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
Amharic Tag = Tag{language: amIndex, locale: amIndex}
Arabic Tag = Tag{language: arIndex, locale: arIndex}
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
Catalan Tag = Tag{language: caIndex, locale: caIndex}
Czech Tag = Tag{language: csIndex, locale: csIndex}
Danish Tag = Tag{language: daIndex, locale: daIndex}
German Tag = Tag{language: deIndex, locale: deIndex}
Greek Tag = Tag{language: elIndex, locale: elIndex}
English Tag = Tag{language: enIndex, locale: enIndex}
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
Spanish Tag = Tag{language: esIndex, locale: esIndex}
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
Estonian Tag = Tag{language: etIndex, locale: etIndex}
Persian Tag = Tag{language: faIndex, locale: faIndex}
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
Filipino Tag = Tag{language: filIndex, locale: filIndex}
French Tag = Tag{language: frIndex, locale: frIndex}
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
Italian Tag = Tag{language: itIndex, locale: itIndex}
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
Kannada Tag = Tag{language: knIndex, locale: knIndex}
Korean Tag = Tag{language: koIndex, locale: koIndex}
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
Lao Tag = Tag{language: loIndex, locale: loIndex}
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
Malay Tag = Tag{language: msIndex, locale: msIndex}
Burmese Tag = Tag{language: myIndex, locale: myIndex}
Nepali Tag = Tag{language: neIndex, locale: neIndex}
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
Polish Tag = Tag{language: plIndex, locale: plIndex}
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
Romanian Tag = Tag{language: roIndex, locale: roIndex}
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
Slovak Tag = Tag{language: skIndex, locale: skIndex}
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
Serbian Tag = Tag{language: srIndex, locale: srIndex}
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
Swedish Tag = Tag{language: svIndex, locale: svIndex}
Swahili Tag = Tag{language: swIndex, locale: swIndex}
Tamil Tag = Tag{language: taIndex, locale: taIndex}
Telugu Tag = Tag{language: teIndex, locale: teIndex}
Thai Tag = Tag{language: thIndex, locale: thIndex}
Turkish Tag = Tag{language: trIndex, locale: trIndex}
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
Urdu Tag = Tag{language: urIndex, locale: urIndex}
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
)

167
vendor/golang.org/x/text/internal/language/compose.go generated vendored Normal file
View file

@ -0,0 +1,167 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"sort"
"strings"
)
// A Builder allows constructing a Tag from individual components.
// Its main user is Compose in the top-level language package.
type Builder struct {
Tag Tag
private string // the x extension
variants []string
extensions []string
}
// Make returns a new Tag from the current settings.
func (b *Builder) Make() Tag {
t := b.Tag
if len(b.extensions) > 0 || len(b.variants) > 0 {
sort.Sort(sortVariants(b.variants))
sort.Strings(b.extensions)
if b.private != "" {
b.extensions = append(b.extensions, b.private)
}
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
buf := make([]byte, n)
p := t.genCoreBytes(buf)
t.pVariant = byte(p)
p += appendTokens(buf[p:], b.variants...)
t.pExt = uint16(p)
p += appendTokens(buf[p:], b.extensions...)
t.str = string(buf[:p])
// We may not always need to remake the string, but when or when not
// to do so is rather tricky.
scan := makeScanner(buf[:p])
t, _ = parse(&scan, "")
return t
} else if b.private != "" {
t.str = b.private
t.RemakeString()
}
return t
}
// SetTag copies all the settings from a given Tag. Any previously set values
// are discarded.
func (b *Builder) SetTag(t Tag) {
b.Tag.LangID = t.LangID
b.Tag.RegionID = t.RegionID
b.Tag.ScriptID = t.ScriptID
// TODO: optimize
b.variants = b.variants[:0]
if variants := t.Variants(); variants != "" {
for _, vr := range strings.Split(variants[1:], "-") {
b.variants = append(b.variants, vr)
}
}
b.extensions, b.private = b.extensions[:0], ""
for _, e := range t.Extensions() {
b.AddExt(e)
}
}
// AddExt adds extension e to the tag. e must be a valid extension as returned
// by Tag.Extension. If the extension already exists, it will be discarded,
// except for a -u extension, where non-existing key-type pairs will added.
func (b *Builder) AddExt(e string) {
if e[0] == 'x' {
if b.private == "" {
b.private = e
}
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] += e[1:]
}
return
}
}
b.extensions = append(b.extensions, e)
}
// SetExt sets the extension e to the tag. e must be a valid extension as
// returned by Tag.Extension. If the extension already exists, it will be
// overwritten, except for a -u extension, where the individual key-type pairs
// will be set.
func (b *Builder) SetExt(e string) {
if e[0] == 'x' {
b.private = e
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] = e + s[1:]
} else {
b.extensions[i] = e
}
return
}
}
b.extensions = append(b.extensions, e)
}
// AddVariant adds any number of variants.
func (b *Builder) AddVariant(v ...string) {
for _, v := range v {
if v != "" {
b.variants = append(b.variants, v)
}
}
}
// ClearVariants removes any variants previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearVariants() {
b.variants = b.variants[:0]
}
// ClearExtensions removes any extensions previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearExtensions() {
b.private = ""
b.extensions = b.extensions[:0]
}
func tokenLen(token ...string) (n int) {
for _, t := range token {
n += len(t) + 1
}
return
}
func appendTokens(b []byte, token ...string) int {
p := 0
for _, t := range token {
b[p] = '-'
copy(b[p+1:], t)
p += 1 + len(t)
}
return p
}
type sortVariants []string
func (s sortVariants) Len() int {
return len(s)
}
func (s sortVariants) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sortVariants) Less(i, j int) bool {
return variantIndex[s[i]] < variantIndex[s[j]]
}

28
vendor/golang.org/x/text/internal/language/coverage.go generated vendored Normal file
View file

@ -0,0 +1,28 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
func BaseLanguages() []Language {
base := make([]Language, 0, NumLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
base = append(base, Language(i))
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
base = append(base, Language(i))
}
v >>= 1
i++
}
}
return base
}

596
vendor/golang.org/x/text/internal/language/language.go generated vendored Normal file
View file

@ -0,0 +1,596 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go -output tables.go
package language // import "golang.org/x/text/internal/language"
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"errors"
"fmt"
"strings"
)
const (
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
maxCoreSize = 12
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
// is large enough to hold at least 99% of the BCP 47 tags.
max99thPercentileSize = 32
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
maxSimpleUExtensionSize = 14
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed. The zero value of Tag is Und.
type Tag struct {
// TODO: the following fields have the form TagTypeID. This name is chosen
// to allow refactoring the public package without conflicting with its
// Base, Script, and Region methods. Once the transition is fully completed
// the ID can be stripped from the name.
LangID Language
RegionID Region
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
// storing lang, region, and ScriptID codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
ScriptID Script
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
// str is the string representation of the Tag. It will only be used if the
// tag has variants or extensions.
str string
}
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
t, _ := Parse(s)
return t
}
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
// TODO: consider removing
func (t Tag) Raw() (b Language, s Script, r Region) {
return t.LangID, t.ScriptID, t.RegionID
}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if int(t.pVariant) < len(t.str) {
return false
}
return t.equalTags(Und)
}
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
// tag.
func (t Tag) IsPrivateUse() bool {
return t.str != "" && t.pVariant == 0
}
// RemakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
func (t *Tag) RemakeString() {
if t.str == "" {
return
}
extra := t.str[t.pVariant:]
if t.pVariant > 0 {
extra = extra[1:]
}
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
t.str = extra
t.pVariant = 0
t.pExt = 0
return
}
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
b := buf[:t.genCoreBytes(buf[:])]
if extra != "" {
diff := len(b) - int(t.pVariant)
b = append(b, '-')
b = append(b, extra...)
t.pVariant = uint8(int(t.pVariant) + diff)
t.pExt = uint16(int(t.pExt) + diff)
} else {
t.pVariant = uint8(len(b))
t.pExt = uint16(len(b))
}
t.str = string(b)
}
// genCoreBytes writes a string for the base languages, script and region tags
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
n := t.LangID.StringToBuf(buf[:])
if t.ScriptID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.ScriptID.String())
}
if t.RegionID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.RegionID.String())
}
return n
}
// String returns the canonical string representation of the language tag.
func (t Tag) String() string {
if t.str != "" {
return t.str
}
if t.ScriptID == 0 && t.RegionID == 0 {
return t.LangID.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
}
// MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" {
text = append(text, t.str...)
} else if t.ScriptID == 0 && t.RegionID == 0 {
text = append(text, t.LangID.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
}
return text, nil
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
tag, err := Parse(string(text))
*t = tag
return err
}
// Variants returns the part of the tag holding all variants or the empty string
// if there are no variants defined.
func (t Tag) Variants() string {
if t.pVariant == 0 {
return ""
}
return t.str[t.pVariant:t.pExt]
}
// VariantOrPrivateUseTags returns variants or private use tags.
func (t Tag) VariantOrPrivateUseTags() string {
if t.pExt > 0 {
return t.str[t.pVariant:t.pExt]
}
return t.str[t.pVariant:]
}
// HasString reports whether this tag defines more than just the raw
// components.
func (t Tag) HasString() bool {
return t.str != ""
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.str != "" {
// Strip the variants and extensions.
b, s, r := t.Raw()
t = Tag{LangID: b, ScriptID: s, RegionID: r}
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID == t.ScriptID {
return Tag{LangID: t.LangID}
}
}
return t
}
if t.LangID != 0 {
if t.RegionID != 0 {
maxScript := t.ScriptID
if maxScript == 0 {
max, _ := addTags(t)
maxScript = max.ScriptID
}
for i := range parents {
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
if Region(r) == t.RegionID {
return Tag{
LangID: t.LangID,
ScriptID: Script(parents[i].script),
RegionID: Region(parents[i].toRegion),
}
}
}
}
}
// Strip the script if it is the default one.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != maxScript {
return Tag{LangID: t.LangID, ScriptID: maxScript}
}
return Tag{LangID: t.LangID}
} else if t.ScriptID != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != t.ScriptID {
return Und
}
return Tag{LangID: t.LangID}
}
}
return Und
}
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
return "", ErrSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
return "", ErrSyntax
}
return string(scan.b), nil
}
// HasVariants reports whether t has variants.
func (t Tag) HasVariants() bool {
return uint16(t.pVariant) < t.pExt
}
// HasExtensions reports whether t has extensions.
func (t Tag) HasExtensions() bool {
return int(t.pExt) < len(t.str)
}
// Extension returns the extension of type x for tag t. It will return
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext string, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
return ext, true
}
}
return "", false
}
// Extensions returns all extensions of t.
func (t Tag) Extensions() []string {
e := []string{}
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
e = append(e, ext)
}
return e
}
// TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start {
return t.str[start:end]
}
return ""
}
var (
errPrivateUse = errors.New("cannot set a key on a private use tag")
errInvalidArguments = errors.New("invalid key or type")
)
// SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
if t.IsPrivateUse() {
return t, errPrivateUse
}
if len(key) != 2 {
return t, errInvalidArguments
}
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
t.str = ""
t.pVariant, t.pExt = 0, 0
} else {
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
}
}
return t, nil
}
if len(value) < 3 || len(value) > 8 {
return t, errInvalidArguments
}
var (
buf [maxCoreSize + maxSimpleUExtensionSize]byte
uStart int // start of the -u extension.
)
// Generate the tag string if needed.
if t.str == "" {
uStart = t.genCoreBytes(buf[:])
buf[uStart] = '-'
uStart++
}
// Create new key-type pair and parse it to verify.
b := buf[uStart:]
copy(b, "u-")
copy(b[2:], key)
b[4] = '-'
b = b[:5+copy(b[5:], value)]
scan := makeScanner(b)
if parseExtensions(&scan); scan.err != nil {
return t, scan.err
}
// Assemble the replacement string.
if t.str == "" {
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
}
}
return t, nil
}
// findKeyAndType returns the start and end position for the type corresponding
// to key or the point at which to insert the key-value pair if the type
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
}
s := t.str
// Find the correct extension.
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
p++
// curKey is the key currently being processed.
curKey := ""
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
if curKey > key {
return p, p, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
}
}
}
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getLangID(buf[:copy(buf[:], s)])
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
if len(s) != 4 {
return 0, ErrSyntax
}
var buf [4]byte
return getScriptID(script, buf[:copy(buf[:], s)])
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) {
return getRegionM49(r)
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getRegionID(buf[:copy(buf[:], s)])
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool {
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
return false
}
return true
}
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool {
if r == 0 {
return false
}
return int(regionInclusion[r]) < len(regionContainment)
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
func (r Region) Contains(c Region) bool {
if r == c {
return true
}
g := regionInclusion[r]
if g >= nRegionGroups {
return false
}
m := regionContainment[g]
d := regionInclusion[c]
b := regionInclusionBits[d]
// A contained country may belong to multiple disjoint groups. Matching any
// of these indicates containment. If the contained region is a group, it
// must strictly be a subset.
if d >= nRegionGroups {
return b&m != 0
}
return b&^m == 0
}
var errNoTLD = errors.New("language: region is not a valid ccTLD")
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error.
//
// This method may return an error for a region for which there exists a
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
if r == _GB {
r = _UK
}
if (r.typ() & ccTLD) == 0 {
return 0, errNoTLD
}
return r, nil
}
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
func (r Region) Canonicalize() Region {
if cr := normRegion(r); cr != 0 {
return cr
}
return r
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
ID uint8
str string
}
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil
}
return Variant{}, NewValueError([]byte(s))
}
// String returns the string representation of the variant.
func (v Variant) String() string {
return v.str
}

View file

@ -17,11 +17,11 @@ import (
// if it could not be found.
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
if !tag.FixCase(form, key) {
return 0, errSyntax
return 0, ErrSyntax
}
i := idx.Index(key)
if i == -1 {
return 0, mkErrInvalid(key)
return 0, NewValueError(key)
}
return i, nil
}
@ -32,38 +32,45 @@ func searchUint(imap []uint16, key uint16) int {
})
}
type langID uint16
type Language uint16
// getLangID returns the langID of s if s is a canonical subtag
// or langUnknown if s is not a canonical subtag.
func getLangID(s []byte) (langID, error) {
func getLangID(s []byte) (Language, error) {
if len(s) == 2 {
return getLangISO2(s)
}
return getLangISO3(s)
}
// TODO language normalization as well as the AliasMaps could be moved to the
// higher level package, but it is a bit tricky to separate the generation.
func (id Language) Canonicalize() (Language, AliasType) {
return normLang(id)
}
// mapLang returns the mapped langID of id according to mapping m.
func normLang(id langID) (langID, langAliasType) {
k := sort.Search(len(langAliasMap), func(i int) bool {
return langAliasMap[i].from >= uint16(id)
func normLang(id Language) (Language, AliasType) {
k := sort.Search(len(AliasMap), func(i int) bool {
return AliasMap[i].From >= uint16(id)
})
if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) {
return langID(langAliasMap[k].to), langAliasTypes[k]
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
return Language(AliasMap[k].To), AliasTypes[k]
}
return id, langAliasTypeUnknown
return id, AliasTypeUnknown
}
// getLangISO2 returns the langID for the given 2-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO2(s []byte) (langID, error) {
func getLangISO2(s []byte) (Language, error) {
if !tag.FixCase("zz", s) {
return 0, errSyntax
return 0, ErrSyntax
}
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
return langID(i), nil
return Language(i), nil
}
return 0, mkErrInvalid(s)
return 0, NewValueError(s)
}
const base = 'z' - 'a' + 1
@ -88,7 +95,7 @@ func intToStr(v uint, s []byte) {
// getLangISO3 returns the langID for the given 3-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO3(s []byte) (langID, error) {
func getLangISO3(s []byte) (Language, error) {
if tag.FixCase("und", s) {
// first try to match canonical 3-letter entries
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
@ -96,7 +103,7 @@ func getLangISO3(s []byte) (langID, error) {
// We treat "und" as special and always translate it to "unspecified".
// Note that ZZ and Zzzz are private use and are not treated as
// unspecified by default.
id := langID(i)
id := Language(i)
if id == nonCanonicalUnd {
return 0, nil
}
@ -104,26 +111,26 @@ func getLangISO3(s []byte) (langID, error) {
}
}
if i := altLangISO3.Index(s); i != -1 {
return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
}
n := strToInt(s)
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
return langID(n) + langNoIndexOffset, nil
return Language(n) + langNoIndexOffset, nil
}
// Check for non-canonical uses of ISO3.
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
return langID(i), nil
return Language(i), nil
}
}
return 0, mkErrInvalid(s)
return 0, NewValueError(s)
}
return 0, errSyntax
return 0, ErrSyntax
}
// stringToBuf writes the string to b and returns the number of bytes
// StringToBuf writes the string to b and returns the number of bytes
// written. cap(b) must be >= 3.
func (id langID) stringToBuf(b []byte) int {
func (id Language) StringToBuf(b []byte) int {
if id >= langNoIndexOffset {
intToStr(uint(id)-langNoIndexOffset, b[:3])
return 3
@ -140,7 +147,7 @@ func (id langID) stringToBuf(b []byte) int {
// String returns the BCP 47 representation of the langID.
// Use b as variable name, instead of id, to ensure the variable
// used is consistent with that of Base in which this type is embedded.
func (b langID) String() string {
func (b Language) String() string {
if b == 0 {
return "und"
} else if b >= langNoIndexOffset {
@ -157,7 +164,7 @@ func (b langID) String() string {
}
// ISO3 returns the ISO 639-3 language code.
func (b langID) ISO3() string {
func (b Language) ISO3() string {
if b == 0 || b >= langNoIndexOffset {
return b.String()
}
@ -173,15 +180,24 @@ func (b langID) ISO3() string {
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (b langID) IsPrivateUse() bool {
func (b Language) IsPrivateUse() bool {
return langPrivateStart <= b && b <= langPrivateEnd
}
type regionID uint16
// SuppressScript returns the script marked as SuppressScript in the IANA
// language tag repository, or 0 if there is no such script.
func (b Language) SuppressScript() Script {
if b < langNoIndexOffset {
return Script(suppressScript[b])
}
return 0
}
type Region uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code
// or unknownRegion.
func getRegionID(s []byte) (regionID, error) {
func getRegionID(s []byte) (Region, error) {
if len(s) == 3 {
if isAlpha(s[0]) {
return getRegionISO3(s)
@ -195,34 +211,34 @@ func getRegionID(s []byte) (regionID, error) {
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO2(s []byte) (regionID, error) {
func getRegionISO2(s []byte) (Region, error) {
i, err := findIndex(regionISO, s, "ZZ")
if err != nil {
return 0, err
}
return regionID(i) + isoRegionOffset, nil
return Region(i) + isoRegionOffset, nil
}
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO3(s []byte) (regionID, error) {
func getRegionISO3(s []byte) (Region, error) {
if tag.FixCase("ZZZ", s) {
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
return regionID(i) + isoRegionOffset, nil
return Region(i) + isoRegionOffset, nil
}
}
for i := 0; i < len(altRegionISO3); i += 3 {
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
return regionID(altRegionIDs[i/3]), nil
return Region(altRegionIDs[i/3]), nil
}
}
return 0, mkErrInvalid(s)
return 0, NewValueError(s)
}
return 0, errSyntax
return 0, ErrSyntax
}
func getRegionM49(n int) (regionID, error) {
func getRegionM49(n int) (Region, error) {
if 0 < n && n <= 999 {
const (
searchBits = 7
@ -236,7 +252,7 @@ func getRegionM49(n int) (regionID, error) {
return buf[i] >= val
})
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
return regionID(r & regionMask), nil
return Region(r & regionMask), nil
}
}
var e ValueError
@ -247,13 +263,13 @@ func getRegionM49(n int) (regionID, error) {
// normRegion returns a region if r is deprecated or 0 otherwise.
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
// TODO: consider mapping split up regions to new most populous one (like CLDR).
func normRegion(r regionID) regionID {
func normRegion(r Region) Region {
m := regionOldMap
k := sort.Search(len(m), func(i int) bool {
return m[i].from >= uint16(r)
return m[i].From >= uint16(r)
})
if k < len(m) && m[k].from == uint16(r) {
return regionID(m[k].to)
if k < len(m) && m[k].From == uint16(r) {
return Region(m[k].To)
}
return 0
}
@ -264,13 +280,13 @@ const (
bcp47Region
)
func (r regionID) typ() byte {
func (r Region) typ() byte {
return regionTypes[r]
}
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
func (r regionID) String() string {
func (r Region) String() string {
if r < isoRegionOffset {
if r == 0 {
return "ZZ"
@ -284,7 +300,7 @@ func (r regionID) String() string {
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
func (r regionID) ISO3() string {
func (r Region) ISO3() string {
if r < isoRegionOffset {
return "ZZZ"
}
@ -301,29 +317,29 @@ func (r regionID) ISO3() string {
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
func (r regionID) M49() int {
func (r Region) M49() int {
return int(m49[r])
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r regionID) IsPrivateUse() bool {
func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0
}
type scriptID uint8
type Script uint8
// getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}.
func getScriptID(idx tag.Index, s []byte) (scriptID, error) {
func getScriptID(idx tag.Index, s []byte) (Script, error) {
i, err := findIndex(idx, s, "Zzzz")
return scriptID(i), err
return Script(i), err
}
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
func (s scriptID) String() string {
func (s Script) String() string {
if s == 0 {
return "Zzzz"
}
@ -331,7 +347,7 @@ func (s scriptID) String() string {
}
// IsPrivateUse reports whether this script code is reserved for private use.
func (s scriptID) IsPrivateUse() bool {
func (s Script) IsPrivateUse() bool {
return _Qaaa <= s && s <= _Qabx
}
@ -389,7 +405,7 @@ func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
if v < 0 {
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
}
t.lang = langID(v)
t.LangID = Language(v)
return t, true
}
return t, false

226
vendor/golang.org/x/text/internal/language/match.go generated vendored Normal file
View file

@ -0,0 +1,226 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import "errors"
type scriptRegionFlags uint8
const (
isList = 1 << iota
scriptInFrom
regionInFrom
)
func (t *Tag) setUndefinedLang(id Language) {
if t.LangID == 0 {
t.LangID = id
}
}
func (t *Tag) setUndefinedScript(id Script) {
if t.ScriptID == 0 {
t.ScriptID = id
}
}
func (t *Tag) setUndefinedRegion(id Region) {
if t.RegionID == 0 || t.RegionID.Contains(id) {
t.RegionID = id
}
}
// ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// addLikelySubtags sets subtags to their most likely value, given the locale.
// In most cases this means setting fields for unknown values, but in some
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
// if the given locale cannot be expanded.
func (t Tag) addLikelySubtags() (Tag, error) {
id, err := addTags(t)
if err != nil {
return t, err
} else if id.equalTags(t) {
return t, nil
}
id.RemakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
t.RegionID = Region(x.region)
}
return true
}
return false
}
// Maximize returns a new tag with missing tags filled in.
func (t Tag) Maximize() (Tag, error) {
return addTags(t)
}
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
if t.IsPrivateUse() {
return t, nil
}
if t.ScriptID != 0 && t.RegionID != 0 {
if t.LangID != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
list := likelyRegion[t.RegionID : t.RegionID+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
if Script(x.script) == t.ScriptID {
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
}
if t.LangID != 0 {
// Search matches for lang-script and lang-region, where lang != und.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
if t.ScriptID != 0 {
for _, x := range list {
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
t.setUndefinedRegion(Region(x.region))
return t, nil
}
}
} else if t.RegionID != 0 {
count := 0
goodScript := true
tt := t
for _, x := range list {
// We visit all entries for which the script was not
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
tt.RegionID = Region(x.region)
tt.setUndefinedScript(Script(x.script))
goodScript = goodScript && tt.ScriptID == Script(x.script)
count++
}
}
if count == 1 {
return tt, nil
}
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
t.ScriptID = tt.ScriptID
}
}
}
}
} else {
// Search matches for und-script.
if t.ScriptID != 0 {
x := likelyScript[t.ScriptID]
if x.region != 0 {
t.setUndefinedRegion(Region(x.region))
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
if t.RegionID != 0 {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
t.RegionID = Region(x.region)
}
} else {
x := likelyRegion[t.RegionID]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
return t, nil
}
}
}
}
// Search matches for lang.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
t.setUndefinedScript(Script(x.script))
t.setUndefinedRegion(Region(x.region))
}
specializeRegion(&t)
if t.LangID == 0 {
t.LangID = _en // default language
}
return t, nil
}
return t, ErrMissingLikelyTagsData
}
func (t *Tag) setTagsFrom(id Tag) {
t.LangID = id.LangID
t.ScriptID = id.ScriptID
t.RegionID = id.RegionID
}
// minimize removes the region or script subtags from t such that
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
func (t Tag) minimize() (Tag, error) {
t, err := minimizeTags(t)
if err != nil {
return t, err
}
t.RemakeString()
return t, nil
}
// minimizeTags mimics the behavior of the ICU 51 C implementation.
func minimizeTags(t Tag) (Tag, error) {
if t.equalTags(Und) {
return t, nil
}
max, err := addTags(t)
if err != nil {
return t, err
}
for _, id := range [...]Tag{
{LangID: t.LangID},
{LangID: t.LangID, RegionID: t.RegionID},
{LangID: t.LangID, ScriptID: t.ScriptID},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
break
}
}
return t, nil
}

594
vendor/golang.org/x/text/internal/language/parse.go generated vendored Normal file
View file

@ -0,0 +1,594 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"errors"
"fmt"
"sort"
"golang.org/x/text/internal/tag"
)
// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha(b byte) bool {
return b > '9'
}
// isAlphaNum returns true if the string contains only ASCII letters or digits.
func isAlphaNum(s []byte) bool {
for _, c := range s {
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
return false
}
}
return true
}
// ErrSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
var ErrSyntax = errors.New("language: tag is not well-formed")
// ErrDuplicateKey is returned when a tag contains the same key twice with
// different values in the -u section.
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
// as a valid value.
type ValueError struct {
v [8]byte
}
// NewValueError creates a new ValueError.
func NewValueError(tag []byte) ValueError {
var e ValueError
copy(e.v[:], tag)
return e
}
func (e ValueError) tag() []byte {
n := bytes.IndexByte(e.v[:], 0)
if n == -1 {
n = 8
}
return e.v[:n]
}
// Error implements the error interface.
func (e ValueError) Error() string {
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
}
// Subtag returns the subtag for which the error occurred.
func (e ValueError) Subtag() string {
return string(e.tag())
}
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
b []byte
bytes [max99thPercentileSize]byte
token []byte
start int // start position of the current token
end int // end position of the current token
next int // next point for scan
err error
done bool
}
func makeScannerString(s string) scanner {
scan := scanner{}
if len(s) <= len(scan.bytes) {
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
} else {
scan.b = []byte(s)
}
scan.init()
return scan
}
// makeScanner returns a scanner using b as the input buffer.
// b is not copied and may be modified by the scanner routines.
func makeScanner(b []byte) scanner {
scan := scanner{b: b}
scan.init()
return scan
}
func (s *scanner) init() {
for i, c := range s.b {
if c == '_' {
s.b[i] = '-'
}
}
s.scan()
}
// restToLower converts the string between start and end to lower case.
func (s *scanner) toLower(start, end int) {
for i := start; i < end; i++ {
c := s.b[i]
if 'A' <= c && c <= 'Z' {
s.b[i] += 'a' - 'A'
}
}
}
func (s *scanner) setError(e error) {
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
s.err = e
}
}
// resizeRange shrinks or grows the array at position oldStart such that
// a new string of size newSize can fit between oldStart and oldEnd.
// Sets the scan point to after the resized range.
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
s.start = oldStart
if end := oldStart + newSize; end != oldEnd {
diff := end - oldEnd
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
copy(b, s.b[:oldStart])
copy(b[end:], s.b[oldEnd:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[oldEnd:]...)
}
s.next = end + (s.next - s.end)
s.end = end
}
}
// replace replaces the current token with repl.
func (s *scanner) replace(repl string) {
s.resizeRange(s.start, s.end, len(repl))
copy(s.b[s.start:], repl)
}
// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func (s *scanner) gobble(e error) {
s.setError(e)
if s.start == 0 {
s.b = s.b[:+copy(s.b, s.b[s.next:])]
s.end = 0
} else {
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
s.end = s.start - 1
}
s.next = s.start
}
// deleteRange removes the given range from s.b before the current token.
func (s *scanner) deleteRange(start, end int) {
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
diff := end - start
s.next -= diff
s.start -= diff
s.end -= diff
}
// scan parses the next token of a BCP 47 string. Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func (s *scanner) scan() (end int) {
end = s.end
s.token = nil
for s.start = s.next; s.next < len(s.b); {
i := bytes.IndexByte(s.b[s.next:], '-')
if i == -1 {
s.end = len(s.b)
s.next = len(s.b)
i = s.end - s.start
} else {
s.end = s.next + i
s.next = s.end + 1
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
s.gobble(ErrSyntax)
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
s.setError(ErrSyntax)
s.b = s.b[:len(s.b)-1]
}
s.done = true
return end
}
// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func (s *scanner) acceptMinSize(min int) (end int) {
end = s.end
s.scan()
for ; len(s.token) >= min; s.scan() {
end = s.end
}
return end
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
func Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
return Und, ErrSyntax
}
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
// Generating invalid UTF-8 is okay as it won't match.
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
} else if c == '_' {
c = '-'
}
b[i] = byte(c)
}
if t, ok := grandfathered(b); ok {
return t, nil
}
}
scan := makeScannerString(s)
return parse(&scan, s)
}
func parse(scan *scanner, s string) (t Tag, err error) {
t = Und
var end int
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
if n == 0 || scan.token[0] != 'x' {
return t, ErrSyntax
}
end = parseExtensions(scan)
} else if n >= 4 {
return Und, ErrSyntax
} else { // the usual case
t, end = parseTag(scan)
if n := len(scan.token); n == 1 {
t.pExt = uint16(end)
end = parseExtensions(scan)
} else if end < len(scan.b) {
scan.setError(ErrSyntax)
scan.b = scan.b[:end]
}
}
if int(t.pVariant) < len(scan.b) {
if end < len(s) {
s = s[:end]
}
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
t.str = s
} else {
t.str = string(scan.b)
}
} else {
t.pVariant, t.pExt = 0, 0
}
return t, scan.err
}
// parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) {
var e error
// TODO: set an error if an unknown lang, script or region is encountered.
t.LangID, e = getLangID(scan.token)
scan.setError(e)
scan.replace(t.LangID.String())
langStart := scan.start
end = scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>.
lang, e := getLangID(scan.token)
if lang != 0 {
t.LangID = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
}
scan.gobble(e)
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
t.ScriptID, e = getScriptID(script, scan.token)
if t.ScriptID == 0 {
scan.gobble(e)
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
t.RegionID, e = getRegionID(scan.token)
if t.RegionID == 0 {
scan.gobble(e)
} else {
scan.replace(t.RegionID.String())
}
end = scan.scan()
}
scan.toLower(scan.start, len(scan.b))
t.pVariant = byte(end)
end = parseVariants(scan, end, t)
t.pExt = uint16(end)
return t, end
}
var separator = []byte{'-'}
// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants(scan *scanner, end int, t Tag) int {
start := scan.start
varIDBuf := [4]uint8{}
variantBuf := [4][]byte{}
varID := varIDBuf[:0]
variant := variantBuf[:0]
last := -1
needSort := false
for ; len(scan.token) >= 4; scan.scan() {
// TODO: measure the impact of needing this conversion and redesign
// the data structure if there is an issue.
v, ok := variantIndex[string(scan.token)]
if !ok {
// unknown variant
// TODO: allow user-defined variants?
scan.gobble(NewValueError(scan.token))
continue
}
varID = append(varID, v)
variant = append(variant, scan.token)
if !needSort {
if last < int(v) {
last = int(v)
} else {
needSort = true
// There is no legal combinations of more than 7 variants
// (and this is by no means a useful sequence).
const maxVariants = 8
if len(varID) > maxVariants {
break
}
}
}
end = scan.end
}
if needSort {
sort.Sort(variantsSort{varID, variant})
k, l := 0, -1
for i, v := range varID {
w := int(v)
if l == w {
// Remove duplicates.
continue
}
varID[k] = varID[i]
variant[k] = variant[i]
k++
l = w
}
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
end = start - 1
} else {
scan.resizeRange(start, end, len(str))
copy(scan.b[scan.start:], str)
end = scan.end
}
}
return end
}
type variantsSort struct {
i []uint8
v [][]byte
}
func (s variantsSort) Len() int {
return len(s.i)
}
func (s variantsSort) Swap(i, j int) {
s.i[i], s.i[j] = s.i[j], s.i[i]
s.v[i], s.v[j] = s.v[j], s.v[i]
}
func (s variantsSort) Less(i, j int) bool {
return s.i[i] < s.i[j]
}
type bytesSort struct {
b [][]byte
n int // first n bytes to compare
}
func (b bytesSort) Len() int {
return len(b.b)
}
func (b bytesSort) Swap(i, j int) {
b.b[i], b.b[j] = b.b[j], b.b[i]
}
func (b bytesSort) Less(i, j int) bool {
for k := 0; k < b.n; k++ {
if b.b[i][k] == b.b[j][k] {
continue
}
return b.b[i][k] < b.b[j][k]
}
return false
}
// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
// It also trims scan.b to remove excess parts accordingly.
func parseExtensions(scan *scanner) int {
start := scan.start
exts := [][]byte{}
private := []byte{}
end := scan.end
for len(scan.token) == 1 {
extStart := scan.start
ext := scan.token[0]
end = parseExtension(scan)
extension := scan.b[extStart:end]
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
scan.setError(ErrSyntax)
end = extStart
continue
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
scan.b = scan.b[:end]
return end
} else if ext == 'x' {
private = extension
break
}
exts = append(exts, extension)
}
sort.Sort(bytesSort{exts, 1})
if len(private) > 0 {
exts = append(exts, private)
}
scan.b = scan.b[:start]
if len(exts) > 0 {
scan.b = append(scan.b, bytes.Join(exts, separator)...)
} else if start > 0 {
// Strip trailing '-'.
scan.b = scan.b[:start-1]
}
return end
}
// parseExtension parses a single extension and returns the position of
// the extension end.
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
if bytes.Compare(scan.token, last) != -1 {
// Attributes are unsorted. Start over from scratch.
p := attrStart + 1
scan.next = p
attrs := [][]byte{}
for scan.scan(); len(scan.token) > 2; scan.scan() {
attrs = append(attrs, scan.token)
end = scan.end
}
sort.Sort(bytesSort{attrs, 3})
copy(scan.b[p:], bytes.Join(attrs, separator))
break
}
last = scan.token
end = scan.end
}
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(ErrSyntax)
end = keyStart
}
}
sort.Stable(bytesSort{keys, 2})
if n := len(keys); n > 0 {
k := 0
for i := 1; i < n; i++ {
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
k++
keys[k] = keys[i]
} else if !bytes.Equal(keys[k], keys[i]) {
scan.setError(ErrDuplicateKey)
}
}
keys = keys[:k+1]
}
reordered := bytes.Join(keys, separator)
if e := p + len(reordered); e < end {
scan.deleteRange(e, end)
end = e
}
copy(scan.b[p:], reordered)
break
}
}
case 't':
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)
scan.toLower(start, end)
}
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
end = scan.acceptMinSize(3)
}
case 'x':
end = scan.acceptMinSize(1)
default:
end = scan.acceptMinSize(2)
}
return end
}
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, ext string) {
if s[p] == '-' {
p++
}
if s[p] == 'x' {
return len(s), s[p:]
}
end = nextExtension(s, p)
return end, s[p:end]
}
// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, language tags will have at most
// one extension and extensions tend to be small.
func nextExtension(s string, p int) int {
for n := len(s) - 3; p < n; {
if s[p] == '-' {
if s[p+2] == '-' {
return p
}
p += 3
} else {
p++
}
}
return len(s)
}

3431
vendor/golang.org/x/text/internal/language/tables.go generated vendored Normal file

File diff suppressed because it is too large Load diff

48
vendor/golang.org/x/text/internal/language/tags.go generated vendored Normal file
View file

@ -0,0 +1,48 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
// It simplifies safe initialization of Tag values.
func MustParse(s string) Tag {
t, err := Parse(s)
if err != nil {
panic(err)
}
return t
}
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
// It simplifies safe initialization of Base values.
func MustParseBase(s string) Language {
b, err := ParseBase(s)
if err != nil {
panic(err)
}
return b
}
// MustParseScript is like ParseScript, but panics if the given script cannot be
// parsed. It simplifies safe initialization of Script values.
func MustParseScript(s string) Script {
scr, err := ParseScript(s)
if err != nil {
panic(err)
}
return scr
}
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
// parsed. It simplifies safe initialization of Region values.
func MustParseRegion(s string) Region {
r, err := ParseRegion(s)
if err != nil {
panic(err)
}
return r
}
// Und is the root language.
var Und Tag

View file

@ -1,16 +0,0 @@
# Copyright 2013 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
CLEANFILES+=maketables
maketables: maketables.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w -s tables.go
# Build (but do not run) maketables during testing,
# just to make sure it still compiles.
testshort: maketables

View file

@ -7,6 +7,8 @@ package language
import (
"fmt"
"sort"
"golang.org/x/text/internal/language"
)
// The Coverage interface is used to define the level of coverage of an
@ -44,9 +46,9 @@ type allSubtags struct{}
// consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" region is not returned.
func (s allSubtags) Regions() []Region {
reg := make([]Region, numRegions)
reg := make([]Region, language.NumRegions)
for i := range reg {
reg[i] = Region{regionID(i + 1)}
reg[i] = Region{language.Region(i + 1)}
}
return reg
}
@ -55,9 +57,9 @@ func (s allSubtags) Regions() []Region {
// consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" script is not returned.
func (s allSubtags) Scripts() []Script {
scr := make([]Script, numScripts)
scr := make([]Script, language.NumScripts)
for i := range scr {
scr[i] = Script{scriptID(i + 1)}
scr[i] = Script{language.Script(i + 1)}
}
return scr
}
@ -65,22 +67,10 @@ func (s allSubtags) Scripts() []Script {
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
func (s allSubtags) BaseLanguages() []Base {
base := make([]Base, 0, numLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
base = append(base, Base{langID(i)})
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
base = append(base, Base{langID(i)})
}
v >>= 1
i++
}
bs := language.BaseLanguages()
base := make([]Base, len(bs))
for i, b := range bs {
base[i] = Base{b}
}
return base
}
@ -90,7 +80,7 @@ func (s allSubtags) Tags() []Tag {
return nil
}
// coverage is used used by NewCoverage which is used as a convenient way for
// coverage is used by NewCoverage which is used as a convenient way for
// creating Coverage implementations for partially defined data. Very often a
// package will only need to define a subset of slices. coverage provides a
// convenient way to do this. Moreover, packages using NewCoverage, instead of
@ -134,7 +124,7 @@ func (s *coverage) BaseLanguages() []Base {
}
a := make([]Base, len(tags))
for i, t := range tags {
a[i] = Base{langID(t.lang)}
a[i] = Base{language.Language(t.lang())}
}
sort.Sort(bases(a))
k := 0

File diff suppressed because it is too large Load diff

View file

@ -1,20 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains code common to the maketables.go and the package code.
// langAliasType is the type of an alias in langAliasMap.
type langAliasType int8
const (
langDeprecated langAliasType = iota
langMacro
langLegacy
langAliasTypeUnknown langAliasType = -1
)

View file

@ -1,162 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file generates derivative tables based on the language package itself.
import (
"bytes"
"flag"
"fmt"
"io/ioutil"
"log"
"reflect"
"sort"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
draft = flag.String("draft",
"contributed",
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
)
func main() {
gen.Init()
// Read the CLDR zip file.
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer func() {
buf := &bytes.Buffer{}
if _, err = w.WriteGo(buf, "language", ""); err != nil {
log.Fatalf("Error formatting file index.go: %v", err)
}
// Since we're generating a table for our own package we need to rewrite
// doing the equivalent of go fmt -r 'language.b -> b'. Using
// bytes.Replace will do.
out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
log.Fatalf("Could not create file index.go: %v", err)
}
}()
m := map[language.Tag]bool{}
for _, lang := range data.Locales() {
// We include all locales unconditionally to be consistent with en_US.
// We want en_US, even though it has no data associated with it.
// TODO: put any of the languages for which no data exists at the end
// of the index. This allows all components based on ICU to use that
// as the cutoff point.
// if x := data.RawLDML(lang); false ||
// x.LocaleDisplayNames != nil ||
// x.Characters != nil ||
// x.Delimiters != nil ||
// x.Measurement != nil ||
// x.Dates != nil ||
// x.Numbers != nil ||
// x.Units != nil ||
// x.ListPatterns != nil ||
// x.Collations != nil ||
// x.Segmentations != nil ||
// x.Rbnf != nil ||
// x.Annotations != nil ||
// x.Metadata != nil {
// TODO: support POSIX natively, albeit non-standard.
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
m[tag] = true
// }
}
// Include locales for plural rules, which uses a different structure.
for _, plurals := range data.Supplemental().Plurals {
for _, rules := range plurals.PluralRules {
for _, lang := range strings.Split(rules.Locales, " ") {
m[language.Make(lang)] = true
}
}
}
var core, special []language.Tag
for t := range m {
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
log.Fatalf("Unexpected extension %v in %v", x, t)
}
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
core = append(core, t)
} else {
special = append(special, t)
}
}
w.WriteComment(`
NumCompactTags is the number of common tags. The maximum tag is
NumCompactTags-1.`)
w.WriteConst("NumCompactTags", len(core)+len(special))
sort.Sort(byAlpha(special))
w.WriteVar("specialTags", special)
// TODO: order by frequency?
sort.Sort(byAlpha(core))
// Size computations are just an estimate.
w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
w.Size += len(core) * 6 // size of uint32 and uint16
fmt.Fprintln(w)
fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
fmt.Fprintln(w, "0x0: 0, // und")
i := len(special) + 1 // Und and special tags already written.
for _, t := range core {
if t == language.Und {
continue
}
fmt.Fprint(w.Hash, t, i)
b, s, r := t.Raw()
fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
getIndex(s, 2),
getIndex(r, 3),
i, t)
i++
}
fmt.Fprintln(w, "}")
}
// getIndex prints the subtag type and extracts its index of size nibble.
// If the index is less than n nibbles, the result is prefixed with 0s.
func getIndex(x interface{}, n int) string {
s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
s = s[strings.Index(s, "0x")+2 : len(s)-1]
return strings.Repeat("0", n-len(s)) + s
}
type byAlpha []language.Tag
func (a byAlpha) Len() int { return len(a) }
func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }

View file

@ -1,783 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package language
// NumCompactTags is the number of common tags. The maximum tag is
// NumCompactTags-1.
const NumCompactTags = 768
var specialTags = []Tag{ // 2 elements
0: {lang: 0xd7, region: 0x6e, script: 0x0, pVariant: 0x5, pExt: 0xe, str: "ca-ES-valencia"},
1: {lang: 0x139, region: 0x135, script: 0x0, pVariant: 0x5, pExt: 0x5, str: "en-US-u-va-posix"},
} // Size: 72 bytes
var coreTags = map[uint32]uint16{
0x0: 0, // und
0x01600000: 3, // af
0x016000d2: 4, // af-NA
0x01600161: 5, // af-ZA
0x01c00000: 6, // agq
0x01c00052: 7, // agq-CM
0x02100000: 8, // ak
0x02100080: 9, // ak-GH
0x02700000: 10, // am
0x0270006f: 11, // am-ET
0x03a00000: 12, // ar
0x03a00001: 13, // ar-001
0x03a00023: 14, // ar-AE
0x03a00039: 15, // ar-BH
0x03a00062: 16, // ar-DJ
0x03a00067: 17, // ar-DZ
0x03a0006b: 18, // ar-EG
0x03a0006c: 19, // ar-EH
0x03a0006d: 20, // ar-ER
0x03a00097: 21, // ar-IL
0x03a0009b: 22, // ar-IQ
0x03a000a1: 23, // ar-JO
0x03a000a8: 24, // ar-KM
0x03a000ac: 25, // ar-KW
0x03a000b0: 26, // ar-LB
0x03a000b9: 27, // ar-LY
0x03a000ba: 28, // ar-MA
0x03a000c9: 29, // ar-MR
0x03a000e1: 30, // ar-OM
0x03a000ed: 31, // ar-PS
0x03a000f3: 32, // ar-QA
0x03a00108: 33, // ar-SA
0x03a0010b: 34, // ar-SD
0x03a00115: 35, // ar-SO
0x03a00117: 36, // ar-SS
0x03a0011c: 37, // ar-SY
0x03a00120: 38, // ar-TD
0x03a00128: 39, // ar-TN
0x03a0015e: 40, // ar-YE
0x04000000: 41, // ars
0x04300000: 42, // as
0x04300099: 43, // as-IN
0x04400000: 44, // asa
0x0440012f: 45, // asa-TZ
0x04800000: 46, // ast
0x0480006e: 47, // ast-ES
0x05800000: 48, // az
0x0581f000: 49, // az-Cyrl
0x0581f032: 50, // az-Cyrl-AZ
0x05857000: 51, // az-Latn
0x05857032: 52, // az-Latn-AZ
0x05e00000: 53, // bas
0x05e00052: 54, // bas-CM
0x07100000: 55, // be
0x07100047: 56, // be-BY
0x07500000: 57, // bem
0x07500162: 58, // bem-ZM
0x07900000: 59, // bez
0x0790012f: 60, // bez-TZ
0x07e00000: 61, // bg
0x07e00038: 62, // bg-BG
0x08200000: 63, // bh
0x0a000000: 64, // bm
0x0a0000c3: 65, // bm-ML
0x0a500000: 66, // bn
0x0a500035: 67, // bn-BD
0x0a500099: 68, // bn-IN
0x0a900000: 69, // bo
0x0a900053: 70, // bo-CN
0x0a900099: 71, // bo-IN
0x0b200000: 72, // br
0x0b200078: 73, // br-FR
0x0b500000: 74, // brx
0x0b500099: 75, // brx-IN
0x0b700000: 76, // bs
0x0b71f000: 77, // bs-Cyrl
0x0b71f033: 78, // bs-Cyrl-BA
0x0b757000: 79, // bs-Latn
0x0b757033: 80, // bs-Latn-BA
0x0d700000: 81, // ca
0x0d700022: 82, // ca-AD
0x0d70006e: 83, // ca-ES
0x0d700078: 84, // ca-FR
0x0d70009e: 85, // ca-IT
0x0db00000: 86, // ccp
0x0db00035: 87, // ccp-BD
0x0db00099: 88, // ccp-IN
0x0dc00000: 89, // ce
0x0dc00106: 90, // ce-RU
0x0df00000: 91, // cgg
0x0df00131: 92, // cgg-UG
0x0e500000: 93, // chr
0x0e500135: 94, // chr-US
0x0e900000: 95, // ckb
0x0e90009b: 96, // ckb-IQ
0x0e90009c: 97, // ckb-IR
0x0fa00000: 98, // cs
0x0fa0005e: 99, // cs-CZ
0x0fe00000: 100, // cu
0x0fe00106: 101, // cu-RU
0x10000000: 102, // cy
0x1000007b: 103, // cy-GB
0x10100000: 104, // da
0x10100063: 105, // da-DK
0x10100082: 106, // da-GL
0x10800000: 107, // dav
0x108000a4: 108, // dav-KE
0x10d00000: 109, // de
0x10d0002e: 110, // de-AT
0x10d00036: 111, // de-BE
0x10d0004e: 112, // de-CH
0x10d00060: 113, // de-DE
0x10d0009e: 114, // de-IT
0x10d000b2: 115, // de-LI
0x10d000b7: 116, // de-LU
0x11700000: 117, // dje
0x117000d4: 118, // dje-NE
0x11f00000: 119, // dsb
0x11f00060: 120, // dsb-DE
0x12400000: 121, // dua
0x12400052: 122, // dua-CM
0x12800000: 123, // dv
0x12b00000: 124, // dyo
0x12b00114: 125, // dyo-SN
0x12d00000: 126, // dz
0x12d00043: 127, // dz-BT
0x12f00000: 128, // ebu
0x12f000a4: 129, // ebu-KE
0x13000000: 130, // ee
0x13000080: 131, // ee-GH
0x13000122: 132, // ee-TG
0x13600000: 133, // el
0x1360005d: 134, // el-CY
0x13600087: 135, // el-GR
0x13900000: 136, // en
0x13900001: 137, // en-001
0x1390001a: 138, // en-150
0x13900025: 139, // en-AG
0x13900026: 140, // en-AI
0x1390002d: 141, // en-AS
0x1390002e: 142, // en-AT
0x1390002f: 143, // en-AU
0x13900034: 144, // en-BB
0x13900036: 145, // en-BE
0x1390003a: 146, // en-BI
0x1390003d: 147, // en-BM
0x13900042: 148, // en-BS
0x13900046: 149, // en-BW
0x13900048: 150, // en-BZ
0x13900049: 151, // en-CA
0x1390004a: 152, // en-CC
0x1390004e: 153, // en-CH
0x13900050: 154, // en-CK
0x13900052: 155, // en-CM
0x1390005c: 156, // en-CX
0x1390005d: 157, // en-CY
0x13900060: 158, // en-DE
0x13900061: 159, // en-DG
0x13900063: 160, // en-DK
0x13900064: 161, // en-DM
0x1390006d: 162, // en-ER
0x13900072: 163, // en-FI
0x13900073: 164, // en-FJ
0x13900074: 165, // en-FK
0x13900075: 166, // en-FM
0x1390007b: 167, // en-GB
0x1390007c: 168, // en-GD
0x1390007f: 169, // en-GG
0x13900080: 170, // en-GH
0x13900081: 171, // en-GI
0x13900083: 172, // en-GM
0x1390008a: 173, // en-GU
0x1390008c: 174, // en-GY
0x1390008d: 175, // en-HK
0x13900096: 176, // en-IE
0x13900097: 177, // en-IL
0x13900098: 178, // en-IM
0x13900099: 179, // en-IN
0x1390009a: 180, // en-IO
0x1390009f: 181, // en-JE
0x139000a0: 182, // en-JM
0x139000a4: 183, // en-KE
0x139000a7: 184, // en-KI
0x139000a9: 185, // en-KN
0x139000ad: 186, // en-KY
0x139000b1: 187, // en-LC
0x139000b4: 188, // en-LR
0x139000b5: 189, // en-LS
0x139000bf: 190, // en-MG
0x139000c0: 191, // en-MH
0x139000c6: 192, // en-MO
0x139000c7: 193, // en-MP
0x139000ca: 194, // en-MS
0x139000cb: 195, // en-MT
0x139000cc: 196, // en-MU
0x139000ce: 197, // en-MW
0x139000d0: 198, // en-MY
0x139000d2: 199, // en-NA
0x139000d5: 200, // en-NF
0x139000d6: 201, // en-NG
0x139000d9: 202, // en-NL
0x139000dd: 203, // en-NR
0x139000df: 204, // en-NU
0x139000e0: 205, // en-NZ
0x139000e6: 206, // en-PG
0x139000e7: 207, // en-PH
0x139000e8: 208, // en-PK
0x139000eb: 209, // en-PN
0x139000ec: 210, // en-PR
0x139000f0: 211, // en-PW
0x13900107: 212, // en-RW
0x13900109: 213, // en-SB
0x1390010a: 214, // en-SC
0x1390010b: 215, // en-SD
0x1390010c: 216, // en-SE
0x1390010d: 217, // en-SG
0x1390010e: 218, // en-SH
0x1390010f: 219, // en-SI
0x13900112: 220, // en-SL
0x13900117: 221, // en-SS
0x1390011b: 222, // en-SX
0x1390011d: 223, // en-SZ
0x1390011f: 224, // en-TC
0x13900125: 225, // en-TK
0x13900129: 226, // en-TO
0x1390012c: 227, // en-TT
0x1390012d: 228, // en-TV
0x1390012f: 229, // en-TZ
0x13900131: 230, // en-UG
0x13900133: 231, // en-UM
0x13900135: 232, // en-US
0x13900139: 233, // en-VC
0x1390013c: 234, // en-VG
0x1390013d: 235, // en-VI
0x1390013f: 236, // en-VU
0x13900142: 237, // en-WS
0x13900161: 238, // en-ZA
0x13900162: 239, // en-ZM
0x13900164: 240, // en-ZW
0x13c00000: 241, // eo
0x13c00001: 242, // eo-001
0x13e00000: 243, // es
0x13e0001f: 244, // es-419
0x13e0002c: 245, // es-AR
0x13e0003f: 246, // es-BO
0x13e00041: 247, // es-BR
0x13e00048: 248, // es-BZ
0x13e00051: 249, // es-CL
0x13e00054: 250, // es-CO
0x13e00056: 251, // es-CR
0x13e00059: 252, // es-CU
0x13e00065: 253, // es-DO
0x13e00068: 254, // es-EA
0x13e00069: 255, // es-EC
0x13e0006e: 256, // es-ES
0x13e00086: 257, // es-GQ
0x13e00089: 258, // es-GT
0x13e0008f: 259, // es-HN
0x13e00094: 260, // es-IC
0x13e000cf: 261, // es-MX
0x13e000d8: 262, // es-NI
0x13e000e2: 263, // es-PA
0x13e000e4: 264, // es-PE
0x13e000e7: 265, // es-PH
0x13e000ec: 266, // es-PR
0x13e000f1: 267, // es-PY
0x13e0011a: 268, // es-SV
0x13e00135: 269, // es-US
0x13e00136: 270, // es-UY
0x13e0013b: 271, // es-VE
0x14000000: 272, // et
0x1400006a: 273, // et-EE
0x14500000: 274, // eu
0x1450006e: 275, // eu-ES
0x14600000: 276, // ewo
0x14600052: 277, // ewo-CM
0x14800000: 278, // fa
0x14800024: 279, // fa-AF
0x1480009c: 280, // fa-IR
0x14e00000: 281, // ff
0x14e00052: 282, // ff-CM
0x14e00084: 283, // ff-GN
0x14e000c9: 284, // ff-MR
0x14e00114: 285, // ff-SN
0x15100000: 286, // fi
0x15100072: 287, // fi-FI
0x15300000: 288, // fil
0x153000e7: 289, // fil-PH
0x15800000: 290, // fo
0x15800063: 291, // fo-DK
0x15800076: 292, // fo-FO
0x15e00000: 293, // fr
0x15e00036: 294, // fr-BE
0x15e00037: 295, // fr-BF
0x15e0003a: 296, // fr-BI
0x15e0003b: 297, // fr-BJ
0x15e0003c: 298, // fr-BL
0x15e00049: 299, // fr-CA
0x15e0004b: 300, // fr-CD
0x15e0004c: 301, // fr-CF
0x15e0004d: 302, // fr-CG
0x15e0004e: 303, // fr-CH
0x15e0004f: 304, // fr-CI
0x15e00052: 305, // fr-CM
0x15e00062: 306, // fr-DJ
0x15e00067: 307, // fr-DZ
0x15e00078: 308, // fr-FR
0x15e0007a: 309, // fr-GA
0x15e0007e: 310, // fr-GF
0x15e00084: 311, // fr-GN
0x15e00085: 312, // fr-GP
0x15e00086: 313, // fr-GQ
0x15e00091: 314, // fr-HT
0x15e000a8: 315, // fr-KM
0x15e000b7: 316, // fr-LU
0x15e000ba: 317, // fr-MA
0x15e000bb: 318, // fr-MC
0x15e000be: 319, // fr-MF
0x15e000bf: 320, // fr-MG
0x15e000c3: 321, // fr-ML
0x15e000c8: 322, // fr-MQ
0x15e000c9: 323, // fr-MR
0x15e000cc: 324, // fr-MU
0x15e000d3: 325, // fr-NC
0x15e000d4: 326, // fr-NE
0x15e000e5: 327, // fr-PF
0x15e000ea: 328, // fr-PM
0x15e00102: 329, // fr-RE
0x15e00107: 330, // fr-RW
0x15e0010a: 331, // fr-SC
0x15e00114: 332, // fr-SN
0x15e0011c: 333, // fr-SY
0x15e00120: 334, // fr-TD
0x15e00122: 335, // fr-TG
0x15e00128: 336, // fr-TN
0x15e0013f: 337, // fr-VU
0x15e00140: 338, // fr-WF
0x15e0015f: 339, // fr-YT
0x16900000: 340, // fur
0x1690009e: 341, // fur-IT
0x16d00000: 342, // fy
0x16d000d9: 343, // fy-NL
0x16e00000: 344, // ga
0x16e00096: 345, // ga-IE
0x17e00000: 346, // gd
0x17e0007b: 347, // gd-GB
0x19000000: 348, // gl
0x1900006e: 349, // gl-ES
0x1a300000: 350, // gsw
0x1a30004e: 351, // gsw-CH
0x1a300078: 352, // gsw-FR
0x1a3000b2: 353, // gsw-LI
0x1a400000: 354, // gu
0x1a400099: 355, // gu-IN
0x1a900000: 356, // guw
0x1ab00000: 357, // guz
0x1ab000a4: 358, // guz-KE
0x1ac00000: 359, // gv
0x1ac00098: 360, // gv-IM
0x1b400000: 361, // ha
0x1b400080: 362, // ha-GH
0x1b4000d4: 363, // ha-NE
0x1b4000d6: 364, // ha-NG
0x1b800000: 365, // haw
0x1b800135: 366, // haw-US
0x1bc00000: 367, // he
0x1bc00097: 368, // he-IL
0x1be00000: 369, // hi
0x1be00099: 370, // hi-IN
0x1d100000: 371, // hr
0x1d100033: 372, // hr-BA
0x1d100090: 373, // hr-HR
0x1d200000: 374, // hsb
0x1d200060: 375, // hsb-DE
0x1d500000: 376, // hu
0x1d500092: 377, // hu-HU
0x1d700000: 378, // hy
0x1d700028: 379, // hy-AM
0x1e100000: 380, // id
0x1e100095: 381, // id-ID
0x1e700000: 382, // ig
0x1e7000d6: 383, // ig-NG
0x1ea00000: 384, // ii
0x1ea00053: 385, // ii-CN
0x1f500000: 386, // io
0x1f800000: 387, // is
0x1f80009d: 388, // is-IS
0x1f900000: 389, // it
0x1f90004e: 390, // it-CH
0x1f90009e: 391, // it-IT
0x1f900113: 392, // it-SM
0x1f900138: 393, // it-VA
0x1fa00000: 394, // iu
0x20000000: 395, // ja
0x200000a2: 396, // ja-JP
0x20300000: 397, // jbo
0x20700000: 398, // jgo
0x20700052: 399, // jgo-CM
0x20a00000: 400, // jmc
0x20a0012f: 401, // jmc-TZ
0x20e00000: 402, // jv
0x21000000: 403, // ka
0x2100007d: 404, // ka-GE
0x21200000: 405, // kab
0x21200067: 406, // kab-DZ
0x21600000: 407, // kaj
0x21700000: 408, // kam
0x217000a4: 409, // kam-KE
0x21f00000: 410, // kcg
0x22300000: 411, // kde
0x2230012f: 412, // kde-TZ
0x22700000: 413, // kea
0x2270005a: 414, // kea-CV
0x23400000: 415, // khq
0x234000c3: 416, // khq-ML
0x23900000: 417, // ki
0x239000a4: 418, // ki-KE
0x24200000: 419, // kk
0x242000ae: 420, // kk-KZ
0x24400000: 421, // kkj
0x24400052: 422, // kkj-CM
0x24500000: 423, // kl
0x24500082: 424, // kl-GL
0x24600000: 425, // kln
0x246000a4: 426, // kln-KE
0x24a00000: 427, // km
0x24a000a6: 428, // km-KH
0x25100000: 429, // kn
0x25100099: 430, // kn-IN
0x25400000: 431, // ko
0x254000aa: 432, // ko-KP
0x254000ab: 433, // ko-KR
0x25600000: 434, // kok
0x25600099: 435, // kok-IN
0x26a00000: 436, // ks
0x26a00099: 437, // ks-IN
0x26b00000: 438, // ksb
0x26b0012f: 439, // ksb-TZ
0x26d00000: 440, // ksf
0x26d00052: 441, // ksf-CM
0x26e00000: 442, // ksh
0x26e00060: 443, // ksh-DE
0x27400000: 444, // ku
0x28100000: 445, // kw
0x2810007b: 446, // kw-GB
0x28a00000: 447, // ky
0x28a000a5: 448, // ky-KG
0x29100000: 449, // lag
0x2910012f: 450, // lag-TZ
0x29500000: 451, // lb
0x295000b7: 452, // lb-LU
0x2a300000: 453, // lg
0x2a300131: 454, // lg-UG
0x2af00000: 455, // lkt
0x2af00135: 456, // lkt-US
0x2b500000: 457, // ln
0x2b50002a: 458, // ln-AO
0x2b50004b: 459, // ln-CD
0x2b50004c: 460, // ln-CF
0x2b50004d: 461, // ln-CG
0x2b800000: 462, // lo
0x2b8000af: 463, // lo-LA
0x2bf00000: 464, // lrc
0x2bf0009b: 465, // lrc-IQ
0x2bf0009c: 466, // lrc-IR
0x2c000000: 467, // lt
0x2c0000b6: 468, // lt-LT
0x2c200000: 469, // lu
0x2c20004b: 470, // lu-CD
0x2c400000: 471, // luo
0x2c4000a4: 472, // luo-KE
0x2c500000: 473, // luy
0x2c5000a4: 474, // luy-KE
0x2c700000: 475, // lv
0x2c7000b8: 476, // lv-LV
0x2d100000: 477, // mas
0x2d1000a4: 478, // mas-KE
0x2d10012f: 479, // mas-TZ
0x2e900000: 480, // mer
0x2e9000a4: 481, // mer-KE
0x2ed00000: 482, // mfe
0x2ed000cc: 483, // mfe-MU
0x2f100000: 484, // mg
0x2f1000bf: 485, // mg-MG
0x2f200000: 486, // mgh
0x2f2000d1: 487, // mgh-MZ
0x2f400000: 488, // mgo
0x2f400052: 489, // mgo-CM
0x2ff00000: 490, // mk
0x2ff000c2: 491, // mk-MK
0x30400000: 492, // ml
0x30400099: 493, // ml-IN
0x30b00000: 494, // mn
0x30b000c5: 495, // mn-MN
0x31b00000: 496, // mr
0x31b00099: 497, // mr-IN
0x31f00000: 498, // ms
0x31f0003e: 499, // ms-BN
0x31f000d0: 500, // ms-MY
0x31f0010d: 501, // ms-SG
0x32000000: 502, // mt
0x320000cb: 503, // mt-MT
0x32500000: 504, // mua
0x32500052: 505, // mua-CM
0x33100000: 506, // my
0x331000c4: 507, // my-MM
0x33a00000: 508, // mzn
0x33a0009c: 509, // mzn-IR
0x34100000: 510, // nah
0x34500000: 511, // naq
0x345000d2: 512, // naq-NA
0x34700000: 513, // nb
0x347000da: 514, // nb-NO
0x34700110: 515, // nb-SJ
0x34e00000: 516, // nd
0x34e00164: 517, // nd-ZW
0x35000000: 518, // nds
0x35000060: 519, // nds-DE
0x350000d9: 520, // nds-NL
0x35100000: 521, // ne
0x35100099: 522, // ne-IN
0x351000db: 523, // ne-NP
0x36700000: 524, // nl
0x36700030: 525, // nl-AW
0x36700036: 526, // nl-BE
0x36700040: 527, // nl-BQ
0x3670005b: 528, // nl-CW
0x367000d9: 529, // nl-NL
0x36700116: 530, // nl-SR
0x3670011b: 531, // nl-SX
0x36800000: 532, // nmg
0x36800052: 533, // nmg-CM
0x36a00000: 534, // nn
0x36a000da: 535, // nn-NO
0x36c00000: 536, // nnh
0x36c00052: 537, // nnh-CM
0x36f00000: 538, // no
0x37500000: 539, // nqo
0x37600000: 540, // nr
0x37a00000: 541, // nso
0x38000000: 542, // nus
0x38000117: 543, // nus-SS
0x38700000: 544, // ny
0x38900000: 545, // nyn
0x38900131: 546, // nyn-UG
0x39000000: 547, // om
0x3900006f: 548, // om-ET
0x390000a4: 549, // om-KE
0x39500000: 550, // or
0x39500099: 551, // or-IN
0x39800000: 552, // os
0x3980007d: 553, // os-GE
0x39800106: 554, // os-RU
0x39d00000: 555, // pa
0x39d05000: 556, // pa-Arab
0x39d050e8: 557, // pa-Arab-PK
0x39d33000: 558, // pa-Guru
0x39d33099: 559, // pa-Guru-IN
0x3a100000: 560, // pap
0x3b300000: 561, // pl
0x3b3000e9: 562, // pl-PL
0x3bd00000: 563, // prg
0x3bd00001: 564, // prg-001
0x3be00000: 565, // ps
0x3be00024: 566, // ps-AF
0x3c000000: 567, // pt
0x3c00002a: 568, // pt-AO
0x3c000041: 569, // pt-BR
0x3c00004e: 570, // pt-CH
0x3c00005a: 571, // pt-CV
0x3c000086: 572, // pt-GQ
0x3c00008b: 573, // pt-GW
0x3c0000b7: 574, // pt-LU
0x3c0000c6: 575, // pt-MO
0x3c0000d1: 576, // pt-MZ
0x3c0000ee: 577, // pt-PT
0x3c000118: 578, // pt-ST
0x3c000126: 579, // pt-TL
0x3c400000: 580, // qu
0x3c40003f: 581, // qu-BO
0x3c400069: 582, // qu-EC
0x3c4000e4: 583, // qu-PE
0x3d400000: 584, // rm
0x3d40004e: 585, // rm-CH
0x3d900000: 586, // rn
0x3d90003a: 587, // rn-BI
0x3dc00000: 588, // ro
0x3dc000bc: 589, // ro-MD
0x3dc00104: 590, // ro-RO
0x3de00000: 591, // rof
0x3de0012f: 592, // rof-TZ
0x3e200000: 593, // ru
0x3e200047: 594, // ru-BY
0x3e2000a5: 595, // ru-KG
0x3e2000ae: 596, // ru-KZ
0x3e2000bc: 597, // ru-MD
0x3e200106: 598, // ru-RU
0x3e200130: 599, // ru-UA
0x3e500000: 600, // rw
0x3e500107: 601, // rw-RW
0x3e600000: 602, // rwk
0x3e60012f: 603, // rwk-TZ
0x3eb00000: 604, // sah
0x3eb00106: 605, // sah-RU
0x3ec00000: 606, // saq
0x3ec000a4: 607, // saq-KE
0x3f300000: 608, // sbp
0x3f30012f: 609, // sbp-TZ
0x3fa00000: 610, // sd
0x3fa000e8: 611, // sd-PK
0x3fc00000: 612, // sdh
0x3fd00000: 613, // se
0x3fd00072: 614, // se-FI
0x3fd000da: 615, // se-NO
0x3fd0010c: 616, // se-SE
0x3ff00000: 617, // seh
0x3ff000d1: 618, // seh-MZ
0x40100000: 619, // ses
0x401000c3: 620, // ses-ML
0x40200000: 621, // sg
0x4020004c: 622, // sg-CF
0x40800000: 623, // shi
0x40857000: 624, // shi-Latn
0x408570ba: 625, // shi-Latn-MA
0x408dc000: 626, // shi-Tfng
0x408dc0ba: 627, // shi-Tfng-MA
0x40c00000: 628, // si
0x40c000b3: 629, // si-LK
0x41200000: 630, // sk
0x41200111: 631, // sk-SK
0x41600000: 632, // sl
0x4160010f: 633, // sl-SI
0x41c00000: 634, // sma
0x41d00000: 635, // smi
0x41e00000: 636, // smj
0x41f00000: 637, // smn
0x41f00072: 638, // smn-FI
0x42200000: 639, // sms
0x42300000: 640, // sn
0x42300164: 641, // sn-ZW
0x42900000: 642, // so
0x42900062: 643, // so-DJ
0x4290006f: 644, // so-ET
0x429000a4: 645, // so-KE
0x42900115: 646, // so-SO
0x43100000: 647, // sq
0x43100027: 648, // sq-AL
0x431000c2: 649, // sq-MK
0x4310014d: 650, // sq-XK
0x43200000: 651, // sr
0x4321f000: 652, // sr-Cyrl
0x4321f033: 653, // sr-Cyrl-BA
0x4321f0bd: 654, // sr-Cyrl-ME
0x4321f105: 655, // sr-Cyrl-RS
0x4321f14d: 656, // sr-Cyrl-XK
0x43257000: 657, // sr-Latn
0x43257033: 658, // sr-Latn-BA
0x432570bd: 659, // sr-Latn-ME
0x43257105: 660, // sr-Latn-RS
0x4325714d: 661, // sr-Latn-XK
0x43700000: 662, // ss
0x43a00000: 663, // ssy
0x43b00000: 664, // st
0x44400000: 665, // sv
0x44400031: 666, // sv-AX
0x44400072: 667, // sv-FI
0x4440010c: 668, // sv-SE
0x44500000: 669, // sw
0x4450004b: 670, // sw-CD
0x445000a4: 671, // sw-KE
0x4450012f: 672, // sw-TZ
0x44500131: 673, // sw-UG
0x44e00000: 674, // syr
0x45000000: 675, // ta
0x45000099: 676, // ta-IN
0x450000b3: 677, // ta-LK
0x450000d0: 678, // ta-MY
0x4500010d: 679, // ta-SG
0x46100000: 680, // te
0x46100099: 681, // te-IN
0x46400000: 682, // teo
0x464000a4: 683, // teo-KE
0x46400131: 684, // teo-UG
0x46700000: 685, // tg
0x46700124: 686, // tg-TJ
0x46b00000: 687, // th
0x46b00123: 688, // th-TH
0x46f00000: 689, // ti
0x46f0006d: 690, // ti-ER
0x46f0006f: 691, // ti-ET
0x47100000: 692, // tig
0x47600000: 693, // tk
0x47600127: 694, // tk-TM
0x48000000: 695, // tn
0x48200000: 696, // to
0x48200129: 697, // to-TO
0x48a00000: 698, // tr
0x48a0005d: 699, // tr-CY
0x48a0012b: 700, // tr-TR
0x48e00000: 701, // ts
0x49400000: 702, // tt
0x49400106: 703, // tt-RU
0x4a400000: 704, // twq
0x4a4000d4: 705, // twq-NE
0x4a900000: 706, // tzm
0x4a9000ba: 707, // tzm-MA
0x4ac00000: 708, // ug
0x4ac00053: 709, // ug-CN
0x4ae00000: 710, // uk
0x4ae00130: 711, // uk-UA
0x4b400000: 712, // ur
0x4b400099: 713, // ur-IN
0x4b4000e8: 714, // ur-PK
0x4bc00000: 715, // uz
0x4bc05000: 716, // uz-Arab
0x4bc05024: 717, // uz-Arab-AF
0x4bc1f000: 718, // uz-Cyrl
0x4bc1f137: 719, // uz-Cyrl-UZ
0x4bc57000: 720, // uz-Latn
0x4bc57137: 721, // uz-Latn-UZ
0x4be00000: 722, // vai
0x4be57000: 723, // vai-Latn
0x4be570b4: 724, // vai-Latn-LR
0x4bee3000: 725, // vai-Vaii
0x4bee30b4: 726, // vai-Vaii-LR
0x4c000000: 727, // ve
0x4c300000: 728, // vi
0x4c30013e: 729, // vi-VN
0x4c900000: 730, // vo
0x4c900001: 731, // vo-001
0x4cc00000: 732, // vun
0x4cc0012f: 733, // vun-TZ
0x4ce00000: 734, // wa
0x4cf00000: 735, // wae
0x4cf0004e: 736, // wae-CH
0x4e500000: 737, // wo
0x4e500114: 738, // wo-SN
0x4f200000: 739, // xh
0x4fb00000: 740, // xog
0x4fb00131: 741, // xog-UG
0x50900000: 742, // yav
0x50900052: 743, // yav-CM
0x51200000: 744, // yi
0x51200001: 745, // yi-001
0x51800000: 746, // yo
0x5180003b: 747, // yo-BJ
0x518000d6: 748, // yo-NG
0x51f00000: 749, // yue
0x51f38000: 750, // yue-Hans
0x51f38053: 751, // yue-Hans-CN
0x51f39000: 752, // yue-Hant
0x51f3908d: 753, // yue-Hant-HK
0x52800000: 754, // zgh
0x528000ba: 755, // zgh-MA
0x52900000: 756, // zh
0x52938000: 757, // zh-Hans
0x52938053: 758, // zh-Hans-CN
0x5293808d: 759, // zh-Hans-HK
0x529380c6: 760, // zh-Hans-MO
0x5293810d: 761, // zh-Hans-SG
0x52939000: 762, // zh-Hant
0x5293908d: 763, // zh-Hant-HK
0x529390c6: 764, // zh-Hant-MO
0x5293912e: 765, // zh-Hant-TW
0x52f00000: 766, // zu
0x52f00161: 767, // zu-ZA
}
// Total table size 4676 bytes (4KiB); checksum: 17BE3673

View file

@ -2,8 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go -output tables.go
//go:generate go run gen_index.go
//go:generate go run gen.go -output tables.go
package language
@ -11,47 +10,34 @@ package language
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"errors"
"fmt"
"strings"
)
const (
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
maxCoreSize = 12
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
// is large enough to hold at least 99% of the BCP 47 tags.
max99thPercentileSize = 32
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
maxSimpleUExtensionSize = 14
"golang.org/x/text/internal/language"
"golang.org/x/text/internal/language/compact"
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
lang langID
region regionID
// TODO: we will soon run out of positions for script. Idea: instead of
// storing lang, region, and script codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
script scriptID
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
type Tag compact.Tag
// str is the string representation of the Tag. It will only be used if the
// tag has variants or extensions.
str string
func makeTag(t language.Tag) (tag Tag) {
return Tag(compact.Make(t))
}
func (t *Tag) tag() language.Tag {
return (*compact.Tag)(t).Tag()
}
func (t *Tag) isCompact() bool {
return (*compact.Tag)(t).IsCompact()
}
// TODO: improve performance.
func (t *Tag) lang() language.Language { return t.tag().LangID }
func (t *Tag) region() language.Region { return t.tag().RegionID }
func (t *Tag) script() language.Script { return t.tag().ScriptID }
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
@ -68,25 +54,13 @@ func (c CanonType) Make(s string) Tag {
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
func (t Tag) Raw() (b Base, s Script, r Region) {
return Base{t.lang}, Script{t.script}, Region{t.region}
}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
return t.lang == a.lang && t.script == a.script && t.region == a.region
tt := t.tag()
return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if int(t.pVariant) < len(t.str) {
return false
}
return t.equalTags(und)
}
// private reports whether the Tag consists solely of a private use tag.
func (t Tag) private() bool {
return t.str != "" && t.pVariant == 0
return compact.Tag(t).IsRoot()
}
// CanonType can be used to enable or disable various types of canonicalization.
@ -138,73 +112,73 @@ const (
// canonicalize returns the canonicalized equivalent of the tag and
// whether there was any change.
func (t Tag) canonicalize(c CanonType) (Tag, bool) {
func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
if c == Raw {
return t, false
}
changed := false
if c&SuppressScript != 0 {
if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
t.script = 0
if t.LangID.SuppressScript() == t.ScriptID {
t.ScriptID = 0
changed = true
}
}
if c&canonLang != 0 {
for {
if l, aliasType := normLang(t.lang); l != t.lang {
if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
switch aliasType {
case langLegacy:
case language.Legacy:
if c&Legacy != 0 {
if t.lang == _sh && t.script == 0 {
t.script = _Latn
if t.LangID == _sh && t.ScriptID == 0 {
t.ScriptID = _Latn
}
t.lang = l
t.LangID = l
changed = true
}
case langMacro:
case language.Macro:
if c&Macro != 0 {
// We deviate here from CLDR. The mapping "nb" -> "no"
// qualifies as a typical Macro language mapping. However,
// for legacy reasons, CLDR maps "no", the macro language
// code for Norwegian, to the dominant variant "nb". This
// change is currently under consideration for CLDR as well.
// See http://unicode.org/cldr/trac/ticket/2698 and also
// http://unicode.org/cldr/trac/ticket/1790 for some of the
// See https://unicode.org/cldr/trac/ticket/2698 and also
// https://unicode.org/cldr/trac/ticket/1790 for some of the
// practical implications. TODO: this check could be removed
// if CLDR adopts this change.
if c&CLDR == 0 || t.lang != _nb {
if c&CLDR == 0 || t.LangID != _nb {
changed = true
t.lang = l
t.LangID = l
}
}
case langDeprecated:
case language.Deprecated:
if c&DeprecatedBase != 0 {
if t.lang == _mo && t.region == 0 {
t.region = _MD
if t.LangID == _mo && t.RegionID == 0 {
t.RegionID = _MD
}
t.lang = l
t.LangID = l
changed = true
// Other canonicalization types may still apply.
continue
}
}
} else if c&Legacy != 0 && t.lang == _no && c&CLDR != 0 {
t.lang = _nb
} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
t.LangID = _nb
changed = true
}
break
}
}
if c&DeprecatedScript != 0 {
if t.script == _Qaai {
if t.ScriptID == _Qaai {
changed = true
t.script = _Zinh
t.ScriptID = _Zinh
}
}
if c&DeprecatedRegion != 0 {
if r := normRegion(t.region); r != 0 {
if r := t.RegionID.Canonicalize(); r != t.RegionID {
changed = true
t.region = r
t.RegionID = r
}
}
return t, changed
@ -212,11 +186,20 @@ func (t Tag) canonicalize(c CanonType) (Tag, bool) {
// Canonicalize returns the canonicalized equivalent of the tag.
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
t, changed := t.canonicalize(c)
if changed {
t.remakeString()
// First try fast path.
if t.isCompact() {
if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
return t, nil
}
}
// It is unlikely that one will canonicalize a tag after matching. So do
// a slow but simple approach here.
if tag, changed := canonicalize(c, t.tag()); changed {
tag.RemakeString()
return makeTag(tag), nil
}
return t, nil
}
// Confidence indicates the level of certainty for a given return value.
@ -239,83 +222,21 @@ func (c Confidence) String() string {
return confName[c]
}
// remakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
func (t *Tag) remakeString() {
if t.str == "" {
return
}
extra := t.str[t.pVariant:]
if t.pVariant > 0 {
extra = extra[1:]
}
if t.equalTags(und) && strings.HasPrefix(extra, "x-") {
t.str = extra
t.pVariant = 0
t.pExt = 0
return
}
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
b := buf[:t.genCoreBytes(buf[:])]
if extra != "" {
diff := len(b) - int(t.pVariant)
b = append(b, '-')
b = append(b, extra...)
t.pVariant = uint8(int(t.pVariant) + diff)
t.pExt = uint16(int(t.pExt) + diff)
} else {
t.pVariant = uint8(len(b))
t.pExt = uint16(len(b))
}
t.str = string(b)
}
// genCoreBytes writes a string for the base languages, script and region tags
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
n := t.lang.stringToBuf(buf[:])
if t.script != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.script.String())
}
if t.region != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.region.String())
}
return n
}
// String returns the canonical string representation of the language tag.
func (t Tag) String() string {
if t.str != "" {
return t.str
}
if t.script == 0 && t.region == 0 {
return t.lang.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
return t.tag().String()
}
// MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" {
text = append(text, t.str...)
} else if t.script == 0 && t.region == 0 {
text = append(text, t.lang.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
}
return text, nil
return t.tag().MarshalText()
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
tag, err := Raw.Parse(string(text))
*t = tag
var tag language.Tag
err := tag.UnmarshalText(text)
*t = makeTag(tag)
return err
}
@ -323,15 +244,16 @@ func (t *Tag) UnmarshalText(text []byte) error {
// unspecified, an attempt will be made to infer it from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Base() (Base, Confidence) {
if t.lang != 0 {
return Base{t.lang}, Exact
if b := t.lang(); b != 0 {
return Base{b}, Exact
}
tt := t.tag()
c := High
if t.script == 0 && !(Region{t.region}).IsCountry() {
if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
c = Low
}
if tag, err := addTags(t); err == nil && tag.lang != 0 {
return Base{tag.lang}, c
if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
return Base{tag.LangID}, c
}
return Base{0}, No
}
@ -344,35 +266,34 @@ func (t Tag) Base() (Base, Confidence) {
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
// Note that an inferred script is never guaranteed to be the correct one. Latin is
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
// in the past. Also, the script that is commonly used may change over time.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Script() (Script, Confidence) {
if t.script != 0 {
return Script{t.script}, Exact
if scr := t.script(); scr != 0 {
return Script{scr}, Exact
}
sc, c := scriptID(_Zzzz), No
if t.lang < langNoIndexOffset {
if scr := scriptID(suppressScript[t.lang]); scr != 0 {
// Note: it is not always the case that a language with a suppress
// script value is only written in one script (e.g. kk, ms, pa).
if t.region == 0 {
return Script{scriptID(scr)}, High
}
sc, c = scr, High
tt := t.tag()
sc, c := language.Script(_Zzzz), No
if scr := tt.LangID.SuppressScript(); scr != 0 {
// Note: it is not always the case that a language with a suppress
// script value is only written in one script (e.g. kk, ms, pa).
if tt.RegionID == 0 {
return Script{scr}, High
}
sc, c = scr, High
}
if tag, err := addTags(t); err == nil {
if tag.script != sc {
sc, c = tag.script, Low
if tag, err := tt.Maximize(); err == nil {
if tag.ScriptID != sc {
sc, c = tag.ScriptID, Low
}
} else {
t, _ = (Deprecated | Macro).Canonicalize(t)
if tag, err := addTags(t); err == nil && tag.script != sc {
sc, c = tag.script, Low
tt, _ = canonicalize(Deprecated|Macro, tt)
if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
sc, c = tag.ScriptID, Low
}
}
return Script{sc}, c
@ -382,28 +303,31 @@ func (t Tag) Script() (Script, Confidence) {
// infer a most likely candidate from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Region() (Region, Confidence) {
if t.region != 0 {
return Region{t.region}, Exact
if r := t.region(); r != 0 {
return Region{r}, Exact
}
if t, err := addTags(t); err == nil {
return Region{t.region}, Low // TODO: differentiate between high and low.
tt := t.tag()
if tt, err := tt.Maximize(); err == nil {
return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
}
t, _ = (Deprecated | Macro).Canonicalize(t)
if tag, err := addTags(t); err == nil {
return Region{tag.region}, Low
tt, _ = canonicalize(Deprecated|Macro, tt)
if tag, err := tt.Maximize(); err == nil {
return Region{tag.RegionID}, Low
}
return Region{_ZZ}, No // TODO: return world instead of undetermined?
}
// Variant returns the variants specified explicitly for this language tag.
// Variants returns the variants specified explicitly for this language tag.
// or nil if no variant was specified.
func (t Tag) Variants() []Variant {
if !compact.Tag(t).MayHaveVariants() {
return nil
}
v := []Variant{}
if int(t.pVariant) < int(t.pExt) {
for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; {
x, str = nextToken(str)
v = append(v, Variant{x})
}
x, str := "", t.tag().Variants()
for str != "" {
x, str = nextToken(str)
v = append(v, Variant{x})
}
return v
}
@ -411,57 +335,13 @@ func (t Tag) Variants() []Variant {
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
//
// Parent returns a tag for a less specific language that is mutually
// intelligible or Und if there is no such language. This may not be the same as
// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
// is "zh-Hant", and the parent of "zh-Hant" is "und".
func (t Tag) Parent() Tag {
if t.str != "" {
// Strip the variants and extensions.
t, _ = Raw.Compose(t.Raw())
if t.region == 0 && t.script != 0 && t.lang != 0 {
base, _ := addTags(Tag{lang: t.lang})
if base.script == t.script {
return Tag{lang: t.lang}
}
}
return t
}
if t.lang != 0 {
if t.region != 0 {
maxScript := t.script
if maxScript == 0 {
max, _ := addTags(t)
maxScript = max.script
}
for i := range parents {
if langID(parents[i].lang) == t.lang && scriptID(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
if regionID(r) == t.region {
return Tag{
lang: t.lang,
script: scriptID(parents[i].script),
region: regionID(parents[i].toRegion),
}
}
}
}
}
// Strip the script if it is the default one.
base, _ := addTags(Tag{lang: t.lang})
if base.script != maxScript {
return Tag{lang: t.lang, script: maxScript}
}
return Tag{lang: t.lang}
} else if t.script != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
base, _ := addTags(Tag{lang: t.lang})
if base.script != t.script {
return und
}
return Tag{lang: t.lang}
}
}
return und
return Tag(compact.Tag(t).Parent())
}
// returns token t and the rest of the string.
@ -487,17 +367,8 @@ func (e Extension) String() string {
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (e Extension, err error) {
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
return Extension{}, errSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
return Extension{}, errSyntax
}
return Extension{string(scan.b)}, nil
ext, err := language.ParseExtension(s)
return Extension{ext}, err
}
// Type returns the one-byte extension type of e. It returns 0 for the zero
@ -518,22 +389,20 @@ func (e Extension) Tokens() []string {
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
return Extension{ext}, true
}
if !compact.Tag(t).MayHaveExtensions() {
return Extension{}, false
}
return Extension{}, false
e, ok := t.tag().Extension(x)
return Extension{e}, ok
}
// Extensions returns all extensions of t.
func (t Tag) Extensions() []Extension {
if !compact.Tag(t).MayHaveExtensions() {
return nil
}
e := []Extension{}
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
for _, ext := range t.tag().Extensions() {
e = append(e, Extension{ext})
}
return e
@ -541,259 +410,105 @@ func (t Tag) Extensions() []Extension {
// TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start {
return t.str[start:end]
if !compact.Tag(t).MayHaveExtensions() {
if key != "rg" && key != "va" {
return ""
}
}
return ""
return t.tag().TypeForKey(key)
}
var (
errPrivateUse = errors.New("cannot set a key on a private use tag")
errInvalidArguments = errors.New("invalid key or type")
)
// SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
if t.private() {
return t, errPrivateUse
}
if len(key) != 2 {
return t, errInvalidArguments
}
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
t.str = ""
t.pVariant, t.pExt = 0, 0
} else {
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
}
}
return t, nil
}
if len(value) < 3 || len(value) > 8 {
return t, errInvalidArguments
}
var (
buf [maxCoreSize + maxSimpleUExtensionSize]byte
uStart int // start of the -u extension.
)
// Generate the tag string if needed.
if t.str == "" {
uStart = t.genCoreBytes(buf[:])
buf[uStart] = '-'
uStart++
}
// Create new key-type pair and parse it to verify.
b := buf[uStart:]
copy(b, "u-")
copy(b[2:], key)
b[4] = '-'
b = b[:5+copy(b[5:], value)]
scan := makeScanner(b)
if parseExtensions(&scan); scan.err != nil {
return t, scan.err
}
// Assemble the replacement string.
if t.str == "" {
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
}
}
return t, nil
tt, err := t.tag().SetTypeForKey(key, value)
return makeTag(tt), err
}
// findKeyAndType returns the start and end position for the type corresponding
// to key or the point at which to insert the key-value pair if the type
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
}
s := t.str
// Find the correct extension.
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
p++
// curKey is the key currently being processed.
curKey := ""
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
if curKey > key {
return p, p, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
}
}
}
// NumCompactTags is the number of compact tags. The maximum tag is
// NumCompactTags-1.
const NumCompactTags = compact.NumCompactTags
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository. The index will change over time
// and should not be stored in persistent storage. Extensions, except for the
// 'va' type of the 'u' extension, are ignored. It will return 0, false if no
// compact tag exists, where 0 is the index for the root language (Und).
func CompactIndex(t Tag) (index int, ok bool) {
// TODO: perhaps give more frequent tags a lower index.
// TODO: we could make the indexes stable. This will excluded some
// possibilities for optimization, so don't do this quite yet.
b, s, r := t.Raw()
if len(t.str) > 0 {
if strings.HasPrefix(t.str, "x-") {
// We have no entries for user-defined tags.
return 0, false
}
if uint16(t.pVariant) != t.pExt {
// There are no tags with variants and an u-va type.
if t.TypeForKey("va") != "" {
return 0, false
}
t, _ = Raw.Compose(b, s, r, t.Variants())
} else if _, ok := t.Extension('u'); ok {
// Strip all but the 'va' entry.
variant := t.TypeForKey("va")
t, _ = Raw.Compose(b, s, r)
t, _ = t.SetTypeForKey("va", variant)
}
if len(t.str) > 0 {
// We have some variants.
for i, s := range specialTags {
if s == t {
return i + 1, true
}
}
return 0, false
}
}
// No variants specified: just compare core components.
// The key has the form lllssrrr, where l, s, and r are nibbles for
// respectively the langID, scriptID, and regionID.
key := uint32(b.langID) << (8 + 12)
key |= uint32(s.scriptID) << 12
key |= uint32(r.regionID)
x, ok := coreTags[key]
return int(x), ok
// for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. If t does not match a compact
// index, exact will be false and the compact index will be returned for the
// first match after repeatedly taking the Parent of t.
func CompactIndex(t Tag) (index int, exact bool) {
id, exact := compact.LanguageID(compact.Tag(t))
return int(id), exact
}
var root = language.Tag{}
// Base is an ISO 639 language code, used for encoding the base language
// of a language tag.
type Base struct {
langID
langID language.Language
}
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Base, error) {
if n := len(s); n < 2 || 3 < n {
return Base{}, errSyntax
}
var buf [3]byte
l, err := getLangID(buf[:copy(buf[:], s)])
l, err := language.ParseBase(s)
return Base{l}, err
}
// String returns the BCP 47 representation of the base language.
func (b Base) String() string {
return b.langID.String()
}
// ISO3 returns the ISO 639-3 language code.
func (b Base) ISO3() string {
return b.langID.ISO3()
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (b Base) IsPrivateUse() bool {
return b.langID.IsPrivateUse()
}
// Script is a 4-letter ISO 15924 code for representing scripts.
// It is idiomatically represented in title case.
type Script struct {
scriptID
scriptID language.Script
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
if len(s) != 4 {
return Script{}, errSyntax
}
var buf [4]byte
sc, err := getScriptID(script, buf[:copy(buf[:], s)])
sc, err := language.ParseScript(s)
return Script{sc}, err
}
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
func (s Script) String() string {
return s.scriptID.String()
}
// IsPrivateUse reports whether this script code is reserved for private use.
func (s Script) IsPrivateUse() bool {
return s.scriptID.IsPrivateUse()
}
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
type Region struct {
regionID
regionID language.Region
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) {
rid, err := getRegionM49(r)
rid, err := language.EncodeM49(r)
return Region{rid}, err
}
@ -801,62 +516,54 @@ func EncodeM49(r int) (Region, error) {
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n {
return Region{}, errSyntax
}
var buf [3]byte
r, err := getRegionID(buf[:copy(buf[:], s)])
r, err := language.ParseRegion(s)
return Region{r}, err
}
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
func (r Region) String() string {
return r.regionID.String()
}
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
func (r Region) ISO3() string {
return r.regionID.String()
}
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
func (r Region) M49() int {
return r.regionID.M49()
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r Region) IsPrivateUse() bool {
return r.regionID.IsPrivateUse()
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool {
if r.regionID == 0 || r.IsGroup() || r.IsPrivateUse() && r.regionID != _XK {
return false
}
return true
return r.regionID.IsCountry()
}
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool {
if r.regionID == 0 {
return false
}
return int(regionInclusion[r.regionID]) < len(regionContainment)
return r.regionID.IsGroup()
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
func (r Region) Contains(c Region) bool {
return r.regionID.contains(c.regionID)
return r.regionID.Contains(c.regionID)
}
func (r regionID) contains(c regionID) bool {
if r == c {
return true
}
g := regionInclusion[r]
if g >= nRegionGroups {
return false
}
m := regionContainment[g]
d := regionInclusion[c]
b := regionInclusionBits[d]
// A contained country may belong to multiple disjoint groups. Matching any
// of these indicates containment. If the contained region is a group, it
// must strictly be a subset.
if d >= nRegionGroups {
return b&m != 0
}
return b&^m == 0
}
var errNoTLD = errors.New("language: region is not a valid ccTLD")
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error.
//
@ -865,25 +572,15 @@ var errNoTLD = errors.New("language: region is not a valid ccTLD")
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
if r.regionID == _GB {
r = Region{_UK}
}
if (r.typ() & ccTLD) == 0 {
return Region{}, errNoTLD
}
return r, nil
tld, err := r.regionID.TLD()
return Region{tld}, err
}
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
func (r Region) Canonicalize() Region {
if cr := normRegion(r.regionID); cr != 0 {
return Region{cr}
}
return r
return Region{r.regionID.Canonicalize()}
}
// Variant represents a registered variant of a language as defined by BCP 47.
@ -894,11 +591,8 @@ type Variant struct {
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
s = strings.ToLower(s)
if _, ok := variantIndex[s]; ok {
return Variant{s}, nil
}
return Variant{}, mkErrInvalid([]byte(s))
v, err := language.ParseVariant(s)
return Variant{v.String()}, err
}
// String returns the string representation of the variant.

View file

@ -4,7 +4,12 @@
package language
import "errors"
import (
"errors"
"strings"
"golang.org/x/text/internal/language"
)
// A MatchOption configures a Matcher.
type MatchOption func(*matcher)
@ -74,12 +79,13 @@ func NewMatcher(t []Tag, options ...MatchOption) Matcher {
}
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
var tt language.Tag
match, w, c := m.getBest(want...)
if match != nil {
t, index = match.tag, match.index
tt, index = match.tag, match.index
} else {
// TODO: this should be an option
t = m.default_.tag
tt = m.default_.tag
if m.preferSameScript {
outer:
for _, w := range want {
@ -91,7 +97,7 @@ func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
}
for i, h := range m.supported {
if script.scriptID == h.maxScript {
t, index = h.tag, i
tt, index = h.tag, i
break outer
}
}
@ -99,238 +105,45 @@ func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
}
// TODO: select first language tag based on script.
}
if w.region != 0 && t.region != 0 && t.region.contains(w.region) {
t, _ = Raw.Compose(t, Region{w.region})
if w.RegionID != tt.RegionID && w.RegionID != 0 {
if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
tt.RegionID = w.RegionID
tt.RemakeString()
} else if r := w.RegionID.String(); len(r) == 2 {
// TODO: also filter macro and deprecated.
tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
}
}
// Copy options from the user-provided tag into the result tag. This is hard
// to do after the fact, so we do it here.
// TODO: add in alternative variants to -u-va-.
// TODO: add preferred region to -u-rg-.
if e := w.Extensions(); len(e) > 0 {
t, _ = Raw.Compose(t, e)
}
return t, index, c
}
type scriptRegionFlags uint8
const (
isList = 1 << iota
scriptInFrom
regionInFrom
)
func (t *Tag) setUndefinedLang(id langID) {
if t.lang == 0 {
t.lang = id
}
}
func (t *Tag) setUndefinedScript(id scriptID) {
if t.script == 0 {
t.script = id
}
}
func (t *Tag) setUndefinedRegion(id regionID) {
if t.region == 0 || t.region.contains(id) {
t.region = id
b := language.Builder{}
b.SetTag(tt)
for _, e := range e {
b.AddExt(e)
}
tt = b.Make()
}
return makeTag(tt), index, c
}
// ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// addLikelySubtags sets subtags to their most likely value, given the locale.
// In most cases this means setting fields for unknown values, but in some
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
// if the given locale cannot be expanded.
func (t Tag) addLikelySubtags() (Tag, error) {
id, err := addTags(t)
if err != nil {
return t, err
} else if id.equalTags(t) {
return t, nil
}
id.remakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
if i := regionInclusion[t.region]; i < nRegionGroups {
x := likelyRegionGroup[i]
if langID(x.lang) == t.lang && scriptID(x.script) == t.script {
t.region = regionID(x.region)
}
return true
}
return false
}
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
if t.private() {
return t, nil
}
if t.script != 0 && t.region != 0 {
if t.lang != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
list := likelyRegion[t.region : t.region+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
if scriptID(x.script) == t.script {
t.setUndefinedLang(langID(x.lang))
return t, nil
}
}
}
if t.lang != 0 {
// Search matches for lang-script and lang-region, where lang != und.
if t.lang < langNoIndexOffset {
x := likelyLang[t.lang]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
if t.script != 0 {
for _, x := range list {
if scriptID(x.script) == t.script && x.flags&scriptInFrom != 0 {
t.setUndefinedRegion(regionID(x.region))
return t, nil
}
}
} else if t.region != 0 {
count := 0
goodScript := true
tt := t
for _, x := range list {
// We visit all entries for which the script was not
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
if x.flags&scriptInFrom == 0 && t.region.contains(regionID(x.region)) {
tt.region = regionID(x.region)
tt.setUndefinedScript(scriptID(x.script))
goodScript = goodScript && tt.script == scriptID(x.script)
count++
}
}
if count == 1 {
return tt, nil
}
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
t.script = tt.script
}
}
}
}
} else {
// Search matches for und-script.
if t.script != 0 {
x := likelyScript[t.script]
if x.region != 0 {
t.setUndefinedRegion(regionID(x.region))
t.setUndefinedLang(langID(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
if t.region != 0 {
if i := regionInclusion[t.region]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
t.setUndefinedLang(langID(x.lang))
t.setUndefinedScript(scriptID(x.script))
t.region = regionID(x.region)
}
} else {
x := likelyRegion[t.region]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
t.setUndefinedLang(langID(x.lang))
t.setUndefinedScript(scriptID(x.script))
return t, nil
}
}
}
}
// Search matches for lang.
if t.lang < langNoIndexOffset {
x := likelyLang[t.lang]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
t.setUndefinedScript(scriptID(x.script))
t.setUndefinedRegion(regionID(x.region))
}
specializeRegion(&t)
if t.lang == 0 {
t.lang = _en // default language
}
return t, nil
}
return t, ErrMissingLikelyTagsData
}
func (t *Tag) setTagsFrom(id Tag) {
t.lang = id.lang
t.script = id.script
t.region = id.region
}
// minimize removes the region or script subtags from t such that
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
func (t Tag) minimize() (Tag, error) {
t, err := minimizeTags(t)
if err != nil {
return t, err
}
t.remakeString()
return t, nil
}
// minimizeTags mimics the behavior of the ICU 51 C implementation.
func minimizeTags(t Tag) (Tag, error) {
if t.equalTags(und) {
return t, nil
}
max, err := addTags(t)
if err != nil {
return t, err
}
for _, id := range [...]Tag{
{lang: t.lang},
{lang: t.lang, region: t.region},
{lang: t.lang, script: t.script},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
break
}
}
return t, nil
}
// func (t *Tag) setTagsFrom(id Tag) {
// t.LangID = id.LangID
// t.ScriptID = id.ScriptID
// t.RegionID = id.RegionID
// }
// Tag Matching
// CLDR defines an algorithm for finding the best match between two sets of language
// tags. The basic algorithm defines how to score a possible match and then find
// the match with the best score
// (see http://www.unicode.org/reports/tr35/#LanguageMatching).
// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
// Using scoring has several disadvantages. The scoring obfuscates the importance of
// the various factors considered, making the algorithm harder to understand. Using
// scoring also requires the full score to be computed for each pair of tags.
@ -441,7 +254,7 @@ func minimizeTags(t Tag) (Tag, error) {
type matcher struct {
default_ *haveTag
supported []*haveTag
index map[langID]*matchHeader
index map[language.Language]*matchHeader
passSettings bool
preferSameScript bool
}
@ -456,7 +269,7 @@ type matchHeader struct {
// haveTag holds a supported Tag and its maximized script and region. The maximized
// or canonicalized language is not stored as it is not needed during matching.
type haveTag struct {
tag Tag
tag language.Tag
// index of this tag in the original list of supported tags.
index int
@ -466,37 +279,37 @@ type haveTag struct {
conf Confidence
// Maximized region and script.
maxRegion regionID
maxScript scriptID
maxRegion language.Region
maxScript language.Script
// altScript may be checked as an alternative match to maxScript. If altScript
// matches, the confidence level for this match is Low. Theoretically there
// could be multiple alternative scripts. This does not occur in practice.
altScript scriptID
altScript language.Script
// nextMax is the index of the next haveTag with the same maximized tags.
nextMax uint16
}
func makeHaveTag(tag Tag, index int) (haveTag, langID) {
func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
max := tag
if tag.lang != 0 || tag.region != 0 || tag.script != 0 {
max, _ = max.canonicalize(All)
max, _ = addTags(max)
max.remakeString()
if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
max, _ = canonicalize(All, max)
max, _ = max.Maximize()
max.RemakeString()
}
return haveTag{tag, index, Exact, max.region, max.script, altScript(max.lang, max.script), 0}, max.lang
return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
}
// altScript returns an alternative script that may match the given script with
// a low confidence. At the moment, the langMatch data allows for at most one
// script to map to another and we rely on this to keep the code simple.
func altScript(l langID, s scriptID) scriptID {
func altScript(l language.Language, s language.Script) language.Script {
for _, alt := range matchScript {
// TODO: also match cases where language is not the same.
if (langID(alt.wantLang) == l || langID(alt.haveLang) == l) &&
scriptID(alt.haveScript) == s {
return scriptID(alt.wantScript)
if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
language.Script(alt.haveScript) == s {
return language.Script(alt.wantScript)
}
}
return 0
@ -508,7 +321,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
h.original = h.original || exact
// Don't add new exact matches.
for _, v := range h.haveTags {
if v.tag.equalsRest(n.tag) {
if equalsRest(v.tag, n.tag) {
return
}
}
@ -517,7 +330,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
for i, v := range h.haveTags {
if v.maxScript == n.maxScript &&
v.maxRegion == n.maxRegion &&
v.tag.variantOrPrivateTagStr() == n.tag.variantOrPrivateTagStr() {
v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
for h.haveTags[i].nextMax != 0 {
i = int(h.haveTags[i].nextMax)
}
@ -530,7 +343,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
// header returns the matchHeader for the given language. It creates one if
// it doesn't already exist.
func (m *matcher) header(l langID) *matchHeader {
func (m *matcher) header(l language.Language) *matchHeader {
if h := m.index[l]; h != nil {
return h
}
@ -554,7 +367,7 @@ func toConf(d uint8) Confidence {
// for a given tag.
func newMatcher(supported []Tag, options []MatchOption) *matcher {
m := &matcher{
index: make(map[langID]*matchHeader),
index: make(map[language.Language]*matchHeader),
preferSameScript: true,
}
for _, o := range options {
@ -567,16 +380,18 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
// Add supported languages to the index. Add exact matches first to give
// them precedence.
for i, tag := range supported {
pair, _ := makeHaveTag(tag, i)
m.header(tag.lang).addIfNew(pair, true)
tt := tag.tag()
pair, _ := makeHaveTag(tt, i)
m.header(tt.LangID).addIfNew(pair, true)
m.supported = append(m.supported, &pair)
}
m.default_ = m.header(supported[0].lang).haveTags[0]
m.default_ = m.header(supported[0].lang()).haveTags[0]
// Keep these in two different loops to support the case that two equivalent
// languages are distinguished, such as iw and he.
for i, tag := range supported {
pair, max := makeHaveTag(tag, i)
if max != tag.lang {
tt := tag.tag()
pair, max := makeHaveTag(tt, i)
if max != tt.LangID {
m.header(max).addIfNew(pair, true)
}
}
@ -585,11 +400,11 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
// update will only add entries to original indexes, thus not computing any
// transitive relations.
update := func(want, have uint16, conf Confidence) {
if hh := m.index[langID(have)]; hh != nil {
if hh := m.index[language.Language(have)]; hh != nil {
if !hh.original {
return
}
hw := m.header(langID(want))
hw := m.header(language.Language(want))
for _, ht := range hh.haveTags {
v := *ht
if conf < v.conf {
@ -597,7 +412,7 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
}
v.nextMax = 0 // this value needs to be recomputed
if v.altScript != 0 {
v.altScript = altScript(langID(want), v.maxScript)
v.altScript = altScript(language.Language(want), v.maxScript)
}
hw.addIfNew(v, conf == Exact && hh.original)
}
@ -618,66 +433,67 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
// First we match deprecated equivalents. If they are perfect equivalents
// (their canonicalization simply substitutes a different language code, but
// nothing else), the match confidence is Exact, otherwise it is High.
for i, lm := range langAliasMap {
for i, lm := range language.AliasMap {
// If deprecated codes match and there is no fiddling with the script or
// or region, we consider it an exact match.
conf := Exact
if langAliasTypes[i] != langMacro {
if !isExactEquivalent(langID(lm.from)) {
if language.AliasTypes[i] != language.Macro {
if !isExactEquivalent(language.Language(lm.From)) {
conf = High
}
update(lm.to, lm.from, conf)
update(lm.To, lm.From, conf)
}
update(lm.from, lm.to, conf)
update(lm.From, lm.To, conf)
}
return m
}
// getBest gets the best matching tag in m for any of the given tags, taking into
// account the order of preference of the given tags.
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
best := bestMatch{}
for i, w := range want {
var max Tag
for i, ww := range want {
w := ww.tag()
var max language.Tag
// Check for exact match first.
h := m.index[w.lang]
if w.lang != 0 {
h := m.index[w.LangID]
if w.LangID != 0 {
if h == nil {
continue
}
// Base language is defined.
max, _ = w.canonicalize(Legacy | Deprecated | Macro)
max, _ = canonicalize(Legacy|Deprecated|Macro, w)
// A region that is added through canonicalization is stronger than
// a maximized region: set it in the original (e.g. mo -> ro-MD).
if w.region != max.region {
w.region = max.region
if w.RegionID != max.RegionID {
w.RegionID = max.RegionID
}
// TODO: should we do the same for scripts?
// See test case: en, sr, nl ; sh ; sr
max, _ = addTags(max)
max, _ = max.Maximize()
} else {
// Base language is not defined.
if h != nil {
for i := range h.haveTags {
have := h.haveTags[i]
if have.tag.equalsRest(w) {
if equalsRest(have.tag, w) {
return have, w, Exact
}
}
}
if w.script == 0 && w.region == 0 {
if w.ScriptID == 0 && w.RegionID == 0 {
// We skip all tags matching und for approximate matching, including
// private tags.
continue
}
max, _ = addTags(w)
if h = m.index[max.lang]; h == nil {
max, _ = w.Maximize()
if h = m.index[max.LangID]; h == nil {
continue
}
}
pin := true
for _, t := range want[i+1:] {
if w.lang == t.lang {
if w.LangID == t.lang() {
pin = false
break
}
@ -685,11 +501,11 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
// Check for match based on maximized tag.
for i := range h.haveTags {
have := h.haveTags[i]
best.update(have, w, max.script, max.region, pin)
best.update(have, w, max.ScriptID, max.RegionID, pin)
if best.conf == Exact {
for have.nextMax != 0 {
have = h.haveTags[have.nextMax]
best.update(have, w, max.script, max.region, pin)
best.update(have, w, max.ScriptID, max.RegionID, pin)
}
return best.have, best.want, best.conf
}
@ -697,9 +513,9 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
}
if best.conf <= No {
if len(want) != 0 {
return nil, want[0], No
return nil, want[0].tag(), No
}
return nil, Tag{}, No
return nil, language.Tag{}, No
}
return best.have, best.want, best.conf
}
@ -707,9 +523,9 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
// bestMatch accumulates the best match so far.
type bestMatch struct {
have *haveTag
want Tag
want language.Tag
conf Confidence
pinnedRegion regionID
pinnedRegion language.Region
pinLanguage bool
sameRegionGroup bool
// Cached results from applying tie-breaking rules.
@ -734,19 +550,19 @@ type bestMatch struct {
// still prefer a second language over a dialect of the preferred language by
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
// be false.
func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion regionID, pin bool) {
func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
// Bail if the maximum attainable confidence is below that of the current best match.
c := have.conf
if c < m.conf {
return
}
// Don't change the language once we already have found an exact match.
if m.pinLanguage && tag.lang != m.want.lang {
if m.pinLanguage && tag.LangID != m.want.LangID {
return
}
// Pin the region group if we are comparing tags for the same language.
if tag.lang == m.want.lang && m.sameRegionGroup {
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.lang)
if tag.LangID == m.want.LangID && m.sameRegionGroup {
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
if !sameGroup {
return
}
@ -756,7 +572,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
// don't pin anything, otherwise pin the language.
m.pinLanguage = pin
}
if have.tag.equalsRest(tag) {
if equalsRest(have.tag, tag) {
} else if have.maxScript != maxScript {
// There is usually very little comprehension between different scripts.
// In a few cases there may still be Low comprehension. This possibility
@ -786,7 +602,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
// Tie-breaker rules:
// We prefer if the pre-maximized language was specified and identical.
origLang := have.tag.lang == tag.lang && tag.lang != 0
origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
if !beaten && m.origLang != origLang {
if m.origLang {
return
@ -795,7 +611,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
}
// We prefer if the pre-maximized region was specified and identical.
origReg := have.tag.region == tag.region && tag.region != 0
origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
if !beaten && m.origReg != origReg {
if m.origReg {
return
@ -803,7 +619,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
beaten = true
}
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.lang)
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
if !beaten && m.regGroupDist != regGroupDist {
if regGroupDist > m.regGroupDist {
return
@ -811,7 +627,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
beaten = true
}
paradigmReg := isParadigmLocale(tag.lang, have.maxRegion)
paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
if !beaten && m.paradigmReg != paradigmReg {
if !paradigmReg {
return
@ -820,7 +636,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
}
// Next we prefer if the pre-maximized script was specified and identical.
origScript := have.tag.script == tag.script && tag.script != 0
origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
if !beaten && m.origScript != origScript {
if m.origScript {
return
@ -843,9 +659,9 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
}
}
func isParadigmLocale(lang langID, r regionID) bool {
func isParadigmLocale(lang language.Language, r language.Region) bool {
for _, e := range paradigmLocales {
if langID(e[0]) == lang && (r == regionID(e[1]) || r == regionID(e[2])) {
if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
return true
}
}
@ -854,13 +670,13 @@ func isParadigmLocale(lang langID, r regionID) bool {
// regionGroupDist computes the distance between two regions based on their
// CLDR grouping.
func regionGroupDist(a, b regionID, script scriptID, lang langID) (dist uint8, same bool) {
func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
const defaultDistance = 4
aGroup := uint(regionToGroups[a]) << 1
bGroup := uint(regionToGroups[b]) << 1
for _, ri := range matchRegion {
if langID(ri.lang) == lang && (ri.script == 0 || scriptID(ri.script) == script) {
if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
group := uint(1 << (ri.group &^ 0x80))
if 0x80&ri.group == 0 {
if aGroup&bGroup&group != 0 { // Both regions are in the group.
@ -876,31 +692,16 @@ func regionGroupDist(a, b regionID, script scriptID, lang langID) (dist uint8, s
return defaultDistance, true
}
func (t Tag) variants() string {
if t.pVariant == 0 {
return ""
}
return t.str[t.pVariant:t.pExt]
}
// variantOrPrivateTagStr returns variants or private use tags.
func (t Tag) variantOrPrivateTagStr() string {
if t.pExt > 0 {
return t.str[t.pVariant:t.pExt]
}
return t.str[t.pVariant:]
}
// equalsRest compares everything except the language.
func (a Tag) equalsRest(b Tag) bool {
func equalsRest(a, b language.Tag) bool {
// TODO: don't include extensions in this comparison. To do this efficiently,
// though, we should handle private tags separately.
return a.script == b.script && a.region == b.region && a.variantOrPrivateTagStr() == b.variantOrPrivateTagStr()
return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
}
// isExactEquivalent returns true if canonicalizing the language will not alter
// the script or region of a tag.
func isExactEquivalent(l langID) bool {
func isExactEquivalent(l language.Language) bool {
for _, o := range notEquivalent {
if o == l {
return false
@ -909,25 +710,26 @@ func isExactEquivalent(l langID) bool {
return true
}
var notEquivalent []langID
var notEquivalent []language.Language
func init() {
// Create a list of all languages for which canonicalization may alter the
// script or region.
for _, lm := range langAliasMap {
tag := Tag{lang: langID(lm.from)}
if tag, _ = tag.canonicalize(All); tag.script != 0 || tag.region != 0 {
notEquivalent = append(notEquivalent, langID(lm.from))
for _, lm := range language.AliasMap {
tag := language.Tag{LangID: language.Language(lm.From)}
if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
notEquivalent = append(notEquivalent, language.Language(lm.From))
}
}
// Maximize undefined regions of paradigm locales.
for i, v := range paradigmLocales {
max, _ := addTags(Tag{lang: langID(v[0])})
t := language.Tag{LangID: language.Language(v[0])}
max, _ := t.Maximize()
if v[1] == 0 {
paradigmLocales[i][1] = uint16(max.region)
paradigmLocales[i][1] = uint16(max.RegionID)
}
if v[2] == 0 {
paradigmLocales[i][2] = uint16(max.region)
paradigmLocales[i][2] = uint16(max.RegionID)
}
}
}

View file

@ -5,216 +5,21 @@
package language
import (
"bytes"
"errors"
"fmt"
"sort"
"strconv"
"strings"
"golang.org/x/text/internal/tag"
"golang.org/x/text/internal/language"
)
// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha(b byte) bool {
return b > '9'
}
// isAlphaNum returns true if the string contains only ASCII letters or digits.
func isAlphaNum(s []byte) bool {
for _, c := range s {
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
return false
}
}
return true
}
// errSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
var errSyntax = errors.New("language: tag is not well-formed")
// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
// as a valid value.
type ValueError struct {
v [8]byte
}
type ValueError interface {
error
func mkErrInvalid(s []byte) error {
var e ValueError
copy(e.v[:], s)
return e
}
func (e ValueError) tag() []byte {
n := bytes.IndexByte(e.v[:], 0)
if n == -1 {
n = 8
}
return e.v[:n]
}
// Error implements the error interface.
func (e ValueError) Error() string {
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
}
// Subtag returns the subtag for which the error occurred.
func (e ValueError) Subtag() string {
return string(e.tag())
}
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
b []byte
bytes [max99thPercentileSize]byte
token []byte
start int // start position of the current token
end int // end position of the current token
next int // next point for scan
err error
done bool
}
func makeScannerString(s string) scanner {
scan := scanner{}
if len(s) <= len(scan.bytes) {
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
} else {
scan.b = []byte(s)
}
scan.init()
return scan
}
// makeScanner returns a scanner using b as the input buffer.
// b is not copied and may be modified by the scanner routines.
func makeScanner(b []byte) scanner {
scan := scanner{b: b}
scan.init()
return scan
}
func (s *scanner) init() {
for i, c := range s.b {
if c == '_' {
s.b[i] = '-'
}
}
s.scan()
}
// restToLower converts the string between start and end to lower case.
func (s *scanner) toLower(start, end int) {
for i := start; i < end; i++ {
c := s.b[i]
if 'A' <= c && c <= 'Z' {
s.b[i] += 'a' - 'A'
}
}
}
func (s *scanner) setError(e error) {
if s.err == nil || (e == errSyntax && s.err != errSyntax) {
s.err = e
}
}
// resizeRange shrinks or grows the array at position oldStart such that
// a new string of size newSize can fit between oldStart and oldEnd.
// Sets the scan point to after the resized range.
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
s.start = oldStart
if end := oldStart + newSize; end != oldEnd {
diff := end - oldEnd
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
copy(b, s.b[:oldStart])
copy(b[end:], s.b[oldEnd:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[oldEnd:]...)
}
s.next = end + (s.next - s.end)
s.end = end
}
}
// replace replaces the current token with repl.
func (s *scanner) replace(repl string) {
s.resizeRange(s.start, s.end, len(repl))
copy(s.b[s.start:], repl)
}
// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func (s *scanner) gobble(e error) {
s.setError(e)
if s.start == 0 {
s.b = s.b[:+copy(s.b, s.b[s.next:])]
s.end = 0
} else {
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
s.end = s.start - 1
}
s.next = s.start
}
// deleteRange removes the given range from s.b before the current token.
func (s *scanner) deleteRange(start, end int) {
s.setError(errSyntax)
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
diff := end - start
s.next -= diff
s.start -= diff
s.end -= diff
}
// scan parses the next token of a BCP 47 string. Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func (s *scanner) scan() (end int) {
end = s.end
s.token = nil
for s.start = s.next; s.next < len(s.b); {
i := bytes.IndexByte(s.b[s.next:], '-')
if i == -1 {
s.end = len(s.b)
s.next = len(s.b)
i = s.end - s.start
} else {
s.end = s.next + i
s.next = s.end + 1
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
s.gobble(errSyntax)
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
s.setError(errSyntax)
s.b = s.b[:len(s.b)-1]
}
s.done = true
return end
}
// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func (s *scanner) acceptMinSize(min int) (end int) {
end = s.end
s.scan()
for ; len(s.token) >= min; s.scan() {
end = s.end
}
return end
// Subtag returns the subtag for which the error occurred.
Subtag() string
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
@ -223,7 +28,7 @@ func (s *scanner) acceptMinSize(min int) (end int) {
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the default canonicalization type.
func Parse(s string) (t Tag, err error) {
return Default.Parse(s)
@ -235,327 +40,18 @@ func Parse(s string) (t Tag, err error) {
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the the canonicalization type c.
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
return und, errSyntax
tt, err := language.Parse(s)
if err != nil {
return makeTag(tt), err
}
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
// Generating invalid UTF-8 is okay as it won't match.
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
} else if c == '_' {
c = '-'
}
b[i] = byte(c)
}
if t, ok := grandfathered(b); ok {
return t, nil
}
}
scan := makeScannerString(s)
t, err = parse(&scan, s)
t, changed := t.canonicalize(c)
tt, changed := canonicalize(c, tt)
if changed {
t.remakeString()
tt.RemakeString()
}
return t, err
}
func parse(scan *scanner, s string) (t Tag, err error) {
t = und
var end int
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
if n == 0 || scan.token[0] != 'x' {
return t, errSyntax
}
end = parseExtensions(scan)
} else if n >= 4 {
return und, errSyntax
} else { // the usual case
t, end = parseTag(scan)
if n := len(scan.token); n == 1 {
t.pExt = uint16(end)
end = parseExtensions(scan)
} else if end < len(scan.b) {
scan.setError(errSyntax)
scan.b = scan.b[:end]
}
}
if int(t.pVariant) < len(scan.b) {
if end < len(s) {
s = s[:end]
}
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
t.str = s
} else {
t.str = string(scan.b)
}
} else {
t.pVariant, t.pExt = 0, 0
}
return t, scan.err
}
// parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) {
var e error
// TODO: set an error if an unknown lang, script or region is encountered.
t.lang, e = getLangID(scan.token)
scan.setError(e)
scan.replace(t.lang.String())
langStart := scan.start
end = scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>.
lang, e := getLangID(scan.token)
if lang != 0 {
t.lang = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
}
scan.gobble(e)
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
t.script, e = getScriptID(script, scan.token)
if t.script == 0 {
scan.gobble(e)
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
t.region, e = getRegionID(scan.token)
if t.region == 0 {
scan.gobble(e)
} else {
scan.replace(t.region.String())
}
end = scan.scan()
}
scan.toLower(scan.start, len(scan.b))
t.pVariant = byte(end)
end = parseVariants(scan, end, t)
t.pExt = uint16(end)
return t, end
}
var separator = []byte{'-'}
// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants(scan *scanner, end int, t Tag) int {
start := scan.start
varIDBuf := [4]uint8{}
variantBuf := [4][]byte{}
varID := varIDBuf[:0]
variant := variantBuf[:0]
last := -1
needSort := false
for ; len(scan.token) >= 4; scan.scan() {
// TODO: measure the impact of needing this conversion and redesign
// the data structure if there is an issue.
v, ok := variantIndex[string(scan.token)]
if !ok {
// unknown variant
// TODO: allow user-defined variants?
scan.gobble(mkErrInvalid(scan.token))
continue
}
varID = append(varID, v)
variant = append(variant, scan.token)
if !needSort {
if last < int(v) {
last = int(v)
} else {
needSort = true
// There is no legal combinations of more than 7 variants
// (and this is by no means a useful sequence).
const maxVariants = 8
if len(varID) > maxVariants {
break
}
}
}
end = scan.end
}
if needSort {
sort.Sort(variantsSort{varID, variant})
k, l := 0, -1
for i, v := range varID {
w := int(v)
if l == w {
// Remove duplicates.
continue
}
varID[k] = varID[i]
variant[k] = variant[i]
k++
l = w
}
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
end = start - 1
} else {
scan.resizeRange(start, end, len(str))
copy(scan.b[scan.start:], str)
end = scan.end
}
}
return end
}
type variantsSort struct {
i []uint8
v [][]byte
}
func (s variantsSort) Len() int {
return len(s.i)
}
func (s variantsSort) Swap(i, j int) {
s.i[i], s.i[j] = s.i[j], s.i[i]
s.v[i], s.v[j] = s.v[j], s.v[i]
}
func (s variantsSort) Less(i, j int) bool {
return s.i[i] < s.i[j]
}
type bytesSort [][]byte
func (b bytesSort) Len() int {
return len(b)
}
func (b bytesSort) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b bytesSort) Less(i, j int) bool {
return bytes.Compare(b[i], b[j]) == -1
}
// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
// It also trims scan.b to remove excess parts accordingly.
func parseExtensions(scan *scanner) int {
start := scan.start
exts := [][]byte{}
private := []byte{}
end := scan.end
for len(scan.token) == 1 {
extStart := scan.start
ext := scan.token[0]
end = parseExtension(scan)
extension := scan.b[extStart:end]
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
scan.setError(errSyntax)
end = extStart
continue
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
scan.b = scan.b[:end]
return end
} else if ext == 'x' {
private = extension
break
}
exts = append(exts, extension)
}
sort.Sort(bytesSort(exts))
if len(private) > 0 {
exts = append(exts, private)
}
scan.b = scan.b[:start]
if len(exts) > 0 {
scan.b = append(scan.b, bytes.Join(exts, separator)...)
} else if start > 0 {
// Strip trailing '-'.
scan.b = scan.b[:start-1]
}
return end
}
// parseExtension parses a single extension and returns the position of
// the extension end.
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
if bytes.Compare(scan.token, last) != -1 {
// Attributes are unsorted. Start over from scratch.
p := attrStart + 1
scan.next = p
attrs := [][]byte{}
for scan.scan(); len(scan.token) > 2; scan.scan() {
attrs = append(attrs, scan.token)
end = scan.end
}
sort.Sort(bytesSort(attrs))
copy(scan.b[p:], bytes.Join(attrs, separator))
break
}
last = scan.token
end = scan.end
}
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(errSyntax)
end = keyStart
}
}
sort.Sort(bytesSort(keys))
reordered := bytes.Join(keys, separator)
if e := p + len(reordered); e < end {
scan.deleteRange(e, end)
end = e
}
copy(scan.b[p:], bytes.Join(keys, separator))
break
}
}
case 't':
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)
scan.toLower(start, end)
}
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
end = scan.acceptMinSize(3)
}
case 'x':
end = scan.acceptMinSize(1)
default:
end = scan.acceptMinSize(2)
}
return end
return makeTag(tt), err
}
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
@ -563,10 +59,11 @@ func parseExtension(scan *scanner) int {
// Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. A Tag overwrites all former values and typically
// only makes sense as the first argument. The resulting tag is returned after
// canonicalizing using the Default CanonType. If one or more errors are
// encountered, one of the errors is returned.
// will replace the former. For -u extensions, though, the key-type pairs are
// added, where later values overwrite older ones. A Tag overwrites all former
// values and typically only makes sense as the first argument. The resulting
// tag is returned after canonicalizing using the Default CanonType. If one or
// more errors are encountered, one of the errors is returned.
func Compose(part ...interface{}) (t Tag, err error) {
return Default.Compose(part...)
}
@ -576,191 +73,63 @@ func Compose(part ...interface{}) (t Tag, err error) {
// Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. A Tag overwrites all former values and typically
// only makes sense as the first argument. The resulting tag is returned after
// canonicalizing using CanonType c. If one or more errors are encountered,
// one of the errors is returned.
// will replace the former. For -u extensions, though, the key-type pairs are
// added, where later values overwrite older ones. A Tag overwrites all former
// values and typically only makes sense as the first argument. The resulting
// tag is returned after canonicalizing using CanonType c. If one or more errors
// are encountered, one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
var b builder
if err = b.update(part...); err != nil {
var b language.Builder
if err = update(&b, part...); err != nil {
return und, err
}
t, _ = b.tag.canonicalize(c)
if len(b.ext) > 0 || len(b.variant) > 0 {
sort.Sort(sortVariant(b.variant))
sort.Strings(b.ext)
if b.private != "" {
b.ext = append(b.ext, b.private)
}
n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
buf := make([]byte, n)
p := t.genCoreBytes(buf)
t.pVariant = byte(p)
p += appendTokens(buf[p:], b.variant...)
t.pExt = uint16(p)
p += appendTokens(buf[p:], b.ext...)
t.str = string(buf[:p])
} else if b.private != "" {
t.str = b.private
t.remakeString()
}
return
}
type builder struct {
tag Tag
private string // the x extension
ext []string
variant []string
err error
}
func (b *builder) addExt(e string) {
if e == "" {
} else if e[0] == 'x' {
b.private = e
} else {
b.ext = append(b.ext, e)
}
b.Tag, _ = canonicalize(c, b.Tag)
return makeTag(b.Make()), err
}
var errInvalidArgument = errors.New("invalid Extension or Variant")
func (b *builder) update(part ...interface{}) (err error) {
replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
if s == "" {
b.err = errInvalidArgument
return true
}
for i, v := range *l {
if eq(v, s) {
(*l)[i] = s
return true
}
}
return false
}
func update(b *language.Builder, part ...interface{}) (err error) {
for _, x := range part {
switch v := x.(type) {
case Tag:
b.tag.lang = v.lang
b.tag.region = v.region
b.tag.script = v.script
if v.str != "" {
b.variant = nil
for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
x, s = nextToken(s)
b.variant = append(b.variant, x)
}
b.ext, b.private = nil, ""
for i, e := int(v.pExt), ""; i < len(v.str); {
i, e = getExtension(v.str, i)
b.addExt(e)
}
}
b.SetTag(v.tag())
case Base:
b.tag.lang = v.langID
b.Tag.LangID = v.langID
case Script:
b.tag.script = v.scriptID
b.Tag.ScriptID = v.scriptID
case Region:
b.tag.region = v.regionID
b.Tag.RegionID = v.regionID
case Variant:
if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
b.variant = append(b.variant, v.variant)
if v.variant == "" {
err = errInvalidArgument
break
}
b.AddVariant(v.variant)
case Extension:
if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
b.addExt(v.s)
if v.s == "" {
err = errInvalidArgument
break
}
b.SetExt(v.s)
case []Variant:
b.variant = nil
for _, x := range v {
b.update(x)
b.ClearVariants()
for _, v := range v {
b.AddVariant(v.variant)
}
case []Extension:
b.ext, b.private = nil, ""
b.ClearExtensions()
for _, e := range v {
b.update(e)
b.SetExt(e.s)
}
// TODO: support parsing of raw strings based on morphology or just extensions?
case error:
err = v
}
}
return
}
func tokenLen(token ...string) (n int) {
for _, t := range token {
n += len(t) + 1
}
return
}
func appendTokens(b []byte, token ...string) int {
p := 0
for _, t := range token {
b[p] = '-'
copy(b[p+1:], t)
p += 1 + len(t)
}
return p
}
type sortVariant []string
func (s sortVariant) Len() int {
return len(s)
}
func (s sortVariant) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sortVariant) Less(i, j int) bool {
return variantIndex[s[i]] < variantIndex[s[j]]
}
func findExt(list []string, x byte) int {
for i, e := range list {
if e[0] == x {
return i
}
}
return -1
}
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, ext string) {
if s[p] == '-' {
p++
}
if s[p] == 'x' {
return len(s), s[p:]
}
end = nextExtension(s, p)
return end, s[p:end]
}
// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, language tags will have at most
// one extension and extensions tend to be small.
func nextExtension(s string, p int) int {
for n := len(s) - 3; p < n; {
if s[p] == '-' {
if s[p+2] == '-' {
return p
if v != nil {
err = v
}
p += 3
} else {
p++
}
}
return len(s)
return
}
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
@ -788,7 +157,7 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
if !ok {
return nil, nil, err
}
t = Tag{lang: id}
t = makeTag(language.Tag{LangID: id})
}
// Scan the optional weight.
@ -830,9 +199,9 @@ func split(s string, c byte) (head, tail string) {
return strings.TrimSpace(s), ""
}
// Add hack mapping to deal with a small number of cases that that occur
// Add hack mapping to deal with a small number of cases that occur
// in Accept-Language (with reasonable frequency).
var acceptFallback = map[string]langID{
var acceptFallback = map[string]language.Language{
"english": _en,
"deutsch": _de,
"italian": _it,

File diff suppressed because it is too large Load diff

View file

@ -4,6 +4,8 @@
package language
import "golang.org/x/text/internal/language/compact"
// TODO: Various sets of commonly use tags and regions.
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
@ -61,83 +63,83 @@ var (
Und Tag = Tag{}
Afrikaans Tag = Tag{lang: _af} // af
Amharic Tag = Tag{lang: _am} // am
Arabic Tag = Tag{lang: _ar} // ar
ModernStandardArabic Tag = Tag{lang: _ar, region: _001} // ar-001
Azerbaijani Tag = Tag{lang: _az} // az
Bulgarian Tag = Tag{lang: _bg} // bg
Bengali Tag = Tag{lang: _bn} // bn
Catalan Tag = Tag{lang: _ca} // ca
Czech Tag = Tag{lang: _cs} // cs
Danish Tag = Tag{lang: _da} // da
German Tag = Tag{lang: _de} // de
Greek Tag = Tag{lang: _el} // el
English Tag = Tag{lang: _en} // en
AmericanEnglish Tag = Tag{lang: _en, region: _US} // en-US
BritishEnglish Tag = Tag{lang: _en, region: _GB} // en-GB
Spanish Tag = Tag{lang: _es} // es
EuropeanSpanish Tag = Tag{lang: _es, region: _ES} // es-ES
LatinAmericanSpanish Tag = Tag{lang: _es, region: _419} // es-419
Estonian Tag = Tag{lang: _et} // et
Persian Tag = Tag{lang: _fa} // fa
Finnish Tag = Tag{lang: _fi} // fi
Filipino Tag = Tag{lang: _fil} // fil
French Tag = Tag{lang: _fr} // fr
CanadianFrench Tag = Tag{lang: _fr, region: _CA} // fr-CA
Gujarati Tag = Tag{lang: _gu} // gu
Hebrew Tag = Tag{lang: _he} // he
Hindi Tag = Tag{lang: _hi} // hi
Croatian Tag = Tag{lang: _hr} // hr
Hungarian Tag = Tag{lang: _hu} // hu
Armenian Tag = Tag{lang: _hy} // hy
Indonesian Tag = Tag{lang: _id} // id
Icelandic Tag = Tag{lang: _is} // is
Italian Tag = Tag{lang: _it} // it
Japanese Tag = Tag{lang: _ja} // ja
Georgian Tag = Tag{lang: _ka} // ka
Kazakh Tag = Tag{lang: _kk} // kk
Khmer Tag = Tag{lang: _km} // km
Kannada Tag = Tag{lang: _kn} // kn
Korean Tag = Tag{lang: _ko} // ko
Kirghiz Tag = Tag{lang: _ky} // ky
Lao Tag = Tag{lang: _lo} // lo
Lithuanian Tag = Tag{lang: _lt} // lt
Latvian Tag = Tag{lang: _lv} // lv
Macedonian Tag = Tag{lang: _mk} // mk
Malayalam Tag = Tag{lang: _ml} // ml
Mongolian Tag = Tag{lang: _mn} // mn
Marathi Tag = Tag{lang: _mr} // mr
Malay Tag = Tag{lang: _ms} // ms
Burmese Tag = Tag{lang: _my} // my
Nepali Tag = Tag{lang: _ne} // ne
Dutch Tag = Tag{lang: _nl} // nl
Norwegian Tag = Tag{lang: _no} // no
Punjabi Tag = Tag{lang: _pa} // pa
Polish Tag = Tag{lang: _pl} // pl
Portuguese Tag = Tag{lang: _pt} // pt
BrazilianPortuguese Tag = Tag{lang: _pt, region: _BR} // pt-BR
EuropeanPortuguese Tag = Tag{lang: _pt, region: _PT} // pt-PT
Romanian Tag = Tag{lang: _ro} // ro
Russian Tag = Tag{lang: _ru} // ru
Sinhala Tag = Tag{lang: _si} // si
Slovak Tag = Tag{lang: _sk} // sk
Slovenian Tag = Tag{lang: _sl} // sl
Albanian Tag = Tag{lang: _sq} // sq
Serbian Tag = Tag{lang: _sr} // sr
SerbianLatin Tag = Tag{lang: _sr, script: _Latn} // sr-Latn
Swedish Tag = Tag{lang: _sv} // sv
Swahili Tag = Tag{lang: _sw} // sw
Tamil Tag = Tag{lang: _ta} // ta
Telugu Tag = Tag{lang: _te} // te
Thai Tag = Tag{lang: _th} // th
Turkish Tag = Tag{lang: _tr} // tr
Ukrainian Tag = Tag{lang: _uk} // uk
Urdu Tag = Tag{lang: _ur} // ur
Uzbek Tag = Tag{lang: _uz} // uz
Vietnamese Tag = Tag{lang: _vi} // vi
Chinese Tag = Tag{lang: _zh} // zh
SimplifiedChinese Tag = Tag{lang: _zh, script: _Hans} // zh-Hans
TraditionalChinese Tag = Tag{lang: _zh, script: _Hant} // zh-Hant
Zulu Tag = Tag{lang: _zu} // zu
Afrikaans Tag = Tag(compact.Afrikaans)
Amharic Tag = Tag(compact.Amharic)
Arabic Tag = Tag(compact.Arabic)
ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
Azerbaijani Tag = Tag(compact.Azerbaijani)
Bulgarian Tag = Tag(compact.Bulgarian)
Bengali Tag = Tag(compact.Bengali)
Catalan Tag = Tag(compact.Catalan)
Czech Tag = Tag(compact.Czech)
Danish Tag = Tag(compact.Danish)
German Tag = Tag(compact.German)
Greek Tag = Tag(compact.Greek)
English Tag = Tag(compact.English)
AmericanEnglish Tag = Tag(compact.AmericanEnglish)
BritishEnglish Tag = Tag(compact.BritishEnglish)
Spanish Tag = Tag(compact.Spanish)
EuropeanSpanish Tag = Tag(compact.EuropeanSpanish)
LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
Estonian Tag = Tag(compact.Estonian)
Persian Tag = Tag(compact.Persian)
Finnish Tag = Tag(compact.Finnish)
Filipino Tag = Tag(compact.Filipino)
French Tag = Tag(compact.French)
CanadianFrench Tag = Tag(compact.CanadianFrench)
Gujarati Tag = Tag(compact.Gujarati)
Hebrew Tag = Tag(compact.Hebrew)
Hindi Tag = Tag(compact.Hindi)
Croatian Tag = Tag(compact.Croatian)
Hungarian Tag = Tag(compact.Hungarian)
Armenian Tag = Tag(compact.Armenian)
Indonesian Tag = Tag(compact.Indonesian)
Icelandic Tag = Tag(compact.Icelandic)
Italian Tag = Tag(compact.Italian)
Japanese Tag = Tag(compact.Japanese)
Georgian Tag = Tag(compact.Georgian)
Kazakh Tag = Tag(compact.Kazakh)
Khmer Tag = Tag(compact.Khmer)
Kannada Tag = Tag(compact.Kannada)
Korean Tag = Tag(compact.Korean)
Kirghiz Tag = Tag(compact.Kirghiz)
Lao Tag = Tag(compact.Lao)
Lithuanian Tag = Tag(compact.Lithuanian)
Latvian Tag = Tag(compact.Latvian)
Macedonian Tag = Tag(compact.Macedonian)
Malayalam Tag = Tag(compact.Malayalam)
Mongolian Tag = Tag(compact.Mongolian)
Marathi Tag = Tag(compact.Marathi)
Malay Tag = Tag(compact.Malay)
Burmese Tag = Tag(compact.Burmese)
Nepali Tag = Tag(compact.Nepali)
Dutch Tag = Tag(compact.Dutch)
Norwegian Tag = Tag(compact.Norwegian)
Punjabi Tag = Tag(compact.Punjabi)
Polish Tag = Tag(compact.Polish)
Portuguese Tag = Tag(compact.Portuguese)
BrazilianPortuguese Tag = Tag(compact.BrazilianPortuguese)
EuropeanPortuguese Tag = Tag(compact.EuropeanPortuguese)
Romanian Tag = Tag(compact.Romanian)
Russian Tag = Tag(compact.Russian)
Sinhala Tag = Tag(compact.Sinhala)
Slovak Tag = Tag(compact.Slovak)
Slovenian Tag = Tag(compact.Slovenian)
Albanian Tag = Tag(compact.Albanian)
Serbian Tag = Tag(compact.Serbian)
SerbianLatin Tag = Tag(compact.SerbianLatin)
Swedish Tag = Tag(compact.Swedish)
Swahili Tag = Tag(compact.Swahili)
Tamil Tag = Tag(compact.Tamil)
Telugu Tag = Tag(compact.Telugu)
Thai Tag = Tag(compact.Thai)
Turkish Tag = Tag(compact.Turkish)
Ukrainian Tag = Tag(compact.Ukrainian)
Urdu Tag = Tag(compact.Urdu)
Uzbek Tag = Tag(compact.Uzbek)
Vietnamese Tag = Tag(compact.Vietnamese)
Chinese Tag = Tag(compact.Chinese)
SimplifiedChinese Tag = Tag(compact.SimplifiedChinese)
TraditionalChinese Tag = Tag(compact.TraditionalChinese)
Zulu Tag = Tag(compact.Zulu)
)

View file

@ -78,8 +78,8 @@ type SpanningTransformer interface {
// considering the error err.
//
// A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be be returned
// regardless of whether atEOF is true. If err is nil, then then n must
// output produced by the Transformer. A nil error can be returned
// regardless of whether atEOF is true. If err is nil, then n must
// equal len(src); the converse is not necessarily true.
//
// ErrEndOfSpan means that the Transformer output may differ from the

View file

@ -6,7 +6,7 @@
// Package bidi contains functionality for bidirectional text support.
//
// See http://www.unicode.org/reports/tr9.
// See https://www.unicode.org/reports/tr9.
//
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
// and without notice.

View file

@ -12,7 +12,7 @@ import (
// This file contains a port of the reference implementation of the
// Bidi Parentheses Algorithm:
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
//
// The implementation in this file covers definitions BD14-BD16 and rule N0
// of UAX#9.
@ -246,7 +246,7 @@ func (p *bracketPairer) getStrongTypeN0(index int) Class {
// assuming the given embedding direction.
//
// It returns ON if no strong type is found. If a single strong type is found,
// it returns this this type. Otherwise it returns the embedding direction.
// it returns this type. Otherwise it returns the embedding direction.
//
// TODO: use separate type for "strong" directionality.
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {

View file

@ -7,7 +7,7 @@ package bidi
import "log"
// This implementation is a port based on the reference implementation found at:
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
//
// described in Unicode Bidirectional Algorithm (UAX #9).
//

View file

@ -1,133 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"flag"
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
)
var outputFile = flag.String("out", "tables.go", "output file")
func main() {
gen.Init()
gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
genTables()
}
// bidiClass names and codes taken from class "bc" in
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
var bidiClass = map[string]Class{
"AL": AL, // ArabicLetter
"AN": AN, // ArabicNumber
"B": B, // ParagraphSeparator
"BN": BN, // BoundaryNeutral
"CS": CS, // CommonSeparator
"EN": EN, // EuropeanNumber
"ES": ES, // EuropeanSeparator
"ET": ET, // EuropeanTerminator
"L": L, // LeftToRight
"NSM": NSM, // NonspacingMark
"ON": ON, // OtherNeutral
"R": R, // RightToLeft
"S": S, // SegmentSeparator
"WS": WS, // WhiteSpace
"FSI": Control,
"PDF": Control,
"PDI": Control,
"LRE": Control,
"LRI": Control,
"LRO": Control,
"RLE": Control,
"RLI": Control,
"RLO": Control,
}
func genTables() {
if numClass > 0x0F {
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
}
w := gen.NewCodeWriter()
defer w.WriteVersionedGoFile(*outputFile, "bidi")
gen.WriteUnicodeVersion(w)
t := triegen.NewTrie("bidi")
// Build data about bracket mapping. These bits need to be or-ed with
// any other bits.
orMask := map[rune]uint64{}
xorMap := map[rune]int{}
xorMasks := []rune{0} // First value is no-op.
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
r1 := p.Rune(0)
r2 := p.Rune(1)
xor := r1 ^ r2
if _, ok := xorMap[xor]; !ok {
xorMap[xor] = len(xorMasks)
xorMasks = append(xorMasks, xor)
}
entry := uint64(xorMap[xor]) << xorMaskShift
switch p.String(2) {
case "o":
entry |= openMask
case "c", "n":
default:
log.Fatalf("Unknown bracket class %q.", p.String(2))
}
orMask[r1] = entry
})
w.WriteComment(`
xorMasks contains masks to be xor-ed with brackets to get the reverse
version.`)
w.WriteVar("xorMasks", xorMasks)
done := map[rune]bool{}
insert := func(r rune, c Class) {
if !done[r] {
t.Insert(r, orMask[r]|uint64(c))
done[r] = true
}
}
// Insert the derived BiDi properties.
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
class, ok := bidiClass[p.String(1)]
if !ok {
log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
}
insert(r, class)
})
visitDefaults(insert)
// TODO: use sparse blocks. This would reduce table size considerably
// from the looks of it.
sz, err := t.Gen(w)
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
// dummy values to make methods in gen_common compile. The real versions
// will be generated by this file to tables.go.
var (
xorMasks []rune
)

View file

@ -1,57 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/rangetable"
)
// These tables are hand-extracted from:
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
func visitDefaults(fn func(r rune, c Class)) {
// first write default values for ranges listed above.
visitRunes(fn, AL, []rune{
0x0600, 0x07BF, // Arabic
0x08A0, 0x08FF, // Arabic Extended-A
0xFB50, 0xFDCF, // Arabic Presentation Forms
0xFDF0, 0xFDFF,
0xFE70, 0xFEFF,
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
})
visitRunes(fn, R, []rune{
0x0590, 0x05FF, // Hebrew
0x07C0, 0x089F, // Nko et al.
0xFB1D, 0xFB4F,
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
0x0001E800, 0x0001EDFF,
0x0001EF00, 0x0001EFFF,
})
visitRunes(fn, ET, []rune{ // European Terminator
0x20A0, 0x20Cf, // Currency symbols
})
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
fn(r, BN) // Boundary Neutral
})
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
fn(p.Rune(0), BN) // Boundary Neutral
}
})
}
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
for i := 0; i < len(runes); i += 2 {
lo, hi := runes[i], runes[i+1]
for j := lo; j <= hi; j++ {
fn(j, c)
}
}
}

View file

@ -1,64 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// Class is the Unicode BiDi class. Each rune has a single class.
type Class uint
const (
L Class = iota // LeftToRight
R // RightToLeft
EN // EuropeanNumber
ES // EuropeanSeparator
ET // EuropeanTerminator
AN // ArabicNumber
CS // CommonSeparator
B // ParagraphSeparator
S // SegmentSeparator
WS // WhiteSpace
ON // OtherNeutral
BN // BoundaryNeutral
NSM // NonspacingMark
AL // ArabicLetter
Control // Control LRO - PDI
numClass
LRO // LeftToRightOverride
RLO // RightToLeftOverride
LRE // LeftToRightEmbedding
RLE // RightToLeftEmbedding
PDF // PopDirectionalFormat
LRI // LeftToRightIsolate
RLI // RightToLeftIsolate
FSI // FirstStrongIsolate
PDI // PopDirectionalIsolate
unknownClass = ^Class(0)
)
var controlToClass = map[rune]Class{
0x202D: LRO, // LeftToRightOverride,
0x202E: RLO, // RightToLeftOverride,
0x202A: LRE, // LeftToRightEmbedding,
0x202B: RLE, // RightToLeftEmbedding,
0x202C: PDF, // PopDirectionalFormat,
0x2066: LRI, // LeftToRightIsolate,
0x2067: RLI, // RightToLeftIsolate,
0x2068: FSI, // FirstStrongIsolate,
0x2069: PDI, // PopDirectionalIsolate,
}
// A trie entry has the following bits:
// 7..5 XOR mask for brackets
// 4 1: Bracket open, 0: Bracket close
// 3..0 Class type
const (
openMask = 0x10
xorMaskShift = 5
)

View file

@ -407,7 +407,7 @@ func decomposeHangul(buf []byte, r rune) int {
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
// See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune) {
r -= hangulBase
x := r % jamoTCount
@ -420,7 +420,7 @@ func (rb *reorderBuffer) decomposeHangul(r rune) {
}
// combineHangul algorithmically combines Jamo character components into Hangul.
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
// See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
@ -461,6 +461,10 @@ func (rb *reorderBuffer) combineHangul(s, i, k int) {
// It should only be used to recompose a single segment, as it will not
// handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose() {
// Lazily load the map used by the combine func below, but do
// it outside of the loop.
recompMapOnce.Do(buildRecompMap)
// UAX #15, section X5 , including Corrigendum #5
// "In any character sequence beginning with starter S, a character C is
// blocked from S if and only if there is some character B between S

View file

@ -4,6 +4,8 @@
package norm
import "encoding/binary"
// This file contains Form-specific logic and wrappers for data in tables.go.
// Rune info is stored in a separate trie per composing form. A composing form
@ -178,6 +180,17 @@ func (p Properties) TrailCCC() uint8 {
return ccc[p.tccc]
}
func buildRecompMap() {
recompMap = make(map[uint32]rune, len(recompMapPacked)/8)
var buf [8]byte
for i := 0; i < len(recompMapPacked); i += 8 {
copy(buf[:], recompMapPacked[i:i+8])
key := binary.BigEndian.Uint32(buf[:4])
val := binary.BigEndian.Uint32(buf[4:])
recompMap[key] = rune(val)
}
}
// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
@ -186,8 +199,14 @@ func (p Properties) TrailCCC() uint8 {
// Note that the recomposition map for NFC and NFKC are identical.
// combine returns the combined rune or 0 if it doesn't exist.
//
// The caller is responsible for calling
// recompMapOnce.Do(buildRecompMap) sometime before this is called.
func combine(a, b rune) rune {
key := uint32(uint16(a))<<16 + uint32(uint16(b))
if recompMap == nil {
panic("caller error") // see func comment
}
return recompMap[key]
}

View file

@ -128,8 +128,9 @@ func (i *Iter) Next() []byte {
func nextASCIIBytes(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
p0 := i.p
i.setDone()
return i.rb.src.bytes[i.p:p]
return i.rb.src.bytes[p0:p]
}
if i.rb.src.bytes[p] < utf8.RuneSelf {
p0 := i.p

View file

@ -1,976 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Normalization table generator.
// Data read from the web.
// See forminfo.go for a description of the trie values associated with each rune.
package main
import (
"bytes"
"flag"
"fmt"
"io"
"log"
"sort"
"strconv"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
)
func main() {
gen.Init()
loadUnicodeData()
compactCCC()
loadCompositionExclusions()
completeCharFields(FCanonical)
completeCharFields(FCompatibility)
computeNonStarterCounts()
verifyComputed()
printChars()
testDerived()
printTestdata()
makeTables()
}
var (
tablelist = flag.String("tables",
"all",
"comma-separated list of which tables to generate; "+
"can be 'decomp', 'recomp', 'info' and 'all'")
test = flag.Bool("test",
false,
"test existing tables against DerivedNormalizationProps and generate test data for regression testing")
verbose = flag.Bool("verbose",
false,
"write data to stdout as it is parsed")
)
const MaxChar = 0x10FFFF // anything above this shouldn't exist
// Quick Check properties of runes allow us to quickly
// determine whether a rune may occur in a normal form.
// For a given normal form, a rune may be guaranteed to occur
// verbatim (QC=Yes), may or may not combine with another
// rune (QC=Maybe), or may not occur (QC=No).
type QCResult int
const (
QCUnknown QCResult = iota
QCYes
QCNo
QCMaybe
)
func (r QCResult) String() string {
switch r {
case QCYes:
return "Yes"
case QCNo:
return "No"
case QCMaybe:
return "Maybe"
}
return "***UNKNOWN***"
}
const (
FCanonical = iota // NFC or NFD
FCompatibility // NFKC or NFKD
FNumberOfFormTypes
)
const (
MComposed = iota // NFC or NFKC
MDecomposed // NFD or NFKD
MNumberOfModes
)
// This contains only the properties we're interested in.
type Char struct {
name string
codePoint rune // if zero, this index is not a valid code point.
ccc uint8 // canonical combining class
origCCC uint8
excludeInComp bool // from CompositionExclusions.txt
compatDecomp bool // it has a compatibility expansion
nTrailingNonStarters uint8
nLeadingNonStarters uint8 // must be equal to trailing if non-zero
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
state State
}
var chars = make([]Char, MaxChar+1)
var cccMap = make(map[uint8]uint8)
func (c Char) String() string {
buf := new(bytes.Buffer)
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
fmt.Fprintf(buf, " state: %v\n", c.state)
fmt.Fprintf(buf, " NFC:\n")
fmt.Fprint(buf, c.forms[FCanonical])
fmt.Fprintf(buf, " NFKC:\n")
fmt.Fprint(buf, c.forms[FCompatibility])
return buf.String()
}
// In UnicodeData.txt, some ranges are marked like this:
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
// parseCharacter keeps a state variable indicating the weirdness.
type State int
const (
SNormal State = iota // known to be zero for the type
SFirst
SLast
SMissing
)
var lastChar = rune('\u0000')
func (c Char) isValid() bool {
return c.codePoint != 0 && c.state != SMissing
}
type FormInfo struct {
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
combinesForward bool // May combine with rune on the right
combinesBackward bool // May combine with rune on the left
isOneWay bool // Never appears in result
inDecomp bool // Some decompositions result in this char.
decomp Decomposition
expandedDecomp Decomposition
}
func (f FormInfo) String() string {
buf := bytes.NewBuffer(make([]byte, 0))
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
return buf.String()
}
type Decomposition []rune
func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
decomp := strings.Split(s, " ")
if len(decomp) > 0 && skipfirst {
decomp = decomp[1:]
}
for _, d := range decomp {
point, err := strconv.ParseUint(d, 16, 64)
if err != nil {
return a, err
}
a = append(a, rune(point))
}
return a, nil
}
func loadUnicodeData() {
f := gen.OpenUCDFile("UnicodeData.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
r := p.Rune(ucd.CodePoint)
char := &chars[r]
char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
decmap := p.String(ucd.DecompMapping)
exp, err := parseDecomposition(decmap, false)
isCompat := false
if err != nil {
if len(decmap) > 0 {
exp, err = parseDecomposition(decmap, true)
if err != nil {
log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
}
isCompat = true
}
}
char.name = p.String(ucd.Name)
char.codePoint = r
char.forms[FCompatibility].decomp = exp
if !isCompat {
char.forms[FCanonical].decomp = exp
} else {
char.compatDecomp = true
}
if len(decmap) > 0 {
char.forms[FCompatibility].decomp = exp
}
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
}
// compactCCC converts the sparse set of CCC values to a continguous one,
// reducing the number of bits needed from 8 to 6.
func compactCCC() {
m := make(map[uint8]uint8)
for i := range chars {
c := &chars[i]
m[c.ccc] = 0
}
cccs := []int{}
for v, _ := range m {
cccs = append(cccs, int(v))
}
sort.Ints(cccs)
for i, c := range cccs {
cccMap[uint8(i)] = uint8(c)
m[uint8(c)] = uint8(i)
}
for i := range chars {
c := &chars[i]
c.origCCC = c.ccc
c.ccc = m[c.ccc]
}
if len(m) >= 1<<6 {
log.Fatalf("too many difference CCC values: %d >= 64", len(m))
}
}
// CompositionExclusions.txt has form:
// 0958 # ...
// See http://unicode.org/reports/tr44/ for full explanation
func loadCompositionExclusions() {
f := gen.OpenUCDFile("CompositionExclusions.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
c := &chars[p.Rune(0)]
if c.excludeInComp {
log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
}
c.excludeInComp = true
}
if e := p.Err(); e != nil {
log.Fatal(e)
}
}
// hasCompatDecomp returns true if any of the recursive
// decompositions contains a compatibility expansion.
// In this case, the character may not occur in NFK*.
func hasCompatDecomp(r rune) bool {
c := &chars[r]
if c.compatDecomp {
return true
}
for _, d := range c.forms[FCompatibility].decomp {
if hasCompatDecomp(d) {
return true
}
}
return false
}
// Hangul related constants.
const (
HangulBase = 0xAC00
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
JamoLBase = 0x1100
JamoLEnd = 0x1113
JamoVBase = 0x1161
JamoVEnd = 0x1176
JamoTBase = 0x11A8
JamoTEnd = 0x11C3
JamoLVTCount = 19 * 21 * 28
JamoTCount = 28
)
func isHangul(r rune) bool {
return HangulBase <= r && r < HangulEnd
}
func isHangulWithoutJamoT(r rune) bool {
if !isHangul(r) {
return false
}
r -= HangulBase
return r < JamoLVTCount && r%JamoTCount == 0
}
func ccc(r rune) uint8 {
return chars[r].ccc
}
// Insert a rune in a buffer, ordered by Canonical Combining Class.
func insertOrdered(b Decomposition, r rune) Decomposition {
n := len(b)
b = append(b, 0)
cc := ccc(r)
if cc > 0 {
// Use bubble sort.
for ; n > 0; n-- {
if ccc(b[n-1]) <= cc {
break
}
b[n] = b[n-1]
}
}
b[n] = r
return b
}
// Recursively decompose.
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
dcomp := chars[r].forms[form].decomp
if len(dcomp) == 0 {
return insertOrdered(d, r)
}
for _, c := range dcomp {
d = decomposeRecursive(form, c, d)
}
return d
}
func completeCharFields(form int) {
// Phase 0: pre-expand decomposition.
for i := range chars {
f := &chars[i].forms[form]
if len(f.decomp) == 0 {
continue
}
exp := make(Decomposition, 0)
for _, c := range f.decomp {
exp = decomposeRecursive(form, c, exp)
}
f.expandedDecomp = exp
}
// Phase 1: composition exclusion, mark decomposition.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
// Marks script-specific exclusions and version restricted.
f.isOneWay = c.excludeInComp
// Singletons
f.isOneWay = f.isOneWay || len(f.decomp) == 1
// Non-starter decompositions
if len(f.decomp) > 1 {
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
f.isOneWay = f.isOneWay || chk
}
// Runes that decompose into more than two runes.
f.isOneWay = f.isOneWay || len(f.decomp) > 2
if form == FCompatibility {
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
}
for _, r := range f.decomp {
chars[r].forms[form].inDecomp = true
}
}
// Phase 2: forward and backward combining.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
if !f.isOneWay && len(f.decomp) == 2 {
f0 := &chars[f.decomp[0]].forms[form]
f1 := &chars[f.decomp[1]].forms[form]
if !f0.isOneWay {
f0.combinesForward = true
}
if !f1.isOneWay {
f1.combinesBackward = true
}
}
if isHangulWithoutJamoT(rune(i)) {
f.combinesForward = true
}
}
// Phase 3: quick check values.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
switch {
case len(f.decomp) > 0:
f.quickCheck[MDecomposed] = QCNo
case isHangul(rune(i)):
f.quickCheck[MDecomposed] = QCNo
default:
f.quickCheck[MDecomposed] = QCYes
}
switch {
case f.isOneWay:
f.quickCheck[MComposed] = QCNo
case (i & 0xffff00) == JamoLBase:
f.quickCheck[MComposed] = QCYes
if JamoLBase <= i && i < JamoLEnd {
f.combinesForward = true
}
if JamoVBase <= i && i < JamoVEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
f.combinesForward = true
}
if JamoTBase <= i && i < JamoTEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
}
case !f.combinesBackward:
f.quickCheck[MComposed] = QCYes
default:
f.quickCheck[MComposed] = QCMaybe
}
}
}
func computeNonStarterCounts() {
// Phase 4: leading and trailing non-starter count
for i := range chars {
c := &chars[i]
runes := []rune{rune(i)}
// We always use FCompatibility so that the CGJ insertion points do not
// change for repeated normalizations with different forms.
if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
runes = exp
}
// We consider runes that combine backwards to be non-starters for the
// purpose of Stream-Safe Text Processing.
for _, r := range runes {
if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
break
}
c.nLeadingNonStarters++
}
for i := len(runes) - 1; i >= 0; i-- {
if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
break
}
c.nTrailingNonStarters++
}
if c.nTrailingNonStarters > 3 {
log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
}
if isHangul(rune(i)) {
c.nTrailingNonStarters = 2
if isHangulWithoutJamoT(rune(i)) {
c.nTrailingNonStarters = 1
}
}
if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
}
if t := c.nTrailingNonStarters; t > 3 {
log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
}
}
}
func printBytes(w io.Writer, b []byte, name string) {
fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
fmt.Fprintf(w, "var %s = [...]byte {", name)
for i, c := range b {
switch {
case i%64 == 0:
fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
case i%8 == 0:
fmt.Fprintf(w, "\n")
}
fmt.Fprintf(w, "0x%.2X, ", c)
}
fmt.Fprint(w, "\n}\n\n")
}
// See forminfo.go for format.
func makeEntry(f *FormInfo, c *Char) uint16 {
e := uint16(0)
if r := c.codePoint; HangulBase <= r && r < HangulEnd {
e |= 0x40
}
if f.combinesForward {
e |= 0x20
}
if f.quickCheck[MDecomposed] == QCNo {
e |= 0x4
}
switch f.quickCheck[MComposed] {
case QCYes:
case QCNo:
e |= 0x10
case QCMaybe:
e |= 0x18
default:
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
}
e |= uint16(c.nTrailingNonStarters)
return e
}
// decompSet keeps track of unique decompositions, grouped by whether
// the decomposition is followed by a trailing and/or leading CCC.
type decompSet [7]map[string]bool
const (
normalDecomp = iota
firstMulti
firstCCC
endMulti
firstLeadingCCC
firstCCCZeroExcept
firstStarterWithNLead
lastDecomp
)
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
func makeDecompSet() decompSet {
m := decompSet{}
for i := range m {
m[i] = make(map[string]bool)
}
return m
}
func (m *decompSet) insert(key int, s string) {
m[key][s] = true
}
func printCharInfoTables(w io.Writer) int {
mkstr := func(r rune, f *FormInfo) (int, string) {
d := f.expandedDecomp
s := string([]rune(d))
if max := 1 << 6; len(s) >= max {
const msg = "%U: too many bytes in decomposition: %d >= %d"
log.Fatalf(msg, r, len(s), max)
}
head := uint8(len(s))
if f.quickCheck[MComposed] != QCYes {
head |= 0x40
}
if f.combinesForward {
head |= 0x80
}
s = string([]byte{head}) + s
lccc := ccc(d[0])
tccc := ccc(d[len(d)-1])
cc := ccc(r)
if cc != 0 && lccc == 0 && tccc == 0 {
log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
}
if tccc < lccc && lccc != 0 {
const msg = "%U: lccc (%d) must be <= tcc (%d)"
log.Fatalf(msg, r, lccc, tccc)
}
index := normalDecomp
nTrail := chars[r].nTrailingNonStarters
nLead := chars[r].nLeadingNonStarters
if tccc > 0 || lccc > 0 || nTrail > 0 {
tccc <<= 2
tccc |= nTrail
s += string([]byte{tccc})
index = endMulti
for _, r := range d[1:] {
if ccc(r) == 0 {
index = firstCCC
}
}
if lccc > 0 || nLead > 0 {
s += string([]byte{lccc})
if index == firstCCC {
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
}
index = firstLeadingCCC
}
if cc != lccc {
if cc != 0 {
log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
}
index = firstCCCZeroExcept
}
} else if len(d) > 1 {
index = firstMulti
}
return index, s
}
decompSet := makeDecompSet()
const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
decompSet.insert(firstStarterWithNLead, nLeadStr)
// Store the uniqued decompositions in a byte buffer,
// preceded by their byte length.
for _, c := range chars {
for _, f := range c.forms {
if len(f.expandedDecomp) == 0 {
continue
}
if f.combinesBackward {
log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
}
index, s := mkstr(c.codePoint, &f)
decompSet.insert(index, s)
}
}
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
size := 0
positionMap := make(map[string]uint16)
decompositions.WriteString("\000")
fmt.Fprintln(w, "const (")
for i, m := range decompSet {
sa := []string{}
for s := range m {
sa = append(sa, s)
}
sort.Strings(sa)
for _, s := range sa {
p := decompositions.Len()
decompositions.WriteString(s)
positionMap[s] = uint16(p)
}
if cname[i] != "" {
fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
}
}
fmt.Fprintln(w, "maxDecomp = 0x8000")
fmt.Fprintln(w, ")")
b := decompositions.Bytes()
printBytes(w, b, "decomps")
size += len(b)
varnames := []string{"nfc", "nfkc"}
for i := 0; i < FNumberOfFormTypes; i++ {
trie := triegen.NewTrie(varnames[i])
for r, c := range chars {
f := c.forms[i]
d := f.expandedDecomp
if len(d) != 0 {
_, key := mkstr(c.codePoint, &f)
trie.Insert(rune(r), uint64(positionMap[key]))
if c.ccc != ccc(d[0]) {
// We assume the lead ccc of a decomposition !=0 in this case.
if ccc(d[0]) == 0 {
log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
}
}
} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
// Handle cases where it can't be detected that the nLead should be equal
// to nTrail.
trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
trie.Insert(c.codePoint, uint64(0x8000|v))
}
}
sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
if err != nil {
log.Fatal(err)
}
size += sz
}
return size
}
func contains(sa []string, s string) bool {
for _, a := range sa {
if a == s {
return true
}
}
return false
}
func makeTables() {
w := &bytes.Buffer{}
size := 0
if *tablelist == "" {
return
}
list := strings.Split(*tablelist, ",")
if *tablelist == "all" {
list = []string{"recomp", "info"}
}
// Compute maximum decomposition size.
max := 0
for _, c := range chars {
if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
max = n
}
}
fmt.Fprintln(w, "const (")
fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
fmt.Fprintln(w)
fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
fmt.Fprintln(w, ")\n")
// Print the CCC remap table.
size += len(cccMap)
fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
for i := 0; i < len(cccMap); i++ {
if i%8 == 0 {
fmt.Fprintln(w)
}
fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
}
fmt.Fprintln(w, "\n}\n")
if contains(list, "info") {
size += printCharInfoTables(w)
}
if contains(list, "recomp") {
// Note that we use 32 bit keys, instead of 64 bit.
// This clips the bits of three entries, but we know
// this won't cause a collision. The compiler will catch
// any changes made to UnicodeData.txt that introduces
// a collision.
// Note that the recomposition map for NFC and NFKC
// are identical.
// Recomposition map
nrentries := 0
for _, c := range chars {
f := c.forms[FCanonical]
if !f.isOneWay && len(f.decomp) > 0 {
nrentries++
}
}
sz := nrentries * 8
size += sz
fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
for i, c := range chars {
f := c.forms[FCanonical]
d := f.decomp
if !f.isOneWay && len(d) > 0 {
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
}
}
fmt.Fprintf(w, "}\n\n")
}
fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes())
}
func printChars() {
if *verbose {
for _, c := range chars {
if !c.isValid() || c.state == SMissing {
continue
}
fmt.Println(c)
}
}
}
// verifyComputed does various consistency tests.
func verifyComputed() {
for i, c := range chars {
for _, f := range c.forms {
isNo := (f.quickCheck[MDecomposed] == QCNo)
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
}
isMaybe := f.quickCheck[MComposed] == QCMaybe
if f.combinesBackward != isMaybe {
log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
}
if len(f.decomp) > 0 && f.combinesForward && isMaybe {
log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
}
if len(f.expandedDecomp) != 0 {
continue
}
if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
// We accept these runes to be treated differently (it only affects
// segment breaking in iteration, most likely on improper use), but
// reconsider if more characters are added.
// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
log.Fatalf("%U: nLead was %v; want %v", i, a, b)
}
}
}
nfc := c.forms[FCanonical]
nfkc := c.forms[FCompatibility]
if nfc.combinesBackward != nfkc.combinesBackward {
log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
}
}
}
// Use values in DerivedNormalizationProps.txt to compare against the
// values we computed.
// DerivedNormalizationProps.txt has form:
// 00C0..00C5 ; NFD_QC; N # ...
// 0374 ; NFD_QC; N # ...
// See http://unicode.org/reports/tr44/ for full explanation
func testDerived() {
f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
r := p.Rune(0)
c := &chars[r]
var ftype, mode int
qt := p.String(1)
switch qt {
case "NFC_QC":
ftype, mode = FCanonical, MComposed
case "NFD_QC":
ftype, mode = FCanonical, MDecomposed
case "NFKC_QC":
ftype, mode = FCompatibility, MComposed
case "NFKD_QC":
ftype, mode = FCompatibility, MDecomposed
default:
continue
}
var qr QCResult
switch p.String(2) {
case "Y":
qr = QCYes
case "N":
qr = QCNo
case "M":
qr = QCMaybe
default:
log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
}
if got := c.forms[ftype].quickCheck[mode]; got != qr {
log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
}
c.forms[ftype].verified[mode] = true
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
// Any unspecified value must be QCYes. Verify this.
for i, c := range chars {
for j, fd := range c.forms {
for k, qr := range fd.quickCheck {
if !fd.verified[k] && qr != QCYes {
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
log.Printf(m, i, j, k, qr, c.name)
}
}
}
}
}
var testHeader = `const (
Yes = iota
No
Maybe
)
type formData struct {
qc uint8
combinesForward bool
decomposition string
}
type runeData struct {
r rune
ccc uint8
nLead uint8
nTrail uint8
f [2]formData // 0: canonical; 1: compatibility
}
func f(qc uint8, cf bool, dec string) [2]formData {
return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
}
func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
}
var testData = []runeData{
`
func printTestdata() {
type lastInfo struct {
ccc uint8
nLead uint8
nTrail uint8
f string
}
last := lastInfo{}
w := &bytes.Buffer{}
fmt.Fprintf(w, testHeader)
for r, c := range chars {
f := c.forms[FCanonical]
qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
f = c.forms[FCompatibility]
qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
s := ""
if d == dk && qc == qck && cf == cfk {
s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
} else {
s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
}
current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
if last != current {
fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
last = current
}
}
fmt.Fprintln(w, "}")
gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes())
}

View file

@ -29,8 +29,8 @@ import (
// proceed independently on both sides:
// f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: http://unicode.org/reports/tr15/ and
// http://unicode.org/notes/tn5/.
// References: https://unicode.org/reports/tr15/ and
// https://unicode.org/notes/tn5/.
type Form int
const (

View file

@ -60,8 +60,8 @@ func (w *normWriter) Close() error {
}
// Writer returns a new writer that implements Write(b)
// by writing f(b) to w. The returned writer may use an
// an internal buffer to maintain state across Write calls.
// by writing f(b) to w. The returned writer may use an
// internal buffer to maintain state across Write calls.
// Calling its Close method writes any buffered data to w.
func (f Form) Writer(w io.Writer) io.WriteCloser {
wr := &normWriter{rb: reorderBuffer{}, w: w}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,6 @@ func (Form) Reset() {}
// Users should either catch ErrShortDst and allow dst to grow or have dst be at
// least of size MaxTransformChunkSize to be guaranteed of progress.
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := 0
// Cap the maximum number of src bytes to check.
b := src
eof := atEOF
@ -27,13 +26,14 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
eof = false
b = b[:ns]
}
i, ok := formTable[f].quickSpan(inputBytes(b), n, len(b), eof)
n += copy(dst[n:], b[n:i])
i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof)
n := copy(dst, b[:i])
if !ok {
nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
return nDst + n, nSrc + n, err
}
if n < len(src) && !atEOF {
if err == nil && n < len(src) && !atEOF {
err = transform.ErrShortSrc
}
return n, n, err
@ -79,7 +79,7 @@ func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
nSrc += n
nDst += n
if ok {
if n < rb.nsrc && !atEOF {
if err == nil && n < rb.nsrc && !atEOF {
err = transform.ErrShortSrc
}
return nDst, nSrc, err

View file

@ -1,117 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Trie table generator.
// Used by make*tables tools to generate a go file with trie data structures
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
// sequence are used to lookup offsets in the index table to be used for the
// next byte. The last byte is used to index into a table with 16-bit values.
package main
import (
"fmt"
"io"
)
const maxSparseEntries = 16
type normCompacter struct {
sparseBlocks [][]uint64
sparseOffset []uint16
sparseCount int
name string
}
func mostFrequentStride(a []uint64) int {
counts := make(map[int]int)
var v int
for _, x := range a {
if stride := int(x) - v; v != 0 && stride >= 0 {
counts[stride]++
}
v = int(x)
}
var maxs, maxc int
for stride, cnt := range counts {
if cnt > maxc || (cnt == maxc && stride < maxs) {
maxs, maxc = stride, cnt
}
}
return maxs
}
func countSparseEntries(a []uint64) int {
stride := mostFrequentStride(a)
var v, count int
for _, tv := range a {
if int(tv)-v != stride {
if tv != 0 {
count++
}
}
v = int(tv)
}
return count
}
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
if n := countSparseEntries(v); n <= maxSparseEntries {
return (n+1)*4 + 2, true
}
return 0, false
}
func (c *normCompacter) Store(v []uint64) uint32 {
h := uint32(len(c.sparseOffset))
c.sparseBlocks = append(c.sparseBlocks, v)
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
c.sparseCount += countSparseEntries(v) + 1
return h
}
func (c *normCompacter) Handler() string {
return c.name + "Sparse.lookup"
}
func (c *normCompacter) Print(w io.Writer) (retErr error) {
p := func(f string, x ...interface{}) {
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
retErr = err
}
}
ls := len(c.sparseBlocks)
p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
ns := c.sparseCount
p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
p("var %sSparseValues = [%d]valueRange {", c.name, ns)
for i, b := range c.sparseBlocks {
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
var v int
stride := mostFrequentStride(b)
n := countSparseEntries(b)
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
for i, nv := range b {
if int(nv)-v != stride {
if v != 0 {
p(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
}
}
v = int(nv)
}
if v != 0 {
p(",hi:%#02x},", 0x80+len(b)-1)
}
}
p("\n}\n\n")
return
}

115
vendor/golang.org/x/text/width/gen.go generated vendored
View file

@ -1,115 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This program generates the trie for width operations. The generated table
// includes width category information as well as the normalization mappings.
package main
import (
"bytes"
"fmt"
"io"
"log"
"math"
"unicode/utf8"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
)
// See gen_common.go for flags.
func main() {
gen.Init()
genTables()
genTests()
gen.Repackage("gen_trieval.go", "trieval.go", "width")
gen.Repackage("gen_common.go", "common_test.go", "width")
}
func genTables() {
t := triegen.NewTrie("width")
// fold and inverse mappings. See mapComment for a description of the format
// of each entry. Add dummy value to make an index of 0 mean no mapping.
inverse := [][4]byte{{}}
mapping := map[[4]byte]int{[4]byte{}: 0}
getWidthData(func(r rune, tag elem, alt rune) {
idx := 0
if alt != 0 {
var buf [4]byte
buf[0] = byte(utf8.EncodeRune(buf[1:], alt))
s := string(r)
buf[buf[0]] ^= s[len(s)-1]
var ok bool
if idx, ok = mapping[buf]; !ok {
idx = len(mapping)
if idx > math.MaxUint8 {
log.Fatalf("Index %d does not fit in a byte.", idx)
}
mapping[buf] = idx
inverse = append(inverse, buf)
}
}
t.Insert(r, uint64(tag|elem(idx)))
})
w := &bytes.Buffer{}
gen.WriteUnicodeVersion(w)
sz, err := t.Gen(w)
if err != nil {
log.Fatal(err)
}
sz += writeMappings(w, inverse)
fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)
gen.WriteVersionedGoFile(*outputFile, "width", w.Bytes())
}
const inverseDataComment = `
// inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding>
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following
// pattern:
// -> A (U+FF21 -> U+0041)
// -> B (U+FF22 -> U+0042)
// ...
// By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 }
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41.
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42.
// Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8.`
func writeMappings(w io.Writer, data [][4]byte) int {
fmt.Fprintln(w, inverseDataComment)
fmt.Fprintf(w, "var inverseData = [%d][4]byte{\n", len(data))
for _, x := range data {
fmt.Fprintf(w, "{ 0x%02x, 0x%02x, 0x%02x, 0x%02x },\n", x[0], x[1], x[2], x[3])
}
fmt.Fprintln(w, "}")
return len(data) * 4
}
func genTests() {
w := &bytes.Buffer{}
fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n")
getWidthData(func(r rune, tag elem, alt rune) {
if alt != 0 {
fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag)
}
})
fmt.Fprintln(w, "}")
gen.WriteGoFile("runes_test.go", "width", w.Bytes())
}

View file

@ -1,96 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This code is shared between the main code generator and the test code.
import (
"flag"
"log"
"strconv"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
)
var (
outputFile = flag.String("out", "tables.go", "output file")
)
var typeMap = map[string]elem{
"A": tagAmbiguous,
"N": tagNeutral,
"Na": tagNarrow,
"W": tagWide,
"F": tagFullwidth,
"H": tagHalfwidth,
}
// getWidthData calls f for every entry for which it is defined.
//
// f may be called multiple times for the same rune. The last call to f is the
// correct value. f is not called for all runes. The default tag type is
// Neutral.
func getWidthData(f func(r rune, tag elem, alt rune)) {
// Set the default values for Unified Ideographs. In line with Annex 11,
// we encode full ranges instead of the defined runes in Unified_Ideograph.
for _, b := range []struct{ lo, hi rune }{
{0x4E00, 0x9FFF}, // the CJK Unified Ideographs block,
{0x3400, 0x4DBF}, // the CJK Unified Ideographs Externsion A block,
{0xF900, 0xFAFF}, // the CJK Compatibility Ideographs block,
{0x20000, 0x2FFFF}, // the Supplementary Ideographic Plane,
{0x30000, 0x3FFFF}, // the Tertiary Ideographic Plane,
} {
for r := b.lo; r <= b.hi; r++ {
f(r, tagWide, 0)
}
}
inverse := map[rune]rune{}
maps := map[string]bool{
"<wide>": true,
"<narrow>": true,
}
// We cannot reuse package norm's decomposition, as we need an unexpanded
// decomposition. We make use of the opportunity to verify that the
// decomposition type is as expected.
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2)
if !maps[s[0]] {
return
}
x, err := strconv.ParseUint(s[1], 16, 32)
if err != nil {
log.Fatalf("Error parsing rune %q", s[1])
}
if inverse[r] != 0 || inverse[rune(x)] != 0 {
log.Fatalf("Circular dependency in mapping between %U and %U", r, x)
}
inverse[r] = rune(x)
inverse[rune(x)] = r
})
// <rune range>;<type>
ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) {
tag, ok := typeMap[p.String(1)]
if !ok {
log.Fatalf("Unknown width type %q", p.String(1))
}
r := p.Rune(0)
alt, ok := inverse[r]
if tag == tagFullwidth || tag == tagHalfwidth && r != wonSign {
tag |= tagNeedsFold
if !ok {
log.Fatalf("Narrow or wide rune %U has no decomposition", r)
}
}
f(r, tag, alt)
})
}

View file

@ -1,34 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// elem is an entry of the width trie. The high byte is used to encode the type
// of the rune. The low byte is used to store the index to a mapping entry in
// the inverseData array.
type elem uint16
const (
tagNeutral elem = iota << typeShift
tagAmbiguous
tagWide
tagNarrow
tagFullwidth
tagHalfwidth
)
const (
numTypeBits = 3
typeShift = 16 - numTypeBits
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
// the Won sign U+20A9.
tagNeedsFold = 0x1000
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
// variant.
wonSign rune = 0x20A9
)

View file

@ -2,7 +2,7 @@
package width
import "fmt"
import "strconv"
const _Kind_name = "NeutralEastAsianAmbiguousEastAsianWideEastAsianNarrowEastAsianFullwidthEastAsianHalfwidth"
@ -10,7 +10,7 @@ var _Kind_index = [...]uint8{0, 7, 25, 38, 53, 71, 89}
func (i Kind) String() string {
if i < 0 || i >= Kind(len(_Kind_index)-1) {
return fmt.Sprintf("Kind(%d)", i)
return "Kind(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Kind_name[_Kind_index[i]:_Kind_index[i+1]]
}

View file

@ -12,7 +12,7 @@
// are kept together in words or runs that are rotated sideways in vertical text
// layout.
//
// For more information, see http://unicode.org/reports/tr11/.
// For more information, see https://unicode.org/reports/tr11/.
package width // import "golang.org/x/text/width"
import (
@ -27,7 +27,7 @@ import (
// (approximation, fixed pitch only).
// 3) Implement display length.
// Kind indicates the type of width property as defined in http://unicode.org/reports/tr11/.
// Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/.
type Kind int
const (
@ -106,7 +106,7 @@ func (e elem) kind() Kind {
}
// Kind returns the Kind of a rune as defined in Unicode TR #11.
// See http://unicode.org/reports/tr11/ for more details.
// See https://unicode.org/reports/tr11/ for more details.
func (p Properties) Kind() Kind {
return p.elem.kind()
}