// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build go1.10

// Package idna implements IDNA2008 using the compatibility processing // defined by UTS (Unicode Technical Standard) #46, which defines a standard to // deal with the transition from IDNA2003. // // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. // UTS #46 is defined in https://www.unicode.org/reports/tr46. // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the // differences between these two standards.
package idna // import "golang.org/x/net/idna" import ( ) // NOTE: Unlike common practice in Go APIs, the functions will return a // sanitized domain name in case of errors. Browsers sometimes use a partially // evaluated string as lookup. // TODO: the current error handling is, in my opinion, the least opinionated. // Other strategies are also viable, though: // Option 1) Return an empty string in case of error, but allow the user to // specify explicitly which errors to ignore. // Option 2) Return the partially evaluated string if it is itself a valid // string, otherwise return the empty string in case of error. // Option 3) Option 1 and 2. // Option 4) Always return an empty string for now and implement Option 1 as // needed, and document that the return string may not be empty in case of // error in the future. // I think Option 1 is best, but it is quite opinionated. // ToASCII is a wrapper for Punycode.ToASCII. func ( string) (string, error) { return Punycode.process(, true) } // ToUnicode is a wrapper for Punycode.ToUnicode. func ( string) (string, error) { return Punycode.process(, false) } // An Option configures a Profile at creation time. type Option func(*options) // Transitional sets a Profile to use the Transitional mapping as defined in UTS // #46. This will cause, for example, "ß" to be mapped to "ss". Using the // transitional mapping provides a compromise between IDNA2003 and IDNA2008 // compatibility. It is used by some browsers when resolving domain names. This // option is only meaningful if combined with MapForLookup. func ( bool) Option { return func( *options) { .transitional = } } // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts // are longer than allowed by the RFC. // // This option corresponds to the VerifyDnsLength flag in UTS #46. func ( bool) Option { return func( *options) { .verifyDNSLength = } } // RemoveLeadingDots removes leading label separators. Leading runes that map to // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. func ( bool) Option { return func( *options) { .removeLeadingDots = } } // ValidateLabels sets whether to check the mandatory label validation criteria // as defined in Section 5.4 of RFC 5891. This includes testing for correct use // of hyphens ('-'), normalization, validity of runes, and the context rules. // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags // in UTS #46. func ( bool) Option { return func( *options) { // Don't override existing mappings, but set one that at least checks // normalization if it is not set. if .mapping == nil && { .mapping = normalize } .trie = trie .checkJoiners = .checkHyphens = if { .fromPuny = validateFromPunycode } else { .fromPuny = nil } } } // CheckHyphens sets whether to check for correct use of hyphens ('-') in // labels. Most web browsers do not have this option set, since labels such as // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. // // This option corresponds to the CheckHyphens flag in UTS #46. func ( bool) Option { return func( *options) { .checkHyphens = } } // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix // A of RFC 5892, concerning the use of joiner runes. // // This option corresponds to the CheckJoiners flag in UTS #46. func ( bool) Option { return func( *options) { .trie = trie .checkJoiners = } } // StrictDomainName limits the set of permissible ASCII characters to those // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the // hyphen). This is set by default for MapForLookup and ValidateForRegistration, // but is only useful if ValidateLabels is set. // // This option is useful, for instance, for browsers that allow characters // outside this range, for example a '_' (U+005F LOW LINE). See // http://www.rfc-editor.org/std/std3.txt for more details. // // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. func ( bool) Option { return func( *options) { .useSTD3Rules = } } // NOTE: the following options pull in tables. The tables should not be linked // in as long as the options are not used. // BidiRule enables the Bidi rule as defined in RFC 5893. Any application // that relies on proper validation of labels should include this rule. // // This option corresponds to the CheckBidi flag in UTS #46. func () Option { return func( *options) { .bidirule = bidirule.ValidString } } // ValidateForRegistration sets validation options to verify that a given IDN is // properly formatted for registration as defined by Section 4 of RFC 5891. func () Option { return func( *options) { .mapping = validateRegistration StrictDomainName(true)() ValidateLabels(true)() VerifyDNSLength(true)() BidiRule()() } } // MapForLookup sets validation and mapping options such that a given IDN is // transformed for domain name lookup according to the requirements set out in // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option // to add this check. // // The mappings include normalization and mapping case, width and other // compatibility mappings. func () Option { return func( *options) { .mapping = validateAndMap StrictDomainName(true)() ValidateLabels(true)() } } type options struct { transitional bool useSTD3Rules bool checkHyphens bool checkJoiners bool verifyDNSLength bool removeLeadingDots bool trie *idnaTrie // fromPuny calls validation rules when converting A-labels to U-labels. fromPuny func(p *Profile, s string) error // mapping implements a validation and mapping step as defined in RFC 5895 // or UTS 46, tailored to, for example, domain registration or lookup. mapping func(p *Profile, s string) (mapped string, isBidi bool, err error) // bidirule, if specified, checks whether s conforms to the Bidi Rule // defined in RFC 5893. bidirule func(s string) bool } // A Profile defines the configuration of an IDNA mapper. type Profile struct { options } func apply( *options, []Option) { for , := range { () } } // New creates a new Profile. // // With no options, the returned Profile is the most permissive and equals the // Punycode Profile. Options can be passed to further restrict the Profile. The // MapForLookup and ValidateForRegistration options set a collection of options, // for lookup and registration purposes respectively, which can be tailored by // adding more fine-grained options, where later options override earlier // options. func ( ...Option) *Profile { := &Profile{} apply(&.options, ) return } // ToASCII converts a domain or domain label to its ASCII form. For example, // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and // ToASCII("golang") is "golang". If an error is encountered it will return // an error and a (partially) processed result. func ( *Profile) ( string) (string, error) { return .process(, true) } // ToUnicode converts a domain or domain label to its Unicode form. For example, // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and // ToUnicode("golang") is "golang". If an error is encountered it will return // an error and a (partially) processed result. func ( *Profile) ( string) (string, error) { := * .transitional = false return .process(, false) } // String reports a string with a description of the profile for debugging // purposes. The string format may change with different versions. func ( *Profile) () string { := "" if .transitional { = "Transitional" } else { = "NonTransitional" } if .useSTD3Rules { += ":UseSTD3Rules" } if .checkHyphens { += ":CheckHyphens" } if .checkJoiners { += ":CheckJoiners" } if .verifyDNSLength { += ":VerifyDNSLength" } return } var ( // Punycode is a Profile that does raw punycode processing with a minimum // of validation. Punycode *Profile = punycode // Lookup is the recommended profile for looking up domain names, according // to Section 5 of RFC 5891. The exact configuration of this profile may // change over time. Lookup *Profile = lookup // Display is the recommended profile for displaying domain names. // The configuration of this profile may change over time. Display *Profile = display // Registration is the recommended profile for checking whether a given // IDN is valid for registration, according to Section 4 of RFC 5891. Registration *Profile = registration punycode = &Profile{} lookup = &Profile{options{ transitional: transitionalLookup, useSTD3Rules: true, checkHyphens: true, checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, bidirule: bidirule.ValidString, }} display = &Profile{options{ useSTD3Rules: true, checkHyphens: true, checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, bidirule: bidirule.ValidString, }} registration = &Profile{options{ useSTD3Rules: true, verifyDNSLength: true, checkHyphens: true, checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateRegistration, bidirule: bidirule.ValidString, }} // TODO: profiles // Register: recommended for approving domain names: don't do any mappings // but rather reject on invalid input. Bundle or block deviation characters. ) type labelError struct{ label, code_ string } func ( labelError) () string { return .code_ } func ( labelError) () string { return fmt.Sprintf("idna: invalid label %q", .label) } type runeError rune func ( runeError) () string { return "P1" } func ( runeError) () string { return fmt.Sprintf("idna: disallowed rune %U", ) } // process implements the algorithm described in section 4 of UTS #46, // see https://www.unicode.org/reports/tr46. func ( *Profile) ( string, bool) (string, error) { var error var bool if .mapping != nil { , , = .mapping(, ) } // Remove leading empty labels. if .removeLeadingDots { for ; len() > 0 && [0] == '.'; = [1:] { } } // TODO: allow for a quick check of the tables data. // It seems like we should only create this error on ToASCII, but the // UTS 46 conformance tests suggests we should always check this. if == nil && .verifyDNSLength && == "" { = &labelError{, "A4"} } := labelIter{orig: } for ; !.done(); .next() { := .label() if == "" { // Empty labels are not okay. The label iterator skips the last // label if it is empty. if == nil && .verifyDNSLength { = &labelError{, "A4"} } continue } if strings.HasPrefix(, acePrefix) { , := decode([len(acePrefix):]) if != nil { if == nil { = } // Spec says keep the old label. continue } = || bidirule.DirectionString() != bidi.LeftToRight .set() if == nil && .fromPuny != nil { = .fromPuny(, ) } if == nil { // This should be called on NonTransitional, according to the // spec, but that currently does not have any effect. Use the // original profile to preserve options. = .validateLabel() } } else if == nil { = .validateLabel() } } if && .bidirule != nil && == nil { for .reset(); !.done(); .next() { if !.bidirule(.label()) { = &labelError{, "B"} break } } } if { for .reset(); !.done(); .next() { := .label() if !ascii() { , := encode(acePrefix, ) if == nil { = } = .set() } := len() if .verifyDNSLength && == nil && ( == 0 || > 63) { = &labelError{, "A4"} } } } = .result() if && .verifyDNSLength && == nil { // Compute the length of the domain name minus the root label and its dot. := len() if > 0 && [-1] == '.' { -- } if len() < 1 || > 253 { = &labelError{, "A4"} } } return , } func normalize( *Profile, string) ( string, bool, error) { // TODO: consider first doing a quick check to see if any of these checks // need to be done. This will make it slower in the general case, but // faster in the common case. = norm.NFC.String() = bidirule.DirectionString() == bidi.RightToLeft return , , nil } func validateRegistration( *Profile, string) ( string, bool, error) { // TODO: filter need for normalization in loop below. if !norm.NFC.IsNormalString() { return , false, &labelError{, "V1"} } for := 0; < len(); { , := trie.lookupString([:]) if == 0 { return , , runeError(utf8.RuneError) } = || info().isBidi([:]) // Copy bytes not copied so far. switch .simplify(info().category()) { // TODO: handle the NV8 defined in the Unicode idna data set to allow // for strict conformance to IDNA2008. case valid, deviation: case disallowed, mapped, unknown, ignored: , := utf8.DecodeRuneInString([:]) return , , runeError() } += } return , , nil } func ( info) ( string) bool { if !.isMapped() { return &attributesMask == rtl } // TODO: also store bidi info for mapped data. This is possible, but a bit // cumbersome and not for the common case. , := bidi.LookupString() switch .Class() { case bidi.R, bidi.AL, bidi.AN: return true } return false } func validateAndMap( *Profile, string) ( string, bool, error) { var ( []byte int ) // combinedInfoBits contains the or-ed bits of all runes. We use this // to derive the mayNeedNorm bit later. This may trigger normalization // overeagerly, but it will not do so in the common case. The end result // is another 10% saving on BenchmarkProfile for the common case. var info for := 0; < len(); { , := trie.lookupString([:]) if == 0 { = append(, [:]...) = append(, "\ufffd"...) = len() if == nil { = runeError(utf8.RuneError) } break } |= info() = || info().isBidi([:]) := += // Copy bytes not copied so far. switch .simplify(info().category()) { case valid: continue case disallowed: if == nil { , := utf8.DecodeRuneInString([:]) = runeError() } continue case mapped, deviation: = append(, [:]...) = info().appendMapping(, [:]) case ignored: = append(, [:]...) // drop the rune case unknown: = append(, [:]...) = append(, "\ufffd"...) } = } if == 0 { // No changes so far. if &mayNeedNorm != 0 { = norm.NFC.String() } } else { = append(, [:]...) if norm.NFC.QuickSpan() != len() { = norm.NFC.Bytes() } // TODO: the punycode converters require strings as input. = string() } return , , } // A labelIter allows iterating over domain name labels. type labelIter struct { orig string slice []string curStart int curEnd int i int } func ( *labelIter) () { .curStart = 0 .curEnd = 0 .i = 0 } func ( *labelIter) () bool { return .curStart >= len(.orig) } func ( *labelIter) () string { if .slice != nil { return strings.Join(.slice, ".") } return .orig } func ( *labelIter) () string { if .slice != nil { return .slice[.i] } := strings.IndexByte(.orig[.curStart:], '.') .curEnd = .curStart + if == -1 { .curEnd = len(.orig) } return .orig[.curStart:.curEnd] } // next sets the value to the next label. It skips the last label if it is empty. func ( *labelIter) () { .i++ if .slice != nil { if .i >= len(.slice) || .i == len(.slice)-1 && .slice[.i] == "" { .curStart = len(.orig) } } else { .curStart = .curEnd + 1 if .curStart == len(.orig)-1 && .orig[.curStart] == '.' { .curStart = len(.orig) } } } func ( *labelIter) ( string) { if .slice == nil { .slice = strings.Split(.orig, ".") } .slice[.i] = } // acePrefix is the ASCII Compatible Encoding prefix. const acePrefix = "xn--" func ( *Profile) ( category) category { switch { case disallowedSTD3Mapped: if .useSTD3Rules { = disallowed } else { = mapped } case disallowedSTD3Valid: if .useSTD3Rules { = disallowed } else { = valid } case deviation: if !.transitional { = valid } case validNV8, validXV8: // TODO: handle V2008 = valid } return } func validateFromPunycode( *Profile, string) error { if !norm.NFC.IsNormalString() { return &labelError{, "V1"} } // TODO: detect whether string may have to be normalized in the following // loop. for := 0; < len(); { , := trie.lookupString([:]) if == 0 { return runeError(utf8.RuneError) } if := .simplify(info().category()); != valid && != deviation { return &labelError{, "V6"} } += } return nil } const ( zwnj = "\u200c" zwj = "\u200d" ) type joinState int8 const ( stateStart joinState = iota stateVirama stateBefore stateBeforeVirama stateAfter stateFAIL ) var joinStates = [][numJoinTypes]joinState{ stateStart: { joiningL: stateBefore, joiningD: stateBefore, joinZWNJ: stateFAIL, joinZWJ: stateFAIL, joinVirama: stateVirama, }, stateVirama: { joiningL: stateBefore, joiningD: stateBefore, }, stateBefore: { joiningL: stateBefore, joiningD: stateBefore, joiningT: stateBefore, joinZWNJ: stateAfter, joinZWJ: stateFAIL, joinVirama: stateBeforeVirama, }, stateBeforeVirama: { joiningL: stateBefore, joiningD: stateBefore, joiningT: stateBefore, }, stateAfter: { joiningL: stateFAIL, joiningD: stateBefore, joiningT: stateAfter, joiningR: stateStart, joinZWNJ: stateFAIL, joinZWJ: stateFAIL, joinVirama: stateAfter, // no-op as we can't accept joiners here }, stateFAIL: { 0: stateFAIL, joiningL: stateFAIL, joiningD: stateFAIL, joiningT: stateFAIL, joiningR: stateFAIL, joinZWNJ: stateFAIL, joinZWJ: stateFAIL, joinVirama: stateFAIL, }, } // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are // already implicitly satisfied by the overall implementation. func ( *Profile) ( string) ( error) { if == "" { if .verifyDNSLength { return &labelError{, "A4"} } return nil } if .checkHyphens { if len() > 4 && [2] == '-' && [3] == '-' { return &labelError{, "V2"} } if [0] == '-' || [len()-1] == '-' { return &labelError{, "V3"} } } if !.checkJoiners { return nil } := .trie // p.checkJoiners is only set if trie is set. // TODO: merge the use of this in the trie. , := .lookupString() := info() if .isModifier() { return &labelError{, "V5"} } // Quickly return in the absence of zero-width (non) joiners. if strings.Index(, zwj) == -1 && strings.Index(, zwnj) == -1 { return nil } := stateStart for := 0; ; { := .joinType() if [:+] == zwj { = joinZWJ } else if [:+] == zwnj { = joinZWNJ } = joinStates[][] if .isViramaModifier() { = joinStates[][joinVirama] } if += ; == len() { break } , = .lookupString([:]) = info() } if == stateFAIL || == stateAfter { return &labelError{, "C"} } return nil } func ascii( string) bool { for := 0; < len(); ++ { if [] >= utf8.RuneSelf { return false } } return true }