// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package bidirule implements the Bidi Rule defined by RFC 5893. // // This package is under development. The API may change without notice and // without preserving backward compatibility.
package bidirule import ( ) // This file contains an implementation of RFC 5893: Right-to-Left Scripts for // Internationalized Domain Names for Applications (IDNA) // // A label is an individual component of a domain name. Labels are usually // shown separated by dots; for example, the domain name "www.example.com" is // composed of three labels: "www", "example", and "com". // // An RTL label is a label that contains at least one character of class R, AL, // or AN. An LTR label is any label that is not an RTL label. // // A "Bidi domain name" is a domain name that contains at least one RTL label. // // The following guarantees can be made based on the above: // // o In a domain name consisting of only labels that satisfy the rule, // the requirements of Section 3 are satisfied. Note that even LTR // labels and pure ASCII labels have to be tested. // // o In a domain name consisting of only LDH labels (as defined in the // Definitions document [RFC5890]) and labels that satisfy the rule, // the requirements of Section 3 are satisfied as long as a label // that starts with an ASCII digit does not come after a // right-to-left label. // // No guarantee is given for other combinations. // ErrInvalid indicates a label is invalid according to the Bidi Rule. var ErrInvalid = errors.New("bidirule: failed Bidi Rule") type ruleState uint8 const ( ruleInitial ruleState = iota ruleLTR ruleLTRFinal ruleRTL ruleRTLFinal ruleInvalid ) type ruleTransition struct { next ruleState mask uint16 } var transitions = [...][2]ruleTransition{ // [2.1] The first character must be a character with Bidi property L, R, or // AL. If it has the R or AL property, it is an RTL label; if it has the L // property, it is an LTR label. ruleInitial: { {ruleLTRFinal, 1 << bidi.L}, {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL}, }, ruleRTL: { // [2.3] In an RTL label, the end of the label must be a character with // Bidi property R, AL, EN, or AN, followed by zero or more characters // with Bidi property NSM. {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN}, // [2.2] In an RTL label, only characters with the Bidi properties R, // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.3] {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, }, ruleRTLFinal: { // [2.3] In an RTL label, the end of the label must be a character with // Bidi property R, AL, EN, or AN, followed by zero or more characters // with Bidi property NSM. {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM}, // [2.2] In an RTL label, only characters with the Bidi properties R, // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.3] and NSM. {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, }, ruleLTR: { // [2.6] In an LTR label, the end of the label must be a character with // Bidi property L or EN, followed by zero or more characters with Bidi // property NSM. {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN}, // [2.5] In an LTR label, only characters with the Bidi properties L, // EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.6]. {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, }, ruleLTRFinal: { // [2.6] In an LTR label, the end of the label must be a character with // Bidi property L or EN, followed by zero or more characters with Bidi // property NSM. {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM}, // [2.5] In an LTR label, only characters with the Bidi properties L, // EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.6]. {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, }, ruleInvalid: { {ruleInvalid, 0}, {ruleInvalid, 0}, }, } // [2.4] In an RTL label, if an EN is present, no AN may be present, and // vice versa. const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN) // From RFC 5893 // An RTL label is a label that contains at least one character of type // R, AL, or AN. // // An LTR label is any label that is not an RTL label. // Direction reports the direction of the given label as defined by RFC 5893. // The Bidi Rule does not have to be applied to labels of the category // LeftToRight. func ( []byte) bidi.Direction { for := 0; < len(); { , := bidi.Lookup([:]) if == 0 { ++ } := .Class() if == bidi.R || == bidi.AL || == bidi.AN { return bidi.RightToLeft } += } return bidi.LeftToRight } // DirectionString reports the direction of the given label as defined by RFC // 5893. The Bidi Rule does not have to be applied to labels of the category // LeftToRight. func ( string) bidi.Direction { for := 0; < len(); { , := bidi.LookupString([:]) if == 0 { ++ continue } := .Class() if == bidi.R || == bidi.AL || == bidi.AN { return bidi.RightToLeft } += } return bidi.LeftToRight } // Valid reports whether b conforms to the BiDi rule. func ( []byte) bool { var Transformer if , := .advance(); ! || < len() { return false } return .isFinal() } // ValidString reports whether s conforms to the BiDi rule. func ( string) bool { var Transformer if , := .advanceString(); ! || < len() { return false } return .isFinal() } // New returns a Transformer that verifies that input adheres to the Bidi Rule. func () *Transformer { return &Transformer{} } // Transformer implements transform.Transform. type Transformer struct { state ruleState hasRTL bool seen uint16 } // A rule can only be violated for "Bidi Domain names", meaning if one of the // following categories has been observed. func ( *Transformer) () bool { const = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN return .seen& != 0 } // Reset implements transform.Transformer. func ( *Transformer) () { * = Transformer{} } // Transform implements transform.Transformer. This Transformer has state and // needs to be reset between uses. func ( *Transformer) (, []byte, bool) (, int, error) { if len() < len() { = [:len()] = false = transform.ErrShortDst } , := .Span(, ) copy(, [:]) if == nil || != nil && != transform.ErrShortSrc { = } return , , } // Span returns the first n bytes of src that conform to the Bidi rule. func ( *Transformer) ( []byte, bool) ( int, error) { if .state == ruleInvalid && .isRTL() { return 0, ErrInvalid } , := .advance() switch { case !: = ErrInvalid case < len(): if ! { = transform.ErrShortSrc break } = ErrInvalid case !.isFinal(): = ErrInvalid } return , } // Precomputing the ASCII values decreases running time for the ASCII fast path // by about 30%. var asciiTable [128]bidi.Properties func init() { for := range asciiTable { , := bidi.LookupRune(rune()) asciiTable[] = } } func ( *Transformer) ( []byte) ( int, bool) { var bidi.Properties var int for < len() { if [] < utf8.RuneSelf { , = asciiTable[[]], 1 } else { , = bidi.Lookup([:]) if <= 1 { if == 1 { // We always consider invalid UTF-8 to be invalid, even if // the string has not yet been determined to be RTL. // TODO: is this correct? return , false } return , true // incomplete UTF-8 encoding } } // TODO: using CompactClass would result in noticeable speedup. // See unicode/bidi/prop.go:Properties.CompactClass. := uint16(1 << .Class()) .seen |= if .seen&exclusiveRTL == exclusiveRTL { .state = ruleInvalid return , false } switch := transitions[.state]; { case [0].mask& != 0: .state = [0].next case [1].mask& != 0: .state = [1].next default: .state = ruleInvalid if .isRTL() { return , false } } += } return , true } func ( *Transformer) ( string) ( int, bool) { var bidi.Properties var int for < len() { if [] < utf8.RuneSelf { , = asciiTable[[]], 1 } else { , = bidi.LookupString([:]) if <= 1 { if == 1 { return , false // invalid UTF-8 } return , true // incomplete UTF-8 encoding } } // TODO: using CompactClass results in noticeable speedup. // See unicode/bidi/prop.go:Properties.CompactClass. := uint16(1 << .Class()) .seen |= if .seen&exclusiveRTL == exclusiveRTL { .state = ruleInvalid return , false } switch := transitions[.state]; { case [0].mask& != 0: .state = [0].next case [1].mask& != 0: .state = [1].next default: .state = ruleInvalid if .isRTL() { return , false } } += } return , true }