Source File
bidirule.go
Belonging Package
vendor/golang.org/x/text/secure/bidirule
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bidirule implements the Bidi Rule defined by RFC 5893.
//
// This package is under development. The API may change without notice and
// without preserving backward compatibility.
package bidirule
import (
)
// This file contains an implementation of RFC 5893: Right-to-Left Scripts for
// Internationalized Domain Names for Applications (IDNA)
//
// A label is an individual component of a domain name. Labels are usually
// shown separated by dots; for example, the domain name "www.example.com" is
// composed of three labels: "www", "example", and "com".
//
// An RTL label is a label that contains at least one character of class R, AL,
// or AN. An LTR label is any label that is not an RTL label.
//
// A "Bidi domain name" is a domain name that contains at least one RTL label.
//
// The following guarantees can be made based on the above:
//
// o In a domain name consisting of only labels that satisfy the rule,
// the requirements of Section 3 are satisfied. Note that even LTR
// labels and pure ASCII labels have to be tested.
//
// o In a domain name consisting of only LDH labels (as defined in the
// Definitions document [RFC5890]) and labels that satisfy the rule,
// the requirements of Section 3 are satisfied as long as a label
// that starts with an ASCII digit does not come after a
// right-to-left label.
//
// No guarantee is given for other combinations.
// ErrInvalid indicates a label is invalid according to the Bidi Rule.
var ErrInvalid = errors.New("bidirule: failed Bidi Rule")
type ruleState uint8
const (
ruleInitial ruleState = iota
ruleLTR
ruleLTRFinal
ruleRTL
ruleRTLFinal
ruleInvalid
)
type ruleTransition struct {
next ruleState
mask uint16
}
var transitions = [...][2]ruleTransition{
// [2.1] The first character must be a character with Bidi property L, R, or
// AL. If it has the R or AL property, it is an RTL label; if it has the L
// property, it is an LTR label.
ruleInitial: {
{ruleLTRFinal, 1 << bidi.L},
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
},
ruleRTL: {
// [2.3] In an RTL label, the end of the label must be a character with
// Bidi property R, AL, EN, or AN, followed by zero or more characters
// with Bidi property NSM.
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
// [2.2] In an RTL label, only characters with the Bidi properties R,
// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.3]
{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
},
ruleRTLFinal: {
// [2.3] In an RTL label, the end of the label must be a character with
// Bidi property R, AL, EN, or AN, followed by zero or more characters
// with Bidi property NSM.
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
// [2.2] In an RTL label, only characters with the Bidi properties R,
// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.3] and NSM.
{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
},
ruleLTR: {
// [2.6] In an LTR label, the end of the label must be a character with
// Bidi property L or EN, followed by zero or more characters with Bidi
// property NSM.
{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
// [2.5] In an LTR label, only characters with the Bidi properties L,
// EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.6].
{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
},
ruleLTRFinal: {
// [2.6] In an LTR label, the end of the label must be a character with
// Bidi property L or EN, followed by zero or more characters with Bidi
// property NSM.
{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
// [2.5] In an LTR label, only characters with the Bidi properties L,
// EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.6].
{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
},
ruleInvalid: {
{ruleInvalid, 0},
{ruleInvalid, 0},
},
}
// [2.4] In an RTL label, if an EN is present, no AN may be present, and
// vice versa.
const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
// From RFC 5893
// An RTL label is a label that contains at least one character of type
// R, AL, or AN.
//
// An LTR label is any label that is not an RTL label.
// Direction reports the direction of the given label as defined by RFC 5893.
// The Bidi Rule does not have to be applied to labels of the category
// LeftToRight.
func ( []byte) bidi.Direction {
for := 0; < len(); {
, := bidi.Lookup([:])
if == 0 {
++
}
:= .Class()
if == bidi.R || == bidi.AL || == bidi.AN {
return bidi.RightToLeft
}
+=
}
return bidi.LeftToRight
}
// DirectionString reports the direction of the given label as defined by RFC
// 5893. The Bidi Rule does not have to be applied to labels of the category
// LeftToRight.
func ( string) bidi.Direction {
for := 0; < len(); {
, := bidi.LookupString([:])
if == 0 {
++
continue
}
:= .Class()
if == bidi.R || == bidi.AL || == bidi.AN {
return bidi.RightToLeft
}
+=
}
return bidi.LeftToRight
}
// Valid reports whether b conforms to the BiDi rule.
func ( []byte) bool {
var Transformer
if , := .advance(); ! || < len() {
return false
}
return .isFinal()
}
// ValidString reports whether s conforms to the BiDi rule.
func ( string) bool {
var Transformer
if , := .advanceString(); ! || < len() {
return false
}
return .isFinal()
}
// New returns a Transformer that verifies that input adheres to the Bidi Rule.
func () *Transformer {
return &Transformer{}
}
// Transformer implements transform.Transform.
type Transformer struct {
state ruleState
hasRTL bool
seen uint16
}
// A rule can only be violated for "Bidi Domain names", meaning if one of the
// following categories has been observed.
func ( *Transformer) () bool {
const = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
return .seen& != 0
}
// Reset implements transform.Transformer.
func ( *Transformer) () { * = Transformer{} }
// Transform implements transform.Transformer. This Transformer has state and
// needs to be reset between uses.
func ( *Transformer) (, []byte, bool) (, int, error) {
if len() < len() {
= [:len()]
= false
= transform.ErrShortDst
}
, := .Span(, )
copy(, [:])
if == nil || != nil && != transform.ErrShortSrc {
=
}
return , ,
}
// Span returns the first n bytes of src that conform to the Bidi rule.
func ( *Transformer) ( []byte, bool) ( int, error) {
if .state == ruleInvalid && .isRTL() {
return 0, ErrInvalid
}
, := .advance()
switch {
case !:
= ErrInvalid
case < len():
if ! {
= transform.ErrShortSrc
break
}
= ErrInvalid
case !.isFinal():
= ErrInvalid
}
return ,
}
// Precomputing the ASCII values decreases running time for the ASCII fast path
// by about 30%.
var asciiTable [128]bidi.Properties
func init() {
for := range asciiTable {
, := bidi.LookupRune(rune())
asciiTable[] =
}
}
func ( *Transformer) ( []byte) ( int, bool) {
var bidi.Properties
var int
for < len() {
if [] < utf8.RuneSelf {
, = asciiTable[[]], 1
} else {
, = bidi.Lookup([:])
if <= 1 {
if == 1 {
// We always consider invalid UTF-8 to be invalid, even if
// the string has not yet been determined to be RTL.
// TODO: is this correct?
return , false
}
return , true // incomplete UTF-8 encoding
}
}
// TODO: using CompactClass would result in noticeable speedup.
// See unicode/bidi/prop.go:Properties.CompactClass.
:= uint16(1 << .Class())
.seen |=
if .seen&exclusiveRTL == exclusiveRTL {
.state = ruleInvalid
return , false
}
switch := transitions[.state]; {
case [0].mask& != 0:
.state = [0].next
case [1].mask& != 0:
.state = [1].next
default:
.state = ruleInvalid
if .isRTL() {
return , false
}
}
+=
}
return , true
}
func ( *Transformer) ( string) ( int, bool) {
var bidi.Properties
var int
for < len() {
if [] < utf8.RuneSelf {
, = asciiTable[[]], 1
} else {
, = bidi.LookupString([:])
if <= 1 {
if == 1 {
return , false // invalid UTF-8
}
return , true // incomplete UTF-8 encoding
}
}
// TODO: using CompactClass results in noticeable speedup.
// See unicode/bidi/prop.go:Properties.CompactClass.
:= uint16(1 << .Class())
.seen |=
if .seen&exclusiveRTL == exclusiveRTL {
.state = ruleInvalid
return , false
}
switch := transitions[.state]; {
case [0].mask& != 0:
.state = [0].next
case [1].mask& != 0:
.state = [1].next
default:
.state = ruleInvalid
if .isRTL() {
return , false
}
}
+=
}
return , true
}
The pages are generated with Golds v0.7.0-preview. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |