// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
// Package unicode provides data and functions to test some properties of// Unicode code points.
package unicodeconst (MaxRune = '\U0010FFFF'// Maximum valid Unicode code point.ReplacementChar = '\uFFFD'// Represents invalid code points.MaxASCII = '\u007F'// maximum ASCII value.MaxLatin1 = '\u00FF'// maximum Latin-1 value.)// RangeTable defines a set of Unicode code points by listing the ranges of// code points within the set. The ranges are listed in two slices// to save space: a slice of 16-bit ranges and a slice of 32-bit ranges.// The two slices must be in sorted order and non-overlapping.// Also, R32 should contain only values >= 0x10000 (1<<16).typeRangeTablestruct { R16 []Range16 R32 []Range32 LatinOffset int// number of entries in R16 with Hi <= MaxLatin1}// Range16 represents of a range of 16-bit Unicode code points. The range runs from Lo to Hi// inclusive and has the specified stride.typeRange16struct { Lo uint16 Hi uint16 Stride uint16}// Range32 represents of a range of Unicode code points and is used when one or// more of the values will not fit in 16 bits. The range runs from Lo to Hi// inclusive and has the specified stride. Lo and Hi must always be >= 1<<16.typeRange32struct { Lo uint32 Hi uint32 Stride uint32}// CaseRange represents a range of Unicode code points for simple (one// code point to one code point) case conversion.// The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas// are the number to add to the code point to reach the code point for a// different case for that character. They may be negative. If zero, it// means the character is in the corresponding case. There is a special// case representing sequences of alternating corresponding Upper and Lower// pairs. It appears with a fixed Delta of//// {UpperLower, UpperLower, UpperLower}//// The constant UpperLower has an otherwise impossible delta value.typeCaseRangestruct { Lo uint32 Hi uint32 Delta d}// SpecialCase represents language-specific case mappings such as Turkish.// Methods of SpecialCase customize (by overriding) the standard mappings.typeSpecialCase []CaseRange// BUG(r): There is no mechanism for full case folding, that is, for// characters that involve multiple runes in the input or output.// Indices into the Delta arrays inside CaseRanges for case mapping.const (UpperCase = iotaLowerCaseTitleCaseMaxCase)type d [MaxCase]rune// to make the CaseRanges text shorter// If the Delta field of a [CaseRange] is UpperLower, it means// this CaseRange represents a sequence of the form (say)// [Upper] [Lower] [Upper] [Lower].const (UpperLower = MaxRune + 1// (Cannot be a valid delta.))// linearMax is the maximum size table for linear search for non-Latin1 rune.// Derived by running 'go test -calibrate'.const linearMax = 18// is16 reports whether r is in the sorted slice of 16-bit ranges.func is16( []Range16, uint16) bool {iflen() <= linearMax || <= MaxLatin1 {for := range { := &[]if < .Lo {returnfalse }if <= .Hi {return .Stride == 1 || (-.Lo)%.Stride == 0 } }returnfalse }// binary search over ranges := 0 := len()for < { := int(uint(+) >> 1) := &[]if .Lo <= && <= .Hi {return .Stride == 1 || (-.Lo)%.Stride == 0 }if < .Lo { = } else { = + 1 } }returnfalse}// is32 reports whether r is in the sorted slice of 32-bit ranges.func is32( []Range32, uint32) bool {iflen() <= linearMax {for := range { := &[]if < .Lo {returnfalse }if <= .Hi {return .Stride == 1 || (-.Lo)%.Stride == 0 } }returnfalse }// binary search over ranges := 0 := len()for < { := int(uint(+) >> 1) := []if .Lo <= && <= .Hi {return .Stride == 1 || (-.Lo)%.Stride == 0 }if < .Lo { = } else { = + 1 } }returnfalse}// Is reports whether the rune is in the specified table of ranges.func ( *RangeTable, rune) bool { := .R16// Compare as uint32 to correctly handle negative runes.iflen() > 0 && uint32() <= uint32([len()-1].Hi) {returnis16(, uint16()) } := .R32iflen() > 0 && >= rune([0].Lo) {returnis32(, uint32()) }returnfalse}func isExcludingLatin( *RangeTable, rune) bool { := .R16// Compare as uint32 to correctly handle negative runes.if := .LatinOffset; len() > && uint32() <= uint32([len()-1].Hi) {returnis16([:], uint16()) } := .R32iflen() > 0 && >= rune([0].Lo) {returnis32(, uint32()) }returnfalse}// IsUpper reports whether the rune is an upper case letter.func ( rune) bool {// See comment in IsGraphic.ifuint32() <= MaxLatin1 {returnproperties[uint8()]&pLmask == pLu }returnisExcludingLatin(Upper, )}// IsLower reports whether the rune is a lower case letter.func ( rune) bool {// See comment in IsGraphic.ifuint32() <= MaxLatin1 {returnproperties[uint8()]&pLmask == pLl }returnisExcludingLatin(Lower, )}// IsTitle reports whether the rune is a title case letter.func ( rune) bool {if <= MaxLatin1 {returnfalse }returnisExcludingLatin(Title, )}// lookupCaseRange returns the CaseRange mapping for rune r or nil if no// mapping exists for r.func lookupCaseRange( rune, []CaseRange) *CaseRange {// binary search over ranges := 0 := len()for < { := int(uint(+) >> 1) := &[]ifrune(.Lo) <= && <= rune(.Hi) {return }if < rune(.Lo) { = } else { = + 1 } }returnnil}// convertCase converts r to _case using CaseRange cr.func convertCase( int, rune, *CaseRange) rune { := .Delta[]if > MaxRune {// In an Upper-Lower sequence, which always starts with // an UpperCase letter, the real deltas always look like: // {0, 1, 0} UpperCase (Lower is next) // {-1, 0, -1} LowerCase (Upper, Title are previous) // The characters at even offsets from the beginning of the // sequence are upper case; the ones at odd offsets are lower. // The correct mapping can be done by clearing or setting the low // bit in the sequence offset. // The constants UpperCase and TitleCase are even while LowerCase // is odd so we take the low bit from _case.returnrune(.Lo) + ((-rune(.Lo))&^1 | rune(&1)) }return + }// to maps the rune using the specified case mapping.// It additionally reports whether caseRange contained a mapping for r.func to( int, rune, []CaseRange) ( rune, bool) {if < 0 || MaxCase <= {returnReplacementChar, false// as reasonable an error as any }if := lookupCaseRange(, ); != nil {returnconvertCase(, , ), true }return , false}// To maps the rune to the specified case: [UpperCase], [LowerCase], or [TitleCase].func ( int, rune) rune { , _ = to(, , CaseRanges)return}// ToUpper maps the rune to upper case.func ( rune) rune {if <= MaxASCII {if'a' <= && <= 'z' { -= 'a' - 'A' }return }returnTo(UpperCase, )}// ToLower maps the rune to lower case.func ( rune) rune {if <= MaxASCII {if'A' <= && <= 'Z' { += 'a' - 'A' }return }returnTo(LowerCase, )}// ToTitle maps the rune to title case.func ( rune) rune {if <= MaxASCII {if'a' <= && <= 'z' { // title case is upper case for ASCII -= 'a' - 'A' }return }returnTo(TitleCase, )}// ToUpper maps the rune to upper case giving priority to the special mapping.func ( SpecialCase) ( rune) rune { , := to(UpperCase, , []CaseRange())if == && ! { = ToUpper() }return}// ToTitle maps the rune to title case giving priority to the special mapping.func ( SpecialCase) ( rune) rune { , := to(TitleCase, , []CaseRange())if == && ! { = ToTitle() }return}// ToLower maps the rune to lower case giving priority to the special mapping.func ( SpecialCase) ( rune) rune { , := to(LowerCase, , []CaseRange())if == && ! { = ToLower() }return}// caseOrbit is defined in tables.go as []foldPair. Right now all the// entries fit in uint16, so use uint16. If that changes, compilation// will fail (the constants in the composite literal will not fit in uint16)// and the types here can change to uint32.type foldPair struct { From uint16 To uint16}// SimpleFold iterates over Unicode code points equivalent under// the Unicode-defined simple case folding. Among the code points// equivalent to rune (including rune itself), SimpleFold returns the// smallest rune > r if one exists, or else the smallest rune >= 0.// If r is not a valid Unicode code point, SimpleFold(r) returns r.//// For example://// SimpleFold('A') = 'a'// SimpleFold('a') = 'A'//// SimpleFold('K') = 'k'// SimpleFold('k') = '\u212A' (Kelvin symbol, K)// SimpleFold('\u212A') = 'K'//// SimpleFold('1') = '1'//// SimpleFold(-2) = -2func ( rune) rune {if < 0 || > MaxRune {return }ifint() < len(asciiFold) {returnrune(asciiFold[]) }// Consult caseOrbit table for special cases. := 0 := len(caseOrbit)for < { := int(uint(+) >> 1)ifrune(caseOrbit[].From) < { = + 1 } else { = } }if < len(caseOrbit) && rune(caseOrbit[].From) == {returnrune(caseOrbit[].To) }// No folding specified. This is a one- or two-element // equivalence class containing rune and ToLower(rune) // and ToUpper(rune) if they are different from rune.if := lookupCaseRange(, CaseRanges); != nil {if := convertCase(LowerCase, , ); != {return }returnconvertCase(UpperCase, , ) }return}
The pages are generated with Goldsv0.7.3. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.