// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.//go:generate go run makeisprint.go -output isprint.gopackage strconvimport ()const ( lowerhex = "0123456789abcdef" upperhex = "0123456789ABCDEF")// contains reports whether the string contains the byte c.func contains( string, byte) bool {returnindex(, ) != -1}func quoteWith( string, byte, , bool) string {returnstring(appendQuotedWith(make([]byte, 0, 3*len()/2), , , , ))}func quoteRuneWith( rune, byte, , bool) string {returnstring(appendQuotedRuneWith(nil, , , , ))}func appendQuotedWith( []byte, string, byte, , bool) []byte {// Often called with big strings, so preallocate. If there's quoting, // this is conservative but still helps a lot.ifcap()-len() < len() { := make([]byte, len(), len()+1+len()+1)copy(, ) = } = append(, )for := 0; len() > 0; = [:] { := rune([0]) = 1if >= utf8.RuneSelf { , = utf8.DecodeRuneInString() }if == 1 && == utf8.RuneError { = append(, `\x`...) = append(, lowerhex[[0]>>4]) = append(, lowerhex[[0]&0xF])continue } = appendEscapedRune(, , , , ) } = append(, )return}func appendQuotedRuneWith( []byte, rune, byte, , bool) []byte { = append(, )if !utf8.ValidRune() { = utf8.RuneError } = appendEscapedRune(, , , , ) = append(, )return}func appendEscapedRune( []byte, rune, byte, , bool) []byte {if == rune() || == '\\' { // always backslashed = append(, '\\') = append(, byte())return }if {if < utf8.RuneSelf && IsPrint() { = append(, byte())return } } elseifIsPrint() || && isInGraphicList() {returnutf8.AppendRune(, ) }switch {case'\a': = append(, `\a`...)case'\b': = append(, `\b`...)case'\f': = append(, `\f`...)case'\n': = append(, `\n`...)case'\r': = append(, `\r`...)case'\t': = append(, `\t`...)case'\v': = append(, `\v`...)default:switch {case < ' ' || == 0x7f: = append(, `\x`...) = append(, lowerhex[byte()>>4]) = append(, lowerhex[byte()&0xF])case !utf8.ValidRune(): = 0xFFFDfallthroughcase < 0x10000: = append(, `\u`...)for := 12; >= 0; -= 4 { = append(, lowerhex[>>uint()&0xF]) }default: = append(, `\U`...)for := 28; >= 0; -= 4 { = append(, lowerhex[>>uint()&0xF]) } } }return}// Quote returns a double-quoted Go string literal representing s. The// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for// control characters and non-printable characters as defined by// [IsPrint].func ( string) string {returnquoteWith(, '"', false, false)}// AppendQuote appends a double-quoted Go string literal representing s,// as generated by [Quote], to dst and returns the extended buffer.func ( []byte, string) []byte {returnappendQuotedWith(, , '"', false, false)}// QuoteToASCII returns a double-quoted Go string literal representing s.// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for// non-ASCII characters and non-printable characters as defined by [IsPrint].func ( string) string {returnquoteWith(, '"', true, false)}// AppendQuoteToASCII appends a double-quoted Go string literal representing s,// as generated by [QuoteToASCII], to dst and returns the extended buffer.func ( []byte, string) []byte {returnappendQuotedWith(, , '"', true, false)}// QuoteToGraphic returns a double-quoted Go string literal representing s.// The returned string leaves Unicode graphic characters, as defined by// [IsGraphic], unchanged and uses Go escape sequences (\t, \n, \xFF, \u0100)// for non-graphic characters.func ( string) string {returnquoteWith(, '"', false, true)}// AppendQuoteToGraphic appends a double-quoted Go string literal representing s,// as generated by [QuoteToGraphic], to dst and returns the extended buffer.func ( []byte, string) []byte {returnappendQuotedWith(, , '"', false, true)}// QuoteRune returns a single-quoted Go character literal representing the// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)// for control characters and non-printable characters as defined by [IsPrint].// If r is not a valid Unicode code point, it is interpreted as the Unicode// replacement character U+FFFD.func ( rune) string {returnquoteRuneWith(, '\'', false, false)}// AppendQuoteRune appends a single-quoted Go character literal representing the rune,// as generated by [QuoteRune], to dst and returns the extended buffer.func ( []byte, rune) []byte {returnappendQuotedRuneWith(, , '\'', false, false)}// QuoteRuneToASCII returns a single-quoted Go character literal representing// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,// \u0100) for non-ASCII characters and non-printable characters as defined// by [IsPrint].// If r is not a valid Unicode code point, it is interpreted as the Unicode// replacement character U+FFFD.func ( rune) string {returnquoteRuneWith(, '\'', true, false)}// AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,// as generated by [QuoteRuneToASCII], to dst and returns the extended buffer.func ( []byte, rune) []byte {returnappendQuotedRuneWith(, , '\'', true, false)}// QuoteRuneToGraphic returns a single-quoted Go character literal representing// the rune. If the rune is not a Unicode graphic character,// as defined by [IsGraphic], the returned string will use a Go escape sequence// (\t, \n, \xFF, \u0100).// If r is not a valid Unicode code point, it is interpreted as the Unicode// replacement character U+FFFD.func ( rune) string {returnquoteRuneWith(, '\'', false, true)}// AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,// as generated by [QuoteRuneToGraphic], to dst and returns the extended buffer.func ( []byte, rune) []byte {returnappendQuotedRuneWith(, , '\'', false, true)}// CanBackquote reports whether the string s can be represented// unchanged as a single-line backquoted string without control// characters other than tab.func ( string) bool {forlen() > 0 { , := utf8.DecodeRuneInString() = [:]if > 1 {if == '\ufeff' {returnfalse// BOMs are invisible and should not be quoted. }continue// All other multibyte runes are correctly encoded and assumed printable. }if == utf8.RuneError {returnfalse }if ( < ' ' && != '\t') || == '`' || == '\u007F' {returnfalse } }returntrue}func unhex( byte) ( rune, bool) { := rune()switch {case'0' <= && <= '9':return - '0', truecase'a' <= && <= 'f':return - 'a' + 10, truecase'A' <= && <= 'F':return - 'A' + 10, true }return}// UnquoteChar decodes the first character or byte in the escaped string// or character literal represented by the string s.// It returns four values://// 1. value, the decoded Unicode code point or byte value;// 2. multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;// 3. tail, the remainder of the string after the character; and// 4. an error that will be nil if the character is syntactically valid.//// The second argument, quote, specifies the type of literal being parsed// and therefore which escaped quote character is permitted.// If set to a single quote, it permits the sequence \' and disallows unescaped '.// If set to a double quote, it permits \" and disallows unescaped ".// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.func ( string, byte) ( rune, bool, string, error) {// easy casesiflen() == 0 { = ErrSyntaxreturn }switch := [0]; {case == && ( == '\'' || == '"'): = ErrSyntaxreturncase >= utf8.RuneSelf: , := utf8.DecodeRuneInString()return , true, [:], nilcase != '\\':returnrune([0]), false, [1:], nil }// hard case: c is backslashiflen() <= 1 { = ErrSyntaxreturn } := [1] = [2:]switch {case'a': = '\a'case'b': = '\b'case'f': = '\f'case'n': = '\n'case'r': = '\r'case't': = '\t'case'v': = '\v'case'x', 'u', 'U': := 0switch {case'x': = 2case'u': = 4case'U': = 8 }varruneiflen() < { = ErrSyntaxreturn }for := 0; < ; ++ { , := unhex([])if ! { = ErrSyntaxreturn } = <<4 | } = [:]if == 'x' {// single-byte string, possibly not UTF-8 = break }if !utf8.ValidRune() { = ErrSyntaxreturn } = = truecase'0', '1', '2', '3', '4', '5', '6', '7': := rune() - '0'iflen() < 2 { = ErrSyntaxreturn }for := 0; < 2; ++ { // one digit already; two more := rune([]) - '0'if < 0 || > 7 { = ErrSyntaxreturn } = ( << 3) | } = [2:]if > 255 { = ErrSyntaxreturn } = case'\\': = '\\'case'\'', '"':if != { = ErrSyntaxreturn } = rune()default: = ErrSyntaxreturn } = return}// QuotedPrefix returns the quoted string (as understood by [Unquote]) at the prefix of s.// If s does not start with a valid quoted string, QuotedPrefix returns an error.func ( string) (string, error) { , , := unquote(, false)return , }// Unquote interprets s as a single-quoted, double-quoted,// or backquoted Go string literal, returning the string value// that s quotes. (If s is single-quoted, it would be a Go// character literal; Unquote returns the corresponding// one-character string.)func ( string) (string, error) { , , := unquote(, true)iflen() > 0 {return"", ErrSyntax }return , }// unquote parses a quoted string at the start of the input,// returning the parsed prefix, the remaining suffix, and any parse errors.// If unescape is true, the parsed prefix is unescaped,// otherwise the input prefix is provided verbatim.func unquote( string, bool) (, string, error) {// Determine the quote form and optimistically find the terminating quote.iflen() < 2 {return"", , ErrSyntax } := [0] := index([1:], )if < 0 {return"", , ErrSyntax } += 2// position after terminating quote; may be wrong if escape sequences are presentswitch {case'`':switch {case !: = [:] // include quotescase !contains([:], '\r'): = [len("`") : -len("`")] // exclude quotesdefault:// Carriage return characters ('\r') inside raw string literals // are discarded from the raw string value. := make([]byte, 0, -len("`")-len("\r")-len("`"))for := len("`"); < -len("`"); ++ {if [] != '\r' { = append(, []) } } = string() }// NOTE: Prior implementations did not verify that raw strings consist // of valid UTF-8 characters and we continue to not verify it as such. // The Go specification does not explicitly require valid UTF-8, // but only mention that it is implicitly valid for Go source code // (which must be valid UTF-8).return , [:], nilcase'"', '\'':// Handle quoted strings without any escape sequences.if !contains([:], '\\') && !contains([:], '\n') {varboolswitch {case'"': = utf8.ValidString([len(`"`) : -len(`"`)])case'\'': , := utf8.DecodeRuneInString([len("'") : -len("'")]) = len("'")++len("'") == && ( != utf8.RuneError || != 1) }if { = [:]if { = [1 : -1] // exclude quotes }return , [:], nil } }// Handle quoted strings with escape sequences.var []byte := = [1:] // skip starting quoteif { = make([]byte, 0, 3*/2) // try to avoid more allocations }forlen() > 0 && [0] != {// Process the next character, // rejecting any unescaped newline characters which are invalid. , , , := UnquoteChar(, )if [0] == '\n' || != nil {return"", , ErrSyntax } = // Append the character if unescaping the input.if {if < utf8.RuneSelf || ! { = append(, byte()) } else { = utf8.AppendRune(, ) } }// Single quoted strings must be a single character.if == '\'' {break } }// Verify that the string ends with a terminating quote.if !(len() > 0 && [0] == ) {return"", , ErrSyntax } = [1:] // skip terminating quoteif {returnstring(), , nil }return [:len()-len()], , nildefault:return"", , ErrSyntax }}// bsearch is semantically the same as [slices.BinarySearch] (without NaN checks)// We copied this function because we can not import "slices" here.func bsearch[ ~[], ~uint16 | ~uint32]( , ) (int, bool) { := len() , := 0, for < { := + (-)>>1if [] < { = + 1 } else { = } }return , < && [] == }// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests// to give the same answer. It allows this package not to depend on unicode,// and therefore not pull in all the Unicode tables. If the linker were better// at tossing unused tables, we could get rid of this implementation.// That would be nice.// IsPrint reports whether the rune is defined as printable by Go, with// the same definition as [unicode.IsPrint]: letters, numbers, punctuation,// symbols and ASCII space.func ( rune) bool {// Fast check for Latin-1if <= 0xFF {if0x20 <= && <= 0x7E {// All the ASCII is printable from space through DEL-1.returntrue }if0xA1 <= && <= 0xFF {// Similarly for ¡ through ÿ...return != 0xAD// ...except for the bizarre soft hyphen. }returnfalse }// Same algorithm, either on uint16 or uint32 value. // First, find first i such that isPrint[i] >= x. // This is the index of either the start or end of a pair that might span x. // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). // If we find x in a range, make sure x is not in isNotPrint list.if0 <= && < 1<<16 { , , := uint16(), isPrint16, isNotPrint16 , := bsearch(, )if >= len() || < [&^1] || [|1] < {returnfalse } , := bsearch(, )return ! } , , := uint32(), isPrint32, isNotPrint32 , := bsearch(, )if >= len() || < [&^1] || [|1] < {returnfalse }if >= 0x20000 {returntrue } -= 0x10000 , := bsearch(, uint16())return !}// IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such// characters include letters, marks, numbers, punctuation, symbols, and// spaces, from categories L, M, N, P, S, and Zs.func ( rune) bool {ifIsPrint() {returntrue }returnisInGraphicList()}// isInGraphicList reports whether the rune is in the isGraphic list. This separation// from IsGraphic allows quoteWith to avoid two calls to IsPrint.// Should be called only if IsPrint fails.func isInGraphicList( rune) bool {// We know r must fit in 16 bits - see makeisprint.go.if > 0xFFFF {returnfalse } , := bsearch(isGraphic, uint16())return}
The pages are generated with Goldsv0.7.0-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.