// Copyright 2010 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
// Package html provides functions for escaping and unescaping HTML text.
package htmlimport ()// These replacements permit compatibility with old numeric entities that// assumed Windows-1252 encoding.// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-statevar replacementTable = [...]rune{'\u20AC', // First entry is what 0x80 should be replaced with.'\u0081','\u201A','\u0192','\u201E','\u2026','\u2020','\u2021','\u02C6','\u2030','\u0160','\u2039','\u0152','\u008D','\u017D','\u008F','\u0090','\u2018','\u2019','\u201C','\u201D','\u2022','\u2013','\u2014','\u02DC','\u2122','\u0161','\u203A','\u0153','\u009D','\u017E','\u0178', // Last entry is 0x9F.// 0x00->'\uFFFD' is handled programmatically. // 0x0D->'\u000D' is a no-op.}// unescapeEntity reads an entity like "<" from b[src:] and writes the// corresponding "<" to b[dst:], returning the incremented dst and src cursors.// Precondition: b[src] == '&' && dst <= src.func unescapeEntity( []byte, , int, map[string]rune, map[string][2]rune) (, int) {const = false// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference// i starts at 1 because we already know that s[0] == '&'. , := 1, [:]iflen() <= 1 { [] = []return + 1, + 1 }if [] == '#' {iflen() <= 3 { // We need to have at least "&#.". [] = []return + 1, + 1 } ++ := [] := falseif == 'x' || == 'X' { = true ++ } := '\x00'for < len() { = [] ++if {if'0' <= && <= '9' { = 16* + rune() - '0'continue } elseif'a' <= && <= 'f' { = 16* + rune() - 'a' + 10continue } elseif'A' <= && <= 'F' { = 16* + rune() - 'A' + 10continue } } elseif'0' <= && <= '9' { = 10* + rune() - '0'continue }if != ';' { -- }break }if <= 3 { // No characters matched. [] = []return + 1, + 1 }if0x80 <= && <= 0x9F {// Replace characters from Windows-1252 with UTF-8 equivalents. = replacementTable[-0x80] } elseif == 0 || (0xD800 <= && <= 0xDFFF) || > 0x10FFFF {// Replace invalid characters with the replacement character. = '\uFFFD' }return + utf8.EncodeRune([:], ), + }// Consume the maximum number of characters possible, with the // consumed characters matching one of the named references.for < len() { := [] ++// Lower-cased characters are more common in entities, so we check for them first.if'a' <= && <= 'z' || 'A' <= && <= 'Z' || '0' <= && <= '9' {continue }if != ';' { -- }break } := [1:]iflen() == 0 {// No-op. } elseif && [len()-1] != ';' && len() > && [] == '=' {// No-op. } elseif := [string()]; != 0 {return + utf8.EncodeRune([:], ), + } elseif := [string()]; [0] != 0 { := + utf8.EncodeRune([:], [0])return + utf8.EncodeRune([:], [1]), + } elseif ! { := len() - 1if > longestEntityWithoutSemicolon { = longestEntityWithoutSemicolon }for := ; > 1; -- {if := [string([:])]; != 0 {return + utf8.EncodeRune([:], ), + + 1 } } } , = +, +copy([:], [:])return , }var htmlEscaper = strings.NewReplacer(`&`, "&",`'`, "'", // "'" is shorter than "'" and apos was not in HTML until HTML5.`<`, "<",`>`, ">",`"`, """, // """ is shorter than """.)// EscapeString escapes special characters like "<" to become "<". It// escapes only five such characters: <, >, &, ' and ".// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't// always true.func ( string) string {returnhtmlEscaper.Replace()}// UnescapeString unescapes entities like "<" to become "<". It unescapes a// larger range of entities than EscapeString escapes. For example, "á"// unescapes to "รก", as does "á" and "á".// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't// always true.func ( string) string { := strings.IndexByte(, '&')if < 0 {return } := []byte() , := entityMaps() , := unescapeEntity(, , , , )forlen([:]) > 0 {if [] == '&' { = 0 } else { = strings.IndexByte([:], '&') }if < 0 { += copy([:], [:])break }if > 0 {copy([:], [:+]) } , = unescapeEntity(, +, +, , ) }returnstring([:])}
The pages are generated with Goldsv0.7.3. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.