// Copyright 2011 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package parseimport ()// item represents a token or text string returned from the scanner.type item struct { typ itemType// The type of this item. pos Pos// The starting position, in bytes, of this item in the input string. val string// The value of this item. line int// The line number at the start of this item.}func ( item) () string {switch {case .typ == itemEOF:return"EOF"case .typ == itemError:return .valcase .typ > itemKeyword:returnfmt.Sprintf("<%s>", .val)caselen(.val) > 10:returnfmt.Sprintf("%.10q...", .val) }returnfmt.Sprintf("%q", .val)}// itemType identifies the type of lex items.type itemType intconst ( itemError itemType = iota// error occurred; value is text of error itemBool // boolean constant itemChar // printable ASCII character; grab bag for comma etc. itemCharConstant // character constant itemComment // comment text itemComplex // complex constant (1+2i); imaginary is just a number itemAssign // equals ('=') introducing an assignment itemDeclare // colon-equals (':=') introducing a declaration itemEOF itemField // alphanumeric identifier starting with '.' itemIdentifier // alphanumeric identifier not starting with '.' itemLeftDelim // left action delimiter itemLeftParen // '(' inside action itemNumber // simple number, including imaginary itemPipe // pipe symbol itemRawString // raw quoted string (includes quotes) itemRightDelim // right action delimiter itemRightParen // ')' inside action itemSpace // run of spaces separating arguments itemString // quoted string (includes quotes) itemText // plain text itemVariable // variable starting with '$', such as '$' or '$1' or '$hello'// Keywords appear after all the rest. itemKeyword // used only to delimit the keywords itemBlock // block keyword itemBreak // break keyword itemContinue // continue keyword itemDot // the cursor, spelled '.' itemDefine // define keyword itemElse // else keyword itemEnd // end keyword itemIf // if keyword itemNil // the untyped nil constant, easiest to treat as a keyword itemRange // range keyword itemTemplate // template keyword itemWith // with keyword)var key = map[string]itemType{".": itemDot,"block": itemBlock,"break": itemBreak,"continue": itemContinue,"define": itemDefine,"else": itemElse,"end": itemEnd,"if": itemIf,"range": itemRange,"nil": itemNil,"template": itemTemplate,"with": itemWith,}const eof = -1// Trimming spaces.// If the action begins "{{- " rather than "{{", then all space/tab/newlines// preceding the action are trimmed; conversely if it ends " -}}" the// leading spaces are trimmed. This is done entirely in the lexer; the// parser never sees it happen. We require an ASCII space (' ', \t, \r, \n)// to be present to avoid ambiguity with things like "{{-3}}". It reads// better with the space present anyway. For simplicity, only ASCII// does the job.const ( spaceChars = " \t\r\n"// These are the space characters defined by Go itself. trimMarker = '-'// Attached to left/right delimiter, trims trailing spaces from preceding/following text. trimMarkerLen = Pos(1 + 1) // marker plus space before or after)// stateFn represents the state of the scanner as a function that returns the next state.type stateFn func(*lexer) stateFn// lexer holds the state of the scanner.type lexer struct { name string// the name of the input; used only for error reports input string// the string being scanned leftDelim string// start of action rightDelim string// end of action emitComment bool// emit itemComment tokens. pos Pos// current position in the input start Pos// start position of this item width Pos// width of last rune read from input items chanitem// channel of scanned items parenDepth int// nesting depth of ( ) exprs line int// 1+number of newlines seen startLine int// start line of this item breakOK bool// break keyword allowed continueOK bool// continue keyword allowed}// next returns the next rune in the input.func ( *lexer) () rune {ifint(.pos) >= len(.input) { .width = 0returneof } , := utf8.DecodeRuneInString(.input[.pos:]) .width = Pos() .pos += .widthif == '\n' { .line++ }return}// peek returns but does not consume the next rune in the input.func ( *lexer) () rune { := .next() .backup()return}// backup steps back one rune. Can only be called once per call of next.func ( *lexer) () { .pos -= .width// Correct newline count.if .width == 1 && .input[.pos] == '\n' { .line-- }}// emit passes an item back to the client.func ( *lexer) ( itemType) { .items <- item{, .start, .input[.start:.pos], .startLine} .start = .pos .startLine = .line}// ignore skips over the pending input before this point.func ( *lexer) () { .line += strings.Count(.input[.start:.pos], "\n") .start = .pos .startLine = .line}// accept consumes the next rune if it's from the valid set.func ( *lexer) ( string) bool {ifstrings.ContainsRune(, .next()) {returntrue } .backup()returnfalse}// acceptRun consumes a run of runes from the valid set.func ( *lexer) ( string) {forstrings.ContainsRune(, .next()) { } .backup()}// errorf returns an error token and terminates the scan by passing// back a nil pointer that will be the next state, terminating l.nextItem.func ( *lexer) ( string, ...any) stateFn { .items <- item{itemError, .start, fmt.Sprintf(, ...), .startLine}returnnil}// nextItem returns the next item from the input.// Called by the parser, not in the lexing goroutine.func ( *lexer) () item {return <-.items}// drain drains the output so the lexing goroutine will exit.// Called by the parser, not in the lexing goroutine.func ( *lexer) () {forrange .items { }}// lex creates a new scanner for the input string.func lex(, , , string, bool) *lexer {if == "" { = leftDelim }if == "" { = rightDelim } := &lexer{name: ,input: ,leftDelim: ,rightDelim: ,emitComment: ,items: make(chanitem),line: 1,startLine: 1, }go .run()return}// run runs the state machine for the lexer.func ( *lexer) () {for := lexText; != nil; { = () }close(.items)}// state functionsconst ( leftDelim = "{{" rightDelim = "}}" leftComment = "/*" rightComment = "*/")// lexText scans until an opening action delimiter, "{{".func lexText( *lexer) stateFn { .width = 0if := strings.Index(.input[.pos:], .leftDelim); >= 0 { := Pos(len(.leftDelim)) .pos += Pos() := Pos(0)ifhasLeftTrimMarker(.input[.pos+:]) { = rightTrimLength(.input[.start:.pos]) } .pos -= if .pos > .start { .line += strings.Count(.input[.start:.pos], "\n") .emit(itemText) } .pos += .ignore()returnlexLeftDelim } .pos = Pos(len(.input))// Correctly reached EOF.if .pos > .start { .line += strings.Count(.input[.start:.pos], "\n") .emit(itemText) } .emit(itemEOF)returnnil}// rightTrimLength returns the length of the spaces at the end of the string.func rightTrimLength( string) Pos {returnPos(len() - len(strings.TrimRight(, spaceChars)))}// atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker.func ( *lexer) () (, bool) {ifhasRightTrimMarker(.input[.pos:]) && strings.HasPrefix(.input[.pos+trimMarkerLen:], .rightDelim) { // With trim marker.returntrue, true }ifstrings.HasPrefix(.input[.pos:], .rightDelim) { // Without trim marker.returntrue, false }returnfalse, false}// leftTrimLength returns the length of the spaces at the beginning of the string.func leftTrimLength( string) Pos {returnPos(len() - len(strings.TrimLeft(, spaceChars)))}// lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.func lexLeftDelim( *lexer) stateFn { .pos += Pos(len(.leftDelim)) := hasLeftTrimMarker(.input[.pos:]) := Pos(0)if { = trimMarkerLen }ifstrings.HasPrefix(.input[.pos+:], leftComment) { .pos += .ignore()returnlexComment } .emit(itemLeftDelim) .pos += .ignore() .parenDepth = 0returnlexInsideAction}// lexComment scans a comment. The left comment marker is known to be present.func lexComment( *lexer) stateFn { .pos += Pos(len(leftComment)) := strings.Index(.input[.pos:], rightComment)if < 0 {return .errorf("unclosed comment") } .pos += Pos( + len(rightComment)) , := .atRightDelim()if ! {return .errorf("comment ends before closing delimiter") }if .emitComment { .emit(itemComment) }if { .pos += trimMarkerLen } .pos += Pos(len(.rightDelim))if { .pos += leftTrimLength(.input[.pos:]) } .ignore()returnlexText}// lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker.func lexRightDelim( *lexer) stateFn { := hasRightTrimMarker(.input[.pos:])if { .pos += trimMarkerLen .ignore() } .pos += Pos(len(.rightDelim)) .emit(itemRightDelim)if { .pos += leftTrimLength(.input[.pos:]) .ignore() }returnlexText}// lexInsideAction scans the elements inside action delimiters.func lexInsideAction( *lexer) stateFn {// Either number, quoted string, or identifier. // Spaces separate arguments; runs of spaces turn into itemSpace. // Pipe symbols separate and are emitted. , := .atRightDelim()if {if .parenDepth == 0 {returnlexRightDelim }return .errorf("unclosed left paren") }switch := .next(); {case == eof:return .errorf("unclosed action")caseisSpace(): .backup() // Put space back in case we have " -}}".returnlexSpacecase == '=': .emit(itemAssign)case == ':':if .next() != '=' {return .errorf("expected :=") } .emit(itemDeclare)case == '|': .emit(itemPipe)case == '"':returnlexQuotecase == '`':returnlexRawQuotecase == '$':returnlexVariablecase == '\'':returnlexCharcase == '.':// special look-ahead for ".field" so we don't break l.backup().if .pos < Pos(len(.input)) { := .input[.pos]if < '0' || '9' < {returnlexField } }fallthrough// '.' can start a number.case == '+' || == '-' || ('0' <= && <= '9'): .backup()returnlexNumbercaseisAlphaNumeric(): .backup()returnlexIdentifiercase == '(': .emit(itemLeftParen) .parenDepth++case == ')': .emit(itemRightParen) .parenDepth--if .parenDepth < 0 {return .errorf("unexpected right paren %#U", ) }case <= unicode.MaxASCII && unicode.IsPrint(): .emit(itemChar)default:return .errorf("unrecognized character in action: %#U", ) }return}// lexSpace scans a run of space characters.// We have not consumed the first space, which is known to be present.// Take care if there is a trim-marked right delimiter, which starts with a space.func lexSpace( *lexer) stateFn {varrunevarintfor { = .peek()if !isSpace() {break } .next() ++ }// Be careful about a trim-marked closing delimiter, which has a minus // after a space. We know there is a space, so check for the '-' that might follow.ifhasRightTrimMarker(.input[.pos-1:]) && strings.HasPrefix(.input[.pos-1+trimMarkerLen:], .rightDelim) { .backup() // Before the space.if == 1 {returnlexRightDelim// On the delim, so go right to that. } } .emit(itemSpace)returnlexInsideAction}// lexIdentifier scans an alphanumeric.func lexIdentifier( *lexer) stateFn {:for {switch := .next(); {caseisAlphaNumeric():// absorb.default: .backup() := .input[.start:.pos]if !.atTerminator() {return .errorf("bad character %#U", ) }switch {casekey[] > itemKeyword: := key[]if == itemBreak && !.breakOK || == itemContinue && !.continueOK { .emit(itemIdentifier) } else { .emit() }case [0] == '.': .emit(itemField)case == "true", == "false": .emit(itemBool)default: .emit(itemIdentifier) }break } }returnlexInsideAction}// lexField scans a field: .Alphanumeric.// The . has been scanned.func lexField( *lexer) stateFn {returnlexFieldOrVariable(, itemField)}// lexVariable scans a Variable: $Alphanumeric.// The $ has been scanned.func lexVariable( *lexer) stateFn {if .atTerminator() { // Nothing interesting follows -> "$". .emit(itemVariable)returnlexInsideAction }returnlexFieldOrVariable(, itemVariable)}// lexVariable scans a field or variable: [.$]Alphanumeric.// The . or $ has been scanned.func lexFieldOrVariable( *lexer, itemType) stateFn {if .atTerminator() { // Nothing interesting follows -> "." or "$".if == itemVariable { .emit(itemVariable) } else { .emit(itemDot) }returnlexInsideAction }varrunefor { = .next()if !isAlphaNumeric() { .backup()break } }if !.atTerminator() {return .errorf("bad character %#U", ) } .emit()returnlexInsideAction}// atTerminator reports whether the input is at valid termination character to// appear after an identifier. Breaks .X.Y into two pieces. Also catches cases// like "$x+2" not being acceptable without a space, in case we decide one// day to implement arithmetic.func ( *lexer) () bool { := .peek()ifisSpace() {returntrue }switch {caseeof, '.', ',', '|', ':', ')', '(':returntrue }// Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will // succeed but should fail) but only in extremely rare cases caused by willfully // bad choice of delimiter.if , := utf8.DecodeRuneInString(.rightDelim); == {returntrue }returnfalse}// lexChar scans a character constant. The initial quote is already// scanned. Syntax checking is done by the parser.func lexChar( *lexer) stateFn {:for {switch .next() {case'\\':if := .next(); != eof && != '\n' {break }fallthroughcaseeof, '\n':return .errorf("unterminated character constant")case'\'':break } } .emit(itemCharConstant)returnlexInsideAction}// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This// isn't a perfect number scanner - for instance it accepts "." and "0x0.2"// and "089" - but when it's wrong the input is invalid and the parser (via// strconv) will notice.func lexNumber( *lexer) stateFn {if !.scanNumber() {return .errorf("bad number syntax: %q", .input[.start:.pos]) }if := .peek(); == '+' || == '-' {// Complex: 1+2i. No spaces, must end in 'i'.if !.scanNumber() || .input[.pos-1] != 'i' {return .errorf("bad number syntax: %q", .input[.start:.pos]) } .emit(itemComplex) } else { .emit(itemNumber) }returnlexInsideAction}func ( *lexer) () bool {// Optional leading sign. .accept("+-")// Is it hex? := "0123456789_"if .accept("0") {// Note: Leading 0 does not mean octal in floats.if .accept("xX") { = "0123456789abcdefABCDEF_" } elseif .accept("oO") { = "01234567_" } elseif .accept("bB") { = "01_" } } .acceptRun()if .accept(".") { .acceptRun() }iflen() == 10+1 && .accept("eE") { .accept("+-") .acceptRun("0123456789_") }iflen() == 16+6+1 && .accept("pP") { .accept("+-") .acceptRun("0123456789_") }// Is it imaginary? .accept("i")// Next thing mustn't be alphanumeric.ifisAlphaNumeric(.peek()) { .next()returnfalse }returntrue}// lexQuote scans a quoted string.func lexQuote( *lexer) stateFn {:for {switch .next() {case'\\':if := .next(); != eof && != '\n' {break }fallthroughcaseeof, '\n':return .errorf("unterminated quoted string")case'"':break } } .emit(itemString)returnlexInsideAction}// lexRawQuote scans a raw quoted string.func lexRawQuote( *lexer) stateFn {:for {switch .next() {caseeof:return .errorf("unterminated raw quoted string")case'`':break } } .emit(itemRawString)returnlexInsideAction}// isSpace reports whether r is a space character.func isSpace( rune) bool {return == ' ' || == '\t' || == '\r' || == '\n'}// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.func isAlphaNumeric( rune) bool {return == '_' || unicode.IsLetter() || unicode.IsDigit()}func hasLeftTrimMarker( string) bool {returnlen() >= 2 && [0] == trimMarker && isSpace(rune([1]))}func hasRightTrimMarker( string) bool {returnlen() >= 2 && isSpace(rune([0])) && [1] == trimMarker}
The pages are generated with Goldsv0.4.5. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.