// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Godoc comment extraction and comment -> HTML formatting.

package doc

import (
	
	
	
	
	 // for HTMLEscape
	
	
)

const (
	ldquo = "“"
	rdquo = "”"
	ulquo = "“"
	urquo = "”"
)

var (
	htmlQuoteReplacer    = strings.NewReplacer(ulquo, ldquo, urquo, rdquo)
	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
)

// Escape comment text for HTML. If nice is set,
// also turn `` into “ and '' into ”.
func commentEscape( io.Writer,  string,  bool) {
	if  {
		// In the first pass, we convert `` and '' into their unicode equivalents.
		// This prevents them from being escaped in HTMLEscape.
		 = convertQuotes()
		var  bytes.Buffer
		template.HTMLEscape(&, []byte())
		// Now we convert the unicode quotes to their HTML escaped entities to maintain old behavior.
		// We need to use a temp buffer to read the string back and do the conversion,
		// otherwise HTMLEscape will escape & to &
		htmlQuoteReplacer.WriteString(, .String())
		return
	}
	template.HTMLEscape(, []byte())
}

func convertQuotes( string) string {
	return unicodeQuoteReplacer.Replace()
}

const (
	// Regexp for Go identifiers
	identRx = `[\pL_][\pL_0-9]*`

	// Regexp for URLs
	// Match parens, and check later for balance - see #5043, #22285
	// Match .,:;?! within path, but not at end - see #18139, #16565
	// This excludes some rare yet valid urls ending in common punctuation
	// in order to allow sentences ending in URLs.

	// protocol (required) e.g. http
	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
	// host (required) e.g. www.example.com or [::1]:8080
	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`

	urlRx = protoPart + `://` + hostPart + pathPart
)

var matchRx = lazyregexp.New(`(` + urlRx + `)|(` + identRx + `)`)

var (
	html_a      = []byte(`<a href="`)
	html_aq     = []byte(`">`)
	html_enda   = []byte("</a>")
	html_i      = []byte("<i>")
	html_endi   = []byte("</i>")
	html_p      = []byte("<p>\n")
	html_endp   = []byte("</p>\n")
	html_pre    = []byte("<pre>")
	html_endpre = []byte("</pre>\n")
	html_h      = []byte(`<h3 id="`)
	html_hq     = []byte(`">`)
	html_endh   = []byte("</h3>\n")
)

// Emphasize and escape a line of text for HTML. URLs are converted into links;
// if the URL also appears in the words map, the link is taken from the map (if
// the corresponding map value is the empty string, the URL is not converted
// into a link). Go identifiers that appear in the words map are italicized; if
// the corresponding map value is not the empty string, it is considered a URL
// and the word is converted into a link. If nice is set, the remaining text's
// appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
// and '' into &rdquo;).
func emphasize( io.Writer,  string,  map[string]string,  bool) {
	for {
		 := matchRx.FindStringSubmatchIndex()
		if  == nil {
			break
		}
		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)

		// write text before match
		commentEscape(, [0:[0]], )

		// adjust match for URLs
		 := [[0]:[1]]
		if strings.Contains(, "://") {
			,  := [0], [1]
			for ,  := range []string{"()", "{}", "[]"} {
				,  := [:1], [1:] // E.g., "(" and ")"
				// require opening parentheses before closing parentheses (#22285)
				if  := strings.Index(, );  >= 0 &&  < strings.Index(, ) {
					 =  + 
					 = [:]
				}
				// require balanced pairs of parentheses (#5043)
				for  := 0; strings.Count(, ) != strings.Count(, ) &&  < 10; ++ {
					 = strings.LastIndexAny([:], )
					 = [:]
				}
			}
			if  != [1] {
				// redo matching with shortened line for correct indices
				 = matchRx.FindStringSubmatchIndex([:[0]+len()])
			}
		}

		// analyze match
		 := ""
		 := false
		if  != nil {
			,  = []
		}
		if [2] >= 0 {
			// match against first parenthesized sub-regexp; must be match against urlRx
			if ! {
				// no alternative URL in words list, use match instead
				 = 
			}
			 = false // don't italicize URLs
		}

		// write match
		if len() > 0 {
			.Write(html_a)
			template.HTMLEscape(, []byte())
			.Write(html_aq)
		}
		if  {
			.Write(html_i)
		}
		commentEscape(, , )
		if  {
			.Write(html_endi)
		}
		if len() > 0 {
			.Write(html_enda)
		}

		// advance
		 = [[1]:]
	}
	commentEscape(, , )
}

func indentLen( string) int {
	 := 0
	for  < len() && ([] == ' ' || [] == '\t') {
		++
	}
	return 
}

func isBlank( string) bool {
	return len() == 0 || (len() == 1 && [0] == '\n')
}

func commonPrefix(,  string) string {
	 := 0
	for  < len() &&  < len() && [] == [] {
		++
	}
	return [0:]
}

func unindent( []string) {
	if len() == 0 {
		return
	}

	// compute maximum common white prefix
	 := [0][0:indentLen([0])]
	for ,  := range  {
		if !isBlank() {
			 = commonPrefix(, [0:indentLen()])
		}
	}
	 := len()

	// remove
	for ,  := range  {
		if !isBlank() {
			[] = [:]
		}
	}
}

// heading returns the trimmed line if it passes as a section heading;
// otherwise it returns the empty string.
func heading( string) string {
	 = strings.TrimSpace()
	if len() == 0 {
		return ""
	}

	// a heading must start with an uppercase letter
	,  := utf8.DecodeRuneInString()
	if !unicode.IsLetter() || !unicode.IsUpper() {
		return ""
	}

	// it must end in a letter or digit:
	, _ = utf8.DecodeLastRuneInString()
	if !unicode.IsLetter() && !unicode.IsDigit() {
		return ""
	}

	// exclude lines with illegal characters. we allow "(),"
	if strings.ContainsAny(, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
		return ""
	}

	// allow "'" for possessive "'s" only
	for  := ; ; {
		 := strings.IndexRune(, '\'')
		if  < 0 {
			break
		}
		if +1 >= len() || [+1] != 's' || (+2 < len() && [+2] != ' ') {
			return "" // not followed by "s "
		}
		 = [+2:]
	}

	// allow "." when followed by non-space
	for  := ; ; {
		 := strings.IndexRune(, '.')
		if  < 0 {
			break
		}
		if +1 >= len() || [+1] == ' ' {
			return "" // not followed by non-space
		}
		 = [+1:]
	}

	return 
}

type op int

const (
	opPara op = iota
	opHead
	opPre
)

type block struct {
	op    op
	lines []string
}

var nonAlphaNumRx = lazyregexp.New(`[^a-zA-Z0-9]`)

func anchorID( string) string {
	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
	return "hdr-" + nonAlphaNumRx.ReplaceAllString(, "_")
}

// ToHTML converts comment text to formatted HTML.
// The comment was prepared by DocReader,
// so it is known not to have leading, trailing blank lines
// nor to have trailing spaces at the end of lines.
// The comment markers have already been removed.
//
// Each span of unindented non-blank lines is converted into
// a single paragraph. There is one exception to the rule: a span that
// consists of a single line, is followed by another paragraph span,
// begins with a capital letter, and contains no punctuation
// other than parentheses and commas is formatted as a heading.
//
// A span of indented lines is converted into a <pre> block,
// with the common indent prefix removed.
//
// URLs in the comment text are converted into links; if the URL also appears
// in the words map, the link is taken from the map (if the corresponding map
// value is the empty string, the URL is not converted into a link).
//
// A pair of (consecutive) backticks (`) is converted to a unicode left quote (“), and a pair of (consecutive)
// single quotes (') is converted to a unicode right quote (”).
//
// Go identifiers that appear in the words map are italicized; if the corresponding
// map value is not the empty string, it is considered a URL and the word is converted
// into a link.
func ( io.Writer,  string,  map[string]string) {
	for ,  := range blocks() {
		switch .op {
		case opPara:
			.Write(html_p)
			for ,  := range .lines {
				emphasize(, , , true)
			}
			.Write(html_endp)
		case opHead:
			.Write(html_h)
			 := ""
			for ,  := range .lines {
				if  == "" {
					 = anchorID()
					.Write([]byte())
					.Write(html_hq)
				}
				commentEscape(, , true)
			}
			if  == "" {
				.Write(html_hq)
			}
			.Write(html_endh)
		case opPre:
			.Write(html_pre)
			for ,  := range .lines {
				emphasize(, , nil, false)
			}
			.Write(html_endpre)
		}
	}
}

func blocks( string) []block {
	var (
		  []block
		 []string

		   = false
		 = false
	)

	 := func() {
		if  != nil {
			 = append(, block{opPara, })
			 = nil
		}
	}

	 := strings.SplitAfter(, "\n")
	unindent()
	for  := 0;  < len(); {
		 := []
		if isBlank() {
			// close paragraph
			()
			++
			 = true
			continue
		}
		if indentLen() > 0 {
			// close paragraph
			()

			// count indented or blank lines
			 :=  + 1
			for  < len() && (isBlank([]) || indentLen([]) > 0) {
				++
			}
			// but not trailing blank lines
			for  >  && isBlank([-1]) {
				--
			}
			 := [:]
			 = 

			unindent()

			// put those lines in a pre block
			 = append(, block{opPre, })
			 = false
			continue
		}

		if  && ! && +2 < len() &&
			isBlank([+1]) && !isBlank([+2]) && indentLen([+2]) == 0 {
			// current line is non-blank, surrounded by blank lines
			// and the next non-blank line is not indented: this
			// might be a heading.
			if  := heading();  != "" {
				()
				 = append(, block{opHead, []string{}})
				 += 2
				 = true
				continue
			}
		}

		// open paragraph
		 = false
		 = false
		 = append(, [])
		++
	}
	()

	return 
}

// ToText prepares comment text for presentation in textual output.
// It wraps paragraphs of text to width or fewer Unicode code points
// and then prefixes each line with the indent. In preformatted sections
// (such as program text), it prefixes each non-blank line with preIndent.
//
// A pair of (consecutive) backticks (`) is converted to a unicode left quote (“), and a pair of (consecutive)
// single quotes (') is converted to a unicode right quote (”).
func ( io.Writer,  string, ,  string,  int) {
	 := lineWrapper{
		out:    ,
		width:  ,
		indent: ,
	}
	for ,  := range blocks() {
		switch .op {
		case opPara:
			// l.write will add leading newline if required
			for ,  := range .lines {
				 = convertQuotes()
				.write()
			}
			.flush()
		case opHead:
			.Write(nl)
			for ,  := range .lines {
				 = convertQuotes()
				.write( + "\n")
			}
			.flush()
		case opPre:
			.Write(nl)
			for ,  := range .lines {
				if isBlank() {
					.Write([]byte("\n"))
				} else {
					.Write([]byte())
					.Write([]byte())
				}
			}
		}
	}
}

type lineWrapper struct {
	out       io.Writer
	printed   bool
	width     int
	indent    string
	n         int
	pendSpace int
}

var nl = []byte("\n")
var space = []byte(" ")
var prefix = []byte("// ")

func ( *lineWrapper) ( string) {
	if .n == 0 && .printed {
		.out.Write(nl) // blank line before new paragraph
	}
	.printed = true

	 := false
	 := strings.HasPrefix(, "//")
	for ,  := range strings.Fields() {
		 := utf8.RuneCountInString()
		// wrap if line is too long
		if .n > 0 && .n+.pendSpace+ > .width {
			.out.Write(nl)
			.n = 0
			.pendSpace = 0
			 =  && !strings.HasPrefix(, "//")
		}
		if .n == 0 {
			.out.Write([]byte(.indent))
		}
		if  {
			.out.Write(prefix)
			 = false
		}
		.out.Write(space[:.pendSpace])
		.out.Write([]byte())
		.n += .pendSpace + 
		.pendSpace = 1
	}
}

func ( *lineWrapper) () {
	if .n == 0 {
		return
	}
	.out.Write(nl)
	.pendSpace = 0
	.n = 0
}