bench-forgejo/vendor/github.com/yuin/goldmark/parser/html_block.go

package parser

import (
	"bytes"
	"regexp"
	"strings"

	"github.com/yuin/goldmark/ast"
	"github.com/yuin/goldmark/text"
	"github.com/yuin/goldmark/util"
)

var allowedBlockTags = map[string]bool{
	"address":    true,
	"article":    true,
	"aside":      true,
	"base":       true,
	"basefont":   true,
	"blockquote": true,
	"body":       true,
	"caption":    true,
	"center":     true,
	"col":        true,
	"colgroup":   true,
	"dd":         true,
	"details":    true,
	"dialog":     true,
	"dir":        true,
	"div":        true,
	"dl":         true,
	"dt":         true,
	"fieldset":   true,
	"figcaption": true,
	"figure":     true,
	"footer":     true,
	"form":       true,
	"frame":      true,
	"frameset":   true,
	"h1":         true,
	"h2":         true,
	"h3":         true,
	"h4":         true,
	"h5":         true,
	"h6":         true,
	"head":       true,
	"header":     true,
	"hr":         true,
	"html":       true,
	"iframe":     true,
	"legend":     true,
	"li":         true,
	"link":       true,
	"main":       true,
	"menu":       true,
	"menuitem":   true,
	"meta":       true,
	"nav":        true,
	"noframes":   true,
	"ol":         true,
	"optgroup":   true,
	"option":     true,
	"p":          true,
	"param":      true,
	"section":    true,
	"source":     true,
	"summary":    true,
	"table":      true,
	"tbody":      true,
	"td":         true,
	"tfoot":      true,
	"th":         true,
	"thead":      true,
	"title":      true,
	"tr":         true,
	"track":      true,
	"ul":         true,
}

var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style)(?:\s.*|>.*|/>.*|)\n?$`)
var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style)>.*`)

var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
var htmlBlockType2Close = []byte{'-', '-', '>'}

var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
var htmlBlockType3Close = []byte{'?', '>'}

var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*\n?$`)
var htmlBlockType4Close = []byte{'>'}

var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
var htmlBlockType5Close = []byte{']', ']', '>'}

var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}</?([a-zA-Z0-9]+)(?:\s.*|>.*|/>.*|)\n?$`)

var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9]+)(` + attributePattern + `*)(:?>|/>)\s*\n?$`)

type htmlBlockParser struct {
}

var defaultHTMLBlockParser = &htmlBlockParser{}

// NewHTMLBlockParser return a new BlockParser that can parse html
// blocks.
func NewHTMLBlockParser() BlockParser {
	return defaultHTMLBlockParser
}

func (b *htmlBlockParser) Trigger() []byte {
	return []byte{'<'}
}

func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
	var node *ast.HTMLBlock
	line, segment := reader.PeekLine()
	last := pc.LastOpenedBlock().Node
	if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' {
		return nil, NoChildren
	}

	if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {
		node = ast.NewHTMLBlock(ast.HTMLBlockType1)
	} else if htmlBlockType2OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType2)
	} else if htmlBlockType3OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType3)
	} else if htmlBlockType4OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType4)
	} else if htmlBlockType5OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType5)
	} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {
		isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))
		hasAttr := match[6] != match[7]
		tagName := strings.ToLower(string(line[match[4]:match[5]]))
		_, ok := allowedBlockTags[tagName]
		if ok {
			node = ast.NewHTMLBlock(ast.HTMLBlockType6)
		} else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
			node = ast.NewHTMLBlock(ast.HTMLBlockType7)
		}
	}
	if node == nil {
		if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {
			tagName := string(line[match[2]:match[3]])
			_, ok := allowedBlockTags[strings.ToLower(tagName)]
			if ok {
				node = ast.NewHTMLBlock(ast.HTMLBlockType6)
			}
		}
	}
	if node != nil {
		reader.Advance(segment.Len() - 1)
		node.Lines().Append(segment)
		return node, NoChildren
	}
	return nil, NoChildren
}

func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
	htmlBlock := node.(*ast.HTMLBlock)
	lines := htmlBlock.Lines()
	line, segment := reader.PeekLine()
	var closurePattern []byte

	switch htmlBlock.HTMLBlockType {
	case ast.HTMLBlockType1:
		if lines.Len() == 1 {
			firstLine := lines.At(0)
			if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {
				return Close
			}
		}
		if htmlBlockType1CloseRegexp.Match(line) {
			htmlBlock.ClosureLine = segment
			reader.Advance(segment.Len() - 1)
			return Close
		}
	case ast.HTMLBlockType2:
		closurePattern = htmlBlockType2Close
		fallthrough
	case ast.HTMLBlockType3:
		if closurePattern == nil {
			closurePattern = htmlBlockType3Close
		}
		fallthrough
	case ast.HTMLBlockType4:
		if closurePattern == nil {
			closurePattern = htmlBlockType4Close
		}
		fallthrough
	case ast.HTMLBlockType5:
		if closurePattern == nil {
			closurePattern = htmlBlockType5Close
		}

		if lines.Len() == 1 {
			firstLine := lines.At(0)
			if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {
				return Close
			}
		}
		if bytes.Contains(line, closurePattern) {
			htmlBlock.ClosureLine = segment
			reader.Advance(segment.Len() - 1)
			return Close
		}

	case ast.HTMLBlockType6, ast.HTMLBlockType7:
		if util.IsBlank(line) {
			return Close
		}
	}
	node.Lines().Append(segment)
	reader.Advance(segment.Len() - 1)
	return Continue | NoChildren
}

func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
	// nothing to do
}

func (b *htmlBlockParser) CanInterruptParagraph() bool {
	return true
}

func (b *htmlBlockParser) CanAcceptIndentedLine() bool {
	return false
}