27757714d0
* Move to goldmark Markdown rendering moved from blackfriday to the goldmark. Multiple subtle changes required to the goldmark extensions to keep current rendering and defaults. Can go further with goldmark linkify and have this work within markdown rendering making the link processor unnecessary. Need to think about how to go about allowing extensions - at present it seems that these would be hard to do without recompilation. * linter fixes Co-authored-by: Lauris BH <lauris@nix.lv>
228 lines
6 KiB
Go
228 lines
6 KiB
Go
package parser
|
|
|
|
import (
|
|
"bytes"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/yuin/goldmark/ast"
|
|
"github.com/yuin/goldmark/text"
|
|
"github.com/yuin/goldmark/util"
|
|
)
|
|
|
|
var allowedBlockTags = map[string]bool{
|
|
"address": true,
|
|
"article": true,
|
|
"aside": true,
|
|
"base": true,
|
|
"basefont": true,
|
|
"blockquote": true,
|
|
"body": true,
|
|
"caption": true,
|
|
"center": true,
|
|
"col": true,
|
|
"colgroup": true,
|
|
"dd": true,
|
|
"details": true,
|
|
"dialog": true,
|
|
"dir": true,
|
|
"div": true,
|
|
"dl": true,
|
|
"dt": true,
|
|
"fieldset": true,
|
|
"figcaption": true,
|
|
"figure": true,
|
|
"footer": true,
|
|
"form": true,
|
|
"frame": true,
|
|
"frameset": true,
|
|
"h1": true,
|
|
"h2": true,
|
|
"h3": true,
|
|
"h4": true,
|
|
"h5": true,
|
|
"h6": true,
|
|
"head": true,
|
|
"header": true,
|
|
"hr": true,
|
|
"html": true,
|
|
"iframe": true,
|
|
"legend": true,
|
|
"li": true,
|
|
"link": true,
|
|
"main": true,
|
|
"menu": true,
|
|
"menuitem": true,
|
|
"meta": true,
|
|
"nav": true,
|
|
"noframes": true,
|
|
"ol": true,
|
|
"optgroup": true,
|
|
"option": true,
|
|
"p": true,
|
|
"param": true,
|
|
"section": true,
|
|
"source": true,
|
|
"summary": true,
|
|
"table": true,
|
|
"tbody": true,
|
|
"td": true,
|
|
"tfoot": true,
|
|
"th": true,
|
|
"thead": true,
|
|
"title": true,
|
|
"tr": true,
|
|
"track": true,
|
|
"ul": true,
|
|
}
|
|
|
|
var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style)(?:\s.*|>.*|/>.*|)\n?$`)
|
|
var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style)>.*`)
|
|
|
|
var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
|
|
var htmlBlockType2Close = []byte{'-', '-', '>'}
|
|
|
|
var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
|
|
var htmlBlockType3Close = []byte{'?', '>'}
|
|
|
|
var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*\n?$`)
|
|
var htmlBlockType4Close = []byte{'>'}
|
|
|
|
var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
|
|
var htmlBlockType5Close = []byte{']', ']', '>'}
|
|
|
|
var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}</?([a-zA-Z0-9]+)(?:\s.*|>.*|/>.*|)\n?$`)
|
|
|
|
var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9]+)(` + attributePattern + `*)(:?>|/>)\s*\n?$`)
|
|
|
|
type htmlBlockParser struct {
|
|
}
|
|
|
|
var defaultHTMLBlockParser = &htmlBlockParser{}
|
|
|
|
// NewHTMLBlockParser return a new BlockParser that can parse html
|
|
// blocks.
|
|
func NewHTMLBlockParser() BlockParser {
|
|
return defaultHTMLBlockParser
|
|
}
|
|
|
|
func (b *htmlBlockParser) Trigger() []byte {
|
|
return []byte{'<'}
|
|
}
|
|
|
|
func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
|
var node *ast.HTMLBlock
|
|
line, segment := reader.PeekLine()
|
|
last := pc.LastOpenedBlock().Node
|
|
if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' {
|
|
return nil, NoChildren
|
|
}
|
|
|
|
if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType1)
|
|
} else if htmlBlockType2OpenRegexp.Match(line) {
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType2)
|
|
} else if htmlBlockType3OpenRegexp.Match(line) {
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType3)
|
|
} else if htmlBlockType4OpenRegexp.Match(line) {
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType4)
|
|
} else if htmlBlockType5OpenRegexp.Match(line) {
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType5)
|
|
} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {
|
|
isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))
|
|
hasAttr := match[6] != match[7]
|
|
tagName := strings.ToLower(string(line[match[4]:match[5]]))
|
|
_, ok := allowedBlockTags[tagName]
|
|
if ok {
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType6)
|
|
} else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType7)
|
|
}
|
|
}
|
|
if node == nil {
|
|
if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {
|
|
tagName := string(line[match[2]:match[3]])
|
|
_, ok := allowedBlockTags[strings.ToLower(tagName)]
|
|
if ok {
|
|
node = ast.NewHTMLBlock(ast.HTMLBlockType6)
|
|
}
|
|
}
|
|
}
|
|
if node != nil {
|
|
reader.Advance(segment.Len() - 1)
|
|
node.Lines().Append(segment)
|
|
return node, NoChildren
|
|
}
|
|
return nil, NoChildren
|
|
}
|
|
|
|
func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
|
|
htmlBlock := node.(*ast.HTMLBlock)
|
|
lines := htmlBlock.Lines()
|
|
line, segment := reader.PeekLine()
|
|
var closurePattern []byte
|
|
|
|
switch htmlBlock.HTMLBlockType {
|
|
case ast.HTMLBlockType1:
|
|
if lines.Len() == 1 {
|
|
firstLine := lines.At(0)
|
|
if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {
|
|
return Close
|
|
}
|
|
}
|
|
if htmlBlockType1CloseRegexp.Match(line) {
|
|
htmlBlock.ClosureLine = segment
|
|
reader.Advance(segment.Len() - 1)
|
|
return Close
|
|
}
|
|
case ast.HTMLBlockType2:
|
|
closurePattern = htmlBlockType2Close
|
|
fallthrough
|
|
case ast.HTMLBlockType3:
|
|
if closurePattern == nil {
|
|
closurePattern = htmlBlockType3Close
|
|
}
|
|
fallthrough
|
|
case ast.HTMLBlockType4:
|
|
if closurePattern == nil {
|
|
closurePattern = htmlBlockType4Close
|
|
}
|
|
fallthrough
|
|
case ast.HTMLBlockType5:
|
|
if closurePattern == nil {
|
|
closurePattern = htmlBlockType5Close
|
|
}
|
|
|
|
if lines.Len() == 1 {
|
|
firstLine := lines.At(0)
|
|
if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {
|
|
return Close
|
|
}
|
|
}
|
|
if bytes.Contains(line, closurePattern) {
|
|
htmlBlock.ClosureLine = segment
|
|
reader.Advance(segment.Len() - 1)
|
|
return Close
|
|
}
|
|
|
|
case ast.HTMLBlockType6, ast.HTMLBlockType7:
|
|
if util.IsBlank(line) {
|
|
return Close
|
|
}
|
|
}
|
|
node.Lines().Append(segment)
|
|
reader.Advance(segment.Len() - 1)
|
|
return Continue | NoChildren
|
|
}
|
|
|
|
func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
|
|
// nothing to do
|
|
}
|
|
|
|
func (b *htmlBlockParser) CanInterruptParagraph() bool {
|
|
return true
|
|
}
|
|
|
|
func (b *htmlBlockParser) CanAcceptIndentedLine() bool {
|
|
return false
|
|
}
|