goldmark/parser/raw_html.go
2019-05-05 15:08:50 +09:00

126 lines
3.3 KiB
Go

package parser
import (
"bytes"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"regexp"
)
type rawHTMLParser struct {
HTMLConfig
}
// NewRawHTMLParser return a new InlineParser that can parse
// inline htmls
func NewRawHTMLParser(opts ...HTMLOption) InlineParser {
p := &rawHTMLParser{}
for _, o := range opts {
o.SetHTMLOption(&p.HTMLConfig)
}
return p
}
func (s *rawHTMLParser) Trigger() []byte {
return []byte{'<'}
}
func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
line, _ := block.PeekLine()
if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
return s.parseMultiLineRegexp(openTagRegexp, block, pc)
}
if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
}
if bytes.HasPrefix(line, []byte("<!--")) {
return s.parseMultiLineRegexp(commentRegexp, block, pc)
}
if bytes.HasPrefix(line, []byte("<?")) {
return s.parseSingleLineRegexp(processingInstructionRegexp, block, pc)
}
if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
return s.parseSingleLineRegexp(declRegexp, block, pc)
}
if bytes.HasPrefix(line, []byte("<![CDATA[")) {
return s.parseMultiLineRegexp(cdataRegexp, block, pc)
}
return nil
}
var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
var attributePattern = `(?:\s+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:\s*=\s*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`
var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*\s*/?>`)
var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + `\s*>`)
var commentRegexp = regexp.MustCompile(`^<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->`)
var processingInstructionRegexp = regexp.MustCompile(`^(?:<\?).*?(?:\?>)`)
var declRegexp = regexp.MustCompile(`^<![A-Z]+\s+[^>]*>`)
var cdataRegexp = regexp.MustCompile(`<!\[CDATA\[[\s\S]*?\]\]>`)
func (s *rawHTMLParser) parseSingleLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
line, segment := block.PeekLine()
match := reg.FindSubmatchIndex(line)
if match == nil {
return nil
}
node := ast.NewRawHTML()
node.Segments.Append(segment.WithStop(segment.Start + match[1]))
block.Advance(match[1])
return node
}
var dummyMatch = [][]byte{}
func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
sline, ssegment := block.Position()
var m [][]byte
if s.FilterTags != nil {
m = block.FindSubMatch(reg)
} else {
if block.Match(reg) {
m = dummyMatch
}
}
if m != nil {
if s.FilterTags != nil {
tagName := string(m[1])
if _, ok := s.FilterTags[tagName]; ok {
return nil
}
}
node := ast.NewRawHTML()
eline, esegment := block.Position()
block.SetPosition(sline, ssegment)
for {
line, segment := block.PeekLine()
if line == nil {
break
}
l, _ := block.Position()
start := segment.Start
if l == sline {
start = ssegment.Start
}
end := segment.Stop
if l == eline {
end = esegment.Start
}
node.Segments.Append(text.NewSegment(start, end))
if l == eline {
block.Advance(end - start)
break
} else {
block.AdvanceLine()
}
}
return node
}
return nil
}
func (s *rawHTMLParser) CloseBlock(parent ast.Node, pc Context) {
// nothing to do
}