From 20a276ea451cae1f07009486b622302a7b832673 Mon Sep 17 00:00:00 2001 From: yuin Date: Sat, 16 Oct 2021 19:39:51 +0900 Subject: [PATCH] #248 - 4 --- _test/extra.txt | 16 ++++++++++++++++ parser/html_block.go | 4 ++-- parser/raw_html.go | 5 +++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/_test/extra.txt b/_test/extra.txt index 9b90cf2..b5246d3 100644 --- a/_test/extra.txt +++ b/_test/extra.txt @@ -510,3 +510,19 @@ _a[b_c_](d) //- - - - - - - - -//

d

//= = = = = = = = = = = = = = = = = = = = = = = =// + +40: Invalid HTML tag names +//- - - - - - - - -// +<1> + + + + + +< p> +//- - - - - - - - -// +

<1>

+

<a:>

+

<a\f>

+

< p>

+//= = = = = = = = = = = = = = = = = = = = = = = =// diff --git a/parser/html_block.go b/parser/html_block.go index db2cf11..531d2fb 100644 --- a/parser/html_block.go +++ b/parser/html_block.go @@ -91,9 +91,9 @@ var htmlBlockType4Close = []byte{'>'} var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`) var htmlBlockType5Close = []byte{']', ']', '>'} -var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}.*|/>.*|)\n?$`) +var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}<(?:/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(?:[ ].*|>.*|/>.*|)\n?$`) -var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?\s*([a-zA-Z0-9\-]+)(` + attributePattern + `*)\s*(:?>|/>)\s*\n?$`) +var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(` + attributePattern + `*)[ ]*(?:>|/>)[ ]*\n?$`) type htmlBlockParser struct { } diff --git a/parser/raw_html.go b/parser/raw_html.go index 7fd696c..43bbab7 100644 --- a/parser/raw_html.go +++ b/parser/raw_html.go @@ -48,8 +48,9 @@ func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) as } var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)` -var attributePattern = `(?:\s+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:\s*=\s*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` -var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*\s*/?>`) + +var attributePattern = `(?:[\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\n \t]*=[\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` +var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*[ \t]*/?>`) var closeTagRegexp = regexp.MustCompile("^`) var commentRegexp = regexp.MustCompile(`^|`) var processingInstructionRegexp = regexp.MustCompile(`^(?:<\?).*?(?:\?>)`)