Issue #3 : failed to parse attributes

This commit is contained in:
yuin 2019-05-15 17:43:57 +09:00
parent 2988e183ed
commit 31fd0f6b4c
2 changed files with 26 additions and 20 deletions

View file

@ -119,13 +119,15 @@ func (b *atxHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context)
if i < stop-1 || line[i] == '{' { if i < stop-1 || line[i] == '{' {
as := i + 1 as := i + 1
for as < stop { for as < stop {
ai := util.FindAttributeIndex(line[as:], true) ai, skip := util.FindAttributeIndex(line[as:], true)
if ai[0] < 0 { if ai[0] < 0 {
break break
} }
node.SetAttribute(line[as+ai[0]:as+ai[1]], node.SetAttribute(line[as+ai[0]:as+ai[1]],
line[as+ai[2]:as+ai[3]]) line[as+ai[2]:as+ai[3]])
as += ai[3] as += ai[3] + skip
}
for ; as < stop && util.IsSpace(line[as]); as++ {
} }
if line[as] == '}' && (as > stop-2 || util.IsBlank(line[as:])) { if line[as] == '}' && (as > stop-2 || util.IsBlank(line[as:])) {
parsed = true parsed = true

View file

@ -595,7 +595,7 @@ retry:
return nil return nil
} }
for as < len(b) { for as < len(b) {
ai := FindAttributeIndex(b[as:], canEscapeQuotes) ai, skip := FindAttributeIndex(b[as:], canEscapeQuotes)
if ai[0] < 0 { if ai[0] < 0 {
break break
} }
@ -604,7 +604,7 @@ retry:
result = [][4]int{} result = [][4]int{}
} }
result = append(result, [4]int{as + ai[0], as + ai[1], as + ai[2], as + ai[3]}) result = append(result, [4]int{as + ai[0], as + ai[1], as + ai[2], as + ai[3]})
as += ai[3] as += ai[3] + skip
} }
if b[as] == '}' && (as > len(b)-2 || IsBlank(b[as:])) { if b[as] == '}' && (as > len(b)-2 || IsBlank(b[as:])) {
return result return result
@ -620,15 +620,15 @@ retry:
// FindHTMLAttributeIndex returns an int array that elements are // FindHTMLAttributeIndex returns an int array that elements are
// [name_start, name_stop, value_start, value_stop]. // [name_start, name_stop, value_start, value_stop].
// value_start and value_stop does not include " or '. // value_start and value_stop does not include " or '.
// If no attributes found, it returns [4]int{-1, -1, -1, -1}. // If no attributes found, it returns ([4]int{-1, -1, -1, -1}, 0).
func FindAttributeIndex(b []byte, canEscapeQuotes bool) [4]int { func FindAttributeIndex(b []byte, canEscapeQuotes bool) ([4]int, int) {
result := [4]int{-1, -1, -1, -1} result := [4]int{-1, -1, -1, -1}
i := 0 i := 0
l := len(b) l := len(b)
for ; i < l && IsSpace(b[i]); i++ { for ; i < l && IsSpace(b[i]); i++ {
} }
if i >= l { if i >= l {
return result return result, 0
} }
c := b[i] c := b[i]
if c == '#' || c == '.' { if c == '#' || c == '.' {
@ -639,7 +639,7 @@ func FindAttributeIndex(b []byte, canEscapeQuotes bool) [4]int {
for ; i < l && !IsSpace(b[i]) && (!IsPunct(b[i]) || b[i] == '_' || b[i] == '-'); i++ { for ; i < l && !IsSpace(b[i]) && (!IsPunct(b[i]) || b[i] == '_' || b[i] == '-'); i++ {
} }
result[3] = i result[3] = i
return result return result, 0
} }
return FindHTMLAttributeIndex(b, canEscapeQuotes) return FindHTMLAttributeIndex(b, canEscapeQuotes)
} }
@ -649,19 +649,19 @@ func FindAttributeIndex(b []byte, canEscapeQuotes bool) [4]int {
// [name_start, name_stop, value_start, value_stop]. // [name_start, name_stop, value_start, value_stop].
// value_start and value_stop does not include " or '. // value_start and value_stop does not include " or '.
// If no attributes found, it returns [4]int{-1, -1, -1, -1}. // If no attributes found, it returns [4]int{-1, -1, -1, -1}.
func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) [4]int { func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) ([4]int, int) {
result := [4]int{-1, -1, -1, -1} result := [4]int{-1, -1, -1, -1}
i := 0 i := 0
l := len(b) l := len(b)
for ; i < l && IsSpace(b[i]); i++ { for ; i < l && IsSpace(b[i]); i++ {
} }
if i >= l { if i >= l {
return result return result, 0
} }
c := b[i] c := b[i]
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
c == '_' || c == ':') { c == '_' || c == ':') {
return result return result, 0
} }
result[0] = i result[0] = i
for ; i < l; i++ { for ; i < l; i++ {
@ -676,24 +676,25 @@ func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) [4]int {
for ; i < l && IsSpace(b[i]); i++ { for ; i < l && IsSpace(b[i]); i++ {
} }
if i >= l { if i >= l {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
if b[i] != '=' { if b[i] != '=' {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
i++ i++
for ; i < l && IsSpace(b[i]); i++ { for ; i < l && IsSpace(b[i]); i++ {
} }
if i >= l { if i >= l {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
skip := 0
if b[i] == '"' { if b[i] == '"' {
i++ i++
result[2] = i result[2] = i
if canEscapeQuotes { if canEscapeQuotes {
pos := FindClosure(b[i:], '"', '"', false, false) pos := FindClosure(b[i:], '"', '"', false, false)
if pos < 0 { if pos < 0 {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
result[3] = pos + i result[3] = pos + i
} else { } else {
@ -701,16 +702,17 @@ func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) [4]int {
} }
result[3] = i result[3] = i
if result[2] == result[3] || i == l && b[l-1] != '"' { if result[2] == result[3] || i == l && b[l-1] != '"' {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
} }
skip = 1
} else if b[i] == '\'' { } else if b[i] == '\'' {
i++ i++
result[2] = i result[2] = i
if canEscapeQuotes { if canEscapeQuotes {
pos := FindClosure(b[i:], '\'', '\'', false, false) pos := FindClosure(b[i:], '\'', '\'', false, false)
if pos < 0 { if pos < 0 {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
result[3] = pos + i result[3] = pos + i
} else { } else {
@ -718,25 +720,27 @@ func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) [4]int {
} }
result[3] = i result[3] = i
if result[2] == result[3] || i == l && b[l-1] != '\'' { if result[2] == result[3] || i == l && b[l-1] != '\'' {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
} }
skip = 1
} else { } else {
result[2] = i result[2] = i
for ; i < l; i++ { for ; i < l; i++ {
c = b[i] c = b[i]
if c == '\\' || c == '"' || c == '\'' || if c == '\\' || c == '"' || c == '\'' ||
c == '=' || c == '<' || c == '>' || c == '`' || c == '=' || c == '<' || c == '>' || c == '`' ||
c == '{' || c == '}' ||
(c >= 0 && c <= 0x20) { (c >= 0 && c <= 0x20) {
break break
} }
} }
result[3] = i result[3] = i
if result[2] == result[3] { if result[2] == result[3] {
return [4]int{-1, -1, -1, -1} return [4]int{-1, -1, -1, -1}, 0
} }
} }
return result return result, skip
} }
// FindURLIndex returns a stop index value if the given bytes seem an URL. // FindURLIndex returns a stop index value if the given bytes seem an URL.