Fix bug found in fuzzing

2025-03-04 23:04:52 +00:00 · 2019-07-24 20:16:54 +09:00 · 2019-07-24 20:16:54 +09:00 · 19b18e85fc
commit 19b18e85fc
parent 883918a85c
12 changed files with 181 additions and 67 deletions
--- a/4
+++ b/4
@ -7,10 +7,10 @@ cov: test
 	go tool cover -html=profile.out

 fuzz:
-	which go-fuzz 2>&1 > /dev/null || (GO111MODULE=off go get -u github.com/dvyukov/go-fuzz/go-fuzz github.com/dvyukov/go-fuzz/go-fuzz-build; GO111MODULE=off go get -d github.com/dvyukov/go-fuzz-corpus; true)
-	cd ./fuzz && go-fuzz-build
+	which go-fuzz > /dev/null 2>&1 || (GO111MODULE=off go get -u github.com/dvyukov/go-fuzz/go-fuzz github.com/dvyukov/go-fuzz/go-fuzz-build; GO111MODULE=off go get -d github.com/dvyukov/go-fuzz-corpus; true)
 	rm -rf ./fuzz/corpus
 	rm -rf ./fuzz/crashers
 	rm -rf ./fuzz/suppressions
 	rm -f ./fuzz/fuzz-fuzz.zip
+	cd ./fuzz && go-fuzz-build
 	cd ./fuzz && go-fuzz
--- a/ast/block.go
+++ b/ast/block.go
@ -369,7 +369,10 @@ type ListItem struct {

 // Dump implements Node.Dump.
 func (n *ListItem) Dump(source []byte, level int) {
-	DumpHelper(n, source, level, nil, nil)
+	m := map[string]string{
+		"Offset": fmt.Sprintf("%d", n.Offset),
+	}
+	DumpHelper(n, source, level, m, nil)
 }

 // KindListItem is a NodeKind of the ListItem node.
--- a/extension/definition_list.go
+++ b/extension/definition_list.go
@ -28,7 +28,8 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par
 	}
 	line, _ := reader.PeekLine()
 	pos := pc.BlockOffset()
-	if pos < 0 || line[pos] != ':' {
+	indent := pc.BlockIndent()
+	if pos < 0 || line[pos] != ':' || indent != 0 {
 		return nil, parser.NoChildren
 	}

@ -45,6 +46,7 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par

 	para, lastIsParagraph := last.(*gast.Paragraph)
 	var list *ast.DefinitionList
+	status := parser.HasChildren
 	var ok bool
 	if lastIsParagraph {
 		list, ok = last.PreviousSibling().(*ast.DefinitionList)
@ -53,6 +55,7 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par
 			list.TemporaryParagraph = para
 		} else { // is first item
 			list = ast.NewDefinitionList(w, para)
+			status |= parser.RequireParagraph
 		}
 	} else if list, ok = last.(*ast.DefinitionList); ok { // multiple description
 		list.Offset = w
@ -61,7 +64,7 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par
 		return nil, parser.NoChildren
 	}

-	return list, parser.HasChildren
+	return list, status
 }

 func (b *definitionListParser) Continue(node gast.Node, reader text.Reader, pc parser.Context) parser.State {
@ -105,7 +108,8 @@ func NewDefinitionDescriptionParser() parser.BlockParser {
 func (b *definitionDescriptionParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
 	line, _ := reader.PeekLine()
 	pos := pc.BlockOffset()
-	if pos < 0 || line[pos] != ':' {
+	indent := pc.BlockIndent()
+	if pos < 0 || line[pos] != ':' || indent != 0 {
 		return nil, parser.NoChildren
 	}
 	list, _ := parent.(*ast.DefinitionList)
--- a/extension/footnote.go
+++ b/extension/footnote.go
@ -84,11 +84,6 @@ func (b *footnoteBlockParser) Close(node gast.Node, reader text.Reader, pc parse
 	} else {
 		list = ast.NewFootnoteList()
 		pc.Set(footnoteListKey, list)
-		var root gast.Node
-		for n := node; n != nil; n = n.Parent() {
-			root = n
-		}
-		root.AppendChild(root, list)
 	}
 	node.Parent().RemoveChild(node.Parent(), node)
 	n := node.(*ast.Footnote)
@ -176,7 +171,6 @@ func (a *footnoteASTTransformer) Transform(node *gast.Document, reader text.Read
 	var list *ast.FootnoteList
 	if tlist := pc.Get(footnoteListKey); tlist != nil {
 		list = tlist.(*ast.FootnoteList)
-		list.Parent().RemoveChild(list.Parent(), list)
 	} else {
 		return
 	}
@ -254,7 +248,7 @@ func (r *FootnoteHTMLRenderer) renderFootnoteList(w util.BufWriter, source []byt
 		_, _ = w.WriteString("<ol>\n")
 	} else {
 		_, _ = w.WriteString("</ol>\n")
-		_, _ = w.WriteString("<")
+		_, _ = w.WriteString("</")
 		_, _ = w.WriteString(tag)
 		_, _ = w.WriteString(">\n")
 	}
--- a/go.sum
+++ b/go.sum
--- a/parser/code_block.go
+++ b/parser/code_block.go
@ -21,7 +21,7 @@ func NewCodeBlockParser() BlockParser {
 func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
 	line, segment := reader.PeekLine()
 	pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
-	if pos < 0 {
+	if pos < 0 || util.IsBlank(line) {
 		return nil, NoChildren
 	}
 	node := ast.NewCodeBlock()
--- a/parser/fcode_block.go
+++ b/parser/fcode_block.go
@ -23,6 +23,7 @@ type fenceData struct {
 	char   byte
 	indent int
 	length int
+	node   ast.Node
 }

 var fencedCodeBlockInfoKey = NewContextKey()
@ -48,7 +49,7 @@ func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Con
 		left := util.TrimLeftSpaceLength(rest)
 		right := util.TrimRightSpaceLength(rest)
 		if left < len(rest)-right {
-			infoStart, infoStop := segment.Start+i+left, segment.Stop-right
+			infoStart, infoStop := segment.Start-segment.Padding+i+left, segment.Stop-right
 			value := rest[left : len(rest)-right]
 			if fenceChar == '`' && bytes.IndexByte(value, '`') > -1 {
 				return nil, NoChildren
@ -57,8 +58,8 @@ func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Con
 			}
 		}
 	}
-	pc.Set(fencedCodeBlockInfoKey, &fenceData{fenceChar, findent, oFenceLength})
 	node := ast.NewFencedCodeBlock(info)
+	pc.Set(fencedCodeBlockInfoKey, &fenceData{fenceChar, findent, oFenceLength, node})
 	return node, NoChildren

 }
@ -79,15 +80,18 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
 	}

 	pos, padding := util.DedentPosition(line, fdata.indent)
-	seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding)
+	seg := text.NewSegmentPadding(segment.Start+pos-segment.Padding, segment.Stop, padding)
 	node.Lines().Append(seg)
 	reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding)
 	return Continue | NoChildren
 }

 func (b *fencedCodeBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
+	fdata := pc.Get(fencedCodeBlockInfoKey).(*fenceData)
+	if fdata.node == node {
 		pc.Set(fencedCodeBlockInfoKey, nil)
 	}
+}

 func (b *fencedCodeBlockParser) CanInterruptParagraph() bool {
 	return true
--- a/parser/paragraph.go
+++ b/parser/paragraph.go
@ -40,6 +40,11 @@ func (b *paragraphParser) Continue(node ast.Node, reader text.Reader, pc Context
 }

 func (b *paragraphParser) Close(node ast.Node, reader text.Reader, pc Context) {
+	parent := node.Parent()
+	if parent == nil {
+		// paragraph has been transformed
+		return
+	}
 	lines := node.Lines()
 	if lines.Len() != 0 {
 		// trim trailing spaces
--- a/parser/parser.go
+++ b/parser/parser.go
@ -163,6 +163,15 @@ type Context interface {
 	// This value is valid only for BlockParser.Open.
 	SetBlockOffset(int)

+	// BlockIndent returns an indent width on current line.
+	// This value is valid only for BlockParser.Open.
+	// BlockIndent returns -1 if current line is blank.
+	BlockIndent() int
+
+	// BlockIndent sets an indent width on current line.
+	// This value is valid only for BlockParser.Open.
+	SetBlockIndent(int)
+
 	// FirstDelimiter returns a first delimiter of the current delimiter list.
 	FirstDelimiter() *Delimiter

@ -194,6 +203,7 @@ type parseContext struct {
 	ids           IDs
 	refs          map[string]Reference
 	blockOffset   int
+	blockIndent   int
 	delimiters    *Delimiter
 	lastDelimiter *Delimiter
 	openedBlocks  []Block
@ -205,7 +215,8 @@ func NewContext() Context {
 		store:         make([]interface{}, ContextKeyMax+1),
 		refs:          map[string]Reference{},
 		ids:           newIDs(),
-		blockOffset:   0,
+		blockOffset:   -1,
+		blockIndent:   -1,
 		delimiters:    nil,
 		lastDelimiter: nil,
 		openedBlocks:  []Block{},
@ -232,6 +243,14 @@ func (p *parseContext) SetBlockOffset(v int) {
 	p.blockOffset = v
 }

+func (p *parseContext) BlockIndent() int {
+	return p.blockIndent
+}
+
+func (p *parseContext) SetBlockIndent(v int) {
+	p.blockIndent = v
+}
+
 func (p *parseContext) LastDelimiter() *Delimiter {
 	return p.lastDelimiter
 }
@ -355,6 +374,11 @@ const (

 	// NoChildren indicates parser does not have child blocks.
 	NoChildren
+
+	// RequireParagraph indicates parser requires that the last node
+	// must be a paragraph and is not converted to other nodes by
+	// ParagraphTransformers.
+	RequireParagraph
 )

 // A Config struct is a data structure that holds configuration of the Parser.
@ -781,13 +805,14 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
 	return root
 }

-func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) {
+func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
 	for _, pt := range p.paragraphTransformers {
 		pt.Transform(node, reader, pc)
 		if node.Parent() == nil {
-			break
+			return true
 		}
 	}
+	return false
 }

 func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
@ -836,8 +861,10 @@ retry:
 			w, pos = util.IndentWidth(line, 0)
 			if w >= len(line) {
 				pc.SetBlockOffset(-1)
+				pc.SetBlockIndent(-1)
 			} else {
 				pc.SetBlockOffset(pos)
+				pc.SetBlockIndent(w)
 			}
 			shouldPeek = false
 			if line == nil || line[0] == '\n' {
@ -850,12 +877,41 @@ retry:
 		if w > 3 && !bp.CanAcceptIndentedLine() {
 			continue
 		}
-		last := pc.LastOpenedBlock().Node
+		lastBlock := pc.LastOpenedBlock()
+		last := lastBlock.Node
 		node, state := bp.Open(parent, reader, pc)
 		// if l, _ := reader.Position(); l != currentLineNum {
 		// 	panic("BlockParser.Open must not advance position beyond the current line")
 		// }
 		if node != nil {
+			// Parser requires last node to be a paragraph.
+			// With table extension:
+			//
+			//     0
+			//     -:
+			//     -
+			//
+			// '-' on 3rd line seems a Setext heading because 1st and 2nd lines
+			// are being paragraph when the Settext heading parser tries to parse the 3rd
+			// line.
+			// But 1st line and 2nd line are a table. Thus this paragraph will be transformed
+			// by a paragraph transformer. So this text should be converted to a table and
+			// an empty list.
+			if state&RequireParagraph != 0 {
+				if last == parent.LastChild() {
+					// Opened paragraph may be transformed by ParagraphTransformers in
+					// closeBlocks().
+					lastBlock.Parser.Close(last, reader, pc)
+					blocks := pc.OpenedBlocks()
+					pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
+					if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
+						// Paragraph has been transformed.
+						// So this parser is considered as failing.
+						continuable = false
+						goto retry
+					}
+				}
+			}
 			shouldPeek = true
 			node.SetBlankPreviousLines(blankLine)
 			if last != nil && last.Parent() == nil {
@ -866,7 +922,7 @@ retry:
 			result = newBlocksOpened
 			be := Block{node, bp}
 			pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
-			if state == HasChildren {
+			if state&HasChildren != 0 {
 				parent = node
 				goto retry // try child block
 			}
@ -967,8 +1023,14 @@ func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
 				if i != 0 {
 					thisParent = openedBlocks[i-1].Node
 				}
+				lastNode := openedBlocks[lastIndex].Node
 				result := p.openBlocks(thisParent, isBlank, reader, pc)
 				if result != paragraphContinuation {
+					// lastNode is a paragraph and was transformed by the paragraph
+					// transformers.
+					if openedBlocks[lastIndex].Node != lastNode {
+						lastIndex--
+					}
 					p.closeBlocks(lastIndex, i, reader, pc)
 				}
 				break
--- a/parser/setext_headings.go
+++ b/parser/setext_headings.go
@ -66,7 +66,7 @@ func (b *setextHeadingParser) Open(parent ast.Node, reader text.Reader, pc Conte
 	node := ast.NewHeading(level)
 	node.Lines().Append(segment)
 	pc.Set(temporaryParagraphKey, last)
-	return node, NoChildren
+	return node, NoChildren | RequireParagraph
 }

 func (b *setextHeadingParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
@ -93,7 +93,10 @@ func (b *setextHeadingParser) Close(node ast.Node, reader text.Reader, pc Contex
 	} else {
 		heading.SetLines(tmp.Lines())
 		heading.SetBlankPreviousLines(tmp.HasBlankPreviousLines())
-		tmp.Parent().RemoveChild(tmp.Parent(), tmp)
+		tp := tmp.Parent()
+		if tp != nil {
+			tp.RemoveChild(tp, tmp)
+		}
 	}

 	if b.Attribute {
--- a/text/reader.go
+++ b/text/reader.go
@ -78,6 +78,7 @@ type reader struct {
 	peekedLine   []byte
 	pos          Segment
 	head         int
+	lineOffset   int
 }

 // NewReader return a new Reader that can read UTF-8 bytes .
@ -93,6 +94,7 @@ func NewReader(source []byte) Reader {
 func (r *reader) ResetPosition() {
 	r.line = -1
 	r.head = 0
+	r.lineOffset = -1
 	r.AdvanceLine()
 }

@ -130,11 +132,18 @@ func (r *reader) ReadRune() (rune, int, error) {
 }

 func (r *reader) LineOffset() int {
-	v := r.pos.Start - r.head
-	if r.pos.Padding > 0 {
-		v += util.TabWidth(v) - r.pos.Padding
+	if r.lineOffset < 0 {
+		v := 0
+		for i := r.head; i < r.pos.Start; i++ {
+			if r.source[i] == '\t' {
+				v += util.TabWidth(v)
+			} else {
+				v += 1
 			}
-	return v
+		}
+		r.lineOffset = v - r.pos.Padding
+	}
+	return r.lineOffset
 }

 func (r *reader) PrecendingCharacter() rune {
@ -155,6 +164,7 @@ func (r *reader) PrecendingCharacter() rune {
 }

 func (r *reader) Advance(n int) {
+	r.lineOffset = -1
 	if n < len(r.peekedLine) && r.pos.Padding == 0 {
 		r.pos.Start += n
 		r.peekedLine = nil
@ -183,6 +193,7 @@ func (r *reader) AdvanceAndSetPadding(n, padding int) {
 }

 func (r *reader) AdvanceLine() {
+	r.lineOffset = -1
 	r.peekedLine = nil
 	r.pos.Start = r.pos.Stop
 	r.head = r.pos.Start
@ -206,6 +217,7 @@ func (r *reader) Position() (int, Segment) {
 }

 func (r *reader) SetPosition(line int, pos Segment) {
+	r.lineOffset = -1
 	r.line = line
 	r.pos = pos
 }
@ -245,6 +257,7 @@ type blockReader struct {
 	pos            Segment
 	head           int
 	last           int
+	lineOffset     int
 }

 // NewBlockReader returns a new BlockReader.
@ -262,6 +275,7 @@ func (r *blockReader) ResetPosition() {
 	r.line = -1
 	r.head = 0
 	r.last = 0
+	r.lineOffset = -1
 	r.pos.Start = -1
 	r.pos.Stop = -1
 	r.pos.Padding = 0
@ -320,13 +334,14 @@ func (r *blockReader) PrecendingCharacter() rune {
 	if r.pos.Start <= 0 {
 		return rune('\n')
 	}
+	l := len(r.source)
 	i := r.pos.Start - 1
-	for ; i >= 0; i-- {
+	for ; i < l && i >= 0; i-- {
 		if utf8.RuneStart(r.source[i]) {
 			break
 		}
 	}
-	if i < 0 {
+	if i < 0 || i >= l {
 		return rune('\n')
 	}
 	rn, _ := utf8.DecodeRune(r.source[i:])
@ -334,11 +349,18 @@ func (r *blockReader) PrecendingCharacter() rune {
 }

 func (r *blockReader) LineOffset() int {
-	v := r.pos.Start - r.head
-	if r.pos.Padding > 0 {
-		v += util.TabWidth(v) - r.pos.Padding
+	if r.lineOffset < 0 {
+		v := 0
+		for i := r.head; i < r.pos.Start; i++ {
+			if r.source[i] == '\t' {
+				v += util.TabWidth(v)
+			} else {
+				v += 1
 			}
-	return v
+		}
+		r.lineOffset = v - r.pos.Padding
+	}
+	return r.lineOffset
 }

 func (r *blockReader) Peek() byte {
@ -359,6 +381,8 @@ func (r *blockReader) PeekLine() ([]byte, Segment) {
 }

 func (r *blockReader) Advance(n int) {
+	r.lineOffset = -1
+
 	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
 		r.pos.Start += n
 		return
@ -394,17 +418,25 @@ func (r *blockReader) Position() (int, Segment) {
 }

 func (r *blockReader) SetPosition(line int, pos Segment) {
+	r.lineOffset = -1
 	r.line = line
 	if pos.Start == invalidValue {
 		if r.line < r.segmentsLength {
-			r.pos = r.segments.At(line)
+			s := r.segments.At(line)
+			r.head = s.Start
+			r.pos = s
 		}
 	} else {
 		r.pos = pos
+		if r.line < r.segmentsLength {
+			s := r.segments.At(line)
+			r.head = s.Start
+		}
 	}
 }

 func (r *blockReader) SetPadding(v int) {
+	r.lineOffset = -1
 	r.pos.Padding = v
 }

--- a/util/util.go
+++ b/util/util.go
@ -139,18 +139,23 @@ func TabWidth(currentPos int) int {
 func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
 	w := 0
 	l := len(bs)
-	for i := 0; i < l; i++ {
-		b := bs[i]
-		if b == ' ' {
-			w++
-		} else if b == '\t' {
+	i := 0
+	hasTab := false
+	for ; i < l; i++ {
+		if bs[i] == '\t' {
 			w += TabWidth(currentPos + w)
+			hasTab = true
+		} else if bs[i] == ' ' {
+			w++
 		} else {
 			break
 		}
-		if w >= width {
-			return i + 1, w - width
 	}
+	if w >= width {
+		if !hasTab {
+			return width, 0
+		}
+		return i, w - width
 	}
 	return -1, -1
 }
@ -452,6 +457,7 @@ func ResolveNumericReferences(source []byte) []byte {
 			next := i + 1
 			if next < limit && source[next] == '#' {
 				nnext := next + 1
+				if nnext < limit {
 					nc := source[nnext]
 					// code point like #x22;
 					if nnext < limit && nc == 'x' || nc == 'X' {
@ -479,6 +485,7 @@ func ResolveNumericReferences(source []byte) []byte {
 						}
 					}
 				}
+			}
 			i = next - 1
 		}
 	}