From 19b18e85fce6e0ba4441165b985d07fa105b8957 Mon Sep 17 00:00:00 2001 From: yuin Date: Wed, 24 Jul 2019 20:16:54 +0900 Subject: [PATCH] Fix bug found in fuzzing --- Makefile | 4 +- ast/block.go | 5 ++- extension/definition_list.go | 10 +++-- extension/footnote.go | 8 +--- go.sum | 0 parser/code_block.go | 2 +- parser/fcode_block.go | 12 ++++-- parser/paragraph.go | 5 +++ parser/parser.go | 72 +++++++++++++++++++++++++++++++++--- parser/setext_headings.go | 7 +++- text/reader.go | 54 +++++++++++++++++++++------ util/util.go | 69 ++++++++++++++++++---------------- 12 files changed, 181 insertions(+), 67 deletions(-) create mode 100644 go.sum diff --git a/Makefile b/Makefile index cfeb21b..667a19a 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,10 @@ cov: test go tool cover -html=profile.out fuzz: - which go-fuzz 2>&1 > /dev/null || (GO111MODULE=off go get -u github.com/dvyukov/go-fuzz/go-fuzz github.com/dvyukov/go-fuzz/go-fuzz-build; GO111MODULE=off go get -d github.com/dvyukov/go-fuzz-corpus; true) - cd ./fuzz && go-fuzz-build + which go-fuzz > /dev/null 2>&1 || (GO111MODULE=off go get -u github.com/dvyukov/go-fuzz/go-fuzz github.com/dvyukov/go-fuzz/go-fuzz-build; GO111MODULE=off go get -d github.com/dvyukov/go-fuzz-corpus; true) rm -rf ./fuzz/corpus rm -rf ./fuzz/crashers rm -rf ./fuzz/suppressions rm -f ./fuzz/fuzz-fuzz.zip + cd ./fuzz && go-fuzz-build cd ./fuzz && go-fuzz diff --git a/ast/block.go b/ast/block.go index 103f4f6..4592dbf 100644 --- a/ast/block.go +++ b/ast/block.go @@ -369,7 +369,10 @@ type ListItem struct { // Dump implements Node.Dump. func (n *ListItem) Dump(source []byte, level int) { - DumpHelper(n, source, level, nil, nil) + m := map[string]string{ + "Offset": fmt.Sprintf("%d", n.Offset), + } + DumpHelper(n, source, level, m, nil) } // KindListItem is a NodeKind of the ListItem node. diff --git a/extension/definition_list.go b/extension/definition_list.go index 8622319..5bfadec 100644 --- a/extension/definition_list.go +++ b/extension/definition_list.go @@ -28,7 +28,8 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par } line, _ := reader.PeekLine() pos := pc.BlockOffset() - if pos < 0 || line[pos] != ':' { + indent := pc.BlockIndent() + if pos < 0 || line[pos] != ':' || indent != 0 { return nil, parser.NoChildren } @@ -45,6 +46,7 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par para, lastIsParagraph := last.(*gast.Paragraph) var list *ast.DefinitionList + status := parser.HasChildren var ok bool if lastIsParagraph { list, ok = last.PreviousSibling().(*ast.DefinitionList) @@ -53,6 +55,7 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par list.TemporaryParagraph = para } else { // is first item list = ast.NewDefinitionList(w, para) + status |= parser.RequireParagraph } } else if list, ok = last.(*ast.DefinitionList); ok { // multiple description list.Offset = w @@ -61,7 +64,7 @@ func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc par return nil, parser.NoChildren } - return list, parser.HasChildren + return list, status } func (b *definitionListParser) Continue(node gast.Node, reader text.Reader, pc parser.Context) parser.State { @@ -105,7 +108,8 @@ func NewDefinitionDescriptionParser() parser.BlockParser { func (b *definitionDescriptionParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) { line, _ := reader.PeekLine() pos := pc.BlockOffset() - if pos < 0 || line[pos] != ':' { + indent := pc.BlockIndent() + if pos < 0 || line[pos] != ':' || indent != 0 { return nil, parser.NoChildren } list, _ := parent.(*ast.DefinitionList) diff --git a/extension/footnote.go b/extension/footnote.go index 0e78fe7..efa80ee 100644 --- a/extension/footnote.go +++ b/extension/footnote.go @@ -84,11 +84,6 @@ func (b *footnoteBlockParser) Close(node gast.Node, reader text.Reader, pc parse } else { list = ast.NewFootnoteList() pc.Set(footnoteListKey, list) - var root gast.Node - for n := node; n != nil; n = n.Parent() { - root = n - } - root.AppendChild(root, list) } node.Parent().RemoveChild(node.Parent(), node) n := node.(*ast.Footnote) @@ -176,7 +171,6 @@ func (a *footnoteASTTransformer) Transform(node *gast.Document, reader text.Read var list *ast.FootnoteList if tlist := pc.Get(footnoteListKey); tlist != nil { list = tlist.(*ast.FootnoteList) - list.Parent().RemoveChild(list.Parent(), list) } else { return } @@ -254,7 +248,7 @@ func (r *FootnoteHTMLRenderer) renderFootnoteList(w util.BufWriter, source []byt _, _ = w.WriteString("
    \n") } else { _, _ = w.WriteString("
\n") - _, _ = w.WriteString("<") + _, _ = w.WriteString("\n") } diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/parser/code_block.go b/parser/code_block.go index 6b149a7..6d69710 100644 --- a/parser/code_block.go +++ b/parser/code_block.go @@ -21,7 +21,7 @@ func NewCodeBlockParser() BlockParser { func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { line, segment := reader.PeekLine() pos, padding := util.IndentPosition(line, reader.LineOffset(), 4) - if pos < 0 { + if pos < 0 || util.IsBlank(line) { return nil, NoChildren } node := ast.NewCodeBlock() diff --git a/parser/fcode_block.go b/parser/fcode_block.go index 3710d3f..401a6c2 100644 --- a/parser/fcode_block.go +++ b/parser/fcode_block.go @@ -23,6 +23,7 @@ type fenceData struct { char byte indent int length int + node ast.Node } var fencedCodeBlockInfoKey = NewContextKey() @@ -48,7 +49,7 @@ func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Con left := util.TrimLeftSpaceLength(rest) right := util.TrimRightSpaceLength(rest) if left < len(rest)-right { - infoStart, infoStop := segment.Start+i+left, segment.Stop-right + infoStart, infoStop := segment.Start-segment.Padding+i+left, segment.Stop-right value := rest[left : len(rest)-right] if fenceChar == '`' && bytes.IndexByte(value, '`') > -1 { return nil, NoChildren @@ -57,8 +58,8 @@ func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Con } } } - pc.Set(fencedCodeBlockInfoKey, &fenceData{fenceChar, findent, oFenceLength}) node := ast.NewFencedCodeBlock(info) + pc.Set(fencedCodeBlockInfoKey, &fenceData{fenceChar, findent, oFenceLength, node}) return node, NoChildren } @@ -79,14 +80,17 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C } pos, padding := util.DedentPosition(line, fdata.indent) - seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding) + seg := text.NewSegmentPadding(segment.Start+pos-segment.Padding, segment.Stop, padding) node.Lines().Append(seg) reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding) return Continue | NoChildren } func (b *fencedCodeBlockParser) Close(node ast.Node, reader text.Reader, pc Context) { - pc.Set(fencedCodeBlockInfoKey, nil) + fdata := pc.Get(fencedCodeBlockInfoKey).(*fenceData) + if fdata.node == node { + pc.Set(fencedCodeBlockInfoKey, nil) + } } func (b *fencedCodeBlockParser) CanInterruptParagraph() bool { diff --git a/parser/paragraph.go b/parser/paragraph.go index 72c849a..d089020 100644 --- a/parser/paragraph.go +++ b/parser/paragraph.go @@ -40,6 +40,11 @@ func (b *paragraphParser) Continue(node ast.Node, reader text.Reader, pc Context } func (b *paragraphParser) Close(node ast.Node, reader text.Reader, pc Context) { + parent := node.Parent() + if parent == nil { + // paragraph has been transformed + return + } lines := node.Lines() if lines.Len() != 0 { // trim trailing spaces diff --git a/parser/parser.go b/parser/parser.go index 21bd035..32bff58 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -163,6 +163,15 @@ type Context interface { // This value is valid only for BlockParser.Open. SetBlockOffset(int) + // BlockIndent returns an indent width on current line. + // This value is valid only for BlockParser.Open. + // BlockIndent returns -1 if current line is blank. + BlockIndent() int + + // BlockIndent sets an indent width on current line. + // This value is valid only for BlockParser.Open. + SetBlockIndent(int) + // FirstDelimiter returns a first delimiter of the current delimiter list. FirstDelimiter() *Delimiter @@ -194,6 +203,7 @@ type parseContext struct { ids IDs refs map[string]Reference blockOffset int + blockIndent int delimiters *Delimiter lastDelimiter *Delimiter openedBlocks []Block @@ -205,7 +215,8 @@ func NewContext() Context { store: make([]interface{}, ContextKeyMax+1), refs: map[string]Reference{}, ids: newIDs(), - blockOffset: 0, + blockOffset: -1, + blockIndent: -1, delimiters: nil, lastDelimiter: nil, openedBlocks: []Block{}, @@ -232,6 +243,14 @@ func (p *parseContext) SetBlockOffset(v int) { p.blockOffset = v } +func (p *parseContext) BlockIndent() int { + return p.blockIndent +} + +func (p *parseContext) SetBlockIndent(v int) { + p.blockIndent = v +} + func (p *parseContext) LastDelimiter() *Delimiter { return p.lastDelimiter } @@ -355,6 +374,11 @@ const ( // NoChildren indicates parser does not have child blocks. NoChildren + + // RequireParagraph indicates parser requires that the last node + // must be a paragraph and is not converted to other nodes by + // ParagraphTransformers. + RequireParagraph ) // A Config struct is a data structure that holds configuration of the Parser. @@ -781,13 +805,14 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node { return root } -func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) { +func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool { for _, pt := range p.paragraphTransformers { pt.Transform(node, reader, pc) if node.Parent() == nil { - break + return true } } + return false } func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) { @@ -836,8 +861,10 @@ retry: w, pos = util.IndentWidth(line, 0) if w >= len(line) { pc.SetBlockOffset(-1) + pc.SetBlockIndent(-1) } else { pc.SetBlockOffset(pos) + pc.SetBlockIndent(w) } shouldPeek = false if line == nil || line[0] == '\n' { @@ -850,12 +877,41 @@ retry: if w > 3 && !bp.CanAcceptIndentedLine() { continue } - last := pc.LastOpenedBlock().Node + lastBlock := pc.LastOpenedBlock() + last := lastBlock.Node node, state := bp.Open(parent, reader, pc) // if l, _ := reader.Position(); l != currentLineNum { // panic("BlockParser.Open must not advance position beyond the current line") // } if node != nil { + // Parser requires last node to be a paragraph. + // With table extension: + // + // 0 + // -: + // - + // + // '-' on 3rd line seems a Setext heading because 1st and 2nd lines + // are being paragraph when the Settext heading parser tries to parse the 3rd + // line. + // But 1st line and 2nd line are a table. Thus this paragraph will be transformed + // by a paragraph transformer. So this text should be converted to a table and + // an empty list. + if state&RequireParagraph != 0 { + if last == parent.LastChild() { + // Opened paragraph may be transformed by ParagraphTransformers in + // closeBlocks(). + lastBlock.Parser.Close(last, reader, pc) + blocks := pc.OpenedBlocks() + pc.SetOpenedBlocks(blocks[0 : len(blocks)-1]) + if p.transformParagraph(last.(*ast.Paragraph), reader, pc) { + // Paragraph has been transformed. + // So this parser is considered as failing. + continuable = false + goto retry + } + } + } shouldPeek = true node.SetBlankPreviousLines(blankLine) if last != nil && last.Parent() == nil { @@ -866,7 +922,7 @@ retry: result = newBlocksOpened be := Block{node, bp} pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be)) - if state == HasChildren { + if state&HasChildren != 0 { parent = node goto retry // try child block } @@ -967,8 +1023,14 @@ func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) { if i != 0 { thisParent = openedBlocks[i-1].Node } + lastNode := openedBlocks[lastIndex].Node result := p.openBlocks(thisParent, isBlank, reader, pc) if result != paragraphContinuation { + // lastNode is a paragraph and was transformed by the paragraph + // transformers. + if openedBlocks[lastIndex].Node != lastNode { + lastIndex-- + } p.closeBlocks(lastIndex, i, reader, pc) } break diff --git a/parser/setext_headings.go b/parser/setext_headings.go index 1012c5d..1dd7fb9 100644 --- a/parser/setext_headings.go +++ b/parser/setext_headings.go @@ -66,7 +66,7 @@ func (b *setextHeadingParser) Open(parent ast.Node, reader text.Reader, pc Conte node := ast.NewHeading(level) node.Lines().Append(segment) pc.Set(temporaryParagraphKey, last) - return node, NoChildren + return node, NoChildren | RequireParagraph } func (b *setextHeadingParser) Continue(node ast.Node, reader text.Reader, pc Context) State { @@ -93,7 +93,10 @@ func (b *setextHeadingParser) Close(node ast.Node, reader text.Reader, pc Contex } else { heading.SetLines(tmp.Lines()) heading.SetBlankPreviousLines(tmp.HasBlankPreviousLines()) - tmp.Parent().RemoveChild(tmp.Parent(), tmp) + tp := tmp.Parent() + if tp != nil { + tp.RemoveChild(tp, tmp) + } } if b.Attribute { diff --git a/text/reader.go b/text/reader.go index b2a4af1..0e2c3f0 100644 --- a/text/reader.go +++ b/text/reader.go @@ -78,6 +78,7 @@ type reader struct { peekedLine []byte pos Segment head int + lineOffset int } // NewReader return a new Reader that can read UTF-8 bytes . @@ -93,6 +94,7 @@ func NewReader(source []byte) Reader { func (r *reader) ResetPosition() { r.line = -1 r.head = 0 + r.lineOffset = -1 r.AdvanceLine() } @@ -130,11 +132,18 @@ func (r *reader) ReadRune() (rune, int, error) { } func (r *reader) LineOffset() int { - v := r.pos.Start - r.head - if r.pos.Padding > 0 { - v += util.TabWidth(v) - r.pos.Padding + if r.lineOffset < 0 { + v := 0 + for i := r.head; i < r.pos.Start; i++ { + if r.source[i] == '\t' { + v += util.TabWidth(v) + } else { + v += 1 + } + } + r.lineOffset = v - r.pos.Padding } - return v + return r.lineOffset } func (r *reader) PrecendingCharacter() rune { @@ -155,6 +164,7 @@ func (r *reader) PrecendingCharacter() rune { } func (r *reader) Advance(n int) { + r.lineOffset = -1 if n < len(r.peekedLine) && r.pos.Padding == 0 { r.pos.Start += n r.peekedLine = nil @@ -183,6 +193,7 @@ func (r *reader) AdvanceAndSetPadding(n, padding int) { } func (r *reader) AdvanceLine() { + r.lineOffset = -1 r.peekedLine = nil r.pos.Start = r.pos.Stop r.head = r.pos.Start @@ -206,6 +217,7 @@ func (r *reader) Position() (int, Segment) { } func (r *reader) SetPosition(line int, pos Segment) { + r.lineOffset = -1 r.line = line r.pos = pos } @@ -245,6 +257,7 @@ type blockReader struct { pos Segment head int last int + lineOffset int } // NewBlockReader returns a new BlockReader. @@ -262,6 +275,7 @@ func (r *blockReader) ResetPosition() { r.line = -1 r.head = 0 r.last = 0 + r.lineOffset = -1 r.pos.Start = -1 r.pos.Stop = -1 r.pos.Padding = 0 @@ -320,13 +334,14 @@ func (r *blockReader) PrecendingCharacter() rune { if r.pos.Start <= 0 { return rune('\n') } + l := len(r.source) i := r.pos.Start - 1 - for ; i >= 0; i-- { + for ; i < l && i >= 0; i-- { if utf8.RuneStart(r.source[i]) { break } } - if i < 0 { + if i < 0 || i >= l { return rune('\n') } rn, _ := utf8.DecodeRune(r.source[i:]) @@ -334,11 +349,18 @@ func (r *blockReader) PrecendingCharacter() rune { } func (r *blockReader) LineOffset() int { - v := r.pos.Start - r.head - if r.pos.Padding > 0 { - v += util.TabWidth(v) - r.pos.Padding + if r.lineOffset < 0 { + v := 0 + for i := r.head; i < r.pos.Start; i++ { + if r.source[i] == '\t' { + v += util.TabWidth(v) + } else { + v += 1 + } + } + r.lineOffset = v - r.pos.Padding } - return v + return r.lineOffset } func (r *blockReader) Peek() byte { @@ -359,6 +381,8 @@ func (r *blockReader) PeekLine() ([]byte, Segment) { } func (r *blockReader) Advance(n int) { + r.lineOffset = -1 + if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 { r.pos.Start += n return @@ -394,17 +418,25 @@ func (r *blockReader) Position() (int, Segment) { } func (r *blockReader) SetPosition(line int, pos Segment) { + r.lineOffset = -1 r.line = line if pos.Start == invalidValue { if r.line < r.segmentsLength { - r.pos = r.segments.At(line) + s := r.segments.At(line) + r.head = s.Start + r.pos = s } } else { r.pos = pos + if r.line < r.segmentsLength { + s := r.segments.At(line) + r.head = s.Start + } } } func (r *blockReader) SetPadding(v int) { + r.lineOffset = -1 r.pos.Padding = v } diff --git a/util/util.go b/util/util.go index a303cbb..46603d3 100644 --- a/util/util.go +++ b/util/util.go @@ -139,18 +139,23 @@ func TabWidth(currentPos int) int { func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) { w := 0 l := len(bs) - for i := 0; i < l; i++ { - b := bs[i] - if b == ' ' { - w++ - } else if b == '\t' { + i := 0 + hasTab := false + for ; i < l; i++ { + if bs[i] == '\t' { w += TabWidth(currentPos + w) + hasTab = true + } else if bs[i] == ' ' { + w++ } else { break } - if w >= width { - return i + 1, w - width + } + if w >= width { + if !hasTab { + return width, 0 } + return i, w - width } return -1, -1 } @@ -452,30 +457,32 @@ func ResolveNumericReferences(source []byte) []byte { next := i + 1 if next < limit && source[next] == '#' { nnext := next + 1 - nc := source[nnext] - // code point like #x22; - if nnext < limit && nc == 'x' || nc == 'X' { - start := nnext + 1 - i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal) - if ok && i < limit && source[i] == ';' { - v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32) - cob.Write(source[n:pos]) - n = i + 1 - runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) - cob.Write(buf[:runeSize]) - continue - } - // code point like #1234; - } else if nc >= '0' && nc <= '9' { - start := nnext - i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric) - if ok && i < limit && i-start < 8 && source[i] == ';' { - v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32) - cob.Write(source[n:pos]) - n = i + 1 - runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) - cob.Write(buf[:runeSize]) - continue + if nnext < limit { + nc := source[nnext] + // code point like #x22; + if nnext < limit && nc == 'x' || nc == 'X' { + start := nnext + 1 + i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal) + if ok && i < limit && source[i] == ';' { + v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32) + cob.Write(source[n:pos]) + n = i + 1 + runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) + cob.Write(buf[:runeSize]) + continue + } + // code point like #1234; + } else if nc >= '0' && nc <= '9' { + start := nnext + i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric) + if ok && i < limit && i-start < 8 && source[i] == ';' { + v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32) + cob.Write(source[n:pos]) + n = i + 1 + runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) + cob.Write(buf[:runeSize]) + continue + } } } }