mirror of
https://github.com/yuin/goldmark
synced 2025-03-04 23:04:52 +00:00
Performance improvements, Add BlockParser.Trigger
This commit is contained in:
parent
667a2920f2
commit
187643a437
18 changed files with 386 additions and 51 deletions
40
README.md
40
README.md
|
|
@ -79,6 +79,25 @@ if err := goldmark.Convert(source, &buf); err != nil {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
With options
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
```go
|
||||||
|
var buf bytes.Buffer
|
||||||
|
if err := goldmark.Convert(source, &buf, parser.WithWorkers(16)); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Functional option | Type | Description |
|
||||||
|
| ----------------- | ---- | ----------- |
|
||||||
|
| `parser.WithContext` | A parser.Context | Context for the parsing phase. |
|
||||||
|
| parser.WithWorkers | int | Number of goroutines that execute concurrent inline element parsing. |
|
||||||
|
|
||||||
|
`parser.WithWorkers` may make performance better a little if markdown text
|
||||||
|
is relatively large. Otherwise, `parser.Workers` may cause performance degradation due to
|
||||||
|
goroutine overheads.
|
||||||
|
|
||||||
Custom parser and renderer
|
Custom parser and renderer
|
||||||
--------------------------
|
--------------------------
|
||||||
```go
|
```go
|
||||||
|
|
@ -236,10 +255,16 @@ blackfriday v2 can not simply be compared with other Commonmark compliant librar
|
||||||
Though goldmark builds clean extensible AST structure and get full compliance with
|
Though goldmark builds clean extensible AST structure and get full compliance with
|
||||||
Commonmark, it is resonably fast and less memory consumption.
|
Commonmark, it is resonably fast and less memory consumption.
|
||||||
|
|
||||||
|
This benchmark parses a relatively large markdown text. In such text, concurrent parsing
|
||||||
|
makes performance better a little.
|
||||||
|
|
||||||
```
|
```
|
||||||
BenchmarkGoldMark-4 200 6388385 ns/op 2085552 B/op 13856 allocs/op
|
BenchmarkMarkdown/Blackfriday-v2-4 300 5316935 ns/op 3321072 B/op 20050 allocs/op
|
||||||
BenchmarkGolangCommonMark-4 200 7056577 ns/op 2974119 B/op 18828 allocs/op
|
BenchmarkMarkdown/GoldMark(workers=16)-4 300 5506219 ns/op 2702358 B/op 14494 allocs/op
|
||||||
BenchmarkBlackFriday-4 300 5635122 ns/op 3341668 B/op 20057 allocs/op
|
BenchmarkMarkdown/GoldMark-4 200 5903779 ns/op 2594304 B/op 13861 allocs/op
|
||||||
|
BenchmarkMarkdown/CommonMark-4 200 7147659 ns/op 2752977 B/op 18827 allocs/op
|
||||||
|
BenchmarkMarkdown/Lute-4 200 5930621 ns/op 2839712 B/op 21165 allocs/op
|
||||||
|
BenchmarkMarkdown/GoMarkdown-4 10 120953070 ns/op 2192278 B/op 22174 allocs/op
|
||||||
```
|
```
|
||||||
|
|
||||||
### against cmark(A CommonMark reference implementation written in c)
|
### against cmark(A CommonMark reference implementation written in c)
|
||||||
|
|
@ -248,12 +273,15 @@ BenchmarkBlackFriday-4 300 5635122 ns/op 3341668
|
||||||
----------- cmark -----------
|
----------- cmark -----------
|
||||||
file: _data.md
|
file: _data.md
|
||||||
iteration: 50
|
iteration: 50
|
||||||
average: 0.0050112160 sec
|
average: 0.0047014618 sec
|
||||||
go run ./goldmark_benchmark.go
|
|
||||||
------- goldmark -------
|
------- goldmark -------
|
||||||
file: _data.md
|
file: _data.md
|
||||||
iteration: 50
|
iteration: 50
|
||||||
average: 0.0064833820 sec
|
average: 0.0052624750 sec
|
||||||
|
------- goldmark(workers=16) -------
|
||||||
|
file: _data.md
|
||||||
|
iteration: 50
|
||||||
|
average: 0.0044918780 sec
|
||||||
```
|
```
|
||||||
|
|
||||||
As you can see, goldmark performs pretty much equally to the cmark.
|
As you can see, goldmark performs pretty much equally to the cmark.
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/yuin/goldmark"
|
"github.com/yuin/goldmark"
|
||||||
|
"github.com/yuin/goldmark/parser"
|
||||||
"github.com/yuin/goldmark/renderer/html"
|
"github.com/yuin/goldmark/renderer/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -42,4 +43,18 @@ func main() {
|
||||||
fmt.Printf("file: %s\n", file)
|
fmt.Printf("file: %s\n", file)
|
||||||
fmt.Printf("iteration: %d\n", n)
|
fmt.Printf("iteration: %d\n", n)
|
||||||
fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0)
|
fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0)
|
||||||
|
|
||||||
|
sum = time.Duration(0)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
start := time.Now()
|
||||||
|
out.Reset()
|
||||||
|
if err := markdown.Convert(source, &out, parser.WithWorkers(16)); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
sum += time.Since(start)
|
||||||
|
}
|
||||||
|
fmt.Printf("------- goldmark(workers=16) -------\n")
|
||||||
|
fmt.Printf("file: %s\n", file)
|
||||||
|
fmt.Printf("iteration: %d\n", n)
|
||||||
|
fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,11 +7,15 @@ import (
|
||||||
|
|
||||||
gomarkdown "github.com/gomarkdown/markdown"
|
gomarkdown "github.com/gomarkdown/markdown"
|
||||||
"github.com/yuin/goldmark"
|
"github.com/yuin/goldmark"
|
||||||
|
"github.com/yuin/goldmark/parser"
|
||||||
"github.com/yuin/goldmark/renderer/html"
|
"github.com/yuin/goldmark/renderer/html"
|
||||||
|
"github.com/yuin/goldmark/util"
|
||||||
"gitlab.com/golang-commonmark/markdown"
|
"gitlab.com/golang-commonmark/markdown"
|
||||||
|
|
||||||
bf1 "github.com/russross/blackfriday"
|
bf1 "github.com/russross/blackfriday"
|
||||||
bf2 "gopkg.in/russross/blackfriday.v2"
|
bf2 "gopkg.in/russross/blackfriday.v2"
|
||||||
|
|
||||||
|
"github.com/b3log/lute"
|
||||||
)
|
)
|
||||||
|
|
||||||
func BenchmarkMarkdown(b *testing.B) {
|
func BenchmarkMarkdown(b *testing.B) {
|
||||||
|
|
@ -31,13 +35,25 @@ func BenchmarkMarkdown(b *testing.B) {
|
||||||
doBenchmark(b, r)
|
doBenchmark(b, r)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
b.Run("GoldMark(workers=16)", func(b *testing.B) {
|
||||||
|
markdown := goldmark.New(
|
||||||
|
goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()),
|
||||||
|
)
|
||||||
|
r := func(src []byte) ([]byte, error) {
|
||||||
|
var out bytes.Buffer
|
||||||
|
err := markdown.Convert(src, &out, parser.WithWorkers(16))
|
||||||
|
return out.Bytes(), err
|
||||||
|
}
|
||||||
|
doBenchmark(b, r)
|
||||||
|
})
|
||||||
|
|
||||||
b.Run("GoldMark", func(b *testing.B) {
|
b.Run("GoldMark", func(b *testing.B) {
|
||||||
markdown := goldmark.New(
|
markdown := goldmark.New(
|
||||||
goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()),
|
goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()),
|
||||||
)
|
)
|
||||||
r := func(src []byte) ([]byte, error) {
|
r := func(src []byte) ([]byte, error) {
|
||||||
var out bytes.Buffer
|
var out bytes.Buffer
|
||||||
err := markdown.Convert(src, &out)
|
err := markdown.Convert(src, &out, parser.WithWorkers(0))
|
||||||
return out.Bytes(), err
|
return out.Bytes(), err
|
||||||
}
|
}
|
||||||
doBenchmark(b, r)
|
doBenchmark(b, r)
|
||||||
|
|
@ -53,6 +69,20 @@ func BenchmarkMarkdown(b *testing.B) {
|
||||||
doBenchmark(b, r)
|
doBenchmark(b, r)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
b.Run("Lute", func(b *testing.B) {
|
||||||
|
luteEngine := lute.New(
|
||||||
|
lute.GFM(false),
|
||||||
|
lute.CodeSyntaxHighlight(false),
|
||||||
|
lute.SoftBreak2HardBreak(false),
|
||||||
|
lute.AutoSpace(false),
|
||||||
|
lute.FixTermTypo(false))
|
||||||
|
r := func(src []byte) ([]byte, error) {
|
||||||
|
out, err := luteEngine.FormatStr("Benchmark", util.BytesToReadOnlyString(src))
|
||||||
|
return util.StringToReadOnlyBytes(out), err
|
||||||
|
}
|
||||||
|
doBenchmark(b, r)
|
||||||
|
})
|
||||||
|
|
||||||
b.Run("GoMarkdown", func(b *testing.B) {
|
b.Run("GoMarkdown", func(b *testing.B) {
|
||||||
r := func(src []byte) ([]byte, error) {
|
r := func(src []byte) ([]byte, error) {
|
||||||
out := gomarkdown.ToHTML(src, nil, nil)
|
out := gomarkdown.ToHTML(src, nil, nil)
|
||||||
|
|
@ -60,6 +90,7 @@ func BenchmarkMarkdown(b *testing.B) {
|
||||||
}
|
}
|
||||||
doBenchmark(b, r)
|
doBenchmark(b, r)
|
||||||
})
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The different frameworks have different APIs. Create an adapter that
|
// The different frameworks have different APIs. Create an adapter that
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,10 @@ func NewDefinitionListParser() parser.BlockParser {
|
||||||
return defaultDefinitionListParser
|
return defaultDefinitionListParser
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *definitionListParser) Trigger() []byte {
|
||||||
|
return []byte{':'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
|
func (b *definitionListParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
|
||||||
if _, ok := parent.(*ast.DefinitionList); ok {
|
if _, ok := parent.(*ast.DefinitionList); ok {
|
||||||
return nil, parser.NoChildren
|
return nil, parser.NoChildren
|
||||||
|
|
@ -105,6 +109,10 @@ func NewDefinitionDescriptionParser() parser.BlockParser {
|
||||||
return defaultDefinitionDescriptionParser
|
return defaultDefinitionDescriptionParser
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *definitionDescriptionParser) Trigger() []byte {
|
||||||
|
return []byte{':'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *definitionDescriptionParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
|
func (b *definitionDescriptionParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
|
||||||
line, _ := reader.PeekLine()
|
line, _ := reader.PeekLine()
|
||||||
pos := pc.BlockOffset()
|
pos := pc.BlockOffset()
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,10 @@ func NewFootnoteBlockParser() parser.BlockParser {
|
||||||
return defaultFootnoteBlockParser
|
return defaultFootnoteBlockParser
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *footnoteBlockParser) Trigger() []byte {
|
||||||
|
return []byte{'['}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *footnoteBlockParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
|
func (b *footnoteBlockParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
|
||||||
line, segment := reader.PeekLine()
|
line, segment := reader.PeekLine()
|
||||||
pos := pc.BlockOffset()
|
pos := pc.BlockOffset()
|
||||||
|
|
@ -136,7 +140,7 @@ func (s *footnoteParser) Parse(parent gast.Node, block text.Reader, pc parser.Co
|
||||||
block.Advance(closes + 1)
|
block.Advance(closes + 1)
|
||||||
|
|
||||||
var list *ast.FootnoteList
|
var list *ast.FootnoteList
|
||||||
if tlist := pc.Get(footnoteListKey); tlist != nil {
|
if tlist := pc.Root().Get(footnoteListKey); tlist != nil {
|
||||||
list = tlist.(*ast.FootnoteList)
|
list = tlist.(*ast.FootnoteList)
|
||||||
}
|
}
|
||||||
if list == nil {
|
if list == nil {
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,10 @@ func NewATXHeadingParser(opts ...HeadingOption) BlockParser {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *atxHeadingParser) Trigger() []byte {
|
||||||
|
return []byte{'#'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *atxHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *atxHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
line, segment := reader.PeekLine()
|
line, segment := reader.PeekLine()
|
||||||
pos := pc.BlockOffset()
|
pos := pc.BlockOffset()
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,10 @@ func (b *blockquoteParser) process(reader text.Reader) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *blockquoteParser) Trigger() []byte {
|
||||||
|
return []byte{'>'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *blockquoteParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *blockquoteParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
if b.process(reader) {
|
if b.process(reader) {
|
||||||
return ast.NewBlockquote(), HasChildren
|
return ast.NewBlockquote(), HasChildren
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,10 @@ func NewCodeBlockParser() BlockParser {
|
||||||
return defaultCodeBlockParser
|
return defaultCodeBlockParser
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *codeBlockParser) Trigger() []byte {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
line, segment := reader.PeekLine()
|
line, segment := reader.PeekLine()
|
||||||
pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
|
pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,10 @@ type fenceData struct {
|
||||||
|
|
||||||
var fencedCodeBlockInfoKey = NewContextKey()
|
var fencedCodeBlockInfoKey = NewContextKey()
|
||||||
|
|
||||||
|
func (b *fencedCodeBlockParser) Trigger() []byte {
|
||||||
|
return []byte{'~', '`'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
line, segment := reader.PeekLine()
|
line, segment := reader.PeekLine()
|
||||||
pos := pc.BlockOffset()
|
pos := pc.BlockOffset()
|
||||||
|
|
|
||||||
|
|
@ -105,6 +105,10 @@ func NewHTMLBlockParser() BlockParser {
|
||||||
return defaultHtmlBlockParser
|
return defaultHtmlBlockParser
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *htmlBlockParser) Trigger() []byte {
|
||||||
|
return []byte{'<'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
var node *ast.HTMLBlock
|
var node *ast.HTMLBlock
|
||||||
line, segment := reader.PeekLine()
|
line, segment := reader.PeekLine()
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,7 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
|
||||||
block.SetPosition(l, pos)
|
block.SetPosition(l, pos)
|
||||||
ssegment := text.NewSegment(last.Segment.Stop, segment.Start)
|
ssegment := text.NewSegment(last.Segment.Stop, segment.Start)
|
||||||
maybeReference := block.Value(ssegment)
|
maybeReference := block.Value(ssegment)
|
||||||
ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
|
ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference))
|
||||||
if !ok {
|
if !ok {
|
||||||
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
|
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
|
||||||
return nil
|
return nil
|
||||||
|
|
@ -243,7 +243,7 @@ func (s *linkParser) parseReferenceLink(parent ast.Node, last *linkLabelState, b
|
||||||
maybeReference = block.Value(ssegment)
|
maybeReference = block.Value(ssegment)
|
||||||
}
|
}
|
||||||
|
|
||||||
ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
|
ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference))
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, true
|
return nil, true
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,10 @@ func NewListParser() BlockParser {
|
||||||
return defaultListParser
|
return defaultListParser
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *listParser) Trigger() []byte {
|
||||||
|
return []byte{'-', '+', '*', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *listParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *listParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
last := pc.LastOpenedBlock().Node
|
last := pc.LastOpenedBlock().Node
|
||||||
if _, lok := last.(*ast.List); lok || pc.Get(skipListParser) != nil {
|
if _, lok := last.(*ast.List); lok || pc.Get(skipListParser) != nil {
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,10 @@ func NewListItemParser() BlockParser {
|
||||||
var skipListParser = NewContextKey()
|
var skipListParser = NewContextKey()
|
||||||
var skipListParserValue interface{} = true
|
var skipListParserValue interface{} = true
|
||||||
|
|
||||||
|
func (b *listItemParser) Trigger() []byte {
|
||||||
|
return []byte{'-', '+', '*', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *listItemParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *listItemParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
list, lok := parent.(*ast.List)
|
list, lok := parent.(*ast.List)
|
||||||
if !lok { // list item must be a child of a list
|
if !lok { // list item must be a child of a list
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,10 @@ func NewParagraphParser() BlockParser {
|
||||||
return defaultParagraphParser
|
return defaultParagraphParser
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *paragraphParser) Trigger() []byte {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (b *paragraphParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *paragraphParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
_, segment := reader.PeekLine()
|
_, segment := reader.PeekLine()
|
||||||
segment = segment.TrimLeftSpace(reader.Source())
|
segment = segment.TrimLeftSpace(reader.Source())
|
||||||
|
|
|
||||||
270
parser/parser.go
270
parser/parser.go
|
|
@ -196,6 +196,9 @@ type Context interface {
|
||||||
|
|
||||||
// LastOpenedBlock returns a last node that is currently in parsing.
|
// LastOpenedBlock returns a last node that is currently in parsing.
|
||||||
LastOpenedBlock() Block
|
LastOpenedBlock() Block
|
||||||
|
|
||||||
|
// Root returns a context shared accross goroutines.
|
||||||
|
Root() Context
|
||||||
}
|
}
|
||||||
|
|
||||||
type parseContext struct {
|
type parseContext struct {
|
||||||
|
|
@ -207,6 +210,7 @@ type parseContext struct {
|
||||||
delimiters *Delimiter
|
delimiters *Delimiter
|
||||||
lastDelimiter *Delimiter
|
lastDelimiter *Delimiter
|
||||||
openedBlocks []Block
|
openedBlocks []Block
|
||||||
|
root Context
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewContext returns a new Context.
|
// NewContext returns a new Context.
|
||||||
|
|
@ -220,6 +224,7 @@ func NewContext() Context {
|
||||||
delimiters: nil,
|
delimiters: nil,
|
||||||
lastDelimiter: nil,
|
lastDelimiter: nil,
|
||||||
openedBlocks: []Block{},
|
openedBlocks: []Block{},
|
||||||
|
root: nil,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -356,6 +361,140 @@ func (p *parseContext) LastOpenedBlock() Block {
|
||||||
return Block{}
|
return Block{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *parseContext) Root() Context {
|
||||||
|
if p.root == nil {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
return p.root
|
||||||
|
}
|
||||||
|
|
||||||
|
type concurrentParseContext struct {
|
||||||
|
delegate Context
|
||||||
|
m sync.RWMutex
|
||||||
|
root Context
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewConcurrentContext(delegate Context) Context {
|
||||||
|
return &concurrentParseContext{
|
||||||
|
delegate: delegate,
|
||||||
|
root: nil,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) Get(key ContextKey) interface{} {
|
||||||
|
p.m.RLock()
|
||||||
|
defer p.m.RUnlock()
|
||||||
|
ret := p.delegate.Get(key)
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) Set(key ContextKey, value interface{}) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.Set(key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) IDs() IDs {
|
||||||
|
return p.delegate.IDs()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) BlockOffset() int {
|
||||||
|
return p.delegate.BlockOffset()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) SetBlockOffset(v int) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.SetBlockOffset(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) BlockIndent() int {
|
||||||
|
return p.delegate.BlockIndent()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) SetBlockIndent(v int) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.SetBlockIndent(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) LastDelimiter() *Delimiter {
|
||||||
|
return p.delegate.LastDelimiter()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) FirstDelimiter() *Delimiter {
|
||||||
|
return p.delegate.FirstDelimiter()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) PushDelimiter(d *Delimiter) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.PushDelimiter(d)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) RemoveDelimiter(d *Delimiter) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.RemoveDelimiter(d)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) ClearDelimiters(bottom ast.Node) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.ClearDelimiters(bottom)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) AddReference(ref Reference) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.AddReference(ref)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) Reference(label string) (Reference, bool) {
|
||||||
|
p.m.RLock()
|
||||||
|
defer p.m.RUnlock()
|
||||||
|
v, ok := p.delegate.Reference(label)
|
||||||
|
return v, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) References() []Reference {
|
||||||
|
p.m.RLock()
|
||||||
|
defer p.m.RUnlock()
|
||||||
|
ret := p.delegate.References()
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) String() string {
|
||||||
|
p.m.RLock()
|
||||||
|
defer p.m.RUnlock()
|
||||||
|
ret := p.delegate.String()
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) OpenedBlocks() []Block {
|
||||||
|
return p.delegate.OpenedBlocks()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) SetOpenedBlocks(v []Block) {
|
||||||
|
p.m.Lock()
|
||||||
|
defer p.m.Unlock()
|
||||||
|
p.delegate.SetOpenedBlocks(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) LastOpenedBlock() Block {
|
||||||
|
p.m.RLock()
|
||||||
|
defer p.m.RUnlock()
|
||||||
|
ret := p.delegate.LastOpenedBlock()
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *concurrentParseContext) Root() Context {
|
||||||
|
if p.root == nil {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
return p.root
|
||||||
|
}
|
||||||
|
|
||||||
// State represents parser's state.
|
// State represents parser's state.
|
||||||
// State is designed to use as a bit flag.
|
// State is designed to use as a bit flag.
|
||||||
type State int
|
type State int
|
||||||
|
|
@ -444,6 +583,11 @@ type SetOptioner interface {
|
||||||
// A BlockParser interface parses a block level element like Paragraph, List,
|
// A BlockParser interface parses a block level element like Paragraph, List,
|
||||||
// Blockquote etc.
|
// Blockquote etc.
|
||||||
type BlockParser interface {
|
type BlockParser interface {
|
||||||
|
// Trigger returns a list of characters that triggers Parse method of
|
||||||
|
// this parser.
|
||||||
|
// If Trigger returns a nil, Open will be called with any lines.
|
||||||
|
Trigger() []byte
|
||||||
|
|
||||||
// Open parses the current line and returns a result of parsing.
|
// Open parses the current line and returns a result of parsing.
|
||||||
//
|
//
|
||||||
// Open must not parse beyond the current line.
|
// Open must not parse beyond the current line.
|
||||||
|
|
@ -582,7 +726,8 @@ type Block struct {
|
||||||
|
|
||||||
type parser struct {
|
type parser struct {
|
||||||
options map[OptionName]interface{}
|
options map[OptionName]interface{}
|
||||||
blockParsers []BlockParser
|
blockParsers [256][]BlockParser
|
||||||
|
freeBlockParsers []BlockParser
|
||||||
inlineParsers [256][]InlineParser
|
inlineParsers [256][]InlineParser
|
||||||
closeBlockers []CloseBlocker
|
closeBlockers []CloseBlocker
|
||||||
paragraphTransformers []ParagraphTransformer
|
paragraphTransformers []ParagraphTransformer
|
||||||
|
|
@ -688,13 +833,23 @@ func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]
|
||||||
if !ok {
|
if !ok {
|
||||||
panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
|
panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
|
||||||
}
|
}
|
||||||
|
tcs := bp.Trigger()
|
||||||
so, ok := v.Value.(SetOptioner)
|
so, ok := v.Value.(SetOptioner)
|
||||||
if ok {
|
if ok {
|
||||||
for oname, ovalue := range options {
|
for oname, ovalue := range options {
|
||||||
so.SetOption(oname, ovalue)
|
so.SetOption(oname, ovalue)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p.blockParsers = append(p.blockParsers, bp)
|
if tcs == nil {
|
||||||
|
p.freeBlockParsers = append(p.freeBlockParsers, bp)
|
||||||
|
} else {
|
||||||
|
for _, tc := range tcs {
|
||||||
|
if p.blockParsers[tc] == nil {
|
||||||
|
p.blockParsers[tc] = []BlockParser{}
|
||||||
|
}
|
||||||
|
p.blockParsers[tc] = append(p.blockParsers[tc], bp)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
||||||
|
|
@ -751,6 +906,7 @@ func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionNa
|
||||||
// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
|
// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
|
||||||
type ParseConfig struct {
|
type ParseConfig struct {
|
||||||
Context Context
|
Context Context
|
||||||
|
Workers int
|
||||||
}
|
}
|
||||||
|
|
||||||
// A ParseOption is a functional option type for the Parser.Parse.
|
// A ParseOption is a functional option type for the Parser.Parse.
|
||||||
|
|
@ -764,12 +920,27 @@ func WithContext(context Context) ParseOption {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithWorkers is a functional option that allow you to set
|
||||||
|
// number of inline parsing workers(goroutines).
|
||||||
|
// If num is 0, inline parsing will never be multithreaded.
|
||||||
|
func WithWorkers(num int) ParseOption {
|
||||||
|
return func(c *ParseConfig) {
|
||||||
|
c.Workers = num
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
|
func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
|
||||||
p.initSync.Do(func() {
|
p.initSync.Do(func() {
|
||||||
p.config.BlockParsers.Sort()
|
p.config.BlockParsers.Sort()
|
||||||
for _, v := range p.config.BlockParsers {
|
for _, v := range p.config.BlockParsers {
|
||||||
p.addBlockParser(v, p.config.Options)
|
p.addBlockParser(v, p.config.Options)
|
||||||
}
|
}
|
||||||
|
for i := range p.blockParsers {
|
||||||
|
if p.blockParsers[i] != nil {
|
||||||
|
p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
p.config.InlineParsers.Sort()
|
p.config.InlineParsers.Sort()
|
||||||
for _, v := range p.config.InlineParsers {
|
for _, v := range p.config.InlineParsers {
|
||||||
p.addInlineParser(v, p.config.Options)
|
p.addInlineParser(v, p.config.Options)
|
||||||
|
|
@ -794,10 +965,46 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
|
||||||
pc := c.Context
|
pc := c.Context
|
||||||
root := ast.NewDocument()
|
root := ast.NewDocument()
|
||||||
p.parseBlocks(root, reader, pc)
|
p.parseBlocks(root, reader, pc)
|
||||||
blockReader := text.NewBlockReader(reader.Source(), nil)
|
|
||||||
p.walkBlock(root, func(node ast.Node) {
|
if c.Workers < 2 {
|
||||||
p.parseBlock(blockReader, node, pc)
|
blockReader := text.NewBlockReader(reader.Source(), nil)
|
||||||
})
|
p.walkBlock(root, func(node ast.Node) {
|
||||||
|
p.parseBlock(blockReader, node, pc)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
nodes := make([]ast.Node, 0, 100)
|
||||||
|
p.walkBlock(root, func(node ast.Node) {
|
||||||
|
nodes = append(nodes, node)
|
||||||
|
})
|
||||||
|
max := (len(nodes) / c.Workers) - 1
|
||||||
|
if max < 0 {
|
||||||
|
blockReader := text.NewBlockReader(reader.Source(), nil)
|
||||||
|
p.walkBlock(root, func(node ast.Node) {
|
||||||
|
p.parseBlock(blockReader, node, pc)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
rootContext := NewConcurrentContext(pc)
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i <= max; i++ {
|
||||||
|
from := i * c.Workers
|
||||||
|
to := from + c.Workers
|
||||||
|
if i == max {
|
||||||
|
to = len(nodes)
|
||||||
|
}
|
||||||
|
wg.Add(1)
|
||||||
|
go func(wg *sync.WaitGroup) {
|
||||||
|
blockReader := text.NewBlockReader(reader.Source(), nil)
|
||||||
|
pc := NewContext()
|
||||||
|
pc.(*parseContext).root = rootContext
|
||||||
|
for _, n := range nodes[from:to] {
|
||||||
|
p.parseBlock(blockReader, n, pc)
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}(&wg)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
}
|
||||||
for _, at := range p.astTransformers {
|
for _, at := range p.astTransformers {
|
||||||
at.Transform(root, reader, pc)
|
at.Transform(root, reader, pc)
|
||||||
}
|
}
|
||||||
|
|
@ -849,28 +1056,31 @@ func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader,
|
||||||
continuable = ast.IsParagraph(lastBlock.Node)
|
continuable = ast.IsParagraph(lastBlock.Node)
|
||||||
}
|
}
|
||||||
retry:
|
retry:
|
||||||
shouldPeek := true
|
var bps []BlockParser
|
||||||
//var currentLineNum int
|
line, _ := reader.PeekLine()
|
||||||
var w int
|
w, pos := util.IndentWidth(line, 0)
|
||||||
var pos int
|
if w >= len(line) {
|
||||||
var line []byte
|
pc.SetBlockOffset(-1)
|
||||||
for _, bp := range p.blockParsers {
|
pc.SetBlockIndent(-1)
|
||||||
if shouldPeek {
|
} else {
|
||||||
//currentLineNum, _ = reader.Position()
|
pc.SetBlockOffset(pos)
|
||||||
line, _ = reader.PeekLine()
|
pc.SetBlockIndent(w)
|
||||||
w, pos = util.IndentWidth(line, 0)
|
}
|
||||||
if w >= len(line) {
|
if line == nil || line[0] == '\n' {
|
||||||
pc.SetBlockOffset(-1)
|
goto continuable
|
||||||
pc.SetBlockIndent(-1)
|
}
|
||||||
} else {
|
bps = p.freeBlockParsers
|
||||||
pc.SetBlockOffset(pos)
|
if pos < len(line) {
|
||||||
pc.SetBlockIndent(w)
|
bps = p.blockParsers[line[pos]]
|
||||||
}
|
if bps == nil {
|
||||||
shouldPeek = false
|
bps = p.freeBlockParsers
|
||||||
if line == nil || line[0] == '\n' {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if bps == nil {
|
||||||
|
goto continuable
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, bp := range bps {
|
||||||
if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
|
if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
@ -880,9 +1090,6 @@ retry:
|
||||||
lastBlock := pc.LastOpenedBlock()
|
lastBlock := pc.LastOpenedBlock()
|
||||||
last := lastBlock.Node
|
last := lastBlock.Node
|
||||||
node, state := bp.Open(parent, reader, pc)
|
node, state := bp.Open(parent, reader, pc)
|
||||||
// if l, _ := reader.Position(); l != currentLineNum {
|
|
||||||
// panic("BlockParser.Open must not advance position beyond the current line")
|
|
||||||
// }
|
|
||||||
if node != nil {
|
if node != nil {
|
||||||
// Parser requires last node to be a paragraph.
|
// Parser requires last node to be a paragraph.
|
||||||
// With table extension:
|
// With table extension:
|
||||||
|
|
@ -912,7 +1119,6 @@ retry:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
shouldPeek = true
|
|
||||||
node.SetBlankPreviousLines(blankLine)
|
node.SetBlankPreviousLines(blankLine)
|
||||||
if last != nil && last.Parent() == nil {
|
if last != nil && last.Parent() == nil {
|
||||||
lastPos := len(pc.OpenedBlocks()) - 1
|
lastPos := len(pc.OpenedBlocks()) - 1
|
||||||
|
|
@ -929,6 +1135,8 @@ retry:
|
||||||
break // no children, can not open more blocks on this line
|
break // no children, can not open more blocks on this line
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
continuable:
|
||||||
if result == noBlocksOpened && continuable {
|
if result == noBlocksOpened && continuable {
|
||||||
state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
|
state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
|
||||||
if state&Continue != 0 {
|
if state&Continue != 0 {
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,10 @@ func NewSetextHeadingParser(opts ...HeadingOption) BlockParser {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *setextHeadingParser) Trigger() []byte {
|
||||||
|
return []byte{'-', '='}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *setextHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *setextHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
last := pc.LastOpenedBlock().Node
|
last := pc.LastOpenedBlock().Node
|
||||||
if last == nil {
|
if last == nil {
|
||||||
|
|
|
||||||
|
|
@ -6,15 +6,15 @@ import (
|
||||||
"github.com/yuin/goldmark/util"
|
"github.com/yuin/goldmark/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ThematicBreakParser struct {
|
type thematicBreakPraser struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultThematicBreakParser = &ThematicBreakParser{}
|
var defaultThematicBreakPraser = &thematicBreakPraser{}
|
||||||
|
|
||||||
// NewThematicBreakParser returns a new BlockParser that
|
// NewThematicBreakPraser returns a new BlockParser that
|
||||||
// parses thematic breaks.
|
// parses thematic breaks.
|
||||||
func NewThematicBreakParser() BlockParser {
|
func NewThematicBreakParser() BlockParser {
|
||||||
return defaultThematicBreakParser
|
return defaultThematicBreakPraser
|
||||||
}
|
}
|
||||||
|
|
||||||
func isThematicBreak(line []byte) bool {
|
func isThematicBreak(line []byte) bool {
|
||||||
|
|
@ -45,7 +45,11 @@ func isThematicBreak(line []byte) bool {
|
||||||
return count > 2
|
return count > 2
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *ThematicBreakParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
func (b *thematicBreakPraser) Trigger() []byte {
|
||||||
|
return []byte{'-', '*', '_'}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *thematicBreakPraser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
|
||||||
line, segment := reader.PeekLine()
|
line, segment := reader.PeekLine()
|
||||||
if isThematicBreak(line) {
|
if isThematicBreak(line) {
|
||||||
reader.Advance(segment.Len() - 1)
|
reader.Advance(segment.Len() - 1)
|
||||||
|
|
@ -54,18 +58,18 @@ func (b *ThematicBreakParser) Open(parent ast.Node, reader text.Reader, pc Conte
|
||||||
return nil, NoChildren
|
return nil, NoChildren
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *ThematicBreakParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
|
func (b *thematicBreakPraser) Continue(node ast.Node, reader text.Reader, pc Context) State {
|
||||||
return Close
|
return Close
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *ThematicBreakParser) Close(node ast.Node, reader text.Reader, pc Context) {
|
func (b *thematicBreakPraser) Close(node ast.Node, reader text.Reader, pc Context) {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *ThematicBreakParser) CanInterruptParagraph() bool {
|
func (b *thematicBreakPraser) CanInterruptParagraph() bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *ThematicBreakParser) CanAcceptIndentedLine() bool {
|
func (b *thematicBreakPraser) CanAcceptIndentedLine() bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/yuin/goldmark"
|
"github.com/yuin/goldmark"
|
||||||
|
"github.com/yuin/goldmark/parser"
|
||||||
"github.com/yuin/goldmark/util"
|
"github.com/yuin/goldmark/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -130,7 +131,7 @@ Actual
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if err := m.Convert([]byte(testCase.Markdown), &out); err != nil {
|
if err := m.Convert([]byte(testCase.Markdown), &out, parser.WithWorkers(16)); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.Expected)))
|
ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.Expected)))
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue