Remove the WithWorkers option

Situations that concurrent inline parsing is effective are very limited
due to goroutine overheads and a parse context sharing mutex.
This commit is contained in:
yuin 2019-10-31 17:34:14 +09:00
parent 2184586bb2
commit 16b69522a4
7 changed files with 28 additions and 259 deletions

View file

@ -84,7 +84,7 @@ With options
```go ```go
var buf bytes.Buffer var buf bytes.Buffer
if err := goldmark.Convert(source, &buf, parser.WithWorkers(16)); err != nil { if err := goldmark.Convert(source, &buf, parser.WithContext(ctx)); err != nil {
panic(err) panic(err)
} }
``` ```
@ -92,11 +92,6 @@ if err := goldmark.Convert(source, &buf, parser.WithWorkers(16)); err != nil {
| Functional option | Type | Description | | Functional option | Type | Description |
| ----------------- | ---- | ----------- | | ----------------- | ---- | ----------- |
| `parser.WithContext` | A parser.Context | Context for the parsing phase. | | `parser.WithContext` | A parser.Context | Context for the parsing phase. |
| parser.WithWorkers | int | Number of goroutines that execute concurrent inline element parsing. |
`parser.WithWorkers` may make performance better a little if markdown text
is relatively large. Otherwise, `parser.Workers` may cause performance degradation due to
goroutine overheads.
Custom parser and renderer Custom parser and renderer
-------------------------- --------------------------
@ -255,19 +250,15 @@ blackfriday v2 can not simply be compared with other Commonmark compliant librar
Though goldmark builds clean extensible AST structure and get full compliance with Though goldmark builds clean extensible AST structure and get full compliance with
Commonmark, it is resonably fast and less memory consumption. Commonmark, it is resonably fast and less memory consumption.
This benchmark parses a relatively large markdown text. In such text, concurrent parsing
makes performance better a little.
``` ```
goos: windows goos: darwin
goarch: amd64 goarch: amd64
pkg: github.com/yuin/goldmark/_benchmark/go pkg: github.com/yuin/goldmark/_benchmark/go
BenchmarkMarkdown/Blackfriday-v2-4 200 6199986 ns/op 3320027 B/op 20050 allocs/op BenchmarkMarkdown/Blackfriday-v2-12 337 3407336 ns/op 3261042 B/op 19862 allocs/op
BenchmarkMarkdown/GoldMark(workers=16)-4 300 5655736 ns/op 2700250 B/op 14494 allocs/op BenchmarkMarkdown/GoldMark-12 302 3947527 ns/op 2574830 B/op 13853 allocs/op
BenchmarkMarkdown/GoldMark-4 200 6501805 ns/op 2594488 B/op 13861 allocs/op BenchmarkMarkdown/CommonMark-12 249 4784221 ns/op 2739317 B/op 18824 allocs/op
BenchmarkMarkdown/CommonMark-4 200 7803784 ns/op 2752553 B/op 18826 allocs/op BenchmarkMarkdown/Lute-12 285 4178276 ns/op 4639751 B/op 26665 allocs/op
BenchmarkMarkdown/Lute-4 200 6920985 ns/op 2984762 B/op 21270 allocs/op BenchmarkMarkdown/GoMarkdown-12 9 114246204 ns/op 2175131 B/op 22172 allocs/op
BenchmarkMarkdown/GoMarkdown-4 10 171046030 ns/op 2195980 B/op 22174 allocs/op
``` ```
### against cmark(A CommonMark reference implementation written in c) ### against cmark(A CommonMark reference implementation written in c)
@ -276,15 +267,12 @@ BenchmarkMarkdown/GoMarkdown-4 10 171046030 ns/op
----------- cmark ----------- ----------- cmark -----------
file: _data.md file: _data.md
iteration: 50 iteration: 50
average: 0.0047014618 sec average: 0.0037760639 sec
go run ./goldmark_benchmark.go
------- goldmark ------- ------- goldmark -------
file: _data.md file: _data.md
iteration: 50 iteration: 50
average: 0.0052624750 sec average: 0.0040964230 sec
------- goldmark(workers=16) -------
file: _data.md
iteration: 50
average: 0.0044918780 sec
``` ```
As you can see, goldmark performs pretty much equally to the cmark. As you can see, goldmark performs pretty much equally to the cmark.

View file

@ -9,7 +9,6 @@ import (
"time" "time"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/renderer/html"
) )
@ -43,18 +42,4 @@ func main() {
fmt.Printf("file: %s\n", file) fmt.Printf("file: %s\n", file)
fmt.Printf("iteration: %d\n", n) fmt.Printf("iteration: %d\n", n)
fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0) fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0)
sum = time.Duration(0)
for i := 0; i < n; i++ {
start := time.Now()
out.Reset()
if err := markdown.Convert(source, &out, parser.WithWorkers(16)); err != nil {
panic(err)
}
sum += time.Since(start)
}
fmt.Printf("------- goldmark(workers=16) -------\n")
fmt.Printf("file: %s\n", file)
fmt.Printf("iteration: %d\n", n)
fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0)
} }

View file

@ -7,26 +7,16 @@ import (
gomarkdown "github.com/gomarkdown/markdown" gomarkdown "github.com/gomarkdown/markdown"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
"gitlab.com/golang-commonmark/markdown" "gitlab.com/golang-commonmark/markdown"
bf1 "github.com/russross/blackfriday"
bf2 "gopkg.in/russross/blackfriday.v2" bf2 "gopkg.in/russross/blackfriday.v2"
"github.com/b3log/lute" "github.com/b3log/lute"
) )
func BenchmarkMarkdown(b *testing.B) { func BenchmarkMarkdown(b *testing.B) {
b.Run("Blackfriday-v1", func(b *testing.B) {
r := func(src []byte) ([]byte, error) {
out := bf1.MarkdownBasic(src)
return out, nil
}
doBenchmark(b, r)
})
b.Run("Blackfriday-v2", func(b *testing.B) { b.Run("Blackfriday-v2", func(b *testing.B) {
r := func(src []byte) ([]byte, error) { r := func(src []byte) ([]byte, error) {
out := bf2.Run(src) out := bf2.Run(src)
@ -35,25 +25,13 @@ func BenchmarkMarkdown(b *testing.B) {
doBenchmark(b, r) doBenchmark(b, r)
}) })
b.Run("GoldMark(workers=16)", func(b *testing.B) {
markdown := goldmark.New(
goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()),
)
r := func(src []byte) ([]byte, error) {
var out bytes.Buffer
err := markdown.Convert(src, &out, parser.WithWorkers(16))
return out.Bytes(), err
}
doBenchmark(b, r)
})
b.Run("GoldMark", func(b *testing.B) { b.Run("GoldMark", func(b *testing.B) {
markdown := goldmark.New( markdown := goldmark.New(
goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()), goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()),
) )
r := func(src []byte) ([]byte, error) { r := func(src []byte) ([]byte, error) {
var out bytes.Buffer var out bytes.Buffer
err := markdown.Convert(src, &out, parser.WithWorkers(0)) err := markdown.Convert(src, &out)
return out.Bytes(), err return out.Bytes(), err
} }
doBenchmark(b, r) doBenchmark(b, r)
@ -70,12 +48,15 @@ func BenchmarkMarkdown(b *testing.B) {
}) })
b.Run("Lute", func(b *testing.B) { b.Run("Lute", func(b *testing.B) {
luteEngine := lute.New( luteEngine := lute.New()
lute.GFM(false), luteEngine.SetGFMAutoLink(false)
lute.CodeSyntaxHighlight(false), luteEngine.SetGFMStrikethrough(false)
lute.SoftBreak2HardBreak(false), luteEngine.SetGFMTable(false)
lute.AutoSpace(false), luteEngine.SetGFMTaskListItem(false)
lute.FixTermTypo(false)) luteEngine.SetCodeSyntaxHighlight(false)
luteEngine.SetSoftBreak2HardBreak(false)
luteEngine.SetAutoSpace(false)
luteEngine.SetFixTermTypo(false)
r := func(src []byte) ([]byte, error) { r := func(src []byte) ([]byte, error) {
out, err := luteEngine.MarkdownStr("Benchmark", util.BytesToReadOnlyString(src)) out, err := luteEngine.MarkdownStr("Benchmark", util.BytesToReadOnlyString(src))
return util.StringToReadOnlyBytes(out), err return util.StringToReadOnlyBytes(out), err

View file

@ -140,7 +140,7 @@ func (s *footnoteParser) Parse(parent gast.Node, block text.Reader, pc parser.Co
block.Advance(closes + 1) block.Advance(closes + 1)
var list *ast.FootnoteList var list *ast.FootnoteList
if tlist := pc.Root().Get(footnoteListKey); tlist != nil { if tlist := pc.Get(footnoteListKey); tlist != nil {
list = tlist.(*ast.FootnoteList) list = tlist.(*ast.FootnoteList)
} }
if list == nil { if list == nil {

View file

@ -169,7 +169,7 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
block.SetPosition(l, pos) block.SetPosition(l, pos)
ssegment := text.NewSegment(last.Segment.Stop, segment.Start) ssegment := text.NewSegment(last.Segment.Stop, segment.Start)
maybeReference := block.Value(ssegment) maybeReference := block.Value(ssegment)
ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference)) ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
if !ok { if !ok {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
return nil return nil
@ -243,7 +243,7 @@ func (s *linkParser) parseReferenceLink(parent ast.Node, last *linkLabelState, b
maybeReference = block.Value(ssegment) maybeReference = block.Value(ssegment)
} }
ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference)) ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
if !ok { if !ok {
return nil, true return nil, true
} }

View file

@ -196,9 +196,6 @@ type Context interface {
// LastOpenedBlock returns a last node that is currently in parsing. // LastOpenedBlock returns a last node that is currently in parsing.
LastOpenedBlock() Block LastOpenedBlock() Block
// Root returns a context shared accross goroutines.
Root() Context
} }
type parseContext struct { type parseContext struct {
@ -210,7 +207,6 @@ type parseContext struct {
delimiters *Delimiter delimiters *Delimiter
lastDelimiter *Delimiter lastDelimiter *Delimiter
openedBlocks []Block openedBlocks []Block
root Context
} }
// NewContext returns a new Context. // NewContext returns a new Context.
@ -224,7 +220,6 @@ func NewContext() Context {
delimiters: nil, delimiters: nil,
lastDelimiter: nil, lastDelimiter: nil,
openedBlocks: []Block{}, openedBlocks: []Block{},
root: nil,
} }
} }
@ -361,140 +356,6 @@ func (p *parseContext) LastOpenedBlock() Block {
return Block{} return Block{}
} }
func (p *parseContext) Root() Context {
if p.root == nil {
return p
}
return p.root
}
type concurrentParseContext struct {
delegate Context
m sync.RWMutex
root Context
}
func NewConcurrentContext(delegate Context) Context {
return &concurrentParseContext{
delegate: delegate,
root: nil,
}
}
func (p *concurrentParseContext) Get(key ContextKey) interface{} {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.Get(key)
return ret
}
func (p *concurrentParseContext) Set(key ContextKey, value interface{}) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.Set(key, value)
}
func (p *concurrentParseContext) IDs() IDs {
return p.delegate.IDs()
}
func (p *concurrentParseContext) BlockOffset() int {
return p.delegate.BlockOffset()
}
func (p *concurrentParseContext) SetBlockOffset(v int) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.SetBlockOffset(v)
}
func (p *concurrentParseContext) BlockIndent() int {
return p.delegate.BlockIndent()
}
func (p *concurrentParseContext) SetBlockIndent(v int) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.SetBlockIndent(v)
}
func (p *concurrentParseContext) LastDelimiter() *Delimiter {
return p.delegate.LastDelimiter()
}
func (p *concurrentParseContext) FirstDelimiter() *Delimiter {
return p.delegate.FirstDelimiter()
}
func (p *concurrentParseContext) PushDelimiter(d *Delimiter) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.PushDelimiter(d)
}
func (p *concurrentParseContext) RemoveDelimiter(d *Delimiter) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.RemoveDelimiter(d)
}
func (p *concurrentParseContext) ClearDelimiters(bottom ast.Node) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.ClearDelimiters(bottom)
}
func (p *concurrentParseContext) AddReference(ref Reference) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.AddReference(ref)
}
func (p *concurrentParseContext) Reference(label string) (Reference, bool) {
p.m.RLock()
defer p.m.RUnlock()
v, ok := p.delegate.Reference(label)
return v, ok
}
func (p *concurrentParseContext) References() []Reference {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.References()
return ret
}
func (p *concurrentParseContext) String() string {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.String()
return ret
}
func (p *concurrentParseContext) OpenedBlocks() []Block {
return p.delegate.OpenedBlocks()
}
func (p *concurrentParseContext) SetOpenedBlocks(v []Block) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.SetOpenedBlocks(v)
}
func (p *concurrentParseContext) LastOpenedBlock() Block {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.LastOpenedBlock()
return ret
}
func (p *concurrentParseContext) Root() Context {
if p.root == nil {
return p
}
return p.root
}
// State represents parser's state. // State represents parser's state.
// State is designed to use as a bit flag. // State is designed to use as a bit flag.
type State int type State int
@ -906,7 +767,6 @@ func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionNa
// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse. // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
type ParseConfig struct { type ParseConfig struct {
Context Context Context Context
Workers int
} }
// A ParseOption is a functional option type for the Parser.Parse. // A ParseOption is a functional option type for the Parser.Parse.
@ -920,15 +780,6 @@ func WithContext(context Context) ParseOption {
} }
} }
// WithWorkers is a functional option that allow you to set
// number of inline parsing workers(goroutines).
// If num is 0, inline parsing will never be multithreaded.
func WithWorkers(num int) ParseOption {
return func(c *ParseConfig) {
c.Workers = num
}
}
func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node { func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
p.initSync.Do(func() { p.initSync.Do(func() {
p.config.BlockParsers.Sort() p.config.BlockParsers.Sort()
@ -966,45 +817,10 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
root := ast.NewDocument() root := ast.NewDocument()
p.parseBlocks(root, reader, pc) p.parseBlocks(root, reader, pc)
if c.Workers < 2 { blockReader := text.NewBlockReader(reader.Source(), nil)
blockReader := text.NewBlockReader(reader.Source(), nil) p.walkBlock(root, func(node ast.Node) {
p.walkBlock(root, func(node ast.Node) { p.parseBlock(blockReader, node, pc)
p.parseBlock(blockReader, node, pc) })
})
} else {
nodes := make([]ast.Node, 0, 100)
p.walkBlock(root, func(node ast.Node) {
nodes = append(nodes, node)
})
max := (len(nodes) / c.Workers) - 1
if max < 0 {
blockReader := text.NewBlockReader(reader.Source(), nil)
p.walkBlock(root, func(node ast.Node) {
p.parseBlock(blockReader, node, pc)
})
} else {
rootContext := NewConcurrentContext(pc)
var wg sync.WaitGroup
for i := 0; i <= max; i++ {
from := i * c.Workers
to := from + c.Workers
if i == max {
to = len(nodes)
}
wg.Add(1)
go func(wg *sync.WaitGroup) {
blockReader := text.NewBlockReader(reader.Source(), nil)
pc := NewContext()
pc.(*parseContext).root = rootContext
for _, n := range nodes[from:to] {
p.parseBlock(blockReader, n, pc)
}
wg.Done()
}(&wg)
}
wg.Wait()
}
}
for _, at := range p.astTransformers { for _, at := range p.astTransformers {
at.Transform(root, reader, pc) at.Transform(root, reader, pc)
} }

View file

@ -10,7 +10,6 @@ import (
"strings" "strings"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
@ -131,7 +130,7 @@ Actual
} }
}() }()
if err := m.Convert([]byte(testCase.Markdown), &out, parser.WithWorkers(16)); err != nil { if err := m.Convert([]byte(testCase.Markdown), &out); err != nil {
panic(err) panic(err)
} }
ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.Expected))) ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.Expected)))