Remove the WithWorkers option

Situations that concurrent inline parsing is effective are very limited
due to goroutine overheads and a parse context sharing mutex.
This commit is contained in:
yuin 2019-10-31 17:34:14 +09:00
parent 2184586bb2
commit 16b69522a4
7 changed files with 28 additions and 259 deletions

View file

@ -84,7 +84,7 @@ With options
```go
var buf bytes.Buffer
if err := goldmark.Convert(source, &buf, parser.WithWorkers(16)); err != nil {
if err := goldmark.Convert(source, &buf, parser.WithContext(ctx)); err != nil {
panic(err)
}
```
@ -92,11 +92,6 @@ if err := goldmark.Convert(source, &buf, parser.WithWorkers(16)); err != nil {
| Functional option | Type | Description |
| ----------------- | ---- | ----------- |
| `parser.WithContext` | A parser.Context | Context for the parsing phase. |
| parser.WithWorkers | int | Number of goroutines that execute concurrent inline element parsing. |
`parser.WithWorkers` may make performance better a little if markdown text
is relatively large. Otherwise, `parser.Workers` may cause performance degradation due to
goroutine overheads.
Custom parser and renderer
--------------------------
@ -255,19 +250,15 @@ blackfriday v2 can not simply be compared with other Commonmark compliant librar
Though goldmark builds clean extensible AST structure and get full compliance with
Commonmark, it is resonably fast and less memory consumption.
This benchmark parses a relatively large markdown text. In such text, concurrent parsing
makes performance better a little.
```
goos: windows
goos: darwin
goarch: amd64
pkg: github.com/yuin/goldmark/_benchmark/go
BenchmarkMarkdown/Blackfriday-v2-4 200 6199986 ns/op 3320027 B/op 20050 allocs/op
BenchmarkMarkdown/GoldMark(workers=16)-4 300 5655736 ns/op 2700250 B/op 14494 allocs/op
BenchmarkMarkdown/GoldMark-4 200 6501805 ns/op 2594488 B/op 13861 allocs/op
BenchmarkMarkdown/CommonMark-4 200 7803784 ns/op 2752553 B/op 18826 allocs/op
BenchmarkMarkdown/Lute-4 200 6920985 ns/op 2984762 B/op 21270 allocs/op
BenchmarkMarkdown/GoMarkdown-4 10 171046030 ns/op 2195980 B/op 22174 allocs/op
BenchmarkMarkdown/Blackfriday-v2-12 337 3407336 ns/op 3261042 B/op 19862 allocs/op
BenchmarkMarkdown/GoldMark-12 302 3947527 ns/op 2574830 B/op 13853 allocs/op
BenchmarkMarkdown/CommonMark-12 249 4784221 ns/op 2739317 B/op 18824 allocs/op
BenchmarkMarkdown/Lute-12 285 4178276 ns/op 4639751 B/op 26665 allocs/op
BenchmarkMarkdown/GoMarkdown-12 9 114246204 ns/op 2175131 B/op 22172 allocs/op
```
### against cmark(A CommonMark reference implementation written in c)
@ -276,15 +267,12 @@ BenchmarkMarkdown/GoMarkdown-4 10 171046030 ns/op
----------- cmark -----------
file: _data.md
iteration: 50
average: 0.0047014618 sec
average: 0.0037760639 sec
go run ./goldmark_benchmark.go
------- goldmark -------
file: _data.md
iteration: 50
average: 0.0052624750 sec
------- goldmark(workers=16) -------
file: _data.md
iteration: 50
average: 0.0044918780 sec
average: 0.0040964230 sec
```
As you can see, goldmark performs pretty much equally to the cmark.

View file

@ -9,7 +9,6 @@ import (
"time"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
)
@ -43,18 +42,4 @@ func main() {
fmt.Printf("file: %s\n", file)
fmt.Printf("iteration: %d\n", n)
fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0)
sum = time.Duration(0)
for i := 0; i < n; i++ {
start := time.Now()
out.Reset()
if err := markdown.Convert(source, &out, parser.WithWorkers(16)); err != nil {
panic(err)
}
sum += time.Since(start)
}
fmt.Printf("------- goldmark(workers=16) -------\n")
fmt.Printf("file: %s\n", file)
fmt.Printf("iteration: %d\n", n)
fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0)
}

View file

@ -7,26 +7,16 @@ import (
gomarkdown "github.com/gomarkdown/markdown"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
"gitlab.com/golang-commonmark/markdown"
bf1 "github.com/russross/blackfriday"
bf2 "gopkg.in/russross/blackfriday.v2"
"github.com/b3log/lute"
)
func BenchmarkMarkdown(b *testing.B) {
b.Run("Blackfriday-v1", func(b *testing.B) {
r := func(src []byte) ([]byte, error) {
out := bf1.MarkdownBasic(src)
return out, nil
}
doBenchmark(b, r)
})
b.Run("Blackfriday-v2", func(b *testing.B) {
r := func(src []byte) ([]byte, error) {
out := bf2.Run(src)
@ -35,25 +25,13 @@ func BenchmarkMarkdown(b *testing.B) {
doBenchmark(b, r)
})
b.Run("GoldMark(workers=16)", func(b *testing.B) {
markdown := goldmark.New(
goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()),
)
r := func(src []byte) ([]byte, error) {
var out bytes.Buffer
err := markdown.Convert(src, &out, parser.WithWorkers(16))
return out.Bytes(), err
}
doBenchmark(b, r)
})
b.Run("GoldMark", func(b *testing.B) {
markdown := goldmark.New(
goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()),
)
r := func(src []byte) ([]byte, error) {
var out bytes.Buffer
err := markdown.Convert(src, &out, parser.WithWorkers(0))
err := markdown.Convert(src, &out)
return out.Bytes(), err
}
doBenchmark(b, r)
@ -70,12 +48,15 @@ func BenchmarkMarkdown(b *testing.B) {
})
b.Run("Lute", func(b *testing.B) {
luteEngine := lute.New(
lute.GFM(false),
lute.CodeSyntaxHighlight(false),
lute.SoftBreak2HardBreak(false),
lute.AutoSpace(false),
lute.FixTermTypo(false))
luteEngine := lute.New()
luteEngine.SetGFMAutoLink(false)
luteEngine.SetGFMStrikethrough(false)
luteEngine.SetGFMTable(false)
luteEngine.SetGFMTaskListItem(false)
luteEngine.SetCodeSyntaxHighlight(false)
luteEngine.SetSoftBreak2HardBreak(false)
luteEngine.SetAutoSpace(false)
luteEngine.SetFixTermTypo(false)
r := func(src []byte) ([]byte, error) {
out, err := luteEngine.MarkdownStr("Benchmark", util.BytesToReadOnlyString(src))
return util.StringToReadOnlyBytes(out), err

View file

@ -140,7 +140,7 @@ func (s *footnoteParser) Parse(parent gast.Node, block text.Reader, pc parser.Co
block.Advance(closes + 1)
var list *ast.FootnoteList
if tlist := pc.Root().Get(footnoteListKey); tlist != nil {
if tlist := pc.Get(footnoteListKey); tlist != nil {
list = tlist.(*ast.FootnoteList)
}
if list == nil {

View file

@ -169,7 +169,7 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
block.SetPosition(l, pos)
ssegment := text.NewSegment(last.Segment.Stop, segment.Start)
maybeReference := block.Value(ssegment)
ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference))
ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
if !ok {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
return nil
@ -243,7 +243,7 @@ func (s *linkParser) parseReferenceLink(parent ast.Node, last *linkLabelState, b
maybeReference = block.Value(ssegment)
}
ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference))
ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
if !ok {
return nil, true
}

View file

@ -196,9 +196,6 @@ type Context interface {
// LastOpenedBlock returns a last node that is currently in parsing.
LastOpenedBlock() Block
// Root returns a context shared accross goroutines.
Root() Context
}
type parseContext struct {
@ -210,7 +207,6 @@ type parseContext struct {
delimiters *Delimiter
lastDelimiter *Delimiter
openedBlocks []Block
root Context
}
// NewContext returns a new Context.
@ -224,7 +220,6 @@ func NewContext() Context {
delimiters: nil,
lastDelimiter: nil,
openedBlocks: []Block{},
root: nil,
}
}
@ -361,140 +356,6 @@ func (p *parseContext) LastOpenedBlock() Block {
return Block{}
}
func (p *parseContext) Root() Context {
if p.root == nil {
return p
}
return p.root
}
type concurrentParseContext struct {
delegate Context
m sync.RWMutex
root Context
}
func NewConcurrentContext(delegate Context) Context {
return &concurrentParseContext{
delegate: delegate,
root: nil,
}
}
func (p *concurrentParseContext) Get(key ContextKey) interface{} {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.Get(key)
return ret
}
func (p *concurrentParseContext) Set(key ContextKey, value interface{}) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.Set(key, value)
}
func (p *concurrentParseContext) IDs() IDs {
return p.delegate.IDs()
}
func (p *concurrentParseContext) BlockOffset() int {
return p.delegate.BlockOffset()
}
func (p *concurrentParseContext) SetBlockOffset(v int) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.SetBlockOffset(v)
}
func (p *concurrentParseContext) BlockIndent() int {
return p.delegate.BlockIndent()
}
func (p *concurrentParseContext) SetBlockIndent(v int) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.SetBlockIndent(v)
}
func (p *concurrentParseContext) LastDelimiter() *Delimiter {
return p.delegate.LastDelimiter()
}
func (p *concurrentParseContext) FirstDelimiter() *Delimiter {
return p.delegate.FirstDelimiter()
}
func (p *concurrentParseContext) PushDelimiter(d *Delimiter) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.PushDelimiter(d)
}
func (p *concurrentParseContext) RemoveDelimiter(d *Delimiter) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.RemoveDelimiter(d)
}
func (p *concurrentParseContext) ClearDelimiters(bottom ast.Node) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.ClearDelimiters(bottom)
}
func (p *concurrentParseContext) AddReference(ref Reference) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.AddReference(ref)
}
func (p *concurrentParseContext) Reference(label string) (Reference, bool) {
p.m.RLock()
defer p.m.RUnlock()
v, ok := p.delegate.Reference(label)
return v, ok
}
func (p *concurrentParseContext) References() []Reference {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.References()
return ret
}
func (p *concurrentParseContext) String() string {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.String()
return ret
}
func (p *concurrentParseContext) OpenedBlocks() []Block {
return p.delegate.OpenedBlocks()
}
func (p *concurrentParseContext) SetOpenedBlocks(v []Block) {
p.m.Lock()
defer p.m.Unlock()
p.delegate.SetOpenedBlocks(v)
}
func (p *concurrentParseContext) LastOpenedBlock() Block {
p.m.RLock()
defer p.m.RUnlock()
ret := p.delegate.LastOpenedBlock()
return ret
}
func (p *concurrentParseContext) Root() Context {
if p.root == nil {
return p
}
return p.root
}
// State represents parser's state.
// State is designed to use as a bit flag.
type State int
@ -906,7 +767,6 @@ func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionNa
// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
type ParseConfig struct {
Context Context
Workers int
}
// A ParseOption is a functional option type for the Parser.Parse.
@ -920,15 +780,6 @@ func WithContext(context Context) ParseOption {
}
}
// WithWorkers is a functional option that allow you to set
// number of inline parsing workers(goroutines).
// If num is 0, inline parsing will never be multithreaded.
func WithWorkers(num int) ParseOption {
return func(c *ParseConfig) {
c.Workers = num
}
}
func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
p.initSync.Do(func() {
p.config.BlockParsers.Sort()
@ -966,45 +817,10 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
root := ast.NewDocument()
p.parseBlocks(root, reader, pc)
if c.Workers < 2 {
blockReader := text.NewBlockReader(reader.Source(), nil)
p.walkBlock(root, func(node ast.Node) {
p.parseBlock(blockReader, node, pc)
})
} else {
nodes := make([]ast.Node, 0, 100)
p.walkBlock(root, func(node ast.Node) {
nodes = append(nodes, node)
})
max := (len(nodes) / c.Workers) - 1
if max < 0 {
blockReader := text.NewBlockReader(reader.Source(), nil)
p.walkBlock(root, func(node ast.Node) {
p.parseBlock(blockReader, node, pc)
})
} else {
rootContext := NewConcurrentContext(pc)
var wg sync.WaitGroup
for i := 0; i <= max; i++ {
from := i * c.Workers
to := from + c.Workers
if i == max {
to = len(nodes)
}
wg.Add(1)
go func(wg *sync.WaitGroup) {
blockReader := text.NewBlockReader(reader.Source(), nil)
pc := NewContext()
pc.(*parseContext).root = rootContext
for _, n := range nodes[from:to] {
p.parseBlock(blockReader, n, pc)
}
wg.Done()
}(&wg)
}
wg.Wait()
}
}
blockReader := text.NewBlockReader(reader.Source(), nil)
p.walkBlock(root, func(node ast.Node) {
p.parseBlock(blockReader, node, pc)
})
for _, at := range p.astTransformers {
at.Transform(root, reader, pc)
}

View file

@ -10,7 +10,6 @@ import (
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util"
)
@ -131,7 +130,7 @@ Actual
}
}()
if err := m.Convert([]byte(testCase.Markdown), &out, parser.WithWorkers(16)); err != nil {
if err := m.Convert([]byte(testCase.Markdown), &out); err != nil {
panic(err)
}
ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.Expected)))