From 987f65f8137cc6bf56a96dba6e2e2fdfbc1a5e35 Mon Sep 17 00:00:00 2001 From: yuin Date: Wed, 1 May 2019 20:32:41 +0900 Subject: [PATCH] Performance optimizations --- README.md | 6 +- ast/ast.go | 107 ++++++++-- ast/block.go | 127 ++++++++++-- ast/inline.go | 93 +++++++-- extension/ast/strikethrough.go | 12 +- extension/ast/table.go | 38 +++- extension/ast/tasklist.go | 10 +- extension/strikethrough.go | 14 +- extension/table.go | 36 ++-- extension/tasklist.go | 17 +- parser/atx_heading.go | 3 +- parser/auto_link.go | 4 - parser/code_span.go | 4 - parser/delimiter.go | 7 + parser/emphasis.go | 4 - parser/link.go | 6 + parser/parser.go | 113 +++++----- renderer/html/html.go | 197 ++++++++---------- renderer/renderer.go | 70 ++++--- util/util.go | 366 ++++++++++++++++++++++++++++----- 20 files changed, 868 insertions(+), 366 deletions(-) diff --git a/README.md b/README.md index 6e66cb7..0135744 100644 --- a/README.md +++ b/README.md @@ -121,9 +121,9 @@ Though goldmark builds clean extensible AST structure and get full compliance wi Commonmark, it is resonably fast and less memory consumption. ``` -BenchmarkGoldMark-4 200 7291402 ns/op 2259603 B/op 16867 allocs/op -BenchmarkGolangCommonMark-4 200 7709939 ns/op 3053760 B/op 18682 allocs/op -BenchmarkBlackFriday-4 300 5776369 ns/op 3356386 B/op 17480 allocs/op +BenchmarkGoldMark-4 200 7981524 ns/op 2485650 B/op 15716 allocs/op +BenchmarkGolangCommonMark-4 200 8609737 ns/op 3053758 B/op 18681 allocs/op +BenchmarkBlackFriday-4 200 6311112 ns/op 3356762 B/op 17481 allocs/op ``` Donation diff --git a/ast/ast.go b/ast/ast.go index 73ec675..82ff67f 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -12,17 +12,50 @@ import ( type NodeType int const ( - // BlockNode indicates that a node is kind of block nodes. - BlockNode NodeType = iota + 1 - // InlineNode indicates that a node is kind of inline nodes. - InlineNode + // TypeBlock indicates that a node is kind of block nodes. + TypeBlock NodeType = iota + 1 + // TypeInline indicates that a node is kind of inline nodes. + TypeInline + // TypeDocument indicates that a node is kind of document nodes. + TypeDocument ) +// NodeKind indicates more specific type than NodeType. +type NodeKind int + +func (k NodeKind) String() string { + return kindNames[k] +} + +var kindMax NodeKind +var kindNames = []string{""} + +// NewNodeKind returns a new Kind value. +func NewNodeKind(name string) NodeKind { + kindMax++ + kindNames = append(kindNames, name) + return kindMax +} + +// An Attribute is an attribute of the Node +type Attribute struct { + Name []byte + Value []byte +} + +var attrNameIDS = []byte("#") +var attrNameID = []byte("id") +var attrNameClassS = []byte(".") +var attrNameClass = []byte("class") + // A Node interface defines basic AST node functionalities. type Node interface { // Type returns a type of this node. Type() NodeType + // Kind returns a kind of this node. + Kind() NodeKind + // NextSibling returns a next sibling node of this node. NextSibling() Node @@ -106,6 +139,18 @@ type Node interface { // IsRaw returns true if contents should be rendered as 'raw' contents. IsRaw() bool + + // SetAttribute sets given value to the attributes. + SetAttribute(name, value []byte) + + // Attribute returns a (attribute value, true) if an attribute + // associated with given name is found, otherwise + // (nil, false) + Attribute(name []byte) ([]byte, bool) + + // Attributes returns a list of attributes. + // This may be a nil if there are no attributes. + Attributes() []Attribute } // A BaseNode struct implements the Node interface. @@ -115,6 +160,8 @@ type BaseNode struct { parent Node next Node prev Node + childCount int + attributes []Attribute } func ensureIsolated(v Node) { @@ -153,6 +200,7 @@ func (n *BaseNode) RemoveChild(self, v Node) { if v.Parent() != self { return } + n.childCount-- prev := v.PreviousSibling() next := v.NextSibling() if prev != nil { @@ -179,6 +227,7 @@ func (n *BaseNode) RemoveChildren(self Node) { } n.firstChild = nil n.lastChild = nil + n.childCount = 0 } // FirstChild implements Node.FirstChild . @@ -193,11 +242,7 @@ func (n *BaseNode) LastChild() Node { // ChildCount implements Node.ChildCount . func (n *BaseNode) ChildCount() int { - count := 0 - for c := n.firstChild; c != nil; c = c.NextSibling() { - count++ - } - return count + return n.childCount } // Parent implements Node.Parent . @@ -224,6 +269,7 @@ func (n *BaseNode) AppendChild(self, v Node) { } v.SetParent(self) n.lastChild = v + n.childCount++ } // ReplaceChild implements Node.ReplaceChild . @@ -239,6 +285,7 @@ func (n *BaseNode) InsertAfter(self, v1, insertee Node) { // InsertBefore implements Node.InsertBefore . func (n *BaseNode) InsertBefore(self, v1, insertee Node) { + n.childCount++ if v1 == nil { n.AppendChild(self, insertee) return @@ -269,15 +316,51 @@ func (n *BaseNode) Text(source []byte) []byte { return buf.Bytes() } +// SetAttribute implements Node.SetAttribute. +func (n *BaseNode) SetAttribute(name, value []byte) { + if n.attributes == nil { + n.attributes = make([]Attribute, 0, 10) + n.attributes = append(n.attributes, Attribute{name, value}) + return + } + for i, a := range n.attributes { + if bytes.Equal(a.Name, name) { + n.attributes[i].Name = name + n.attributes[i].Value = value + return + } + } + n.attributes = append(n.attributes, Attribute{name, value}) + return +} + +// Attribute implements Node.Attribute. +func (n *BaseNode) Attribute(name []byte) ([]byte, bool) { + if n.attributes == nil { + return nil, false + } + for i, a := range n.attributes { + if bytes.Equal(a.Name, name) { + return n.attributes[i].Value, true + } + } + return nil, false +} + +// Attributes implements Node.Attributes +func (n *BaseNode) Attributes() []Attribute { + return n.attributes +} + // DumpHelper is a helper function to implement Node.Dump. -// name is a name of the node. // kv is pairs of an attribute name and an attribute value. // cb is a function called after wrote a name and attributes. -func DumpHelper(v Node, source []byte, level int, name string, kv map[string]string, cb func(int)) { +func DumpHelper(v Node, source []byte, level int, kv map[string]string, cb func(int)) { + name := v.Kind().String() indent := strings.Repeat(" ", level) fmt.Printf("%s%s {\n", indent, name) indent2 := strings.Repeat(" ", level+1) - if v.Type() == BlockNode { + if v.Type() == TypeBlock { fmt.Printf("%sRawText: \"", indent2) for i := 0; i < v.Lines().Len(); i++ { line := v.Lines().At(i) diff --git a/ast/block.go b/ast/block.go index 281d146..95c81fe 100644 --- a/ast/block.go +++ b/ast/block.go @@ -15,7 +15,7 @@ type BaseBlock struct { // Type implements Node.Type func (b *BaseBlock) Type() NodeType { - return BlockNode + return TypeBlock } // IsRaw implements Node.IsRaw @@ -51,9 +51,22 @@ type Document struct { BaseBlock } +// KindDocument is a NodeKind of the Document node. +var KindDocument = NewNodeKind("Document") + // Dump impelements Node.Dump . func (n *Document) Dump(source []byte, level int) { - DumpHelper(n, source, level, "Document", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// Type implements Node.Type . +func (n *Document) Type() NodeType { + return TypeDocument +} + +// Kind implements Node.Kind. +func (n *Document) Kind() NodeKind { + return KindDocument } // NewDocument returns a new Document node. @@ -71,7 +84,15 @@ type TextBlock struct { // Dump impelements Node.Dump . func (n *TextBlock) Dump(source []byte, level int) { - DumpHelper(n, source, level, "TextBlock", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindTextBlock is a NodeKind of the TextBlock node. +var KindTextBlock = NewNodeKind("TextBlock") + +// Kind implements Node.Kind. +func (n *TextBlock) Kind() NodeKind { + return KindTextBlock } // NewTextBlock returns a new TextBlock node. @@ -88,7 +109,15 @@ type Paragraph struct { // Dump impelements Node.Dump . func (n *Paragraph) Dump(source []byte, level int) { - DumpHelper(n, source, level, "Paragraph", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindParagraph is a NodeKind of the Paragraph node. +var KindParagraph = NewNodeKind("Paragraph") + +// Kind implements Node.Kind. +func (n *Paragraph) Kind() NodeKind { + return KindParagraph } // NewParagraph returns a new Paragraph node. @@ -111,9 +140,6 @@ type Heading struct { // Level returns a level of this heading. // This value is between 1 and 6. Level int - - // ID returns an ID of this heading. - ID []byte } // Dump impelements Node.Dump . @@ -121,7 +147,15 @@ func (n *Heading) Dump(source []byte, level int) { m := map[string]string{ "Level": fmt.Sprintf("%d", n.Level), } - DumpHelper(n, source, level, "Heading", m, nil) + DumpHelper(n, source, level, m, nil) +} + +// KindHeading is a NodeKind of the Heading node. +var KindHeading = NewNodeKind("Heading") + +// Kind implements Node.Kind. +func (n *Heading) Kind() NodeKind { + return KindHeading } // NewHeading returns a new Heading node. @@ -139,7 +173,15 @@ type ThemanticBreak struct { // Dump impelements Node.Dump . func (n *ThemanticBreak) Dump(source []byte, level int) { - DumpHelper(n, source, level, "ThemanticBreak", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindThemanticBreak is a NodeKind of the ThemanticBreak node. +var KindThemanticBreak = NewNodeKind("ThemanticBreak") + +// Kind implements Node.Kind. +func (n *ThemanticBreak) Kind() NodeKind { + return KindThemanticBreak } // NewThemanticBreak returns a new ThemanticBreak node. @@ -161,7 +203,15 @@ func (n *CodeBlock) IsRaw() bool { // Dump impelements Node.Dump . func (n *CodeBlock) Dump(source []byte, level int) { - DumpHelper(n, source, level, "CodeBlock", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindCodeBlock is a NodeKind of the CodeBlock node. +var KindCodeBlock = NewNodeKind("CodeBlock") + +// Kind implements Node.Kind. +func (n *CodeBlock) Kind() NodeKind { + return KindCodeBlock } // NewCodeBlock returns a new CodeBlock node. @@ -189,7 +239,15 @@ func (n *FencedCodeBlock) Dump(source []byte, level int) { if n.Info != nil { m["Info"] = fmt.Sprintf("\"%s\"", n.Info.Text(source)) } - DumpHelper(n, source, level, "FencedCodeBlock", m, nil) + DumpHelper(n, source, level, m, nil) +} + +// KindFencedCodeBlock is a NodeKind of the FencedCodeBlock node. +var KindFencedCodeBlock = NewNodeKind("FencedCodeBlock") + +// Kind implements Node.Kind. +func (n *FencedCodeBlock) Kind() NodeKind { + return KindFencedCodeBlock } // NewFencedCodeBlock return a new FencedCodeBlock node. @@ -207,7 +265,15 @@ type Blockquote struct { // Dump impelements Node.Dump . func (n *Blockquote) Dump(source []byte, level int) { - DumpHelper(n, source, level, "Blockquote", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindBlockquote is a NodeKind of the Blockquote node. +var KindBlockquote = NewNodeKind("Blockquote") + +// Kind implements Node.Kind. +func (n *Blockquote) Kind() NodeKind { + return KindBlockquote } // NewBlockquote returns a new Blockquote node. @@ -246,18 +312,23 @@ func (l *List) CanContinue(marker byte, isOrdered bool) bool { // Dump implements Node.Dump. func (l *List) Dump(source []byte, level int) { - name := "List" - if l.IsOrdered() { - name = "OrderedList" - } m := map[string]string{ - "Marker": fmt.Sprintf("%c", l.Marker), - "Tight": fmt.Sprintf("%v", l.IsTight), + "Ordered": fmt.Sprintf("%v", l.IsOrdered()), + "Marker": fmt.Sprintf("%c", l.Marker), + "Tight": fmt.Sprintf("%v", l.IsTight), } if l.IsOrdered() { m["Start"] = fmt.Sprintf("%d", l.Start) } - DumpHelper(l, source, level, name, m, nil) + DumpHelper(l, source, level, m, nil) +} + +// KindList is a NodeKind of the List node. +var KindList = NewNodeKind("List") + +// Kind implements Node.Kind. +func (l *List) Kind() NodeKind { + return KindList } // NewList returns a new List node. @@ -279,7 +350,15 @@ type ListItem struct { // Dump implements Node.Dump. func (n *ListItem) Dump(source []byte, level int) { - DumpHelper(n, source, level, "ListItem", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindListItem is a NodeKind of the ListItem node. +var KindListItem = NewNodeKind("ListItem") + +// Kind implements Node.Kind. +func (n *ListItem) Kind() NodeKind { + return KindListItem } // NewListItem returns a new ListItem node. @@ -354,6 +433,14 @@ func (n *HTMLBlock) Dump(source []byte, level int) { fmt.Printf("%s}\n", indent) } +// KindHTMLBlock is a NodeKind of the HTMLBlock node. +var KindHTMLBlock = NewNodeKind("HTMLBlock") + +// Kind implements Node.Kind. +func (n *HTMLBlock) Kind() NodeKind { + return KindHTMLBlock +} + // NewHTMLBlock returns a new HTMLBlock node. func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock { return &HTMLBlock{ diff --git a/ast/inline.go b/ast/inline.go index 43ce81f..151034a 100644 --- a/ast/inline.go +++ b/ast/inline.go @@ -15,7 +15,7 @@ type BaseInline struct { // Type implements Node.Type func (b *BaseInline) Type() NodeType { - return InlineNode + return TypeInline } // IsRaw implements Node.IsRaw @@ -133,6 +133,14 @@ func (n *Text) Dump(source []byte, level int) { fmt.Printf("%sText: \"%s\"\n", strings.Repeat(" ", level), strings.TrimRight(string(n.Text(source)), "\n")) } +// KindText is a NodeKind of the Text node. +var KindText = NewNodeKind("Text") + +// Kind implements Node.Kind. +func (n *Text) Kind() NodeKind { + return KindText +} + // NewText returns a new Text node. func NewText() *Text { return &Text{ @@ -166,8 +174,7 @@ func MergeOrAppendTextSegment(parent Node, s textm.Segment) { last := parent.LastChild() t, ok := last.(*Text) if ok && t.Segment.Stop == s.Start && !t.SoftLineBreak() { - ts := t.Segment - t.Segment = ts.WithStop(s.Stop) + t.Segment = t.Segment.WithStop(s.Stop) } else { parent.AppendChild(parent, NewTextSegment(s)) } @@ -207,7 +214,15 @@ func (n *CodeSpan) IsBlank(source []byte) bool { // Dump implements Node.Dump func (n *CodeSpan) Dump(source []byte, level int) { - DumpHelper(n, source, level, "CodeSpan", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindCodeSpan is a NodeKind of the CodeSpan node. +var KindCodeSpan = NewNodeKind("CodeSpan") + +// Kind implements Node.Kind. +func (n *CodeSpan) Kind() NodeKind { + return KindCodeSpan } // NewCodeSpan returns a new CodeSpan node. @@ -225,13 +240,20 @@ type Emphasis struct { Level int } -// Inline implements Inline.Inline. -func (n *Emphasis) Inline() { -} - // Dump implements Node.Dump. func (n *Emphasis) Dump(source []byte, level int) { - DumpHelper(n, source, level, fmt.Sprintf("Emphasis(%d)", n.Level), nil, nil) + m := map[string]string{ + "Level": fmt.Sprintf("%v", n.Level), + } + DumpHelper(n, source, level, m, nil) +} + +// KindEmphasis is a NodeKind of the Emphasis node. +var KindEmphasis = NewNodeKind("Emphasis") + +// Kind implements Node.Kind. +func (n *Emphasis) Kind() NodeKind { + return KindEmphasis } // NewEmphasis returns a new Emphasis node with given level. @@ -256,18 +278,27 @@ type baseLink struct { func (n *baseLink) Inline() { } -func (n *baseLink) Dump(source []byte, level int) { - m := map[string]string{} - m["Destination"] = string(n.Destination) - m["Title"] = string(n.Title) - DumpHelper(n, source, level, "Link", m, nil) -} - // A Link struct represents a link of the Markdown text. type Link struct { baseLink } +// Dump implements Node.Dump. +func (n *Link) Dump(source []byte, level int) { + m := map[string]string{} + m["Destination"] = string(n.Destination) + m["Title"] = string(n.Title) + DumpHelper(n, source, level, m, nil) +} + +// KindLink is a NodeKind of the Link node. +var KindLink = NewNodeKind("Link") + +// Kind implements Node.Kind. +func (n *Link) Kind() NodeKind { + return KindLink +} + // NewLink returns a new Link node. func NewLink() *Link { c := &Link{ @@ -288,7 +319,15 @@ func (n *Image) Dump(source []byte, level int) { m := map[string]string{} m["Destination"] = string(n.Destination) m["Title"] = string(n.Title) - DumpHelper(n, source, level, "Image", m, nil) + DumpHelper(n, source, level, m, nil) +} + +// KindImage is a NodeKind of the Image node. +var KindImage = NewNodeKind("Image") + +// Kind implements Node.Kind. +func (n *Image) Kind() NodeKind { + return KindImage } // NewImage returns a new Image node. @@ -338,7 +377,15 @@ func (n *AutoLink) Dump(source []byte, level int) { m := map[string]string{ "Value": string(segment.Value(source)), } - DumpHelper(n, source, level, "AutoLink", m, nil) + DumpHelper(n, source, level, m, nil) +} + +// KindAutoLink is a NodeKind of the AutoLink node. +var KindAutoLink = NewNodeKind("AutoLink") + +// Kind implements Node.Kind. +func (n *AutoLink) Kind() NodeKind { + return KindAutoLink } // NewAutoLink returns a new AutoLink node. @@ -360,7 +407,15 @@ func (n *RawHTML) Inline() {} // Dump implements Node.Dump. func (n *RawHTML) Dump(source []byte, level int) { - DumpHelper(n, source, level, "RawHTML", nil, nil) + DumpHelper(n, source, level, nil, nil) +} + +// KindRawHTML is a NodeKind of the RawHTML node. +var KindRawHTML = NewNodeKind("RawHTML") + +// Kind implements Node.Kind. +func (n *RawHTML) Kind() NodeKind { + return KindRawHTML } // NewRawHTML returns a new RawHTML node. diff --git a/extension/ast/strikethrough.go b/extension/ast/strikethrough.go index e6f45a5..a9216b7 100644 --- a/extension/ast/strikethrough.go +++ b/extension/ast/strikethrough.go @@ -10,11 +10,17 @@ type Strikethrough struct { gast.BaseInline } -func (n *Strikethrough) Inline() { +// Dump implements Node.Dump. +func (n *Strikethrough) Dump(source []byte, level int) { + gast.DumpHelper(n, source, level, nil, nil) } -func (n *Strikethrough) Dump(source []byte, level int) { - gast.DumpHelper(n, source, level, "Strikethrough", nil, nil) +// KindStrikethrough is a NodeKind of the Strikethrough node. +var KindStrikethrough = gast.NewNodeKind("Strikethrough") + +// Kind implements Node.Kind. +func (n *Strikethrough) Kind() gast.NodeKind { + return KindStrikethrough } // NewStrikethrough returns a new Strikethrough node. diff --git a/extension/ast/table.go b/extension/ast/table.go index 7d205ef..7396b86 100644 --- a/extension/ast/table.go +++ b/extension/ast/table.go @@ -47,7 +47,7 @@ type Table struct { // Dump implements Node.Dump func (n *Table) Dump(source []byte, level int) { - gast.DumpHelper(n, source, level, "Table", nil, func(level int) { + gast.DumpHelper(n, source, level, nil, func(level int) { indent := strings.Repeat(" ", level) fmt.Printf("%sAlignments {\n", indent) for i, alignment := range n.Alignments { @@ -61,6 +61,14 @@ func (n *Table) Dump(source []byte, level int) { }) } +// KindTable is a NodeKind of the Table node. +var KindTable = gast.NewNodeKind("Table") + +// Kind implements Node.Kind. +func (n *Table) Kind() gast.NodeKind { + return KindTable +} + // NewTable returns a new Table node. func NewTable() *Table { return &Table{ @@ -76,7 +84,15 @@ type TableRow struct { // Dump implements Node.Dump. func (n *TableRow) Dump(source []byte, level int) { - gast.DumpHelper(n, source, level, "TableRow", nil, nil) + gast.DumpHelper(n, source, level, nil, nil) +} + +// KindTableRow is a NodeKind of the TableRow node. +var KindTableRow = gast.NewNodeKind("TableRow") + +// Kind implements Node.Kind. +func (n *TableRow) Kind() gast.NodeKind { + return KindTableRow } // NewTableRow returns a new TableRow node. @@ -89,6 +105,14 @@ type TableHeader struct { *TableRow } +// KindTableHeader is a NodeKind of the TableHeader node. +var KindTableHeader = gast.NewNodeKind("TableHeader") + +// Kind implements Node.Kind. +func (n *TableHeader) Kind() gast.NodeKind { + return KindTableHeader +} + // NewTableHeader returns a new TableHeader node. func NewTableHeader(row *TableRow) *TableHeader { return &TableHeader{row} @@ -102,7 +126,15 @@ type TableCell struct { // Dump implements Node.Dump. func (n *TableCell) Dump(source []byte, level int) { - gast.DumpHelper(n, source, level, "TableCell", nil, nil) + gast.DumpHelper(n, source, level, nil, nil) +} + +// KindTableCell is a NodeKind of the TableCell node. +var KindTableCell = gast.NewNodeKind("TableCell") + +// Kind implements Node.Kind. +func (n *TableCell) Kind() gast.NodeKind { + return KindTableCell } // NewTableCell returns a new TableCell node. diff --git a/extension/ast/tasklist.go b/extension/ast/tasklist.go index 38f7be4..90b451b 100644 --- a/extension/ast/tasklist.go +++ b/extension/ast/tasklist.go @@ -16,7 +16,15 @@ func (n *TaskCheckBox) Dump(source []byte, level int) { m := map[string]string{ "Checked": fmt.Sprintf("%v", n.IsChecked), } - gast.DumpHelper(n, source, level, "TaskCheckBox", m, nil) + gast.DumpHelper(n, source, level, m, nil) +} + +// KindTaskCheckBox is a NodeKind of the TaskCheckBox node. +var KindTaskCheckBox = gast.NewNodeKind("TaskCheckBox") + +// Kind implements Node.Kind. +func (n *TaskCheckBox) Kind() gast.NodeKind { + return KindTaskCheckBox } // NewTaskCheckBox returns a new TaskCheckBox node. diff --git a/extension/strikethrough.go b/extension/strikethrough.go index 9e70fe5..7462030 100644 --- a/extension/strikethrough.go +++ b/extension/strikethrough.go @@ -77,22 +77,18 @@ func NewStrikethroughHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { return r } -// Render implements renderer.NodeRenderer.Render. -func (r *StrikethroughHTMLRenderer) Render(writer util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { - switch node := n.(type) { - case *ast.Strikethrough: - return r.renderStrikethrough(writer, source, node, entering), nil - } - return gast.WalkContinue, renderer.NotSupported +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *StrikethroughHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindStrikethrough, r.renderStrikethrough) } -func (r *StrikethroughHTMLRenderer) renderStrikethrough(w util.BufWriter, source []byte, n *ast.Strikethrough, entering bool) gast.WalkStatus { +func (r *StrikethroughHTMLRenderer) renderStrikethrough(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { if entering { w.WriteString("") } else { w.WriteString("") } - return gast.WalkContinue + return gast.WalkContinue, nil } type strikethrough struct { diff --git a/extension/table.go b/extension/table.go index 3cc65c6..ed0e51a 100644 --- a/extension/table.go +++ b/extension/table.go @@ -147,31 +147,24 @@ func NewTableHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { return r } -// Render implements renderer.Renderer.Render. -func (r *TableHTMLRenderer) Render(writer util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { - switch node := n.(type) { - case *ast.Table: - return r.renderTable(writer, source, node, entering), nil - case *ast.TableHeader: - return r.renderTableHeader(writer, source, node, entering), nil - case *ast.TableRow: - return r.renderTableRow(writer, source, node, entering), nil - case *ast.TableCell: - return r.renderTableCell(writer, source, node, entering), nil - } - return gast.WalkContinue, renderer.NotSupported +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *TableHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindTable, r.renderTable) + reg.Register(ast.KindTableHeader, r.renderTableHeader) + reg.Register(ast.KindTableRow, r.renderTableRow) + reg.Register(ast.KindTableCell, r.renderTableCell) } -func (r *TableHTMLRenderer) renderTable(w util.BufWriter, source []byte, n *ast.Table, entering bool) gast.WalkStatus { +func (r *TableHTMLRenderer) renderTable(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { if entering { w.WriteString("\n") } else { w.WriteString("
\n") } - return gast.WalkContinue + return gast.WalkContinue, nil } -func (r *TableHTMLRenderer) renderTableHeader(w util.BufWriter, source []byte, n *ast.TableHeader, entering bool) gast.WalkStatus { +func (r *TableHTMLRenderer) renderTableHeader(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { if entering { w.WriteString("\n") w.WriteString("\n") @@ -185,10 +178,10 @@ func (r *TableHTMLRenderer) renderTableHeader(w util.BufWriter, source []byte, n w.WriteString("\n") } } - return gast.WalkContinue + return gast.WalkContinue, nil } -func (r *TableHTMLRenderer) renderTableRow(w util.BufWriter, source []byte, n *ast.TableRow, entering bool) gast.WalkStatus { +func (r *TableHTMLRenderer) renderTableRow(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { if entering { w.WriteString("\n") } else { @@ -197,10 +190,11 @@ func (r *TableHTMLRenderer) renderTableRow(w util.BufWriter, source []byte, n *a w.WriteString("\n") } } - return gast.WalkContinue + return gast.WalkContinue, nil } -func (r *TableHTMLRenderer) renderTableCell(w util.BufWriter, source []byte, n *ast.TableCell, entering bool) gast.WalkStatus { +func (r *TableHTMLRenderer) renderTableCell(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) { + n := node.(*ast.TableCell) tag := "td" if n.Parent().Parent().FirstChild() == n.Parent() { tag = "th" @@ -214,7 +208,7 @@ func (r *TableHTMLRenderer) renderTableCell(w util.BufWriter, source []byte, n * } else { fmt.Fprintf(w, "\n", tag) } - return gast.WalkContinue + return gast.WalkContinue, nil } type table struct { diff --git a/extension/tasklist.go b/extension/tasklist.go index 1de59f9..85ade26 100644 --- a/extension/tasklist.go +++ b/extension/tasklist.go @@ -75,19 +75,16 @@ func NewTaskCheckBoxHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { return r } -// Render implements renderer.NodeRenderer.Render. -func (r *TaskCheckBoxHTMLRenderer) Render(writer util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { - switch node := n.(type) { - case *ast.TaskCheckBox: - return r.renderTaskCheckBox(writer, source, node, entering), nil - } - return gast.WalkContinue, renderer.NotSupported +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *TaskCheckBoxHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindTaskCheckBox, r.renderTaskCheckBox) } -func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, n *ast.TaskCheckBox, entering bool) gast.WalkStatus { +func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) { if !entering { - return gast.WalkContinue + return gast.WalkContinue, nil } + n := node.(*ast.TaskCheckBox) if n.IsChecked { w.WriteString(`") } - return gast.WalkContinue + return gast.WalkContinue, nil } type taskList struct { diff --git a/parser/atx_heading.go b/parser/atx_heading.go index 46adbee..371dcfb 100644 --- a/parser/atx_heading.go +++ b/parser/atx_heading.go @@ -120,6 +120,7 @@ func (b *atxHeadingParser) CanAcceptIndentedLine() bool { var headingIDRegexp = regexp.MustCompile(`^(.*[^\\])({#([^}]+)}\s*)\n?$`) var headingIDMap = NewContextKey() +var attrNameID = []byte("id") func parseOrGenerateHeadingID(node *ast.Heading, pc Context) { existsv := pc.Get(headingIDMap) @@ -142,5 +143,5 @@ func parseOrGenerateHeadingID(node *ast.Heading, pc Context) { } else { headingID = util.GenerateLinkID(line, exists) } - node.ID = headingID + node.SetAttribute(attrNameID, headingID) } diff --git a/parser/auto_link.go b/parser/auto_link.go index f7656bc..6972680 100644 --- a/parser/auto_link.go +++ b/parser/auto_link.go @@ -40,7 +40,3 @@ func (s *autoLinkParser) Parse(parent ast.Node, block text.Reader, pc Context) a block.Advance(match[1]) return ast.NewAutoLink(typ, value) } - -func (s *autoLinkParser) CloseBlock(parent ast.Node, pc Context) { - // nothing to do -} diff --git a/parser/code_span.go b/parser/code_span.go index e5452a4..157abaf 100644 --- a/parser/code_span.go +++ b/parser/code_span.go @@ -81,7 +81,3 @@ end: } return node } - -func (s *codeSpanParser) CloseBlock(parent ast.Node, pc Context) { - // nothing to do -} diff --git a/parser/delimiter.go b/parser/delimiter.go index f86c677..888e21a 100644 --- a/parser/delimiter.go +++ b/parser/delimiter.go @@ -65,6 +65,13 @@ func (d *Delimiter) Dump(source []byte, level int) { fmt.Printf("%sDelimiter: \"%s\"\n", strings.Repeat(" ", level), string(d.Text(source))) } +var kindDelimiter = ast.NewNodeKind("Delimiter") + +// Kind implements Node.Kind +func (d *Delimiter) Kind() ast.NodeKind { + return kindDelimiter +} + // Text implements Node.Text func (d *Delimiter) Text(source []byte) []byte { return d.Segment.Value(source) diff --git a/parser/emphasis.go b/parser/emphasis.go index 73b32f2..4886471 100644 --- a/parser/emphasis.go +++ b/parser/emphasis.go @@ -48,7 +48,3 @@ func (s *emphasisParser) Parse(parent ast.Node, block text.Reader, pc Context) a pc.PushDelimiter(node) return node } - -func (s *emphasisParser) CloseBlock(parent ast.Node, pc Context) { - // nothing to do -} diff --git a/parser/link.go b/parser/link.go index e8cec9b..ba2af86 100644 --- a/parser/link.go +++ b/parser/link.go @@ -43,6 +43,12 @@ func (s *linkLabelState) Dump(source []byte, level int) { fmt.Printf("%slinkLabelState: \"%s\"\n", strings.Repeat(" ", level), s.Text(source)) } +var kindLinkLabelState = ast.NewNodeKind("LinkLabelState") + +func (s *linkLabelState) Kind() ast.NodeKind { + return kindLinkLabelState +} + func pushLinkLabelState(pc Context, v *linkLabelState) { tlist := pc.Get(linkLabelStateKey) var list *linkLabelState diff --git a/parser/parser.go b/parser/parser.go index 5d33ab3..a815fa5 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" "sync" - "sync/atomic" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/text" @@ -55,19 +54,15 @@ func (r *reference) String() string { } // ContextKey is a key that is used to set arbitary values to the context. -type ContextKey int32 - -// New returns a new ContextKey value. -func (c *ContextKey) New() ContextKey { - return ContextKey(atomic.AddInt32((*int32)(c), 1)) -} +type ContextKey int // ContextKeyMax is a maximum value of the ContextKey. var ContextKeyMax ContextKey // NewContextKey return a new ContextKey value. func NewContextKey() ContextKey { - return ContextKeyMax.New() + ContextKeyMax++ + return ContextKeyMax } // A Context interface holds a information that are necessary to parse @@ -127,33 +122,28 @@ type Context interface { // LastOpenedBlock returns a last node that is currently in parsing. LastOpenedBlock() Block - - // SetLastOpenedBlock sets a last node that is currently in parsing. - SetLastOpenedBlock(Block) } type parseContext struct { - store []interface{} - source []byte - refs map[string]Reference - blockOffset int - delimiters *Delimiter - lastDelimiter *Delimiter - openedBlocks []Block - lastOpenedBlock Block + store []interface{} + source []byte + refs map[string]Reference + blockOffset int + delimiters *Delimiter + lastDelimiter *Delimiter + openedBlocks []Block } // NewContext returns a new Context. func NewContext(source []byte) Context { return &parseContext{ - store: make([]interface{}, ContextKeyMax+1), - source: source, - refs: map[string]Reference{}, - blockOffset: 0, - delimiters: nil, - lastDelimiter: nil, - openedBlocks: []Block{}, - lastOpenedBlock: Block{}, + store: make([]interface{}, ContextKeyMax+1), + source: source, + refs: map[string]Reference{}, + blockOffset: 0, + delimiters: nil, + lastDelimiter: nil, + openedBlocks: []Block{}, } } @@ -276,11 +266,10 @@ func (p *parseContext) SetOpenedBlocks(v []Block) { } func (p *parseContext) LastOpenedBlock() Block { - return p.lastOpenedBlock -} - -func (p *parseContext) SetLastOpenedBlock(v Block) { - p.lastOpenedBlock = v + if l := len(p.openedBlocks); l != 0 { + return p.openedBlocks[l-1] + } + return Block{} } // State represents parser's state. @@ -401,7 +390,11 @@ type InlineParser interface { // If Parse has been able to parse the current line, it must advance a reader // position by consumed byte length. Parse(parent ast.Node, block text.Reader, pc Context) ast.Node +} +// A CloseBlocker interface is a callback function that will be +// called when block is closed in the inline parsing. +type CloseBlocker interface { // CloseBlock will be called when a block is closed. CloseBlock(parent ast.Node, pc Context) } @@ -487,7 +480,7 @@ type parser struct { options map[OptionName]interface{} blockParsers []BlockParser inlineParsers [256][]InlineParser - inlineParsersList []InlineParser + closeBlockers []CloseBlocker paragraphTransformers []ParagraphTransformer astTransformers []ASTTransformer config *Config @@ -610,7 +603,9 @@ func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName so.SetOption(oname, ovalue) } } - p.inlineParsersList = append(p.inlineParsersList, ip) + if cb, ok := ip.(CloseBlocker); ok { + p.closeBlockers = append(p.closeBlockers, cb) + } for _, tc := range tcs { if p.inlineParsers[tc] == nil { p.inlineParsers[tc] = []InlineParser{} @@ -715,15 +710,12 @@ func (p *parser) transformParagraph(node *ast.Paragraph, pc Context) { func (p *parser) closeBlocks(from, to int, pc Context) { blocks := pc.OpenedBlocks() - last := pc.LastOpenedBlock() for i := from; i >= to; i-- { node := blocks[i].Node - if node.Parent() != nil { - blocks[i].Parser.Close(blocks[i].Node, pc) - paragraph, ok := node.(*ast.Paragraph) - if ok && node.Parent() != nil { - p.transformParagraph(paragraph, pc) - } + blocks[i].Parser.Close(blocks[i].Node, pc) + paragraph, ok := node.(*ast.Paragraph) + if ok && node.Parent() != nil { + p.transformParagraph(paragraph, pc) } } if from == len(blocks)-1 { @@ -731,14 +723,7 @@ func (p *parser) closeBlocks(from, to int, pc Context) { } else { blocks = append(blocks[0:to], blocks[from+1:]...) } - l := len(blocks) - if l == 0 { - last.Node = nil - } else { - last = blocks[l-1] - } pc.SetOpenedBlocks(blocks) - pc.SetLastOpenedBlock(last) } type blockOpenResult int @@ -758,13 +743,13 @@ func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, } retry: shouldPeek := true - var currentLineNum int + //var currentLineNum int var w int var pos int var line []byte for _, bp := range p.blockParsers { if shouldPeek { - currentLineNum, _ = reader.Position() + //currentLineNum, _ = reader.Position() line, _ = reader.PeekLine() w, pos = util.IndentWidth(line, 0) pc.SetBlockOffset(pos) @@ -781,9 +766,9 @@ retry: } last := pc.LastOpenedBlock().Node node, state := bp.Open(parent, reader, pc) - if l, _ := reader.Position(); l != currentLineNum { - panic("BlockParser.Open must not advance position beyond the current line") - } + // if l, _ := reader.Position(); l != currentLineNum { + // panic("BlockParser.Open must not advance position beyond the current line") + // } if node != nil { shouldPeek = true node.SetBlankPreviousLines(blankLine) @@ -795,7 +780,6 @@ retry: result = newBlocksOpened be := Block{node, bp} pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be)) - pc.SetLastOpenedBlock(be) if state == HasChildren { parent = node goto retry // try child block @@ -834,7 +818,6 @@ func isBlankLine(lineNum, level int, stats []lineStat) ([]lineStat, bool) { } func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) { - pc.SetLastOpenedBlock(Block{}) pc.SetOpenedBlocks([]Block{}) blankLines := make([]lineStat, 0, 64) isBlank := false @@ -848,14 +831,20 @@ func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) { return } lineNum, _ := reader.Position() - for i := 0; i < len(pc.OpenedBlocks()); i++ { + l := len(pc.OpenedBlocks()) + for i := 0; i < l; i++ { blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0}) } reader.AdvanceLine() - for len(pc.OpenedBlocks()) != 0 { // process opened blocks line by line - lastIndex := len(pc.OpenedBlocks()) - 1 - for i := 0; i < len(pc.OpenedBlocks()); i++ { - be := pc.OpenedBlocks()[i] + for { // process opened blocks line by line + openedBlocks := pc.OpenedBlocks() + l := len(openedBlocks) + if l == 0 { + break + } + lastIndex := l - 1 + for i := 0; i < l; i++ { + be := openedBlocks[i] line, _ := reader.PeekLine() if line == nil { p.closeBlocks(lastIndex, 0, pc) @@ -883,7 +872,7 @@ func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) { blankLines, isBlank = isBlankLine(lineNum-1, i, blankLines) thisParent := parent if i != 0 { - thisParent = pc.OpenedBlocks()[i-1].Node + thisParent = openedBlocks[i-1].Node } result := p.openBlocks(thisParent, isBlank, reader, pc) if result != paragraphContinuation { @@ -998,7 +987,7 @@ func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) } ProcessDelimiters(nil, pc) - for _, ip := range p.inlineParsersList { + for _, ip := range p.closeBlockers { ip.CloseBlock(parent, pc) } diff --git a/renderer/html/html.go b/renderer/html/html.go index 903f591..e0eaaa4 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -158,53 +158,33 @@ func NewRenderer(opts ...Option) renderer.NodeRenderer { return r } -// Render implements renderer.NodeRenderer.Render. -func (r *Renderer) Render(writer util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { - switch node := n.(type) { - +// RegisterFuncs implements NodeRenderer.RegisterFuncs . +func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { // blocks - case *ast.Document: - return r.renderDocument(writer, source, node, entering), nil - case *ast.Heading: - return r.renderHeading(writer, source, node, entering), nil - case *ast.Blockquote: - return r.renderBlockquote(writer, source, node, entering), nil - case *ast.CodeBlock: - return r.renderCodeBlock(writer, source, node, entering), nil - case *ast.FencedCodeBlock: - return r.renderFencedCodeBlock(writer, source, node, entering), nil - case *ast.HTMLBlock: - return r.renderHTMLBlock(writer, source, node, entering), nil - case *ast.List: - return r.renderList(writer, source, node, entering), nil - case *ast.ListItem: - return r.renderListItem(writer, source, node, entering), nil - case *ast.Paragraph: - return r.renderParagraph(writer, source, node, entering), nil - case *ast.TextBlock: - return r.renderTextBlock(writer, source, node, entering), nil - case *ast.ThemanticBreak: - return r.renderThemanticBreak(writer, source, node, entering), nil + reg.Register(ast.KindDocument, r.renderDocument) + reg.Register(ast.KindHeading, r.renderHeading) + reg.Register(ast.KindBlockquote, r.renderBlockquote) + reg.Register(ast.KindCodeBlock, r.renderCodeBlock) + reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock) + reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock) + reg.Register(ast.KindList, r.renderList) + reg.Register(ast.KindListItem, r.renderListItem) + reg.Register(ast.KindParagraph, r.renderParagraph) + reg.Register(ast.KindTextBlock, r.renderTextBlock) + reg.Register(ast.KindThemanticBreak, r.renderThemanticBreak) + // inlines - case *ast.AutoLink: - return r.renderAutoLink(writer, source, node, entering), nil - case *ast.CodeSpan: - return r.renderCodeSpan(writer, source, node, entering), nil - case *ast.Emphasis: - return r.renderEmphasis(writer, source, node, entering), nil - case *ast.Image: - return r.renderImage(writer, source, node, entering), nil - case *ast.Link: - return r.renderLink(writer, source, node, entering), nil - case *ast.RawHTML: - return r.renderRawHTML(writer, source, node, entering), nil - case *ast.Text: - return r.renderText(writer, source, node, entering), nil - } - return ast.WalkContinue, renderer.NotSupported + reg.Register(ast.KindAutoLink, r.renderAutoLink) + reg.Register(ast.KindCodeSpan, r.renderCodeSpan) + reg.Register(ast.KindEmphasis, r.renderEmphasis) + reg.Register(ast.KindImage, r.renderImage) + reg.Register(ast.KindLink, r.renderLink) + reg.Register(ast.KindRawHTML, r.renderRawHTML) + reg.Register(ast.KindText, r.renderText) } + func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) { l := n.Lines().Len() for i := 0; i < l; i++ { @@ -213,19 +193,25 @@ func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) { } } -func (r *Renderer) renderDocument(w util.BufWriter, source []byte, n *ast.Document, entering bool) ast.WalkStatus { +func (r *Renderer) renderDocument(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { // nothing to do - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderHeading(w util.BufWriter, source []byte, n *ast.Heading, entering bool) ast.WalkStatus { +var attrNameID = []byte("id") + +func (r *Renderer) renderHeading(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.Heading) if entering { w.WriteString("') } else { @@ -233,29 +219,30 @@ func (r *Renderer) renderHeading(w util.BufWriter, source []byte, n *ast.Heading w.WriteByte("0123456"[n.Level]) w.WriteString(">\n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderBlockquote(w util.BufWriter, source []byte, n *ast.Blockquote, entering bool) ast.WalkStatus { +func (r *Renderer) renderBlockquote(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if entering { w.WriteString("
\n") } else { w.WriteString("
\n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n *ast.CodeBlock, entering bool) ast.WalkStatus { +func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if entering { w.WriteString("
")
 		r.writeLines(w, source, n)
 	} else {
 		w.WriteString("
\n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, n *ast.FencedCodeBlock, entering bool) ast.WalkStatus { +func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.FencedCodeBlock) if entering { w.WriteString("
\n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, n *ast.HTMLBlock, entering bool) ast.WalkStatus { +func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.HTMLBlock) if entering { if r.Unsafe { l := n.Lines().Len() @@ -301,10 +289,11 @@ func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, n *ast.HTMLB } } } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderList(w util.BufWriter, source []byte, n *ast.List, entering bool) ast.WalkStatus { +func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.List) tag := "ul" if n.IsOrdered() { tag = "ol" @@ -322,10 +311,10 @@ func (r *Renderer) renderList(w util.BufWriter, source []byte, n *ast.List, ente w.WriteString(tag) w.WriteString(">\n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n *ast.ListItem, entering bool) ast.WalkStatus { +func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if entering { w.WriteString("
  • ") fc := n.FirstChild() @@ -337,43 +326,43 @@ func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n *ast.ListIt } else { w.WriteString("
  • \n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n *ast.Paragraph, entering bool) ast.WalkStatus { +func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if entering { w.WriteString("

    ") } else { w.WriteString("

    \n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n *ast.TextBlock, entering bool) ast.WalkStatus { +func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if !entering { if _, ok := n.NextSibling().(ast.Node); ok && n.FirstChild() != nil { w.WriteByte('\n') } - return ast.WalkContinue } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderThemanticBreak(w util.BufWriter, source []byte, n *ast.ThemanticBreak, entering bool) ast.WalkStatus { +func (r *Renderer) renderThemanticBreak(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if !entering { - return ast.WalkContinue + return ast.WalkContinue, nil } if r.XHTML { w.WriteString("
    \n") } else { w.WriteString("
    \n") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderAutoLink(w util.BufWriter, source []byte, n *ast.AutoLink, entering bool) ast.WalkStatus { +func (r *Renderer) renderAutoLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.AutoLink) if !entering { - return ast.WalkContinue + return ast.WalkContinue, nil } w.WriteString(``) w.Write(util.EscapeHTML(value)) w.WriteString(``) - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n *ast.CodeSpan, entering bool) ast.WalkStatus { +func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if entering { w.WriteString("") for c := n.FirstChild(); c != nil; c = c.NextSibling() { @@ -403,13 +392,14 @@ func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n *ast.CodeSp r.Writer.RawWrite(w, value) } } - return ast.WalkSkipChildren + return ast.WalkSkipChildren, nil } w.WriteString("") - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, n *ast.Emphasis, entering bool) ast.WalkStatus { +func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.Emphasis) tag := "em" if n.Level == 2 { tag = "strong" @@ -423,10 +413,11 @@ func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, n *ast.Emphas w.WriteString(tag) w.WriteByte('>') } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderLink(w util.BufWriter, source []byte, n *ast.Link, entering bool) ast.WalkStatus { +func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.Link) if entering { w.WriteString("") } - return ast.WalkContinue + return ast.WalkContinue, nil } -func (r *Renderer) renderImage(w util.BufWriter, source []byte, n *ast.Image, entering bool) ast.WalkStatus { +func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { if !entering { - return ast.WalkContinue + return ast.WalkContinue, nil } + n := node.(*ast.Image) w.WriteString("") } - return ast.WalkSkipChildren + return ast.WalkSkipChildren, nil } -func (r *Renderer) renderRawHTML(w util.BufWriter, source []byte, n *ast.RawHTML, entering bool) ast.WalkStatus { +func (r *Renderer) renderRawHTML(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { if r.Unsafe { - return ast.WalkContinue + return ast.WalkContinue, nil } w.WriteString("") - return ast.WalkSkipChildren + return ast.WalkSkipChildren, nil } -func (r *Renderer) renderText(w util.BufWriter, source []byte, n *ast.Text, entering bool) ast.WalkStatus { +func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { if !entering { - return ast.WalkContinue + return ast.WalkContinue, nil } + n := node.(*ast.Text) segment := n.Segment if n.IsRaw() { w.Write(segment.Value(source)) @@ -495,21 +488,7 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, n *ast.Text, ente w.WriteByte('\n') } } - return ast.WalkContinue -} - -func readWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) { - j := index[0] - ok := false - for ; j < index[1]; j++ { - c1 := source[j] - if pred(c1) { - ok = true - continue - } - break - } - return j, ok + return ast.WalkContinue, nil } // A Writer interface wirtes textual contents to a writer. @@ -526,11 +505,9 @@ type Writer interface { type defaultWriter struct { } -var htmlEscaleTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} - func escapeRune(writer util.BufWriter, r rune) { if r < 256 { - v := htmlEscaleTable[byte(r)] + v := util.EscapeHTMLByte(byte(r)) if v != nil { writer.Write(v) return @@ -543,7 +520,7 @@ func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) { n := 0 l := len(source) for i := 0; i < l; i++ { - v := htmlEscaleTable[source[i]] + v := util.EscapeHTMLByte(source[i]) if v != nil { writer.Write(source[i-n : i]) n = 0 @@ -581,7 +558,7 @@ func (d *defaultWriter) Write(writer util.BufWriter, source []byte) { // code point like #x22; if nnext < limit && nc == 'x' || nc == 'X' { start := nnext + 1 - i, ok = readWhile(source, [2]int{start, limit}, util.IsHexDecimal) + i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal) if ok && i < limit && source[i] == ';' { v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32) d.RawWrite(writer, source[n:pos]) @@ -592,7 +569,7 @@ func (d *defaultWriter) Write(writer util.BufWriter, source []byte) { // code point like #1234; } else if nc >= '0' && nc <= '9' { start := nnext - i, ok = readWhile(source, [2]int{start, limit}, util.IsNumeric) + i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric) if ok && i < limit && i-start < 8 && source[i] == ';' { v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 0, 32) d.RawWrite(writer, source[n:pos]) @@ -603,7 +580,7 @@ func (d *defaultWriter) Write(writer util.BufWriter, source []byte) { } } else { start := next - i, ok = readWhile(source, [2]int{start, limit}, util.IsAlphaNumeric) + i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric) // entity reference if ok && i < limit && source[i] == ';' { name := util.BytesToReadOnlyString(source[start:i]) diff --git a/renderer/renderer.go b/renderer/renderer.go index 5dff406..2690e06 100644 --- a/renderer/renderer.go +++ b/renderer/renderer.go @@ -25,16 +25,6 @@ func NewConfig() *Config { } } -type notSupported struct { -} - -func (e *notSupported) Error() string { - return "not supported by this parser" -} - -// NotSupported indicates given node can not be rendered by this NodeRenderer. -var NotSupported = ¬Supported{} - // An OptionName is a name of the option. type OptionName string @@ -80,10 +70,19 @@ type SetOptioner interface { SetOption(name OptionName, value interface{}) } -// A NodeRenderer interface renders given AST node to given writer. +// NodeRendererFunc is a function that renders a given node. +type NodeRendererFunc func(writer util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) + +// A NodeRenderer interface offers NodeRendererFuncs. type NodeRenderer interface { - // Render renders given AST node to given writer. - Render(writer util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) + // RendererFuncs registers NodeRendererFuncs to given NodeRendererFuncRegisterer. + RegisterFuncs(NodeRendererFuncRegisterer) +} + +// A NodeRendererFuncRegisterer registers +type NodeRendererFuncRegisterer interface { + // Register registers given NodeRendererFunc to this object. + Register(ast.NodeKind, NodeRendererFunc) } // A Renderer interface renders given AST node to given @@ -96,10 +95,12 @@ type Renderer interface { } type renderer struct { - config *Config - options map[OptionName]interface{} - nodeRenderers []NodeRenderer - initSync sync.Once + config *Config + options map[OptionName]interface{} + nodeRendererFuncsTmp map[ast.NodeKind]NodeRendererFunc + maxKind int + nodeRendererFuncs []NodeRendererFunc + initSync sync.Once } // NewRenderer returns a new Renderer with given options. @@ -110,8 +111,9 @@ func NewRenderer(options ...Option) Renderer { } r := &renderer{ - options: map[OptionName]interface{}{}, - config: config, + options: map[OptionName]interface{}{}, + config: config, + nodeRendererFuncsTmp: map[ast.NodeKind]NodeRendererFunc{}, } return r @@ -121,36 +123,46 @@ func (r *renderer) AddOption(o Option) { o.SetConfig(r.config) } +func (r *renderer) Register(kind ast.NodeKind, v NodeRendererFunc) { + r.nodeRendererFuncsTmp[kind] = v + if int(kind) > r.maxKind { + r.maxKind = int(kind) + } +} + // Render renders given AST node to given writer with given Renderer. func (r *renderer) Render(w io.Writer, source []byte, n ast.Node) error { r.initSync.Do(func() { r.options = r.config.Options r.config.NodeRenderers.Sort() - r.nodeRenderers = make([]NodeRenderer, 0, len(r.config.NodeRenderers)) - for _, v := range r.config.NodeRenderers { + l := len(r.config.NodeRenderers) + for i := l - 1; i >= 0; i-- { + v := r.config.NodeRenderers[i] nr, _ := v.Value.(NodeRenderer) if se, ok := v.Value.(SetOptioner); ok { for oname, ovalue := range r.options { se.SetOption(oname, ovalue) } } - r.nodeRenderers = append(r.nodeRenderers, nr) + nr.RegisterFuncs(r) + } + r.nodeRendererFuncs = make([]NodeRendererFunc, r.maxKind+1) + for kind, nr := range r.nodeRendererFuncsTmp { + r.nodeRendererFuncs[kind] = nr } r.config = nil + r.nodeRendererFuncsTmp = nil }) writer, ok := w.(util.BufWriter) if !ok { writer = bufio.NewWriter(w) } err := ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) { - var s ast.WalkStatus + s := ast.WalkStatus(ast.WalkContinue) var err error - for _, nr := range r.nodeRenderers { - s, err = nr.Render(writer, source, n, entering) - if err == NotSupported { - continue - } - break + f := r.nodeRendererFuncs[n.Kind()] + if f != nil { + s, err = f(writer, source, n, entering) } return s, err }) diff --git a/util/util.go b/util/util.go index 9247d1f..c5998c2 100644 --- a/util/util.go +++ b/util/util.go @@ -6,13 +6,70 @@ import ( "fmt" "io" "net/url" - "regexp" "sort" "strconv" "strings" "unicode/utf8" ) +// A CopyOnWriteBuffer is a byte buffer that copies buffer when +// it need to be changed. +type CopyOnWriteBuffer struct { + buffer []byte + copied bool +} + +// NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer. +func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer { + return CopyOnWriteBuffer{ + buffer: buffer, + copied: false, + } +} + +// Write writes given bytes to the buffer. +func (b *CopyOnWriteBuffer) Write(value []byte) { + if !b.copied { + b.buffer = make([]byte, 0, len(b.buffer)+20) + b.copied = true + } + b.buffer = append(b.buffer, value...) +} + +// WriteByte writes given byte to the buffer. +func (b *CopyOnWriteBuffer) WriteByte(c byte) { + if !b.copied { + b.buffer = make([]byte, 0, len(b.buffer)+20) + b.copied = true + } + b.buffer = append(b.buffer, c) +} + +// Bytes returns bytes of this buffer. +func (b *CopyOnWriteBuffer) Bytes() []byte { + return b.buffer +} + +// IsCopied returns true if buffer has been copied, otherwise false. +func (b *CopyOnWriteBuffer) IsCopied() bool { + return b.copied +} + +// ReadWhile read given source while pred is true. +func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) { + j := index[0] + ok := false + for ; j < index[1]; j++ { + c1 := source[j] + if pred(c1) { + ok = true + continue + } + break + } + return j, ok +} + // IsBlank returns true if given string is all space characters. func IsBlank(bs []byte) bool { for _, b := range bs { @@ -26,6 +83,9 @@ func IsBlank(bs []byte) bool { // DedentPosition dedents lines by given width. func DedentPosition(bs []byte, width int) (pos, padding int) { + if width == 0 { + return + } i := 0 l := len(bs) w := 0 @@ -307,30 +367,22 @@ func ToLinkReference(v []byte) string { return strings.ToLower(string(ReplaceSpaces(v, ' '))) } -var escapeRegex = regexp.MustCompile(`\\.`) -var hexRefRegex = regexp.MustCompile(`#[xX][\da-fA-F]+;`) -var numRefRegex = regexp.MustCompile(`#\d{1,7};`) -var entityRefRegex = regexp.MustCompile(`&([a-zA-Z\d]+);`) +var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} -var entityLt = []byte("<") -var entityGt = []byte(">") -var entityAmp = []byte("&") -var entityQuot = []byte(""") +// EscapeHTMLByte returns HTML escaped bytes if given byte should be escaped, +// otherwise nil. +func EscapeHTMLByte(b byte) []byte { + return htmlEscapeTable[b] +} // EscapeHTML escapes characters that should be escaped in HTML text. func EscapeHTML(v []byte) []byte { result := make([]byte, 0, len(v)+10) for _, c := range v { - switch c { - case '<': - result = append(result, entityLt...) - case '>': - result = append(result, entityGt...) - case '&': - result = append(result, entityAmp...) - case '"': - result = append(result, entityQuot...) - default: + escaped := htmlEscapeTable[c] + if escaped != nil { + result = append(result, escaped...) + } else { result = append(result, c) } } @@ -338,41 +390,111 @@ func EscapeHTML(v []byte) []byte { } // UnescapePunctuations unescapes blackslash escaped punctuations. -func UnescapePunctuations(v []byte) []byte { - return escapeRegex.ReplaceAllFunc(v, func(match []byte) []byte { - if IsPunct(match[1]) { - return []byte{match[1]} +func UnescapePunctuations(source []byte) []byte { + cob := NewCopyOnWriteBuffer(source) + limit := len(source) + n := 0 + for i := 0; i < limit; { + c := source[i] + if i < limit-1 && c == '\\' && IsPunct(source[i+1]) { + cob.Write(source[n:i]) + cob.WriteByte(source[i+1]) + i += 2 + n = i + continue } - return match - }) + i++ + } + if cob.IsCopied() { + cob.Write(source[n:len(source)]) + } + return cob.Bytes() } // ResolveNumericReferences resolve numeric references like 'Ӓ" . -func ResolveNumericReferences(v []byte) []byte { +func ResolveNumericReferences(source []byte) []byte { + cob := NewCopyOnWriteBuffer(source) buf := make([]byte, 6, 6) - v = hexRefRegex.ReplaceAllFunc(v, func(match []byte) []byte { - v, _ := strconv.ParseUint(string(match[2:len(match)-1]), 16, 32) - n := utf8.EncodeRune(buf, ToValidRune(rune(v))) - return buf[:n] - }) - return numRefRegex.ReplaceAllFunc(v, func(match []byte) []byte { - v, _ := strconv.ParseUint(string(match[1:len(match)-1]), 0, 32) - n := utf8.EncodeRune(buf, ToValidRune(rune(v))) - return buf[:n] - }) + limit := len(source) + ok := false + n := 0 + for i := 0; i < limit; i++ { + if source[i] == '&' { + pos := i + next := i + 1 + if next < limit && source[next] == '#' { + nnext := next + 1 + nc := source[nnext] + // code point like #x22; + if nnext < limit && nc == 'x' || nc == 'X' { + start := nnext + 1 + i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal) + if ok && i < limit && source[i] == ';' { + v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32) + cob.Write(source[n:pos]) + n = i + 1 + runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) + cob.Write(buf[:runeSize]) + continue + } + // code point like #1234; + } else if nc >= '0' && nc <= '9' { + start := nnext + i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric) + if ok && i < limit && i-start < 8 && source[i] == ';' { + v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32) + cob.Write(source[n:pos]) + n = i + 1 + runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v))) + cob.Write(buf[:runeSize]) + continue + } + } + } + i = next - 1 + } + } + if cob.IsCopied() { + cob.Write(source[n:len(source)]) + } + return cob.Bytes() } // ResolveEntityNames resolve entity references like 'ö" . -func ResolveEntityNames(v []byte) []byte { - return entityRefRegex.ReplaceAllFunc(v, func(match []byte) []byte { - entity, ok := LookUpHTML5EntityByName(string(match[1 : len(match)-1])) - if ok { - return entity.Characters +func ResolveEntityNames(source []byte) []byte { + cob := NewCopyOnWriteBuffer(source) + limit := len(source) + ok := false + n := 0 + for i := 0; i < limit; i++ { + if source[i] == '&' { + pos := i + next := i + 1 + if !(next < limit && source[next] == '#') { + start := next + i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric) + if ok && i < limit && source[i] == ';' { + name := BytesToReadOnlyString(source[start:i]) + entity, ok := LookUpHTML5EntityByName(name) + if ok { + cob.Write(source[n:pos]) + n = i + 1 + cob.Write(entity.Characters) + continue + } + } + } + i = next - 1 } - return match - }) + } + if cob.IsCopied() { + cob.Write(source[n:len(source)]) + } + return cob.Bytes() } +var htmlSpace = []byte("%20") + // URLEscape escape given URL. // If resolveReference is set true: // 1. unescape punctuations @@ -386,32 +508,174 @@ func URLEscape(v []byte, resolveReference bool) []byte { v = ResolveNumericReferences(v) v = ResolveEntityNames(v) } - result := make([]byte, 0, len(v)+10) - for i := 0; i < len(v); { + ret := v + changed := false + limit := len(v) + n := 0 + add := func(b []byte) { + if !changed { + ret = make([]byte, 0, len(v)+20) + changed = true + } + ret = append(ret, b...) + } + + for i := 0; i < limit; { c := v[i] if urlEscapeTable[c] == 1 { - result = append(result, c) i++ continue } - if c == '%' && i+2 < len(v) && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) { - result = append(result, c, v[i+1], v[i+2]) + if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) { i += 3 continue } u8len := utf8lenTable[c] if u8len == 99 { // invalid utf8 leading byte, skip it - result = append(result, c) i++ continue } if c == ' ' { - result = append(result, '%', '2', '0') + add(v[n:i]) + add(htmlSpace) i++ + n = i continue } - result = append(result, []byte(url.QueryEscape(string(v[i:i+int(u8len)])))...) + add(v[n:i]) + add([]byte(url.QueryEscape(string(v[i : i+int(u8len)])))) i += int(u8len) + n = i + } + if changed { + add(v[n:len(v)]) + } + return ret +} + +// FindAttributeIndex searchs +// - #id +// - .class +// - attr=value +// in given bytes. +// FindHTMLAttributeIndex returns an int array that elements are +// [name_start, name_stop, value_start, value_stop]. +// value_start and value_stop does not include " or '. +// If no attributes found, it returns [4]int{-1, -1, -1, -1}. +func FindAttributeIndex(b []byte, canEscapeQuotes bool) [4]int { + result := [4]int{-1, -1, -1, -1} + i := 0 + l := len(b) + for ; i < l && IsSpace(b[i]); i++ { + } + if i >= l { + return result + } + c := b[i] + if c == '#' || c == '.' { + result[0] = i + i++ + result[1] = i + result[2] = i + for ; i < l && !IsSpace(b[i]); i++ { + } + result[3] = i + return result + } + return FindHTMLAttributeIndex(b, canEscapeQuotes) +} + +// FindHTMLAttributeIndex searches HTML attributes in given bytes. +// FindHTMLAttributeIndex returns an int array that elements are +// [name_start, name_stop, value_start, value_stop]. +// value_start and value_stop does not include " or '. +// If no attributes found, it returns [4]int{-1, -1, -1, -1}. +func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) [4]int { + result := [4]int{-1, -1, -1, -1} + i := 0 + l := len(b) + for ; i < l && IsSpace(b[i]); i++ { + } + if i >= l { + return result + } + c := b[i] + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + c == '_' || c == ':') { + return result + } + result[0] = i + for ; i < l; i++ { + c := b[i] + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '_' || c == ':' || c == '.' || c == '-') { + break + } + } + result[1] = i + for ; i < l && IsSpace(b[i]); i++ { + } + if i >= l { + return result // empty attribute + } + if b[i] != '=' { + return result // empty attribute + } + i++ + for ; i < l && IsSpace(b[i]); i++ { + } + if i >= l { + return [4]int{-1, -1, -1, -1} + } + if b[i] == '"' { + i++ + result[2] = i + if canEscapeQuotes { + pos := FindClosure(b[i:], '"', '"', false, false) + if pos < 0 { + return [4]int{-1, -1, -1, -1} + } + result[3] = pos + i + } else { + for ; i < l && b[i] != '"'; i++ { + } + result[3] = i + if result[2] == result[3] || i == l && b[l-1] != '"' { + return [4]int{-1, -1, -1, -1} + } + } + } else if b[i] == '\'' { + i++ + result[2] = i + if canEscapeQuotes { + pos := FindClosure(b[i:], '\'', '\'', false, false) + if pos < 0 { + return [4]int{-1, -1, -1, -1} + } + result[3] = pos + i + } else { + for ; i < l && b[i] != '\''; i++ { + } + result[3] = i + if result[2] == result[3] || i == l && b[l-1] != '\'' { + return [4]int{-1, -1, -1, -1} + } + } + } else { + result[2] = i + for ; i < l; i++ { + c = b[i] + if c == '\\' || c == '"' || c == '\'' || + c == '=' || c == '<' || c == '>' || c == '`' || + (c >= 0 && c <= 0x20) { + break + } + } + result[3] = i + if result[2] == result[3] { + return [4]int{-1, -1, -1, -1} + } } return result }