This commit is contained in:
yuin 2024-10-15 19:19:41 +09:00
parent bc993b4f59
commit ad1565131a
10 changed files with 387 additions and 82 deletions

View file

@ -379,6 +379,11 @@ func (n *BaseNode) Text(source []byte) []byte {
var buf bytes.Buffer
for c := n.firstChild; c != nil; c = c.NextSibling() {
buf.Write(c.Text(source))
if sb, ok := c.(interface {
SoftLineBreak() bool
}); ok && sb.SoftLineBreak() {
buf.WriteByte('\n')
}
}
return buf.Bytes()
}

View file

@ -1,28 +1,10 @@
package ast
import (
"bytes"
"reflect"
"testing"
"github.com/yuin/goldmark/text"
)
func TestRemoveChildren(t *testing.T) {
root := NewDocument()
node1 := NewDocument()
node2 := NewDocument()
root.AppendChild(root, node1)
root.AppendChild(root, node2)
root.RemoveChildren(root)
t.Logf("%+v", node2.PreviousSibling())
}
func TestWalk(t *testing.T) {
tests := []struct {
name string
@ -76,48 +58,3 @@ func node(n Node, children ...Node) Node {
}
return n
}
func TestBaseBlock_Text(t *testing.T) {
source := []byte(`# Heading
code block here
and also here
A paragraph
` + "```" + `somelang
fenced code block
` + "```" + `
The end`)
t.Run("fetch text from code block", func(t *testing.T) {
block := NewCodeBlock()
block.lines = text.NewSegments()
block.lines.Append(text.Segment{Start: 15, Stop: 31})
block.lines.Append(text.Segment{Start: 32, Stop: 46})
expected := []byte("code block here\nand also here\n")
if !bytes.Equal(expected, block.Text(source)) {
t.Errorf("Expected: %q, got: %q", string(expected), string(block.Text(source)))
}
})
t.Run("fetch text from fenced code block", func(t *testing.T) {
block := NewFencedCodeBlock(&Text{
Segment: text.Segment{Start: 63, Stop: 71},
})
block.lines = text.NewSegments()
block.lines.Append(text.Segment{Start: 72, Stop: 90})
expectedLang := []byte("somelang")
if !bytes.Equal(expectedLang, block.Language(source)) {
t.Errorf("Expected: %q, got: %q", string(expectedLang), string(block.Language(source)))
}
expected := []byte("fenced code block\n")
if !bytes.Equal(expected, block.Text(source)) {
t.Errorf("Expected: %q, got: %q", string(expected), string(block.Text(source)))
}
})
}

View file

@ -1,7 +1,6 @@
package ast
import (
"bytes"
"fmt"
"strings"
@ -48,15 +47,6 @@ func (b *BaseBlock) SetLines(v *textm.Segments) {
b.lines = v
}
// Text implements Node.Text.
func (b *BaseBlock) Text(source []byte) []byte {
var buf bytes.Buffer
for _, line := range b.Lines().Sliced(0, b.Lines().Len()) {
buf.Write(line.Value(source))
}
return buf.Bytes()
}
// A Document struct is a root node of Markdown text.
type Document struct {
BaseBlock
@ -140,6 +130,11 @@ func (n *TextBlock) Kind() NodeKind {
return KindTextBlock
}
// Text implements Node.Text.
func (n *TextBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewTextBlock returns a new TextBlock node.
func NewTextBlock() *TextBlock {
return &TextBlock{
@ -165,6 +160,11 @@ func (n *Paragraph) Kind() NodeKind {
return KindParagraph
}
// Text implements Node.Text.
func (n *Paragraph) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewParagraph returns a new Paragraph node.
func NewParagraph() *Paragraph {
return &Paragraph{
@ -259,6 +259,11 @@ func (n *CodeBlock) Kind() NodeKind {
return KindCodeBlock
}
// Text implements Node.Text.
func (n *CodeBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewCodeBlock returns a new CodeBlock node.
func NewCodeBlock() *CodeBlock {
return &CodeBlock{
@ -314,6 +319,11 @@ func (n *FencedCodeBlock) Kind() NodeKind {
return KindFencedCodeBlock
}
// Text implements Node.Text.
func (n *FencedCodeBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewFencedCodeBlock return a new FencedCodeBlock node.
func NewFencedCodeBlock(info *Text) *FencedCodeBlock {
return &FencedCodeBlock{
@ -508,6 +518,15 @@ func (n *HTMLBlock) Kind() NodeKind {
return KindHTMLBlock
}
// Text implements Node.Text.
func (n *HTMLBlock) Text(source []byte) []byte {
ret := n.Lines().Value(source)
if n.HasClosure() {
ret = append(ret, n.ClosureLine.Value(source)...)
}
return ret
}
// NewHTMLBlock returns a new HTMLBlock node.
func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock {
return &HTMLBlock{

View file

@ -503,6 +503,11 @@ func (n *AutoLink) Label(source []byte) []byte {
return n.value.Text(source)
}
// Text implements Node.Text.
func (n *AutoLink) Text(source []byte) []byte {
return n.value.Text(source)
}
// NewAutoLink returns a new AutoLink node.
func NewAutoLink(typ AutoLinkType, value *Text) *AutoLink {
return &AutoLink{
@ -541,6 +546,11 @@ func (n *RawHTML) Kind() NodeKind {
return KindRawHTML
}
// Text implements Node.Text.
func (n *RawHTML) Text(source []byte) []byte {
return n.Segments.Value(source)
}
// NewRawHTML returns a new RawHTML node.
func NewRawHTML() *RawHTML {
return &RawHTML{

200
ast_test.go Normal file
View file

@ -0,0 +1,200 @@
package goldmark_test
import (
"bytes"
"testing"
. "github.com/yuin/goldmark"
"github.com/yuin/goldmark/testutil"
"github.com/yuin/goldmark/text"
)
func TestASTBlockNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
T2 string
C bool
}{
{
Name: "AtxHeading",
Source: `# l1
a
# l2`,
T1: `l1`,
T2: `l2`,
},
{
Name: "SetextHeading",
Source: `l1
l2
===============
a
l3
l4
==============`,
T1: `l1
l2`,
T2: `l3
l4`,
},
{
Name: "CodeBlock",
Source: ` l1
l2
a
l3
l4`,
T1: `l1
l2
`,
T2: `l3
l4
`,
},
{
Name: "FencedCodeBlock",
Source: "```" + `
l1
l2
` + "```" + `
a
` + "```" + `
l3
l4`,
T1: `l1
l2
`,
T2: `l3
l4
`,
},
{
Name: "Blockquote",
Source: `> l1
> l2
a
> l3
> l4`,
T1: `l1
l2`,
T2: `l3
l4`,
},
{
Name: "List",
Source: `- l1
l2
a
- l3
l4`,
T1: `l1
l2`,
T2: `l3
l4`,
C: true,
},
{
Name: "HTMLBlock",
Source: `<div>
l1
l2
</div>
a
<div>
l3
l4`,
T1: `<div>
l1
l2
</div>
`,
T2: `<div>
l3
l4`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := New()
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild()
c2 := c1.NextSibling().NextSibling()
if cs.C {
c1 = c1.FirstChild()
c2 = c2.FirstChild()
}
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
t.Errorf("%s unmatch: %s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
}
if !bytes.Equal(c2.Text(s), []byte(cs.T2)) {
t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2)))
}
})
}
}
func TestASTInlineNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
}{
{
Name: "CodeSpan",
Source: "`c1`",
T1: `c1`,
},
{
Name: "Emphasis",
Source: `*c1 **c2***`,
T1: `c1 c2`,
},
{
Name: "Link",
Source: `[label](url)`,
T1: `label`,
},
{
Name: "AutoLink",
Source: `<http://url>`,
T1: `http://url`,
},
{
Name: "RawHTML",
Source: `<span>c1</span>`,
T1: `<span>`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := New()
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild().FirstChild()
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
}
})
}
}

117
extension/ast_test.go Normal file
View file

@ -0,0 +1,117 @@
package extension
import (
"bytes"
"testing"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/testutil"
"github.com/yuin/goldmark/text"
)
func TestASTBlockNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
T2 string
C bool
}{
{
Name: "DefinitionList",
Source: `c1
: c2
c3
a
c4
: c5
c6`,
T1: `c1c2
c3`,
T2: `c4c5
c6`,
},
{
Name: "Table",
Source: `| h1 | h2 |
| -- | -- |
| c1 | c2 |
a
| h3 | h4 |
| -- | -- |
| c3 | c4 |`,
T1: `h1h2c1c2`,
T2: `h3h4c3c4`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
goldmark.WithExtensions(
DefinitionList,
Table,
),
)
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild()
c2 := c1.NextSibling().NextSibling()
if cs.C {
c1 = c1.FirstChild()
c2 = c2.FirstChild()
}
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
}
if !bytes.Equal(c2.Text(s), []byte(cs.T2)) {
t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2)))
}
})
}
}
func TestASTInlineNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
}{
{
Name: "Strikethrough",
Source: `~c1 *c2*~`,
T1: `c1 c2`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
goldmark.WithExtensions(
Strikethrough,
),
)
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild().FirstChild()
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
}
})
}
}

View file

@ -35,6 +35,7 @@ func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context)
if segment.Padding != 0 {
preserveLeadingTabInCodeBlock(&segment, reader, 0)
}
segment.ForceNewline = true
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return node, NoChildren
@ -59,6 +60,7 @@ func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
preserveLeadingTabInCodeBlock(&segment, reader, 0)
}
segment.ForceNewline = true
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return Continue | NoChildren

View file

@ -100,6 +100,7 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
if padding != 0 {
preserveLeadingTabInCodeBlock(&seg, reader, fdata.indent)
}
seg.ForceNewline = true // EOF as newline
node.Lines().Append(seg)
reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding)
return Continue | NoChildren

View file

@ -878,12 +878,6 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
blockReader := text.NewBlockReader(reader.Source(), nil)
p.walkBlock(root, func(node ast.Node) {
p.parseBlock(blockReader, node, pc)
lines := node.Lines()
if lines != nil && lines.Len() != 0 {
s := lines.At(lines.Len() - 1)
s.EOB = true
lines.Set(lines.Len()-1, s)
}
})
for _, at := range p.astTransformers {
at.Transform(root, reader, pc)

View file

@ -20,8 +20,19 @@ type Segment struct {
// Padding is a padding length of the segment.
Padding int
// EOB is true if the segment is end of the block.
EOB bool
// ForceNewline is true if the segment should be ended with a newline.
// Some elements(i.e. CodeBlock, FencedCodeBlock) does not trim trailing
// newlines. Spec defines that EOF is treated as a newline, so we need to
// add a newline to the end of the segment if it is not empty.
//
// i.e.:
//
// ```go
// const test = "test"
//
// This code does not close the code block and ends with EOF. In this case,
// we need to add a newline to the end of the last line like `const test = "test"\n`.
ForceNewline bool
}
// NewSegment return a new Segment.
@ -52,7 +63,7 @@ func (t *Segment) Value(buffer []byte) []byte {
result = append(result, bytes.Repeat(space, t.Padding)...)
result = append(result, buffer[t.Start:t.Stop]...)
}
if t.EOB && len(result) > 0 && result[len(result)-1] != '\n' {
if t.ForceNewline && len(result) > 0 && result[len(result)-1] != '\n' {
result = append(result, '\n')
}
return result
@ -217,3 +228,12 @@ func (s *Segments) Unshift(v Segment) {
s.values = append(s.values[0:1], s.values[0:]...)
s.values[0] = v
}
// Value returns a string value of the collection.
func (s *Segments) Value(buffer []byte) []byte {
var result []byte
for _, v := range s.values {
result = append(result, v.Value(buffer)...)
}
return result
}