Compare commits

..

No commits in common. "master" and "v1.5.6" have entirely different histories.

45 changed files with 3900 additions and 5207 deletions

View file

@ -5,7 +5,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go-version: [1.21.x, 1.22.x]
go-version: [1.19.x, 1.20.x]
platform: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.platform }}
steps:
@ -16,18 +16,16 @@ jobs:
- name: Checkout code
uses: actions/checkout@v3
- name: Run lints
uses: golangci/golangci-lint-action@v6
uses: golangci/golangci-lint-action@v3
with:
version: latest
if: "matrix.platform == 'ubuntu-latest'" # gofmt linter fails on Windows for CRLF problems
- name: Run tests
env:
GOLDMARK_TEST_TIMEOUT_MULTIPLIER: 5
run: go test -v ./... -covermode=count -coverprofile=coverage.out -coverpkg=./...
- name: Install goveralls
run: go install github.com/mattn/goveralls@latest
- name: Send coverage
if: "matrix.platform == 'ubuntu-latest'"
env:
COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: goveralls -coverprofile=coverage.out -service=github
run: |
GO111MODULE=off go get github.com/mattn/goveralls
$(go env GOPATH)/bin/goveralls -coverprofile=coverage.out -service=github

View file

@ -45,7 +45,7 @@ linters-settings:
disabled: false
- name: dot-imports
severity: warning
disabled: true
disabled: false
- name: error-return
severity: warning
disabled: false

View file

@ -2,15 +2,13 @@ goldmark
==========================================
[![https://pkg.go.dev/github.com/yuin/goldmark](https://pkg.go.dev/badge/github.com/yuin/goldmark.svg)](https://pkg.go.dev/github.com/yuin/goldmark)
[![https://github.com/yuin/goldmark/actions?query=workflow:test](https://github.com/yuin/goldmark/actions/workflows/test.yaml/badge.svg?branch=master&event=push)](https://github.com/yuin/goldmark/actions?query=workflow:test)
[![https://github.com/yuin/goldmark/actions?query=workflow:test](https://github.com/yuin/goldmark/workflows/test/badge.svg?branch=master&event=push)](https://github.com/yuin/goldmark/actions?query=workflow:test)
[![https://coveralls.io/github/yuin/goldmark](https://coveralls.io/repos/github/yuin/goldmark/badge.svg?branch=master)](https://coveralls.io/github/yuin/goldmark)
[![https://goreportcard.com/report/github.com/yuin/goldmark](https://goreportcard.com/badge/github.com/yuin/goldmark)](https://goreportcard.com/report/github.com/yuin/goldmark)
> A Markdown parser written in Go. Easy to extend, standards-compliant, well-structured.
goldmark is compliant with CommonMark 0.31.2.
- [goldmark playground](https://yuin.github.io/goldmark/playground/) : Try goldmark online. This playground is built with WASM(5-10MB).
goldmark is compliant with CommonMark 0.30.
Motivation
----------------------
@ -262,7 +260,7 @@ You can override autolinking patterns via options.
| Functional option | Type | Description |
| ----------------- | ---- | ----------- |
| `extension.WithLinkifyAllowedProtocols` | `[][]byte \| []string` | List of allowed protocols such as `[]string{ "http:" }` |
| `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` |
| `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URLs, including protocols |
| `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) |
| `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email addresses` |
@ -279,12 +277,12 @@ markdown := goldmark.New(
),
goldmark.WithExtensions(
extension.NewLinkify(
extension.WithLinkifyAllowedProtocols([]string{
"http:",
"https:",
extension.WithLinkifyAllowedProtocols([][]byte{
[]byte("http:"),
[]byte("https:"),
}),
extension.WithLinkifyURLRegexp(
xurls.Strict(),
xurls.Strict,
),
),
),
@ -299,13 +297,13 @@ This extension has some options:
| Functional option | Type | Description |
| ----------------- | ---- | ----------- |
| `extension.WithFootnoteIDPrefix` | `[]byte \| string` | a prefix for the id attributes.|
| `extension.WithFootnoteIDPrefix` | `[]byte` | a prefix for the id attributes.|
| `extension.WithFootnoteIDPrefixFunction` | `func(gast.Node) []byte` | a function that determines the id attribute for given Node.|
| `extension.WithFootnoteLinkTitle` | `[]byte \| string` | an optional title attribute for footnote links.|
| `extension.WithFootnoteBacklinkTitle` | `[]byte \| string` | an optional title attribute for footnote backlinks. |
| `extension.WithFootnoteLinkClass` | `[]byte \| string` | a class for footnote links. This defaults to `footnote-ref`. |
| `extension.WithFootnoteBacklinkClass` | `[]byte \| string` | a class for footnote backlinks. This defaults to `footnote-backref`. |
| `extension.WithFootnoteBacklinkHTML` | `[]byte \| string` | a class for footnote backlinks. This defaults to `↩︎`. |
| `extension.WithFootnoteLinkTitle` | `[]byte` | an optional title attribute for footnote links.|
| `extension.WithFootnoteBacklinkTitle` | `[]byte` | an optional title attribute for footnote backlinks. |
| `extension.WithFootnoteLinkClass` | `[]byte` | a class for footnote links. This defaults to `footnote-ref`. |
| `extension.WithFootnoteBacklinkClass` | `[]byte` | a class for footnote backlinks. This defaults to `footnote-backref`. |
| `extension.WithFootnoteBacklinkHTML` | `[]byte` | a class for footnote backlinks. This defaults to `↩︎`. |
Some options can have special substitutions. Occurrences of “^^” in the string will be replaced by the corresponding footnote number in the HTML output. Occurrences of “%%” will be replaced by a number for the reference (footnotes can have multiple references).
@ -321,7 +319,7 @@ for _, path := range files {
markdown := goldmark.New(
goldmark.WithExtensions(
NewFootnote(
WithFootnoteIDPrefix(path),
WithFootnoteIDPrefix([]byte(path)),
),
),
)
@ -381,47 +379,9 @@ This extension provides additional options for CJK users.
| Functional option | Type | Description |
| ----------------- | ---- | ----------- |
| `extension.WithEastAsianLineBreaks` | `...extension.EastAsianLineBreaksStyle` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. This defaults to `EastAsianLineBreaksStyleSimple`. |
| `extension.WithEastAsianLineBreaks` | `-` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. |
| `extension.WithEscapedSpace` | `-` | Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec). With this option, you can avoid this inconvenient behavior by putting 'not rendered' spaces around an emphasis like `太郎は\ **「こんにちわ」**\ といった`. |
#### Styles of Line Breaking
| Style | Description |
| ----- | ----------- |
| `EastAsianLineBreaksStyleSimple` | Soft line breaks are ignored if both sides of the break are east asian wide character. This behavior is the same as [`east_asian_line_breaks`](https://pandoc.org/MANUAL.html#extension-east_asian_line_breaks) in Pandoc. |
| `EastAsianLineBreaksCSS3Draft` | This option implements CSS text level3 [Segment Break Transformation Rules](https://drafts.csswg.org/css-text-3/#line-break-transform) with [some enhancements](https://github.com/w3c/csswg-drafts/issues/5086). |
#### Example of `EastAsianLineBreaksStyleSimple`
Input Markdown:
```md
私はプログラマーです。
東京の会社に勤めています。
GoでWebアプリケーションを開発しています。
```
Output:
```html
<p>私はプログラマーです。東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。</p>
```
#### Example of `EastAsianLineBreaksCSS3Draft`
Input Markdown:
```md
私はプログラマーです。
東京の会社に勤めています。
GoでWebアプリケーションを開発しています。
```
Output:
```html
<p>私はプログラマーです。東京の会社に勤めています。GoでWebアプリケーションを開発しています。</p>
```
Security
--------------------
@ -469,7 +429,6 @@ As you can see, goldmark's performance is on par with cmark's.
Extensions
--------------------
### List of extensions
- [goldmark-meta](https://github.com/yuin/goldmark-meta): A YAML metadata
extension for the goldmark Markdown parser.
@ -493,14 +452,6 @@ Extensions
- [goldmark-d2](https://github.com/FurqanSoftware/goldmark-d2): Adds support for [D2](https://d2lang.com/) diagrams.
- [goldmark-katex](https://github.com/FurqanSoftware/goldmark-katex): Adds support for [KaTeX](https://katex.org/) math and equations.
- [goldmark-img64](https://github.com/tenkoh/goldmark-img64): Adds support for embedding images into the document as DataURL (base64 encoded).
- [goldmark-enclave](https://github.com/quailyquaily/goldmark-enclave): Adds support for embedding youtube/bilibili video, X's [oembed X](https://publish.x.com/), [tradingview chart](https://www.tradingview.com/widget/)'s chart, [quaily widget](https://quaily.com), [spotify embeds](https://developer.spotify.com/documentation/embeds), [dify embed](https://dify.ai/) and html audio into the document.
- [goldmark-wiki-table](https://github.com/movsb/goldmark-wiki-table): Adds support for embedding Wiki Tables.
- [goldmark-tgmd](https://github.com/Mad-Pixels/goldmark-tgmd): A Telegram markdown renderer that can be passed to `goldmark.WithRenderer()`.
### Loading extensions at runtime
[goldmark-dynamic](https://github.com/yuin/goldmark-dynamic) allows you to write a goldmark extension in Lua and load it at runtime without re-compilation.
Please refer to [goldmark-dynamic](https://github.com/yuin/goldmark-dynamic) for details.
goldmark internal(for extension developers)

View file

@ -6,7 +6,7 @@ require (
github.com/88250/lute v1.7.5
github.com/gomarkdown/markdown v0.0.0-20230322041520-c84983bdbf2a
github.com/russross/blackfriday/v2 v2.1.0
github.com/yuin/goldmark v0.0.0
github.com/yuin/goldmark v1.2.1
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a
)
@ -22,4 +22,3 @@ require (
)
replace gopkg.in/russross/blackfriday.v2 v2.0.1 => github.com/russross/blackfriday/v2 v2.0.1
replace github.com/yuin/goldmark v0.0.0 => ../../

View file

@ -385,8 +385,7 @@ a* b c d *e*
//- - - - - - - - -//
x
//- - - - - - - - -//
<pre><code> x
</code></pre>
<pre><code> x</code></pre>
//= = = = = = = = = = = = = = = = = = = = = = = =//
26: NUL bytes must be replaced with U+FFFD
@ -772,74 +771,3 @@ a <!-- b -->
<p>&lt;img src=./.assets/logo.svg</p>
<p>/&gt;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
61: Image alt with a new line
//- - - - - - - - -//
![alt
text](logo.png)
//- - - - - - - - -//
<p><img src="logo.png" alt="alt
text" /></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
62: Image alt with an escaped character
//- - - - - - - - -//
![\`alt](https://example.com/img.png)
//- - - - - - - - -//
<p><img src="https://example.com/img.png" alt="`alt" /></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
63: Emphasis in link label
//- - - - - - - - -//
[*[a]*](b)
//- - - - - - - - -//
<p><a href="b"><em>[a]</em></a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
64: Nested list under an empty list item
//- - - - - - - - -//
-
- foo
//- - - - - - - - -//
<ul>
<li>
<ul>
<li>foo</li>
</ul>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
65: Nested fenced code block with tab
//- - - - - - - - -//
> ```
> 0
> ```
//- - - - - - - - -//
<blockquote>
<pre><code> 0
</code></pre>
</blockquote>
//= = = = = = = = = = = = = = = = = = = = = = = =//
66: EOF should be rendered as a newline with an unclosed block(w/ TAB)
//- - - - - - - - -//
> ```
> 0
//- - - - - - - - -//
<blockquote>
<pre><code> 0
</code></pre>
</blockquote>
//= = = = = = = = = = = = = = = = = = = = = = = =//
67: EOF should be rendered as a newline with an unclosed block
//- - - - - - - - -//
> ```
> 0
//- - - - - - - - -//
<blockquote>
<pre><code> 0
</code></pre>
</blockquote>
//= = = = = = = = = = = = = = = = = = = = = = = =//

File diff suppressed because it is too large Load diff

View file

@ -123,12 +123,6 @@ type Node interface {
Dump(source []byte, level int)
// Text returns text values of this node.
// This method is valid only for some inline nodes.
// If this node is a block node, Text returns a text value as reasonable as possible.
// Notice that there are no 'correct' text values for the block nodes.
// Result for the block nodes may be different from your expectation.
//
// Deprecated: Use other properties of the node to get the text value(i.e. Pragraph.Lines, Text.Value).
Text(source []byte) []byte
// HasBlankPreviousLines returns true if the row before this node is blank,
@ -381,17 +375,10 @@ func (n *BaseNode) OwnerDocument() *Document {
}
// Text implements Node.Text .
//
// Deprecated: Use other properties of the node to get the text value(i.e. Pragraph.Lines, Text.Value).
func (n *BaseNode) Text(source []byte) []byte {
var buf bytes.Buffer
for c := n.firstChild; c != nil; c = c.NextSibling() {
buf.Write(c.Text(source))
if sb, ok := c.(interface {
SoftLineBreak() bool
}); ok && sb.SoftLineBreak() {
buf.WriteByte('\n')
}
}
return buf.Bytes()
}

View file

@ -5,6 +5,21 @@ import (
"testing"
)
func TestRemoveChildren(t *testing.T) {
root := NewDocument()
node1 := NewDocument()
node2 := NewDocument()
root.AppendChild(root, node1)
root.AppendChild(root, node2)
root.RemoveChildren(root)
t.Logf("%+v", node2.PreviousSibling())
}
func TestWalk(t *testing.T) {
tests := []struct {
name string

View file

@ -130,13 +130,6 @@ func (n *TextBlock) Kind() NodeKind {
return KindTextBlock
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. TextBlock.Lines).
func (n *TextBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewTextBlock returns a new TextBlock node.
func NewTextBlock() *TextBlock {
return &TextBlock{
@ -162,13 +155,6 @@ func (n *Paragraph) Kind() NodeKind {
return KindParagraph
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. Paragraph.Lines).
func (n *Paragraph) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewParagraph returns a new Paragraph node.
func NewParagraph() *Paragraph {
return &Paragraph{
@ -263,13 +249,6 @@ func (n *CodeBlock) Kind() NodeKind {
return KindCodeBlock
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. CodeBlock.Lines).
func (n *CodeBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewCodeBlock returns a new CodeBlock node.
func NewCodeBlock() *CodeBlock {
return &CodeBlock{
@ -325,13 +304,6 @@ func (n *FencedCodeBlock) Kind() NodeKind {
return KindFencedCodeBlock
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. FencedCodeBlock.Lines).
func (n *FencedCodeBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewFencedCodeBlock return a new FencedCodeBlock node.
func NewFencedCodeBlock(info *Text) *FencedCodeBlock {
return &FencedCodeBlock{
@ -526,17 +498,6 @@ func (n *HTMLBlock) Kind() NodeKind {
return KindHTMLBlock
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. HTMLBlock.Lines).
func (n *HTMLBlock) Text(source []byte) []byte {
ret := n.Lines().Value(source)
if n.HasClosure() {
ret = append(ret, n.ClosureLine.Value(source)...)
}
return ret
}
// NewHTMLBlock returns a new HTMLBlock node.
func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock {
return &HTMLBlock{

View file

@ -143,25 +143,17 @@ func (n *Text) Merge(node Node, source []byte) bool {
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. Text.Value).
func (n *Text) Text(source []byte) []byte {
return n.Segment.Value(source)
}
// Value returns a value of this node.
// SoftLineBreaks are not included in the returned value.
func (n *Text) Value(source []byte) []byte {
return n.Segment.Value(source)
}
// Dump implements Node.Dump.
func (n *Text) Dump(source []byte, level int) {
fs := textFlagsString(n.flags)
if len(fs) != 0 {
fs = "(" + fs + ")"
}
fmt.Printf("%sText%s: \"%s\"\n", strings.Repeat(" ", level), fs, strings.TrimRight(string(n.Value(source)), "\n"))
fmt.Printf("%sText%s: \"%s\"\n", strings.Repeat(" ", level), fs, strings.TrimRight(string(n.Text(source)), "\n"))
}
// KindText is a NodeKind of the Text node.
@ -266,8 +258,6 @@ func (n *String) SetCode(v bool) {
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. String.Value).
func (n *String) Text(source []byte) []byte {
return n.Value
}
@ -502,22 +492,15 @@ func (n *AutoLink) URL(source []byte) []byte {
ret := make([]byte, 0, len(n.Protocol)+s.Len()+3)
ret = append(ret, n.Protocol...)
ret = append(ret, ':', '/', '/')
ret = append(ret, n.value.Value(source)...)
ret = append(ret, n.value.Text(source)...)
return ret
}
return n.value.Value(source)
return n.value.Text(source)
}
// Label returns a label of this node.
func (n *AutoLink) Label(source []byte) []byte {
return n.value.Value(source)
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. AutoLink.Label).
func (n *AutoLink) Text(source []byte) []byte {
return n.value.Value(source)
return n.value.Text(source)
}
// NewAutoLink returns a new AutoLink node.
@ -558,13 +541,6 @@ func (n *RawHTML) Kind() NodeKind {
return KindRawHTML
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. RawHTML.Segments).
func (n *RawHTML) Text(source []byte) []byte {
return n.Segments.Value(source)
}
// NewRawHTML returns a new RawHTML node.
func NewRawHTML() *RawHTML {
return &RawHTML{

View file

@ -1,204 +0,0 @@
package goldmark_test
import (
"bytes"
"testing"
. "github.com/yuin/goldmark"
"github.com/yuin/goldmark/testutil"
"github.com/yuin/goldmark/text"
)
func TestASTBlockNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
T2 string
C bool
}{
{
Name: "AtxHeading",
Source: `# l1
a
# l2`,
T1: `l1`,
T2: `l2`,
},
{
Name: "SetextHeading",
Source: `l1
l2
===============
a
l3
l4
==============`,
T1: `l1
l2`,
T2: `l3
l4`,
},
{
Name: "CodeBlock",
Source: ` l1
l2
a
l3
l4`,
T1: `l1
l2
`,
T2: `l3
l4
`,
},
{
Name: "FencedCodeBlock",
Source: "```" + `
l1
l2
` + "```" + `
a
` + "```" + `
l3
l4`,
T1: `l1
l2
`,
T2: `l3
l4
`,
},
{
Name: "Blockquote",
Source: `> l1
> l2
a
> l3
> l4`,
T1: `l1
l2`,
T2: `l3
l4`,
},
{
Name: "List",
Source: `- l1
l2
a
- l3
l4`,
T1: `l1
l2`,
T2: `l3
l4`,
C: true,
},
{
Name: "HTMLBlock",
Source: `<div>
l1
l2
</div>
a
<div>
l3
l4`,
T1: `<div>
l1
l2
</div>
`,
T2: `<div>
l3
l4`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := New()
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild()
c2 := c1.NextSibling().NextSibling()
if cs.C {
c1 = c1.FirstChild()
c2 = c2.FirstChild()
}
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch: %s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
if !bytes.Equal(c2.Text(s), []byte(cs.T2)) { // nolint: staticcheck
t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2))) // nolint: staticcheck
}
})
}
}
func TestASTInlineNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
}{
{
Name: "CodeSpan",
Source: "`c1`",
T1: `c1`,
},
{
Name: "Emphasis",
Source: `*c1 **c2***`,
T1: `c1 c2`,
},
{
Name: "Link",
Source: `[label](url)`,
T1: `label`,
},
{
Name: "AutoLink",
Source: `<http://url>`,
T1: `http://url`,
},
{
Name: "RawHTML",
Source: `<span>c1</span>`,
T1: `<span>`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := New()
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild().FirstChild()
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
})
}
}

View file

@ -2,7 +2,7 @@ package goldmark_test
import (
"encoding/json"
"os"
"io/ioutil"
"testing"
. "github.com/yuin/goldmark"
@ -20,7 +20,7 @@ type commonmarkSpecTestCase struct {
}
func TestSpec(t *testing.T) {
bs, err := os.ReadFile("_test/spec.json")
bs, err := ioutil.ReadFile("_test/spec.json")
if err != nil {
panic(err)
}

View file

@ -150,8 +150,7 @@ on two lines.</p>
//- - - - - - - - -//
<dl>
<dt>0</dt>
<dd><pre><code> 0
</code></pre>
<dd><pre><code> 0</code></pre>
</dd>
</dl>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -5,6 +5,8 @@
<p><del>Hi</del> Hello, world!</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
2
//- - - - - - - - -//
This ~~has a
@ -14,26 +16,3 @@ new paragraph~~.
<p>This ~~has a</p>
<p>new paragraph~~.</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
3
//- - - - - - - - -//
~Hi~ Hello, world!
//- - - - - - - - -//
<p><del>Hi</del> Hello, world!</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
4: Three or more tildes do not create a strikethrough
//- - - - - - - - -//
This will ~~~not~~~ strike.
//- - - - - - - - -//
<p>This will ~~~not~~~ strike.</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
5: Leading three or more tildes do not create a strikethrough, create a code block
//- - - - - - - - -//
~~~Hi~~~ Hello, world!
//- - - - - - - - -//
<pre><code class="language-Hi~~~"></code></pre>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -28,24 +28,3 @@
<li><input disabled="" type="checkbox"> bim</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
3
//- - - - - - - - -//
- test[x]=[x]
//- - - - - - - - -//
<ul>
<li>test[x]=[x]</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
4
//- - - - - - - - -//
+ [x] [x]
//- - - - - - - - -//
<ul>
<li><input checked="" disabled="" type="checkbox"> [x]</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -1,123 +0,0 @@
package extension
import (
"bytes"
"testing"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/testutil"
"github.com/yuin/goldmark/text"
)
func TestASTBlockNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
T2 string
C bool
}{
{
Name: "DefinitionList",
Source: `c1
: c2
c3
a
c4
: c5
c6`,
T1: `c1c2
c3`,
T2: `c4c5
c6`,
},
{
Name: "Table",
Source: `| h1 | h2 |
| -- | -- |
| c1 | c2 |
a
| h3 | h4 |
| -- | -- |
| c3 | c4 |`,
T1: `h1h2c1c2`,
T2: `h3h4c3c4`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
goldmark.WithExtensions(
DefinitionList,
Table,
),
)
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild()
c2 := c1.NextSibling().NextSibling()
if cs.C {
c1 = c1.FirstChild()
c2 = c2.FirstChild()
}
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
if !bytes.Equal(c2.Text(s), []byte(cs.T2)) { // nolint: staticcheck
t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2))) // nolint: staticcheck
}
})
}
}
func TestASTInlineNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
}{
{
Name: "Strikethrough",
Source: `~c1 *c2*~`,
T1: `c1 c2`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
goldmark.WithExtensions(
Strikethrough,
),
)
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild().FirstChild()
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
})
}
}

View file

@ -9,30 +9,11 @@ import (
// A CJKOption sets options for CJK support mostly for HTML based renderers.
type CJKOption func(*cjk)
// A EastAsianLineBreaks is a style of east asian line breaks.
type EastAsianLineBreaks int
const (
//EastAsianLineBreaksNone renders line breaks as it is.
EastAsianLineBreaksNone EastAsianLineBreaks = iota
// EastAsianLineBreaksSimple is a style where soft line breaks are ignored
// if both sides of the break are east asian wide characters.
EastAsianLineBreaksSimple
// EastAsianLineBreaksCSS3Draft is a style where soft line breaks are ignored
// even if only one side of the break is an east asian wide character.
EastAsianLineBreaksCSS3Draft
)
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
// between east asian wide characters should be ignored.
// style defauts to [EastAsianLineBreaksSimple] .
func WithEastAsianLineBreaks(style ...EastAsianLineBreaks) CJKOption {
func WithEastAsianLineBreaks() CJKOption {
return func(c *cjk) {
if len(style) == 0 {
c.EastAsianLineBreaks = EastAsianLineBreaksSimple
return
}
c.EastAsianLineBreaks = style[0]
c.EastAsianLineBreaks = true
}
}
@ -44,7 +25,7 @@ func WithEscapedSpace() CJKOption {
}
type cjk struct {
EastAsianLineBreaks EastAsianLineBreaks
EastAsianLineBreaks bool
EscapedSpace bool
}
@ -53,9 +34,7 @@ var CJK = NewCJK(WithEastAsianLineBreaks(), WithEscapedSpace())
// NewCJK returns a new extension with given options.
func NewCJK(opts ...CJKOption) goldmark.Extender {
e := &cjk{
EastAsianLineBreaks: EastAsianLineBreaksNone,
}
e := &cjk{}
for _, opt := range opts {
opt(e)
}
@ -63,8 +42,9 @@ func NewCJK(opts ...CJKOption) goldmark.Extender {
}
func (e *cjk) Extend(m goldmark.Markdown) {
m.Renderer().AddOptions(html.WithEastAsianLineBreaks(
html.EastAsianLineBreaks(e.EastAsianLineBreaks)))
if e.EastAsianLineBreaks {
m.Renderer().AddOptions(html.WithEastAsianLineBreaks())
}
if e.EscapedSpace {
m.Renderer().AddOptions(html.WithWriter(html.NewWriter(html.WithEscapedSpace())))
m.Parser().AddOptions(parser.WithEscapedSpace())

View file

@ -177,7 +177,6 @@ func TestEastAsianLineBreaks(t *testing.T) {
t,
)
// Tests with EastAsianLineBreaksStyleSimple
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
@ -209,61 +208,4 @@ func TestEastAsianLineBreaks(t *testing.T) {
},
t,
)
no = 9
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are ignored",
Markdown: "私はプログラマーです。\n東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。",
Expected: "<p>私はプログラマーです。東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。</p>",
},
t,
)
// Tests with EastAsianLineBreaksCSS3Draft
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewCJK(WithEastAsianLineBreaks(EastAsianLineBreaksCSS3Draft)),
),
)
no = 10
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between a western character and an east asian wide character are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったaんです</p>",
},
t,
)
no = 11
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったbんです</p>",
},
t,
)
no = 12
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are ignored",
Markdown: "私はプログラマーです。\n東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。",
Expected: "<p>私はプログラマーです。東京の会社に勤めています。GoでWebアプリケーションを開発しています。</p>",
},
t,
)
}

View file

@ -382,8 +382,8 @@ func (o *withFootnoteIDPrefix) SetFootnoteOption(c *FootnoteConfig) {
}
// WithFootnoteIDPrefix is a functional option that is a prefix for the id attributes generated by footnotes.
func WithFootnoteIDPrefix[T []byte | string](a T) FootnoteOption {
return &withFootnoteIDPrefix{[]byte(a)}
func WithFootnoteIDPrefix(a []byte) FootnoteOption {
return &withFootnoteIDPrefix{a}
}
const optFootnoteIDPrefixFunction renderer.OptionName = "FootnoteIDPrefixFunction"
@ -420,8 +420,8 @@ func (o *withFootnoteLinkTitle) SetFootnoteOption(c *FootnoteConfig) {
}
// WithFootnoteLinkTitle is a functional option that is an optional title attribute for footnote links.
func WithFootnoteLinkTitle[T []byte | string](a T) FootnoteOption {
return &withFootnoteLinkTitle{[]byte(a)}
func WithFootnoteLinkTitle(a []byte) FootnoteOption {
return &withFootnoteLinkTitle{a}
}
const optFootnoteBacklinkTitle renderer.OptionName = "FootnoteBacklinkTitle"
@ -439,8 +439,8 @@ func (o *withFootnoteBacklinkTitle) SetFootnoteOption(c *FootnoteConfig) {
}
// WithFootnoteBacklinkTitle is a functional option that is an optional title attribute for footnote backlinks.
func WithFootnoteBacklinkTitle[T []byte | string](a T) FootnoteOption {
return &withFootnoteBacklinkTitle{[]byte(a)}
func WithFootnoteBacklinkTitle(a []byte) FootnoteOption {
return &withFootnoteBacklinkTitle{a}
}
const optFootnoteLinkClass renderer.OptionName = "FootnoteLinkClass"
@ -458,8 +458,8 @@ func (o *withFootnoteLinkClass) SetFootnoteOption(c *FootnoteConfig) {
}
// WithFootnoteLinkClass is a functional option that is a class for footnote links.
func WithFootnoteLinkClass[T []byte | string](a T) FootnoteOption {
return &withFootnoteLinkClass{[]byte(a)}
func WithFootnoteLinkClass(a []byte) FootnoteOption {
return &withFootnoteLinkClass{a}
}
const optFootnoteBacklinkClass renderer.OptionName = "FootnoteBacklinkClass"
@ -477,8 +477,8 @@ func (o *withFootnoteBacklinkClass) SetFootnoteOption(c *FootnoteConfig) {
}
// WithFootnoteBacklinkClass is a functional option that is a class for footnote backlinks.
func WithFootnoteBacklinkClass[T []byte | string](a T) FootnoteOption {
return &withFootnoteBacklinkClass{[]byte(a)}
func WithFootnoteBacklinkClass(a []byte) FootnoteOption {
return &withFootnoteBacklinkClass{a}
}
const optFootnoteBacklinkHTML renderer.OptionName = "FootnoteBacklinkHTML"
@ -496,8 +496,8 @@ func (o *withFootnoteBacklinkHTML) SetFootnoteOption(c *FootnoteConfig) {
}
// WithFootnoteBacklinkHTML is an HTML content for footnote backlinks.
func WithFootnoteBacklinkHTML[T []byte | string](a T) FootnoteOption {
return &withFootnoteBacklinkHTML{[]byte(a)}
func WithFootnoteBacklinkHTML(a []byte) FootnoteOption {
return &withFootnoteBacklinkHTML{a}
}
// FootnoteHTMLRenderer is a renderer.NodeRenderer implementation that

View file

@ -38,12 +38,12 @@ func TestFootnoteOptions(t *testing.T) {
),
goldmark.WithExtensions(
NewFootnote(
WithFootnoteIDPrefix("article12-"),
WithFootnoteLinkClass("link-class"),
WithFootnoteBacklinkClass("backlink-class"),
WithFootnoteLinkTitle("link-title-%%-^^"),
WithFootnoteBacklinkTitle("backlink-title"),
WithFootnoteBacklinkHTML("^"),
WithFootnoteIDPrefix([]byte("article12-")),
WithFootnoteLinkClass([]byte("link-class")),
WithFootnoteBacklinkClass([]byte("backlink-class")),
WithFootnoteLinkTitle([]byte("link-title-%%-^^")),
WithFootnoteBacklinkTitle([]byte("backlink-title")),
WithFootnoteBacklinkHTML([]byte("^")),
),
),
)

View file

@ -66,12 +66,10 @@ func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
// WithLinkifyAllowedProtocols is a functional option that specify allowed
// protocols in autolinks. Each protocol must end with ':' like
// 'http:' .
func WithLinkifyAllowedProtocols[T []byte | string](value []T) LinkifyOption {
opt := &withLinkifyAllowedProtocols{}
for _, v := range value {
opt.value = append(opt.value, []byte(v))
func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption {
return &withLinkifyAllowedProtocols{
value: value,
}
return opt
}
type withLinkifyURLRegexp struct {

View file

@ -29,8 +29,8 @@ func TestLinkifyWithAllowedProtocols(t *testing.T) {
),
goldmark.WithExtensions(
NewLinkify(
WithLinkifyAllowedProtocols([]string{
"ssh:",
WithLinkifyAllowedProtocols([][]byte{
[]byte("ssh:"),
}),
WithLinkifyURLRegexp(
regexp.MustCompile(`\w+://[^\s]+`),

View file

@ -46,11 +46,10 @@ func (s *strikethroughParser) Trigger() []byte {
func (s *strikethroughParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
node := parser.ScanDelimiter(line, before, 1, defaultStrikethroughDelimiterProcessor)
if node == nil || node.OriginalLength > 2 || before == '~' {
node := parser.ScanDelimiter(line, before, 2, defaultStrikethroughDelimiterProcessor)
if node == nil {
return nil
}
node.Segment = segment.WithStop(segment.Start + node.OriginalLength)
block.Advance(node.OriginalLength)
pc.PushDelimiter(node)

View file

@ -184,11 +184,11 @@ func (b *tableParagraphTransformer) Transform(node *gast.Paragraph, reader text.
func (b *tableParagraphTransformer) parseRow(segment text.Segment,
alignments []ast.Alignment, isHeader bool, reader text.Reader, pc parser.Context) *ast.TableRow {
source := reader.Source()
segment = segment.TrimLeftSpace(source)
segment = segment.TrimRightSpace(source)
line := segment.Value(source)
pos := 0
pos += util.TrimLeftSpaceLength(line)
limit := len(line)
limit -= util.TrimRightSpaceLength(line)
row := ast.NewTableRow(alignments)
if len(line) > 0 && line[pos] == '|' {
pos++
@ -492,7 +492,7 @@ func (r *TableHTMLRenderer) renderTableCell(
tag = "th"
}
if entering {
_, _ = fmt.Fprintf(w, "<%s", tag)
fmt.Fprintf(w, "<%s", tag)
if n.Alignment != ast.AlignNone {
amethod := r.TableConfig.TableCellAlignMethod
if amethod == TableCellAlignDefault {
@ -505,7 +505,7 @@ func (r *TableHTMLRenderer) renderTableCell(
switch amethod {
case TableCellAlignAttribute:
if _, ok := n.AttributeString("align"); !ok { // Skip align render if overridden
_, _ = fmt.Fprintf(w, ` align="%s"`, n.Alignment.String())
fmt.Fprintf(w, ` align="%s"`, n.Alignment.String())
}
case TableCellAlignStyle:
v, ok := n.AttributeString("style")
@ -528,7 +528,7 @@ func (r *TableHTMLRenderer) renderTableCell(
}
_ = w.WriteByte('>')
} else {
_, _ = fmt.Fprintf(w, "</%s>\n", tag)
fmt.Fprintf(w, "</%s>\n", tag)
}
return gast.WalkContinue, nil
}

View file

@ -355,40 +355,3 @@ bar | baz
t,
)
}
func TestTableFuzzedPanics(t *testing.T) {
markdown := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewTable(),
),
)
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: 1,
Description: "This should not panic",
Markdown: "* 0\n-|\n\t0",
Expected: `<ul>
<li>
<table>
<thead>
<tr>
<th>0</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
</tr>
</tbody>
</table>
</li>
</ul>`,
},
t,
)
}

View file

@ -41,9 +41,6 @@ func (s *taskCheckBoxParser) Parse(parent gast.Node, block text.Reader, pc parse
return nil
}
if parent.HasChildren() {
return nil
}
if _, ok := parent.Parent().(*gast.ListItem); !ok {
return nil
}

View file

@ -115,10 +115,10 @@ func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig
// WithTypographicSubstitutions is a functional otpion that specify replacement text
// for punctuations.
func WithTypographicSubstitutions[T []byte | string](values map[TypographicPunctuation]T) TypographerOption {
func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption {
replacements := newDefaultSubstitutions()
for k, v := range values {
replacements[k] = []byte(v)
replacements[k] = v
}
return &withTypographicSubstitutions{replacements}

View file

@ -3,7 +3,7 @@ package fuzz
import (
"bytes"
"encoding/json"
"os"
"io/ioutil"
"testing"
"github.com/yuin/goldmark"
@ -42,7 +42,7 @@ func fuzz(f *testing.F) {
}
func FuzzDefault(f *testing.F) {
bs, err := os.ReadFile("../_test/spec.json")
bs, err := ioutil.ReadFile("../_test/spec.json")
if err != nil {
panic(err)
}

2
go.mod
View file

@ -1,3 +1,3 @@
module github.com/yuin/goldmark
go 1.19
go 1.18

View file

@ -2,13 +2,12 @@
package goldmark
import (
"io"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"io"
)
// DefaultParser returns a new Parser that is configured by default values.

View file

@ -28,13 +28,12 @@ func (b *blockquoteParser) process(reader text.Reader) bool {
reader.Advance(pos)
return true
}
reader.Advance(pos)
if line[pos] == ' ' || line[pos] == '\t' {
padding := 0
if line[pos] == '\t' {
padding = util.TabWidth(reader.LineOffset()) - 1
pos++
}
reader.AdvanceAndSetPadding(1, padding)
reader.Advance(pos)
if line[pos-1] == '\t' {
reader.SetPadding(2)
}
return true
}

View file

@ -35,7 +35,6 @@ func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context)
if segment.Padding != 0 {
preserveLeadingTabInCodeBlock(&segment, reader, 0)
}
segment.ForceNewline = true
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return node, NoChildren
@ -60,7 +59,6 @@ func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
preserveLeadingTabInCodeBlock(&segment, reader, 0)
}
segment.ForceNewline = true
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return Continue | NoChildren

View file

@ -100,7 +100,6 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
if padding != 0 {
preserveLeadingTabInCodeBlock(&seg, reader, fdata.indent)
}
seg.ForceNewline = true // EOF as newline
node.Lines().Append(seg)
reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding)
return Continue | NoChildren

View file

@ -61,8 +61,8 @@ var allowedBlockTags = map[string]bool{
"option": true,
"p": true,
"param": true,
"search": true,
"section": true,
"source": true,
"summary": true,
"table": true,
"tbody": true,

View file

@ -126,13 +126,13 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
if line[0] == '!' {
if len(line) > 1 && line[1] == '[' {
block.Advance(1)
pushLinkBottom(pc)
pc.Set(linkBottom, pc.LastDelimiter())
return processLinkLabelOpen(block, segment.Start+1, true, pc)
}
return nil
}
if line[0] == '[' {
pushLinkBottom(pc)
pc.Set(linkBottom, pc.LastDelimiter())
return processLinkLabelOpen(block, segment.Start, false, pc)
}
@ -143,7 +143,6 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
}
last := tlist.(*linkLabelState).Last
if last == nil {
_ = popLinkBottom(pc)
return nil
}
block.Advance(1)
@ -152,13 +151,11 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
// > A link label can have at most 999 characters inside the square brackets.
if linkLabelStateLength(tlist.(*linkLabelState)) > 998 {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil
}
if !last.IsImage && s.containsLink(last) { // a link in a link text is not allowed
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil
}
@ -172,7 +169,6 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
link, hasValue = s.parseReferenceLink(parent, last, block, pc)
if link == nil && hasValue {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil
}
}
@ -186,14 +182,12 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
// > A link label can have at most 999 characters inside the square brackets.
if len(maybeReference) > 999 {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil
}
ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
if !ok {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil
}
link = ast.NewLink()
@ -236,7 +230,11 @@ func processLinkLabelOpen(block text.Reader, pos int, isImage bool, pc Context)
}
func (s *linkParser) processLinkLabel(parent ast.Node, link *ast.Link, last *linkLabelState, pc Context) {
bottom := popLinkBottom(pc)
var bottom ast.Node
if v := pc.Get(linkBottom); v != nil {
bottom = v.(ast.Node)
}
pc.Set(linkBottom, nil)
ProcessDelimiters(bottom, pc)
for c := last.NextSibling(); c != nil; {
next := c.NextSibling()
@ -397,43 +395,6 @@ func parseLinkTitle(block text.Reader) ([]byte, bool) {
return nil, false
}
func pushLinkBottom(pc Context) {
bottoms := pc.Get(linkBottom)
b := pc.LastDelimiter()
if bottoms == nil {
pc.Set(linkBottom, b)
return
}
if s, ok := bottoms.([]ast.Node); ok {
pc.Set(linkBottom, append(s, b))
return
}
pc.Set(linkBottom, []ast.Node{bottoms.(ast.Node), b})
}
func popLinkBottom(pc Context) ast.Node {
bottoms := pc.Get(linkBottom)
if bottoms == nil {
return nil
}
if v, ok := bottoms.(ast.Node); ok {
pc.Set(linkBottom, nil)
return v
}
s := bottoms.([]ast.Node)
v := s[len(s)-1]
n := s[0 : len(s)-1]
switch len(n) {
case 0:
pc.Set(linkBottom, nil)
case 1:
pc.Set(linkBottom, n[0])
default:
pc.Set(linkBottom, s[0:len(s)-1])
}
return v
}
func (s *linkParser) CloseBlock(parent ast.Node, block text.Reader, pc Context) {
pc.Set(linkBottom, nil)
tlist := pc.Get(linkLabelStateKey)

View file

@ -58,7 +58,7 @@ func (b *listItemParser) Continue(node ast.Node, reader text.Reader, pc Context)
}
offset := lastOffset(node.Parent())
isEmpty := node.ChildCount() == 0 && pc.Get(emptyListItemWithBlankLines) != nil
isEmpty := node.ChildCount() == 0
indent, _ := util.IndentWidth(line, reader.LineOffset())
if (isEmpty || indent < offset) && indent < 4 {
_, typ := matchesListItem(line, true)

View file

@ -882,7 +882,6 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
for _, at := range p.astTransformers {
at.Transform(root, reader, pc)
}
// root.Dump(reader.Source(), 0)
return root
}
@ -1257,5 +1256,4 @@ func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context)
for _, ip := range p.closeBlockers {
ip.CloseBlock(parent, block, pc)
}
}

View file

@ -58,38 +58,47 @@ var closeProcessingInstruction = []byte("?>")
var openCDATA = []byte("<![CDATA[")
var closeCDATA = []byte("]]>")
var closeDecl = []byte(">")
var emptyComment1 = []byte("<!-->")
var emptyComment2 = []byte("<!--->")
var emptyComment = []byte("<!---->")
var invalidComment1 = []byte("<!-->")
var invalidComment2 = []byte("<!--->")
var openComment = []byte("<!--")
var closeComment = []byte("-->")
var doubleHyphen = []byte("--")
func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
savedLine, savedSegment := block.Position()
node := ast.NewRawHTML()
line, segment := block.PeekLine()
if bytes.HasPrefix(line, emptyComment1) {
node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment1)))
block.Advance(len(emptyComment1))
if bytes.HasPrefix(line, emptyComment) {
node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment)))
block.Advance(len(emptyComment))
return node
}
if bytes.HasPrefix(line, emptyComment2) {
node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment2)))
block.Advance(len(emptyComment2))
return node
if bytes.HasPrefix(line, invalidComment1) || bytes.HasPrefix(line, invalidComment2) {
return nil
}
offset := len(openComment)
line = line[offset:]
for {
index := bytes.Index(line, closeComment)
if index > -1 {
node.Segments.Append(segment.WithStop(segment.Start + offset + index + len(closeComment)))
block.Advance(offset + index + len(closeComment))
hindex := bytes.Index(line, doubleHyphen)
if hindex > -1 {
hindex += offset
}
index := bytes.Index(line, closeComment) + offset
if index > -1 && hindex == index {
if index == 0 || len(line) < 2 || line[index-offset-1] != '-' {
node.Segments.Append(segment.WithStop(segment.Start + index + len(closeComment)))
block.Advance(index + len(closeComment))
return node
}
offset = 0
}
if hindex > 0 {
break
}
node.Segments.Append(segment)
block.AdvanceLine()
line, segment = block.PeekLine()
offset = 0
if line == nil {
break
}

View file

@ -5,7 +5,6 @@ import (
"bytes"
"fmt"
"strconv"
"unicode"
"unicode/utf8"
"github.com/yuin/goldmark/ast"
@ -17,7 +16,7 @@ import (
type Config struct {
Writer Writer
HardWraps bool
EastAsianLineBreaks EastAsianLineBreaks
EastAsianLineBreaks bool
XHTML bool
Unsafe bool
}
@ -27,7 +26,7 @@ func NewConfig() Config {
return Config{
Writer: DefaultWriter,
HardWraps: false,
EastAsianLineBreaks: EastAsianLineBreaksNone,
EastAsianLineBreaks: false,
XHTML: false,
Unsafe: false,
}
@ -39,7 +38,7 @@ func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
case optHardWraps:
c.HardWraps = value.(bool)
case optEastAsianLineBreaks:
c.EastAsianLineBreaks = value.(EastAsianLineBreaks)
c.EastAsianLineBreaks = value.(bool)
case optXHTML:
c.XHTML = value.(bool)
case optUnsafe:
@ -104,94 +103,24 @@ func WithHardWraps() interface {
// EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
// A EastAsianLineBreaks is a style of east asian line breaks.
type EastAsianLineBreaks int
const (
//EastAsianLineBreaksNone renders line breaks as it is.
EastAsianLineBreaksNone EastAsianLineBreaks = iota
// EastAsianLineBreaksSimple follows east_asian_line_breaks in Pandoc.
EastAsianLineBreaksSimple
// EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements.
EastAsianLineBreaksCSS3Draft
)
func (b EastAsianLineBreaks) softLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
switch b {
case EastAsianLineBreaksNone:
return false
case EastAsianLineBreaksSimple:
return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
case EastAsianLineBreaksCSS3Draft:
return eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
}
return false
}
func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
// Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
// References:
// - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
// - https://github.com/w3c/csswg-drafts/issues/5086
// Rule1:
// If the character immediately before or immediately after the segment break is
// the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
return false
}
// Rule2:
// Otherwise, if the East Asian Width property of both the character before and after the segment break is
// F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
if (thisLastRuneEastAsianWidth == "F" ||
thisLastRuneEastAsianWidth == "W" ||
thisLastRuneEastAsianWidth == "H") &&
(siblingFirstRuneEastAsianWidth == "F" ||
siblingFirstRuneEastAsianWidth == "W" ||
siblingFirstRuneEastAsianWidth == "H") {
return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
}
// Rule3:
// Otherwise, if either the character before or after the segment break belongs to
// the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
// then the segment break is removed.
if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
unicode.IsPunct(thisLastRune) ||
thisLastRune == '\u3000' ||
util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
unicode.IsPunct(siblingFirstRune) ||
siblingFirstRune == '\u3000' {
return false
}
// Rule4:
// Otherwise, the segment break is converted to a space (U+0020).
return true
}
type withEastAsianLineBreaks struct {
eastAsianLineBreaksStyle EastAsianLineBreaks
}
func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
c.Options[optEastAsianLineBreaks] = o.eastAsianLineBreaksStyle
c.Options[optEastAsianLineBreaks] = true
}
func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
c.EastAsianLineBreaks = o.eastAsianLineBreaksStyle
c.EastAsianLineBreaks = true
}
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
// between east asian wide characters should be ignored.
func WithEastAsianLineBreaks(e EastAsianLineBreaks) interface {
func WithEastAsianLineBreaks() interface {
renderer.Option
Option
} {
return &withEastAsianLineBreaks{e}
return &withEastAsianLineBreaks{}
}
// XHTML is an option name used in WithXHTML.
@ -445,7 +374,7 @@ func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, en
_ = w.WriteByte('<')
_, _ = w.WriteString(tag)
if n.IsOrdered() && n.Start != 1 {
_, _ = fmt.Fprintf(w, " start=\"%d\"", n.Start)
fmt.Fprintf(w, " start=\"%d\"", n.Start)
}
if n.Attributes() != nil {
RenderAttributes(w, n, ListAttributeFilter)
@ -680,7 +609,7 @@ func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, e
_, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
}
_, _ = w.WriteString(`" alt="`)
r.renderTexts(w, source, n)
_, _ = w.Write(nodeToHTMLText(n, source))
_ = w.WriteByte('"')
if n.Title != nil {
_, _ = w.WriteString(` title="`)
@ -734,13 +663,14 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en
_, _ = w.WriteString("<br>\n")
}
} else if n.SoftLineBreak() {
if r.EastAsianLineBreaks != EastAsianLineBreaksNone && len(value) != 0 {
if r.EastAsianLineBreaks && len(value) != 0 {
sibling := node.NextSibling()
if sibling != nil && sibling.Kind() == ast.KindText {
if siblingText := sibling.(*ast.Text).Value(source); len(siblingText) != 0 {
if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 {
thisLastRune := util.ToRune(value, len(value)-1)
siblingFirstRune, _ := utf8.DecodeRune(siblingText)
if r.EastAsianLineBreaks.softLineBreak(thisLastRune, siblingFirstRune) {
if !(util.IsEastAsianWideRune(thisLastRune) &&
util.IsEastAsianWideRune(siblingFirstRune)) {
_ = w.WriteByte('\n')
}
}
@ -770,18 +700,6 @@ func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node,
return ast.WalkContinue, nil
}
func (r *Renderer) renderTexts(w util.BufWriter, source []byte, n ast.Node) {
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
if s, ok := c.(*ast.String); ok {
_, _ = r.renderString(w, source, s, true)
} else if t, ok := c.(*ast.Text); ok {
_, _ = r.renderText(w, source, t, true)
} else {
r.renderTexts(w, source, c)
}
}
}
var dataPrefix = []byte("data-")
// RenderAttributes renders given node's attributes.
@ -798,14 +716,7 @@ func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter)
_, _ = w.Write(attr.Name)
_, _ = w.WriteString(`="`)
// TODO: convert numeric values to strings
var value []byte
switch typed := attr.Value.(type) {
case []byte:
value = typed
case string:
value = util.StringToReadOnlyBytes(typed)
}
_, _ = w.Write(util.EscapeHTML(value))
_, _ = w.Write(util.EscapeHTML(attr.Value.([]byte)))
_ = w.WriteByte('"')
}
}
@ -1019,3 +930,17 @@ func IsDangerousURL(url []byte) bool {
return hasPrefix(url, bJs) || hasPrefix(url, bVb) ||
hasPrefix(url, bFile) || hasPrefix(url, bData)
}
func nodeToHTMLText(n ast.Node, source []byte) []byte {
var buf bytes.Buffer
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
if s, ok := c.(*ast.String); ok && s.IsCode() {
buf.Write(s.Text(source))
} else if !c.HasChildren() {
buf.Write(util.EscapeHTML(c.Text(source)))
} else {
buf.Write(nodeToHTMLText(c, source))
}
}
return buf.Bytes()
}

View file

@ -2,7 +2,6 @@ package text
import (
"bytes"
"github.com/yuin/goldmark/util"
)
@ -19,20 +18,6 @@ type Segment struct {
// Padding is a padding length of the segment.
Padding int
// ForceNewline is true if the segment should be ended with a newline.
// Some elements(i.e. CodeBlock, FencedCodeBlock) does not trim trailing
// newlines. Spec defines that EOF is treated as a newline, so we need to
// add a newline to the end of the segment if it is not empty.
//
// i.e.:
//
// ```go
// const test = "test"
//
// This code does not close the code block and ends with EOF. In this case,
// we need to add a newline to the end of the last line like `const test = "test"\n`.
ForceNewline bool
}
// NewSegment return a new Segment.
@ -55,18 +40,12 @@ func NewSegmentPadding(start, stop, n int) Segment {
// Value returns a value of the segment.
func (t *Segment) Value(buffer []byte) []byte {
var result []byte
if t.Padding == 0 {
result = buffer[t.Start:t.Stop]
} else {
result = make([]byte, 0, t.Padding+t.Stop-t.Start+1)
return buffer[t.Start:t.Stop]
}
result := make([]byte, 0, t.Padding+t.Stop-t.Start+1)
result = append(result, bytes.Repeat(space, t.Padding)...)
result = append(result, buffer[t.Start:t.Stop]...)
}
if t.ForceNewline && len(result) > 0 && result[len(result)-1] != '\n' {
result = append(result, '\n')
}
return result
return append(result, buffer[t.Start:t.Stop]...)
}
// Len returns a length of the segment.
@ -228,12 +207,3 @@ func (s *Segments) Unshift(v Segment) {
s.values = append(s.values[0:1], s.values[0:]...)
s.values[0] = v
}
// Value returns a string value of the collection.
func (s *Segments) Value(buffer []byte) []byte {
var result []byte
for _, v := range s.values {
result = append(result, v.Value(buffer)...)
}
return result
}

View file

@ -1,8 +1,6 @@
//nolint:golint,lll,misspell
package util
import "sync"
// An HTML5Entity struct represents HTML5 entitites.
type HTML5Entity struct {
Name string
@ -13,18 +11,11 @@ type HTML5Entity struct {
// LookUpHTML5EntityByName returns (an HTML5Entity, true) if an entity named
// given name is found, otherwise (nil, false).
func LookUpHTML5EntityByName(name string) (*HTML5Entity, bool) {
v, ok := html5entities()[name]
v, ok := html5entities[name]
return v, ok
}
var html5entitiesOnce sync.Once // TODO: uses sync.OnceValue for future
var _html5entities map[string]*HTML5Entity
func html5entities() map[string]*HTML5Entity {
html5entitiesOnce.Do(func() {
_html5entities =
map[string]*HTML5Entity{
var html5entities = map[string]*HTML5Entity{
"AElig": {Name: "AElig", CodePoints: []int{198}, Characters: []byte{0xc3, 0x86}},
"AMP": {Name: "AMP", CodePoints: []int{38}, Characters: []byte{0x26}},
"Aacute": {Name: "Aacute", CodePoints: []int{193}, Characters: []byte{0xc3, 0x81}},
@ -2149,7 +2140,4 @@ func html5entities() map[string]*HTML5Entity {
"zscr": {Name: "zscr", CodePoints: []int{120015}, Characters: []byte{0xf0, 0x9d, 0x93, 0x8f}},
"zwj": {Name: "zwj", CodePoints: []int{8205}, Characters: []byte{0xe2, 0x80, 0x8d}},
"zwnj": {Name: "zwnj", CodePoints: []int{8204}, Characters: []byte{0xe2, 0x80, 0x8c}},
}
})
return _html5entities
}

View file

@ -166,13 +166,7 @@ func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, pad
w := 0
i := 0
l := len(bs)
p := paddingv
for ; i < l; i++ {
if p > 0 {
p--
w++
continue
}
if bs[i] == '\t' && w < width {
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' && w < width {
@ -814,7 +808,7 @@ func IsPunct(c byte) bool {
// IsPunctRune returns true if the given rune is a punctuation, otherwise false.
func IsPunctRune(r rune) bool {
return unicode.IsSymbol(r) || unicode.IsPunct(r)
return int32(r) <= 256 && IsPunct(byte(r)) || unicode.IsPunct(r)
}
// IsSpace returns true if the given character is a space, otherwise false.
@ -842,6 +836,22 @@ func IsAlphaNumeric(c byte) bool {
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
}
// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
func IsEastAsianWideRune(r rune) bool {
// https://en.wikipedia.org/wiki/CJK_Symbols_and_Punctuation
var CJKSymbolsAndPunctuation = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3000, 0x303F, 1},
},
}
return unicode.Is(unicode.Hiragana, r) ||
unicode.Is(unicode.Katakana, r) ||
unicode.Is(unicode.Han, r) ||
unicode.Is(unicode.Lm, r) ||
unicode.Is(unicode.Hangul, r) ||
unicode.Is(CJKSymbolsAndPunctuation, r)
}
// A BufWriter is a subset of the bufio.Writer .
type BufWriter interface {
io.Writer

View file

@ -1,469 +0,0 @@
package util
import "unicode"
var cjkRadicalsSupplement = &unicode.RangeTable{
R16: []unicode.Range16{
{0x2E80, 0x2EFF, 1},
},
}
var kangxiRadicals = &unicode.RangeTable{
R16: []unicode.Range16{
{0x2F00, 0x2FDF, 1},
},
}
var ideographicDescriptionCharacters = &unicode.RangeTable{
R16: []unicode.Range16{
{0x2FF0, 0x2FFF, 1},
},
}
var cjkSymbolsAndPunctuation = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3000, 0x303F, 1},
},
}
var hiragana = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3040, 0x309F, 1},
},
}
var katakana = &unicode.RangeTable{
R16: []unicode.Range16{
{0x30A0, 0x30FF, 1},
},
}
var kanbun = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3130, 0x318F, 1},
{0x3190, 0x319F, 1},
},
}
var cjkStrokes = &unicode.RangeTable{
R16: []unicode.Range16{
{0x31C0, 0x31EF, 1},
},
}
var katakanaPhoneticExtensions = &unicode.RangeTable{
R16: []unicode.Range16{
{0x31F0, 0x31FF, 1},
},
}
var cjkCompatibility = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3300, 0x33FF, 1},
},
}
var cjkUnifiedIdeographsExtensionA = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3400, 0x4DBF, 1},
},
}
var cjkUnifiedIdeographs = &unicode.RangeTable{
R16: []unicode.Range16{
{0x4E00, 0x9FFF, 1},
},
}
var yiSyllables = &unicode.RangeTable{
R16: []unicode.Range16{
{0xA000, 0xA48F, 1},
},
}
var yiRadicals = &unicode.RangeTable{
R16: []unicode.Range16{
{0xA490, 0xA4CF, 1},
},
}
var cjkCompatibilityIdeographs = &unicode.RangeTable{
R16: []unicode.Range16{
{0xF900, 0xFAFF, 1},
},
}
var verticalForms = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFE10, 0xFE1F, 1},
},
}
var cjkCompatibilityForms = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFE30, 0xFE4F, 1},
},
}
var smallFormVariants = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFE50, 0xFE6F, 1},
},
}
var halfwidthAndFullwidthForms = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFF00, 0xFFEF, 1},
},
}
var kanaSupplement = &unicode.RangeTable{
R32: []unicode.Range32{
{0x1B000, 0x1B0FF, 1},
},
}
var kanaExtendedA = &unicode.RangeTable{
R32: []unicode.Range32{
{0x1B100, 0x1B12F, 1},
},
}
var smallKanaExtension = &unicode.RangeTable{
R32: []unicode.Range32{
{0x1B130, 0x1B16F, 1},
},
}
var cjkUnifiedIdeographsExtensionB = &unicode.RangeTable{
R32: []unicode.Range32{
{0x20000, 0x2A6DF, 1},
},
}
var cjkUnifiedIdeographsExtensionC = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2A700, 0x2B73F, 1},
},
}
var cjkUnifiedIdeographsExtensionD = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2B740, 0x2B81F, 1},
},
}
var cjkUnifiedIdeographsExtensionE = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2B820, 0x2CEAF, 1},
},
}
var cjkUnifiedIdeographsExtensionF = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2CEB0, 0x2EBEF, 1},
},
}
var cjkCompatibilityIdeographsSupplement = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2F800, 0x2FA1F, 1},
},
}
var cjkUnifiedIdeographsExtensionG = &unicode.RangeTable{
R32: []unicode.Range32{
{0x30000, 0x3134F, 1},
},
}
// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
func IsEastAsianWideRune(r rune) bool {
return unicode.Is(unicode.Hiragana, r) ||
unicode.Is(unicode.Katakana, r) ||
unicode.Is(unicode.Han, r) ||
unicode.Is(unicode.Lm, r) ||
unicode.Is(unicode.Hangul, r) ||
unicode.Is(cjkSymbolsAndPunctuation, r)
}
// IsSpaceDiscardingUnicodeRune returns true if the given rune is space-discarding unicode character, otherwise false.
// See https://www.w3.org/TR/2020/WD-css-text-3-20200429/#space-discard-set
func IsSpaceDiscardingUnicodeRune(r rune) bool {
return unicode.Is(cjkRadicalsSupplement, r) ||
unicode.Is(kangxiRadicals, r) ||
unicode.Is(ideographicDescriptionCharacters, r) ||
unicode.Is(cjkSymbolsAndPunctuation, r) ||
unicode.Is(hiragana, r) ||
unicode.Is(katakana, r) ||
unicode.Is(kanbun, r) ||
unicode.Is(cjkStrokes, r) ||
unicode.Is(katakanaPhoneticExtensions, r) ||
unicode.Is(cjkCompatibility, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionA, r) ||
unicode.Is(cjkUnifiedIdeographs, r) ||
unicode.Is(yiSyllables, r) ||
unicode.Is(yiRadicals, r) ||
unicode.Is(cjkCompatibilityIdeographs, r) ||
unicode.Is(verticalForms, r) ||
unicode.Is(cjkCompatibilityForms, r) ||
unicode.Is(smallFormVariants, r) ||
unicode.Is(halfwidthAndFullwidthForms, r) ||
unicode.Is(kanaSupplement, r) ||
unicode.Is(kanaExtendedA, r) ||
unicode.Is(smallKanaExtension, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionB, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionC, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionD, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionE, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionF, r) ||
unicode.Is(cjkCompatibilityIdeographsSupplement, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionG, r)
}
// EastAsianWidth returns the east asian width of the given rune.
// See https://www.unicode.org/reports/tr11/tr11-36.html
func EastAsianWidth(r rune) string {
switch {
case r == 0x3000,
(0xFF01 <= r && r <= 0xFF60),
(0xFFE0 <= r && r <= 0xFFE6):
return "F"
case r == 0x20A9,
(0xFF61 <= r && r <= 0xFFBE),
(0xFFC2 <= r && r <= 0xFFC7),
(0xFFCA <= r && r <= 0xFFCF),
(0xFFD2 <= r && r <= 0xFFD7),
(0xFFDA <= r && r <= 0xFFDC),
(0xFFE8 <= r && r <= 0xFFEE):
return "H"
case (0x1100 <= r && r <= 0x115F),
(0x11A3 <= r && r <= 0x11A7),
(0x11FA <= r && r <= 0x11FF),
(0x2329 <= r && r <= 0x232A),
(0x2E80 <= r && r <= 0x2E99),
(0x2E9B <= r && r <= 0x2EF3),
(0x2F00 <= r && r <= 0x2FD5),
(0x2FF0 <= r && r <= 0x2FFB),
(0x3001 <= r && r <= 0x303E),
(0x3041 <= r && r <= 0x3096),
(0x3099 <= r && r <= 0x30FF),
(0x3105 <= r && r <= 0x312D),
(0x3131 <= r && r <= 0x318E),
(0x3190 <= r && r <= 0x31BA),
(0x31C0 <= r && r <= 0x31E3),
(0x31F0 <= r && r <= 0x321E),
(0x3220 <= r && r <= 0x3247),
(0x3250 <= r && r <= 0x32FE),
(0x3300 <= r && r <= 0x4DBF),
(0x4E00 <= r && r <= 0xA48C),
(0xA490 <= r && r <= 0xA4C6),
(0xA960 <= r && r <= 0xA97C),
(0xAC00 <= r && r <= 0xD7A3),
(0xD7B0 <= r && r <= 0xD7C6),
(0xD7CB <= r && r <= 0xD7FB),
(0xF900 <= r && r <= 0xFAFF),
(0xFE10 <= r && r <= 0xFE19),
(0xFE30 <= r && r <= 0xFE52),
(0xFE54 <= r && r <= 0xFE66),
(0xFE68 <= r && r <= 0xFE6B),
(0x1B000 <= r && r <= 0x1B001),
(0x1F200 <= r && r <= 0x1F202),
(0x1F210 <= r && r <= 0x1F23A),
(0x1F240 <= r && r <= 0x1F248),
(0x1F250 <= r && r <= 0x1F251),
(0x20000 <= r && r <= 0x2F73F),
(0x2B740 <= r && r <= 0x2FFFD),
(0x30000 <= r && r <= 0x3FFFD):
return "W"
case (0x0020 <= r && r <= 0x007E),
(0x00A2 <= r && r <= 0x00A3),
(0x00A5 <= r && r <= 0x00A6),
r == 0x00AC,
r == 0x00AF,
(0x27E6 <= r && r <= 0x27ED),
(0x2985 <= r && r <= 0x2986):
return "Na"
case (0x00A1 == r),
(0x00A4 == r),
(0x00A7 <= r && r <= 0x00A8),
(0x00AA == r),
(0x00AD <= r && r <= 0x00AE),
(0x00B0 <= r && r <= 0x00B4),
(0x00B6 <= r && r <= 0x00BA),
(0x00BC <= r && r <= 0x00BF),
(0x00C6 == r),
(0x00D0 == r),
(0x00D7 <= r && r <= 0x00D8),
(0x00DE <= r && r <= 0x00E1),
(0x00E6 == r),
(0x00E8 <= r && r <= 0x00EA),
(0x00EC <= r && r <= 0x00ED),
(0x00F0 == r),
(0x00F2 <= r && r <= 0x00F3),
(0x00F7 <= r && r <= 0x00FA),
(0x00FC == r),
(0x00FE == r),
(0x0101 == r),
(0x0111 == r),
(0x0113 == r),
(0x011B == r),
(0x0126 <= r && r <= 0x0127),
(0x012B == r),
(0x0131 <= r && r <= 0x0133),
(0x0138 == r),
(0x013F <= r && r <= 0x0142),
(0x0144 == r),
(0x0148 <= r && r <= 0x014B),
(0x014D == r),
(0x0152 <= r && r <= 0x0153),
(0x0166 <= r && r <= 0x0167),
(0x016B == r),
(0x01CE == r),
(0x01D0 == r),
(0x01D2 == r),
(0x01D4 == r),
(0x01D6 == r),
(0x01D8 == r),
(0x01DA == r),
(0x01DC == r),
(0x0251 == r),
(0x0261 == r),
(0x02C4 == r),
(0x02C7 == r),
(0x02C9 <= r && r <= 0x02CB),
(0x02CD == r),
(0x02D0 == r),
(0x02D8 <= r && r <= 0x02DB),
(0x02DD == r),
(0x02DF == r),
(0x0300 <= r && r <= 0x036F),
(0x0391 <= r && r <= 0x03A1),
(0x03A3 <= r && r <= 0x03A9),
(0x03B1 <= r && r <= 0x03C1),
(0x03C3 <= r && r <= 0x03C9),
(0x0401 == r),
(0x0410 <= r && r <= 0x044F),
(0x0451 == r),
(0x2010 == r),
(0x2013 <= r && r <= 0x2016),
(0x2018 <= r && r <= 0x2019),
(0x201C <= r && r <= 0x201D),
(0x2020 <= r && r <= 0x2022),
(0x2024 <= r && r <= 0x2027),
(0x2030 == r),
(0x2032 <= r && r <= 0x2033),
(0x2035 == r),
(0x203B == r),
(0x203E == r),
(0x2074 == r),
(0x207F == r),
(0x2081 <= r && r <= 0x2084),
(0x20AC == r),
(0x2103 == r),
(0x2105 == r),
(0x2109 == r),
(0x2113 == r),
(0x2116 == r),
(0x2121 <= r && r <= 0x2122),
(0x2126 == r),
(0x212B == r),
(0x2153 <= r && r <= 0x2154),
(0x215B <= r && r <= 0x215E),
(0x2160 <= r && r <= 0x216B),
(0x2170 <= r && r <= 0x2179),
(0x2189 == r),
(0x2190 <= r && r <= 0x2199),
(0x21B8 <= r && r <= 0x21B9),
(0x21D2 == r),
(0x21D4 == r),
(0x21E7 == r),
(0x2200 == r),
(0x2202 <= r && r <= 0x2203),
(0x2207 <= r && r <= 0x2208),
(0x220B == r),
(0x220F == r),
(0x2211 == r),
(0x2215 == r),
(0x221A == r),
(0x221D <= r && r <= 0x2220),
(0x2223 == r),
(0x2225 == r),
(0x2227 <= r && r <= 0x222C),
(0x222E == r),
(0x2234 <= r && r <= 0x2237),
(0x223C <= r && r <= 0x223D),
(0x2248 == r),
(0x224C == r),
(0x2252 == r),
(0x2260 <= r && r <= 0x2261),
(0x2264 <= r && r <= 0x2267),
(0x226A <= r && r <= 0x226B),
(0x226E <= r && r <= 0x226F),
(0x2282 <= r && r <= 0x2283),
(0x2286 <= r && r <= 0x2287),
(0x2295 == r),
(0x2299 == r),
(0x22A5 == r),
(0x22BF == r),
(0x2312 == r),
(0x2460 <= r && r <= 0x24E9),
(0x24EB <= r && r <= 0x254B),
(0x2550 <= r && r <= 0x2573),
(0x2580 <= r && r <= 0x258F),
(0x2592 <= r && r <= 0x2595),
(0x25A0 <= r && r <= 0x25A1),
(0x25A3 <= r && r <= 0x25A9),
(0x25B2 <= r && r <= 0x25B3),
(0x25B6 <= r && r <= 0x25B7),
(0x25BC <= r && r <= 0x25BD),
(0x25C0 <= r && r <= 0x25C1),
(0x25C6 <= r && r <= 0x25C8),
(0x25CB == r),
(0x25CE <= r && r <= 0x25D1),
(0x25E2 <= r && r <= 0x25E5),
(0x25EF == r),
(0x2605 <= r && r <= 0x2606),
(0x2609 == r),
(0x260E <= r && r <= 0x260F),
(0x2614 <= r && r <= 0x2615),
(0x261C == r),
(0x261E == r),
(0x2640 == r),
(0x2642 == r),
(0x2660 <= r && r <= 0x2661),
(0x2663 <= r && r <= 0x2665),
(0x2667 <= r && r <= 0x266A),
(0x266C <= r && r <= 0x266D),
(0x266F == r),
(0x269E <= r && r <= 0x269F),
(0x26BE <= r && r <= 0x26BF),
(0x26C4 <= r && r <= 0x26CD),
(0x26CF <= r && r <= 0x26E1),
(0x26E3 == r),
(0x26E8 <= r && r <= 0x26FF),
(0x273D == r),
(0x2757 == r),
(0x2776 <= r && r <= 0x277F),
(0x2B55 <= r && r <= 0x2B59),
(0x3248 <= r && r <= 0x324F),
(0xE000 <= r && r <= 0xF8FF),
(0xFE00 <= r && r <= 0xFE0F),
(0xFFFD == r),
(0x1F100 <= r && r <= 0x1F10A),
(0x1F110 <= r && r <= 0x1F12D),
(0x1F130 <= r && r <= 0x1F169),
(0x1F170 <= r && r <= 0x1F19A),
(0xE0100 <= r && r <= 0xE01EF),
(0xF0000 <= r && r <= 0xFFFFD),
(0x100000 <= r && r <= 0x10FFFD):
return "A"
default:
return "N"
}
}

View file

@ -1,5 +1,5 @@
//go:build !appengine && !js && !go1.21
// +build !appengine,!js,!go1.21
//go:build !appengine && !js
// +build !appengine,!js
package util

View file

@ -1,18 +0,0 @@
//go:build !appengine && !js && go1.21
// +build !appengine,!js,go1.21
package util
import (
"unsafe"
)
// BytesToReadOnlyString returns a string converted from given bytes.
func BytesToReadOnlyString(b []byte) string {
return unsafe.String(unsafe.SliceData(b), len(b))
}
// StringToReadOnlyBytes returns bytes converted from given string.
func StringToReadOnlyBytes(s string) []byte {
return unsafe.Slice(unsafe.StringData(s), len(s))
}