Compare commits

..

No commits in common. "master" and "v1.3.5" have entirely different histories.

80 changed files with 4913 additions and 8360 deletions

17
.github/stale.yml vendored Normal file
View file

@ -0,0 +1,17 @@
# Number of days of inactivity before an issue becomes stale
daysUntilStale: 30
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 7
# Issues with these labels will never be considered stale
exemptLabels:
- pinned
- security
# Label to use when marking an issue as stale
staleLabel: stale
# Comment to post when marking an issue as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when closing a stale issue. Set to `false` to disable
closeComment: false

View file

@ -1,26 +0,0 @@
name: Close inactive issues
on:
schedule:
- cron: "30 9 * * *"
jobs:
close-issues:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- uses: actions/stale@v5
with:
days-before-issue-stale: 30
days-before-issue-close: 14
stale-issue-label: "stale"
stale-issue-message: "This issue is stale because it has been open for 30 days with no activity."
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
exempt-issue-labels: "pinned,security"
days-before-pr-stale: 180
days-before-pr-close: 14
stale-pr-label: "stale"
stale-pr-message: "This PR is stale because it has been open for 180 days with no activity."
close-pr-message: "This PR was closed because it has been inactive for 14 days since being marked as stale."
exempt-pr-labels: "pinned,security"
repo-token: ${{ secrets.GITHUB_TOKEN }}

View file

@ -5,29 +5,22 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
go-version: [1.21.x, 1.22.x] go-version: [1.14.x,1.15.x]
platform: [ubuntu-latest, macos-latest, windows-latest] platform: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.platform }} runs-on: ${{ matrix.platform }}
steps: steps:
- name: Install Go - name: Install Go
uses: actions/setup-go@v4 uses: actions/setup-go@v1
with: with:
go-version: ${{ matrix.go-version }} go-version: ${{ matrix.go-version }}
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v1
- name: Run lints
uses: golangci/golangci-lint-action@v6
with:
version: latest
if: "matrix.platform == 'ubuntu-latest'" # gofmt linter fails on Windows for CRLF problems
- name: Run tests - name: Run tests
env:
GOLDMARK_TEST_TIMEOUT_MULTIPLIER: 5
run: go test -v ./... -covermode=count -coverprofile=coverage.out -coverpkg=./... run: go test -v ./... -covermode=count -coverprofile=coverage.out -coverpkg=./...
- name: Install goveralls
run: go install github.com/mattn/goveralls@latest
- name: Send coverage - name: Send coverage
if: "matrix.platform == 'ubuntu-latest'" if: "matrix.platform == 'ubuntu-latest'"
env: env:
COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: goveralls -coverprofile=coverage.out -service=github run: |
GO111MODULE=off go get github.com/mattn/goveralls
$(go env GOPATH)/bin/goveralls -coverprofile=coverage.out -service=github

View file

@ -1,105 +0,0 @@
run:
deadline: 10m
issues:
exclude-use-default: false
exclude-rules:
- path: _test.go
linters:
- errcheck
- lll
exclude:
- "Package util"
linters:
disable-all: true
enable:
- errcheck
- gosimple
- govet
- ineffassign
- staticcheck
- typecheck
- unused
- gofmt
- godot
- makezero
- misspell
- revive
- wastedassign
- lll
linters-settings:
revive:
severity: "warning"
confidence: 0.8
rules:
- name: blank-imports
severity: warning
disabled: false
- name: context-as-argument
severity: warning
disabled: false
- name: context-keys-type
severity: warning
disabled: false
- name: dot-imports
severity: warning
disabled: true
- name: error-return
severity: warning
disabled: false
- name: error-strings
severity: warning
disabled: false
- name: error-naming
severity: warning
disabled: false
- name: exported
severity: warning
disabled: false
- name: increment-decrement
severity: warning
disabled: false
- name: var-naming
severity: warning
disabled: false
- name: var-declaration
severity: warning
disabled: false
- name: package-comments
severity: warning
disabled: false
- name: range
severity: warning
disabled: false
- name: receiver-naming
severity: warning
disabled: false
- name: time-naming
severity: warning
disabled: false
- name: unexported-return
severity: warning
disabled: false
- name: indent-error-flow
severity: warning
disabled: false
- name: errorf
severity: warning
disabled: false
- name: empty-block
severity: warning
disabled: true
- name: superfluous-else
severity: warning
disabled: false
- name: unused-parameter
severity: warning
disabled: true
- name: unreachable-code
severity: warning
disabled: false
- name: redefines-builtin-id
severity: warning
disabled: false

View file

@ -1,7 +1,4 @@
.PHONY: test fuzz lint .PHONY: test fuzz
lint:
golangci-lint run -c .golangci.yml ./...
test: test:
go test -coverprofile=profile.out -coverpkg=github.com/yuin/goldmark,github.com/yuin/goldmark/ast,github.com/yuin/goldmark/extension,github.com/yuin/goldmark/extension/ast,github.com/yuin/goldmark/parser,github.com/yuin/goldmark/renderer,github.com/yuin/goldmark/renderer/html,github.com/yuin/goldmark/text,github.com/yuin/goldmark/util ./... go test -coverprofile=profile.out -coverpkg=github.com/yuin/goldmark,github.com/yuin/goldmark/ast,github.com/yuin/goldmark/extension,github.com/yuin/goldmark/extension/ast,github.com/yuin/goldmark/parser,github.com/yuin/goldmark/renderer,github.com/yuin/goldmark/renderer/html,github.com/yuin/goldmark/text,github.com/yuin/goldmark/util ./...
@ -10,4 +7,10 @@ cov: test
go tool cover -html=profile.out go tool cover -html=profile.out
fuzz: fuzz:
cd ./fuzz && go test -fuzz=Fuzz which go-fuzz > /dev/null 2>&1 || (GO111MODULE=off go get -u github.com/dvyukov/go-fuzz/go-fuzz github.com/dvyukov/go-fuzz/go-fuzz-build; GO111MODULE=off go get -d github.com/dvyukov/go-fuzz-corpus; true)
rm -rf ./fuzz/corpus
rm -rf ./fuzz/crashers
rm -rf ./fuzz/suppressions
rm -f ./fuzz/fuzz-fuzz.zip
cd ./fuzz && go-fuzz-build
cd ./fuzz && go-fuzz

131
README.md
View file

@ -2,15 +2,13 @@ goldmark
========================================== ==========================================
[![https://pkg.go.dev/github.com/yuin/goldmark](https://pkg.go.dev/badge/github.com/yuin/goldmark.svg)](https://pkg.go.dev/github.com/yuin/goldmark) [![https://pkg.go.dev/github.com/yuin/goldmark](https://pkg.go.dev/badge/github.com/yuin/goldmark.svg)](https://pkg.go.dev/github.com/yuin/goldmark)
[![https://github.com/yuin/goldmark/actions?query=workflow:test](https://github.com/yuin/goldmark/actions/workflows/test.yaml/badge.svg?branch=master&event=push)](https://github.com/yuin/goldmark/actions?query=workflow:test) [![https://github.com/yuin/goldmark/actions?query=workflow:test](https://github.com/yuin/goldmark/workflows/test/badge.svg?branch=master&event=push)](https://github.com/yuin/goldmark/actions?query=workflow:test)
[![https://coveralls.io/github/yuin/goldmark](https://coveralls.io/repos/github/yuin/goldmark/badge.svg?branch=master)](https://coveralls.io/github/yuin/goldmark) [![https://coveralls.io/github/yuin/goldmark](https://coveralls.io/repos/github/yuin/goldmark/badge.svg?branch=master)](https://coveralls.io/github/yuin/goldmark)
[![https://goreportcard.com/report/github.com/yuin/goldmark](https://goreportcard.com/badge/github.com/yuin/goldmark)](https://goreportcard.com/report/github.com/yuin/goldmark) [![https://goreportcard.com/report/github.com/yuin/goldmark](https://goreportcard.com/badge/github.com/yuin/goldmark)](https://goreportcard.com/report/github.com/yuin/goldmark)
> A Markdown parser written in Go. Easy to extend, standards-compliant, well-structured. > A Markdown parser written in Go. Easy to extend, standards-compliant, well-structured.
goldmark is compliant with CommonMark 0.31.2. goldmark is compliant with CommonMark 0.29.
- [goldmark playground](https://yuin.github.io/goldmark/playground/) : Try goldmark online. This playground is built with WASM(5-10MB).
Motivation Motivation
---------------------- ----------------------
@ -48,7 +46,7 @@ Features
renderers. renderers.
- **Performance.** goldmark's performance is on par with that of cmark, - **Performance.** goldmark's performance is on par with that of cmark,
the CommonMark reference implementation written in C. the CommonMark reference implementation written in C.
- **Robust.** goldmark is tested with `go test --fuzz`. - **Robust.** goldmark is tested with [go-fuzz](https://github.com/dvyukov/go-fuzz), a fuzz testing tool.
- **Built-in extensions.** goldmark ships with common extensions like tables, strikethrough, - **Built-in extensions.** goldmark ships with common extensions like tables, strikethrough,
task lists, and definition lists. task lists, and definition lists.
- **Depends only on standard libraries.** - **Depends only on standard libraries.**
@ -182,8 +180,6 @@ Parser and Renderer options
- [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes) - [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes)
- `extension.Typographer` - `extension.Typographer`
- This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/). - This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/).
- `extension.CJK`
- This extension is a shortcut for CJK related functionalities.
### Attributes ### Attributes
The `parser.WithAttribute` option allows you to define attributes on some elements. The `parser.WithAttribute` option allows you to define attributes on some elements.
@ -262,7 +258,7 @@ You can override autolinking patterns via options.
| Functional option | Type | Description | | Functional option | Type | Description |
| ----------------- | ---- | ----------- | | ----------------- | ---- | ----------- |
| `extension.WithLinkifyAllowedProtocols` | `[][]byte \| []string` | List of allowed protocols such as `[]string{ "http:" }` | | `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` |
| `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URLs, including protocols | | `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URLs, including protocols |
| `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) | | `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) |
| `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email addresses` | | `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email addresses` |
@ -279,12 +275,12 @@ markdown := goldmark.New(
), ),
goldmark.WithExtensions( goldmark.WithExtensions(
extension.NewLinkify( extension.NewLinkify(
extension.WithLinkifyAllowedProtocols([]string{ extension.WithLinkifyAllowedProtocols([][]byte{
"http:", []byte("http:"),
"https:", []byte("https:"),
}), }),
extension.WithLinkifyURLRegexp( extension.WithLinkifyURLRegexp(
xurls.Strict(), xurls.Strict,
), ),
), ),
), ),
@ -299,13 +295,13 @@ This extension has some options:
| Functional option | Type | Description | | Functional option | Type | Description |
| ----------------- | ---- | ----------- | | ----------------- | ---- | ----------- |
| `extension.WithFootnoteIDPrefix` | `[]byte \| string` | a prefix for the id attributes.| | `extension.WithFootnoteIDPrefix` | `[]byte` | a prefix for the id attributes.|
| `extension.WithFootnoteIDPrefixFunction` | `func(gast.Node) []byte` | a function that determines the id attribute for given Node.| | `extension.WithFootnoteIDPrefixFunction` | `func(gast.Node) []byte` | a function that determines the id attribute for given Node.|
| `extension.WithFootnoteLinkTitle` | `[]byte \| string` | an optional title attribute for footnote links.| | `extension.WithFootnoteLinkTitle` | `[]byte` | an optional title attribute for footnote links.|
| `extension.WithFootnoteBacklinkTitle` | `[]byte \| string` | an optional title attribute for footnote backlinks. | | `extension.WithFootnoteBacklinkTitle` | `[]byte` | an optional title attribute for footnote backlinks. |
| `extension.WithFootnoteLinkClass` | `[]byte \| string` | a class for footnote links. This defaults to `footnote-ref`. | | `extension.WithFootnoteLinkClass` | `[]byte` | a class for footnote links. This defaults to `footnote-ref`. |
| `extension.WithFootnoteBacklinkClass` | `[]byte \| string` | a class for footnote backlinks. This defaults to `footnote-backref`. | | `extension.WithFootnoteBacklinkClass` | `[]byte` | a class for footnote backlinks. This defaults to `footnote-backref`. |
| `extension.WithFootnoteBacklinkHTML` | `[]byte \| string` | a class for footnote backlinks. This defaults to `↩︎`. | | `extension.WithFootnoteBacklinkHTML` | `[]byte` | a class for footnote backlinks. This defaults to `↩︎`. |
Some options can have special substitutions. Occurrences of “^^” in the string will be replaced by the corresponding footnote number in the HTML output. Occurrences of “%%” will be replaced by a number for the reference (footnotes can have multiple references). Some options can have special substitutions. Occurrences of “^^” in the string will be replaced by the corresponding footnote number in the HTML output. Occurrences of “%%” will be replaced by a number for the reference (footnotes can have multiple references).
@ -321,7 +317,7 @@ for _, path := range files {
markdown := goldmark.New( markdown := goldmark.New(
goldmark.WithExtensions( goldmark.WithExtensions(
NewFootnote( NewFootnote(
WithFootnoteIDPrefix(path), WithFootnoteIDPrefix([]byte(path)),
), ),
), ),
) )
@ -373,56 +369,7 @@ footnote-prefix: article1
# My article # My article
``` ```
### CJK extension
CommonMark gives compatibilities a high priority and original markdown was designed by westerners. So CommonMark lacks considerations for languages like CJK.
This extension provides additional options for CJK users.
| Functional option | Type | Description |
| ----------------- | ---- | ----------- |
| `extension.WithEastAsianLineBreaks` | `...extension.EastAsianLineBreaksStyle` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. This defaults to `EastAsianLineBreaksStyleSimple`. |
| `extension.WithEscapedSpace` | `-` | Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec). With this option, you can avoid this inconvenient behavior by putting 'not rendered' spaces around an emphasis like `太郎は\ **「こんにちわ」**\ といった`. |
#### Styles of Line Breaking
| Style | Description |
| ----- | ----------- |
| `EastAsianLineBreaksStyleSimple` | Soft line breaks are ignored if both sides of the break are east asian wide character. This behavior is the same as [`east_asian_line_breaks`](https://pandoc.org/MANUAL.html#extension-east_asian_line_breaks) in Pandoc. |
| `EastAsianLineBreaksCSS3Draft` | This option implements CSS text level3 [Segment Break Transformation Rules](https://drafts.csswg.org/css-text-3/#line-break-transform) with [some enhancements](https://github.com/w3c/csswg-drafts/issues/5086). |
#### Example of `EastAsianLineBreaksStyleSimple`
Input Markdown:
```md
私はプログラマーです。
東京の会社に勤めています。
GoでWebアプリケーションを開発しています。
```
Output:
```html
<p>私はプログラマーです。東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。</p>
```
#### Example of `EastAsianLineBreaksCSS3Draft`
Input Markdown:
```md
私はプログラマーです。
東京の会社に勤めています。
GoでWebアプリケーションを開発しています。
```
Output:
```html
<p>私はプログラマーです。東京の会社に勤めています。GoでWebアプリケーションを開発しています。</p>
```
Security Security
-------------------- --------------------
By default, goldmark does not render raw HTML or potentially-dangerous URLs. By default, goldmark does not render raw HTML or potentially-dangerous URLs.
@ -440,36 +387,34 @@ blackfriday v2 seems to be the fastest, but as it is not CommonMark compliant, i
goldmark, meanwhile, builds a clean, extensible AST structure, achieves full compliance with goldmark, meanwhile, builds a clean, extensible AST structure, achieves full compliance with
CommonMark, and consumes less memory, all while being reasonably fast. CommonMark, and consumes less memory, all while being reasonably fast.
- MBP 2019 13″(i5, 16GB), Go1.17
``` ```
BenchmarkMarkdown/Blackfriday-v2-8 302 3743747 ns/op 3290445 B/op 20050 allocs/op goos: darwin
BenchmarkMarkdown/GoldMark-8 280 4200974 ns/op 2559738 B/op 13435 allocs/op goarch: amd64
BenchmarkMarkdown/CommonMark-8 226 5283686 ns/op 2702490 B/op 20792 allocs/op BenchmarkMarkdown/Blackfriday-v2-12 326 3465240 ns/op 3298861 B/op 20047 allocs/op
BenchmarkMarkdown/Lute-8 12 92652857 ns/op 10602649 B/op 40555 allocs/op BenchmarkMarkdown/GoldMark-12 303 3927494 ns/op 2574809 B/op 13853 allocs/op
BenchmarkMarkdown/GoMarkdown-8 13 81380167 ns/op 2245002 B/op 22889 allocs/op BenchmarkMarkdown/CommonMark-12 244 4900853 ns/op 2753851 B/op 20527 allocs/op
BenchmarkMarkdown/Lute-12 130 9195245 ns/op 9175030 B/op 123534 allocs/op
BenchmarkMarkdown/GoMarkdown-12 9 113541994 ns/op 2187472 B/op 22173 allocs/op
``` ```
### against cmark (CommonMark reference implementation written in C) ### against cmark (CommonMark reference implementation written in C)
- MBP 2019 13″(i5, 16GB), Go1.17
``` ```
----------- cmark ----------- ----------- cmark -----------
file: _data.md file: _data.md
iteration: 50 iteration: 50
average: 0.0044073057 sec average: 0.0037760639 sec
go run ./goldmark_benchmark.go
------- goldmark ------- ------- goldmark -------
file: _data.md file: _data.md
iteration: 50 iteration: 50
average: 0.0041611990 sec average: 0.0040964230 sec
``` ```
As you can see, goldmark's performance is on par with cmark's. As you can see, goldmark's performance is on par with cmark's.
Extensions Extensions
-------------------- --------------------
### List of extensions
- [goldmark-meta](https://github.com/yuin/goldmark-meta): A YAML metadata - [goldmark-meta](https://github.com/yuin/goldmark-meta): A YAML metadata
extension for the goldmark Markdown parser. extension for the goldmark Markdown parser.
@ -478,30 +423,6 @@ Extensions
- [goldmark-emoji](https://github.com/yuin/goldmark-emoji): An emoji - [goldmark-emoji](https://github.com/yuin/goldmark-emoji): An emoji
extension for the goldmark Markdown parser. extension for the goldmark Markdown parser.
- [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for the goldmark markdown parser - [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for the goldmark markdown parser
- [goldmark-pdf](https://github.com/stephenafamo/goldmark-pdf): A PDF renderer that can be passed to `goldmark.WithRenderer()`.
- [goldmark-hashtag](https://github.com/abhinav/goldmark-hashtag): Adds support for `#hashtag`-based tagging to goldmark.
- [goldmark-wikilink](https://github.com/abhinav/goldmark-wikilink): Adds support for `[[wiki]]`-style links to goldmark.
- [goldmark-anchor](https://github.com/abhinav/goldmark-anchor): Adds anchors (permalinks) next to all headers in a document.
- [goldmark-figure](https://github.com/mangoumbrella/goldmark-figure): Adds support for rendering paragraphs starting with an image to `<figure>` elements.
- [goldmark-frontmatter](https://github.com/abhinav/goldmark-frontmatter): Adds support for YAML, TOML, and custom front matter to documents.
- [goldmark-toc](https://github.com/abhinav/goldmark-toc): Adds support for generating tables-of-contents for goldmark documents.
- [goldmark-mermaid](https://github.com/abhinav/goldmark-mermaid): Adds support for rendering [Mermaid](https://mermaid-js.github.io/mermaid/) diagrams in goldmark documents.
- [goldmark-pikchr](https://github.com/jchenry/goldmark-pikchr): Adds support for rendering [Pikchr](https://pikchr.org/home/doc/trunk/homepage.md) diagrams in goldmark documents.
- [goldmark-embed](https://github.com/13rac1/goldmark-embed): Adds support for rendering embeds from YouTube links.
- [goldmark-latex](https://github.com/soypat/goldmark-latex): A $\LaTeX$ renderer that can be passed to `goldmark.WithRenderer()`.
- [goldmark-fences](https://github.com/stefanfritsch/goldmark-fences): Support for pandoc-style [fenced divs](https://pandoc.org/MANUAL.html#divs-and-spans) in goldmark.
- [goldmark-d2](https://github.com/FurqanSoftware/goldmark-d2): Adds support for [D2](https://d2lang.com/) diagrams.
- [goldmark-katex](https://github.com/FurqanSoftware/goldmark-katex): Adds support for [KaTeX](https://katex.org/) math and equations.
- [goldmark-img64](https://github.com/tenkoh/goldmark-img64): Adds support for embedding images into the document as DataURL (base64 encoded).
- [goldmark-enclave](https://github.com/quailyquaily/goldmark-enclave): Adds support for embedding youtube/bilibili video, X's [oembed X](https://publish.x.com/), [tradingview chart](https://www.tradingview.com/widget/)'s chart, [quaily widget](https://quaily.com), [spotify embeds](https://developer.spotify.com/documentation/embeds), [dify embed](https://dify.ai/) and html audio into the document.
- [goldmark-wiki-table](https://github.com/movsb/goldmark-wiki-table): Adds support for embedding Wiki Tables.
- [goldmark-tgmd](https://github.com/Mad-Pixels/goldmark-tgmd): A Telegram markdown renderer that can be passed to `goldmark.WithRenderer()`.
### Loading extensions at runtime
[goldmark-dynamic](https://github.com/yuin/goldmark-dynamic) allows you to write a goldmark extension in Lua and load it at runtime without re-compilation.
Please refer to [goldmark-dynamic](https://github.com/yuin/goldmark-dynamic) for details.
goldmark internal(for extension developers) goldmark internal(for extension developers)
---------------------------------------------- ----------------------------------------------

View file

@ -4,39 +4,18 @@ ifeq ($(OS),Windows_NT)
CMARK_BIN=cmark_benchmark.exe CMARK_BIN=cmark_benchmark.exe
CMARK_RUN=bash -c "PATH=./cmark-master/build/src:$${PATH} ./$(CMARK_BIN)" CMARK_RUN=bash -c "PATH=./cmark-master/build/src:$${PATH} ./$(CMARK_BIN)"
endif endif
ifneq ($(WSL_INTEROP),)
CMARK_BIN=cmark_benchmark.exe
CMARK_RUN=cp ./cmark-master/build-mingw/windows/bin/libcmark.dll . && ./$(CMARK_BIN); rm -f libcmark.dll
endif
.PHONY: run .PHONY: run
run: $(CMARK_BIN) run: $(CMARK_BIN)
@ $(CMARK_RUN) $(CMARK_RUN)
@ if [ -z "$${WSL_INTEROP}" ]; then \ go run ./goldmark_benchmark.go
go run ./goldmark_benchmark.go; \
else \
GOOS=windows GOARCH=amd64 go build -o goldmark_benchmark.exe ./goldmark_benchmark.go && ./goldmark_benchmark.exe; \
fi
./cmark-master/Makefile: ./cmark-master/build/src/config.h:
wget -nc -O cmark.zip https://github.com/commonmark/cmark/archive/master.zip wget -nc -O cmark.zip https://github.com/commonmark/cmark/archive/master.zip
unzip cmark.zip unzip cmark.zip
rm -f cmark.zip rm -f cmark.zip
@ if [ -z "$${WSL_INTEROP}" ]; then \ cd cmark-master && make
cd cmark-master && make; \
else \
cd cmark-master && make mingw; \
fi
$(CMARK_BIN): ./cmark-master/Makefile $(CMARK_BIN): ./cmark-master/build/src/config.h
@ if [ -z "$${WSL_INTEROP}" ]; then \ gcc -I./cmark-master/build/src -I./cmark-master/src cmark_benchmark.c -o $(CMARK_BIN) -L./cmark-master/build/src -lcmark
gcc -I./cmark-master/build/src -I./cmark-master/src cmark_benchmark.c -o $(CMARK_BIN) -L./cmark-master/build/src -lcmark; \
else \
i686-w64-mingw32-gcc -I./cmark-master/build-mingw/windows/include cmark_benchmark.c -o $(CMARK_BIN) -L./cmark-master/build-mingw/windows/lib -lcmark.dll; \
fi
.PHONY: clean
clean:
rm -f $(CMARK_BIN)
rm -f goldmark_benchmark.exe

View file

@ -1,4 +1,4 @@
package benchmark package main
import ( import (
"bytes" "bytes"
@ -58,8 +58,8 @@ func BenchmarkMarkdown(b *testing.B) {
luteEngine.SetAutoSpace(false) luteEngine.SetAutoSpace(false)
luteEngine.SetFixTermTypo(false) luteEngine.SetFixTermTypo(false)
r := func(src []byte) ([]byte, error) { r := func(src []byte) ([]byte, error) {
out := luteEngine.MarkdownStr("Benchmark", util.BytesToReadOnlyString(src)) out, err := luteEngine.MarkdownStr("Benchmark", util.BytesToReadOnlyString(src))
return util.StringToReadOnlyBytes(out), nil return util.StringToReadOnlyBytes(out), err
} }
doBenchmark(b, r) doBenchmark(b, r)
}) })

View file

@ -1,25 +0,0 @@
module banchmark
go 1.17
require (
github.com/88250/lute v1.7.5
github.com/gomarkdown/markdown v0.0.0-20230322041520-c84983bdbf2a
github.com/russross/blackfriday/v2 v2.1.0
github.com/yuin/goldmark v0.0.0
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a
)
require (
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/gopherjs/gopherjs v1.17.2 // indirect
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 // indirect
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 // indirect
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
golang.org/x/text v0.10.0 // indirect
)
replace gopkg.in/russross/blackfriday.v2 v2.0.1 => github.com/russross/blackfriday/v2 v2.0.1
replace github.com/yuin/goldmark v0.0.0 => ../../

View file

@ -1,42 +0,0 @@
github.com/88250/lute v1.7.5 h1:mcPFURh5sK1WH1kFRjqK5DkMWOfVN2BhyrXitN8GmpQ=
github.com/88250/lute v1.7.5/go.mod h1:cEoBGi0zArPqAsp0MdG9SKinvH/xxZZWXU7sRx8vHSA=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/gomarkdown/markdown v0.0.0-20230322041520-c84983bdbf2a h1:AWZzzFrqyjYlRloN6edwTLTUbKxf5flLXNuTBDm3Ews=
github.com/gomarkdown/markdown v0.0.0-20230322041520-c84983bdbf2a/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g=
github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/yuin/goldmark v1.2.1 h1:ruQGxdhGHe7FWOJPT0mKs5+pD2Xs1Bm/kdGlHO04FmM=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 h1:K+bMSIx9A7mLES1rtG+qKduLIXq40DAzYHtb0XuCukA=
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181/go.mod h1:dzYhVIwWCtzPAa4QP98wfB9+mzt33MSmM8wsKiMi2ow=
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 h1:oYrL81N608MLZhma3ruL8qTM4xcpYECGut8KSxRY59g=
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82/go.mod h1:Gn+LZmCrhPECMD3SOKlE+BOHwhOYD9j7WT9NUtkCrC8=
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a h1:O85GKETcmnCNAfv4Aym9tepU8OE0NmcZNqPlXcsBKBs=
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a/go.mod h1:LaSIs30YPGs1H5jwGgPhLzc8vkNc/k0rDX/fEZqiU/M=
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 h1:qqjvoVXdWIcZCLPMlzgA7P9FZWdPGPvP/l3ef8GzV6o=
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84/go.mod h1:IJZ+fdMvbW2qW6htJx7sLJ04FEs4Ldl/MDsJtMKywfw=
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f h1:Wku8eEdeJqIOFHtrfkYUByc4bCaTeA6fL0UJgfEiFMI=
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs=
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI=
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View file

@ -323,523 +323,3 @@ foo
</li> </li>
</ul> </ul>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
21: Fenced code block within list can start with tab
//- - - - - - - - -//
- List
```
A
B
C
```
//- - - - - - - - -//
<ul>
<li>
<p>List</p>
<pre><code>A
B
C
</code></pre>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
22: Indented code block within list can start with tab
//- - - - - - - - -//
- List
A
B
C
a
//- - - - - - - - -//
<ul>
<li>
<p>List</p>
<pre><code>A
B
C
</code></pre>
</li>
</ul>
<p>a</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
23: Emphasis corner case(yuin/goldmark#245)
//- - - - - - - - -//
a* b c d *e*
//- - - - - - - - -//
<p>a* b c d <em>e</em></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
24: HTML block tags can contain trailing spaces
//- - - - - - - - -//
<aaa >
//- - - - - - - - -//
<aaa >
//= = = = = = = = = = = = = = = = = = = = = = = =//
25: Indented code blocks can start with tab
//- - - - - - - - -//
x
//- - - - - - - - -//
<pre><code> x
</code></pre>
//= = = = = = = = = = = = = = = = = = = = = = = =//
26: NUL bytes must be replaced with U+FFFD
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
hello\x00world
<?\x00
//- - - - - - - - -//
<p>hello\ufffdworld</p>
<?\uFFFD
//= = = = = = = = = = = = = = = = = = = = = = = =//
27: Newlines in code spans must be preserved as a space
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
`\n`
`x\n`
`\nx`
//- - - - - - - - -//
<p><code> </code></p>
<p><code>x </code></p>
<p><code> x</code></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
28: Single # is a heading level 1
//- - - - - - - - -//
#
//- - - - - - - - -//
<h1></h1>
//= = = = = = = = = = = = = = = = = = = = = = = =//
29: An empty list item cannot interrupt a paragraph
//- - - - - - - - -//
x
*
//- - - - - - - - -//
<p>x
*</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
30: A link reference definition followed by a single quote without closer
//- - - - - - - - -//
[x]
[x]: <>
'
//- - - - - - - - -//
<p><a href="">x</a></p>
<p>'</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
31: A link reference definition followed by a double quote without closer
//- - - - - - - - -//
[x]
[x]: <>
"
//- - - - - - - - -//
<p><a href="">x</a></p>
<p>&quot;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
32: Hex character entities must be limited to 6 characters
//- - - - - - - - -//
&#x0000041;
//- - - - - - - - -//
<p>&amp;#x0000041;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
33: \x01 should be escaped all the time
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
[x](\x01)
//- - - - - - - - -//
<p><a href="%01">x</a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
34: A form feed should not be treated as a space
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
x \f
//- - - - - - - - -//
<p>x \f</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
35: A link reference definition can contain a new line
//- - - - - - - - -//
This is a [test][foo
bar] 1...2..3...
[foo bar]: /
//- - - - - - - - -//
<p>This is a <a href="/">test</a> 1...2..3...</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
36: Emphasis and links
//- - - - - - - - -//
_a[b_c_](d)
//- - - - - - - - -//
<p>_a<a href="d">b_c_</a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
37: Tabs and spaces
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
\t\t x\n
//- - - - - - - - -//
<pre><code>\t x\n</code></pre>
//= = = = = = = = = = = = = = = = = = = = = = = =//
38: Decimal HTML entity literals should allow 7 digits
//- - - - - - - - -//
&#7654321;
//- - - - - - - - -//
<p>\uFFFD</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
39: Decimal HTML entities should not be interpreted as octal when starting with a 0
//- - - - - - - - -//
&#0100;
//- - - - - - - - -//
<p>d</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
40: Invalid HTML tag names
//- - - - - - - - -//
<1>
<a:>
<a\f>
< p>
//- - - - - - - - -//
<p>&lt;1&gt;</p>
<p>&lt;a:&gt;</p>
<p>&lt;a\f&gt;</p>
<p>&lt; p&gt;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
41: Link references can not contain spaces after link label
//- - - - - - - - -//
[x]
:>
[o] :x
//- - - - - - - - -//
<p>[x]
:&gt;</p>
<p>[o] :x</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
42: Unclosed link reference titles can interrupt link references
//- - - - - - - - -//
[r]:
<>
'
[o]:
x
'
//- - - - - - - - -//
<p>'</p>
<p>'</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
43: A link containing an image containing a link should disable the outer link
//- - - - - - - - -//
[ ![ [b](c) ](x) ](y)
//- - - - - - - - -//
<p>[ <img src="x" alt=" b " /> ](y)</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
44: An empty list item(with trailing spaces) cannot interrupt a paragraph
//- - - - - - - - -//
a
*
//- - - - - - - - -//
<p>a
*</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
45: Multiple empty list items
//- - - - - - - - -//
-
-
//- - - - - - - - -//
<ul>
<li></li>
<li></li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
46: Vertical tab should not be treated as spaces
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
\v
//- - - - - - - - -//
<p>\v</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
47: Escape back slashes should not be treated as hard line breaks
//- - - - - - - - -//
\\\\
a
//- - - - - - - - -//
<p>\
a</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
48: Multiple paragraphs in tight list
//- - - - - - - - -//
- a
>
b
//- - - - - - - - -//
<ul>
<li>a
<blockquote>
</blockquote>
b</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
49: A list item that is indented up to 3 spaces after an empty list item
//- - - - - - - - -//
1.
1. b
-
- b
//- - - - - - - - -//
<ol>
<li></li>
<li>
<p>b</p>
</li>
</ol>
<ul>
<li></li>
<li>
<p>b</p>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
50: Spaces before a visible hard linebreak should be preserved
//- - - - - - - - -//
a \
b
//- - - - - - - - -//
<p>a <br />
b</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
51: Empty line in a fenced code block under list items
//- - - - - - - - -//
* This is a list item
```
This is a test
This line will be dropped.
This line will be displayed.
```
//- - - - - - - - -//
<ul>
<li>This is a list item
<pre><code>This is a test
This line will be dropped.
This line will be displayed.
</code></pre>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
52: windows-style newline and HTMLs
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
<a \r\nhref='link'>link</a>
<video autoplay muted loop>\r\n<source src=\"https://example.com/example.mp4\" type=\"video/mp4\">\r\nYour browser does not support the video tag.\r\n</video>
//- - - - - - - - -//
<p><a \r\nhref='link'>link</a></p>
<video autoplay muted loop>\r\n<source src=\"https://example.com/example.mp4\" type=\"video/mp4\">\r\nYour browser does not support the video tag.\r\n</video>
//= = = = = = = = = = = = = = = = = = = = = = = =//
53: HTML comment without trailing new lines
OPTIONS: {"trim": true}
//- - - - - - - - -//
<!--
-->
//- - - - - - - - -//
<!--
-->
//= = = = = = = = = = = = = = = = = = = = = = = =//
54: Escaped characters followed by a null character
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
\\\x00\"
//- - - - - - - - -//
<p>\\\ufffd&quot;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
55: inline HTML comment
//- - - - - - - - -//
a <!-- b --> c
a <!-- b -->
//- - - - - - - - -//
<p>a <!-- b --> c</p>
<p>a <!-- b --></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
56: An empty list followed by blockquote
//- - - - - - - - -//
1.
> This is a quote.
//- - - - - - - - -//
<ol>
<li></li>
</ol>
<blockquote>
<p>This is a quote.</p>
</blockquote>
//= = = = = = = = = = = = = = = = = = = = = = = =//
57: Tabbed fenced code block within a list
//- - - - - - - - -//
1.
```
```
//- - - - - - - - -//
<ol>
<li>
<pre><code></code></pre>
</li>
</ol>
//= = = = = = = = = = = = = = = = = = = = = = = =//
58: HTML end tag without trailing new lines
OPTIONS: {"trim": true}
//- - - - - - - - -//
<pre>
</pre>
//- - - - - - - - -//
<pre>
</pre>
//= = = = = = = = = = = = = = = = = = = = = = = =//
59: Raw HTML tag with one new line
//- - - - - - - - -//
<img src=./.assets/logo.svg
/>
//- - - - - - - - -//
<p><img src=./.assets/logo.svg
/></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
60: Raw HTML tag with multiple new lines
//- - - - - - - - -//
<img src=./.assets/logo.svg
/>
//- - - - - - - - -//
<p>&lt;img src=./.assets/logo.svg</p>
<p>/&gt;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
61: Image alt with a new line
//- - - - - - - - -//
![alt
text](logo.png)
//- - - - - - - - -//
<p><img src="logo.png" alt="alt
text" /></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
62: Image alt with an escaped character
//- - - - - - - - -//
![\`alt](https://example.com/img.png)
//- - - - - - - - -//
<p><img src="https://example.com/img.png" alt="`alt" /></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
63: Emphasis in link label
//- - - - - - - - -//
[*[a]*](b)
//- - - - - - - - -//
<p><a href="b"><em>[a]</em></a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
64: Nested list under an empty list item
//- - - - - - - - -//
-
- foo
//- - - - - - - - -//
<ul>
<li>
<ul>
<li>foo</li>
</ul>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
65: Nested fenced code block with tab
//- - - - - - - - -//
> ```
> 0
> ```
//- - - - - - - - -//
<blockquote>
<pre><code> 0
</code></pre>
</blockquote>
//= = = = = = = = = = = = = = = = = = = = = = = =//
66: EOF should be rendered as a newline with an unclosed block(w/ TAB)
//- - - - - - - - -//
> ```
> 0
//- - - - - - - - -//
<blockquote>
<pre><code> 0
</code></pre>
</blockquote>
//= = = = = = = = = = = = = = = = = = = = = = = =//
67: EOF should be rendered as a newline with an unclosed block
//- - - - - - - - -//
> ```
> 0
//- - - - - - - - -//
<blockquote>
<pre><code> 0
</code></pre>
</blockquote>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -60,19 +60,3 @@
//- - - - - - - - -// //- - - - - - - - -//
<h2 id="test--hey-sortclassfineclassshell-doesnt-matter">Test ## {#hey .sort,class=fine,class=shell} Doesn't matter</h2> <h2 id="test--hey-sortclassfineclassshell-doesnt-matter">Test ## {#hey .sort,class=fine,class=shell} Doesn't matter</h2>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
6: class must be a string
//- - - - - - - - -//
# Test ## {class=0#.}
//- - - - - - - - -//
<h1 id="test--class0">Test ## {class=0#.}</h1>
//= = = = = = = = = = = = = = = = = = = = = = = =//
7: short handed ids can contain hyphens ("-"), underscores ("_"), colons (":"), and periods (".")
//- - - - - - - - -//
# Test ## {#id-foo_bar:baz.qux .foobar}
//- - - - - - - - -//
<h1 id="id-foo_bar:baz.qux" class="foobar">Test</h1>
//= = = = = = = = = = = = = = = = = = = = = = = =//

File diff suppressed because it is too large Load diff

View file

@ -1,61 +0,0 @@
package main
import (
"archive/zip"
"encoding/json"
"io/ioutil"
"log"
"os"
"strconv"
"strings"
)
type TestCase struct {
Example int `json:"example"`
Markdown string `json:"markdown"`
}
func main() {
corpus_out := os.Args[1]
if !strings.HasSuffix(corpus_out, ".zip") {
log.Fatalln("Expected command line:", os.Args[0], "<corpus_output>.zip")
}
zip_file, err := os.Create(corpus_out)
zip_writer := zip.NewWriter(zip_file)
if err != nil {
log.Fatalln("Failed creating file:", err)
}
json_corpus := "_test/spec.json"
bs, err := ioutil.ReadFile(json_corpus)
if err != nil {
log.Fatalln("Could not open file:", json_corpus)
panic(err)
}
var testCases []TestCase
if err := json.Unmarshal(bs, &testCases); err != nil {
panic(err)
}
for _, c := range testCases {
file_in_zip := "example-" + strconv.Itoa(c.Example)
f, err := zip_writer.Create(file_in_zip)
if err != nil {
log.Fatal(err)
}
_, err = f.Write([]byte(c.Markdown))
if err != nil {
log.Fatalf("Failed to write file: %s into zip file", file_in_zip)
}
}
err = zip_writer.Close()
if err != nil {
log.Fatal("Failed to close zip writer", err)
}
zip_file.Close()
}

View file

@ -20,7 +20,7 @@ type caseFolding struct {
} }
func main() { func main() {
url := "http://www.unicode.org/Public/14.0.0/ucd/CaseFolding.txt" url := "http://www.unicode.org/Public/12.1.0/ucd/CaseFolding.txt"
resp, err := http.Get(url) resp, err := http.Get(url)
if err != nil { if err != nil {

View file

@ -39,7 +39,7 @@ func NewNodeKind(name string) NodeKind {
return kindMax return kindMax
} }
// An Attribute is an attribute of the Node. // An Attribute is an attribute of the Node
type Attribute struct { type Attribute struct {
Name []byte Name []byte
Value interface{} Value interface{}
@ -123,12 +123,6 @@ type Node interface {
Dump(source []byte, level int) Dump(source []byte, level int)
// Text returns text values of this node. // Text returns text values of this node.
// This method is valid only for some inline nodes.
// If this node is a block node, Text returns a text value as reasonable as possible.
// Notice that there are no 'correct' text values for the block nodes.
// Result for the block nodes may be different from your expectation.
//
// Deprecated: Use other properties of the node to get the text value(i.e. Pragraph.Lines, Text.Value).
Text(source []byte) []byte Text(source []byte) []byte
// HasBlankPreviousLines returns true if the row before this node is blank, // HasBlankPreviousLines returns true if the row before this node is blank,
@ -254,7 +248,7 @@ func (n *BaseNode) RemoveChildren(self Node) {
n.childCount = 0 n.childCount = 0
} }
// SortChildren implements Node.SortChildren. // SortChildren implements Node.SortChildren
func (n *BaseNode) SortChildren(comparator func(n1, n2 Node) int) { func (n *BaseNode) SortChildren(comparator func(n1, n2 Node) int) {
var sorted Node var sorted Node
current := n.firstChild current := n.firstChild
@ -364,7 +358,7 @@ func (n *BaseNode) InsertBefore(self, v1, insertee Node) {
} }
} }
// OwnerDocument implements Node.OwnerDocument. // OwnerDocument implements Node.OwnerDocument
func (n *BaseNode) OwnerDocument() *Document { func (n *BaseNode) OwnerDocument() *Document {
d := n.Parent() d := n.Parent()
for { for {
@ -380,18 +374,11 @@ func (n *BaseNode) OwnerDocument() *Document {
return nil return nil
} }
// Text implements Node.Text . // Text implements Node.Text .
//
// Deprecated: Use other properties of the node to get the text value(i.e. Pragraph.Lines, Text.Value).
func (n *BaseNode) Text(source []byte) []byte { func (n *BaseNode) Text(source []byte) []byte {
var buf bytes.Buffer var buf bytes.Buffer
for c := n.firstChild; c != nil; c = c.NextSibling() { for c := n.firstChild; c != nil; c = c.NextSibling() {
buf.Write(c.Text(source)) buf.Write(c.Text(source))
if sb, ok := c.(interface {
SoftLineBreak() bool
}); ok && sb.SoftLineBreak() {
buf.WriteByte('\n')
}
} }
return buf.Bytes() return buf.Bytes()
} }
@ -412,7 +399,7 @@ func (n *BaseNode) SetAttribute(name []byte, value interface{}) {
n.attributes = append(n.attributes, Attribute{name, value}) n.attributes = append(n.attributes, Attribute{name, value})
} }
// SetAttributeString implements Node.SetAttributeString. // SetAttributeString implements Node.SetAttributeString
func (n *BaseNode) SetAttributeString(name string, value interface{}) { func (n *BaseNode) SetAttributeString(name string, value interface{}) {
n.SetAttribute(util.StringToReadOnlyBytes(name), value) n.SetAttribute(util.StringToReadOnlyBytes(name), value)
} }
@ -435,12 +422,12 @@ func (n *BaseNode) AttributeString(s string) (interface{}, bool) {
return n.Attribute(util.StringToReadOnlyBytes(s)) return n.Attribute(util.StringToReadOnlyBytes(s))
} }
// Attributes implements Node.Attributes. // Attributes implements Node.Attributes
func (n *BaseNode) Attributes() []Attribute { func (n *BaseNode) Attributes() []Attribute {
return n.attributes return n.attributes
} }
// RemoveAttributes implements Node.RemoveAttributes. // RemoveAttributes implements Node.RemoveAttributes
func (n *BaseNode) RemoveAttributes() { func (n *BaseNode) RemoveAttributes() {
n.attributes = nil n.attributes = nil
} }

View file

@ -5,6 +5,21 @@ import (
"testing" "testing"
) )
func TestRemoveChildren(t *testing.T) {
root := NewDocument()
node1 := NewDocument()
node2 := NewDocument()
root.AppendChild(root, node1)
root.AppendChild(root, node2)
root.RemoveChildren(root)
t.Logf("%+v", node2.PreviousSibling())
}
func TestWalk(t *testing.T) { func TestWalk(t *testing.T) {
tests := []struct { tests := []struct {
name string name string

View file

@ -14,12 +14,12 @@ type BaseBlock struct {
lines *textm.Segments lines *textm.Segments
} }
// Type implements Node.Type. // Type implements Node.Type
func (b *BaseBlock) Type() NodeType { func (b *BaseBlock) Type() NodeType {
return TypeBlock return TypeBlock
} }
// IsRaw implements Node.IsRaw. // IsRaw implements Node.IsRaw
func (b *BaseBlock) IsRaw() bool { func (b *BaseBlock) IsRaw() bool {
return false return false
} }
@ -34,7 +34,7 @@ func (b *BaseBlock) SetBlankPreviousLines(v bool) {
b.blankPreviousLines = v b.blankPreviousLines = v
} }
// Lines implements Node.Lines. // Lines implements Node.Lines
func (b *BaseBlock) Lines() *textm.Segments { func (b *BaseBlock) Lines() *textm.Segments {
if b.lines == nil { if b.lines == nil {
b.lines = textm.NewSegments() b.lines = textm.NewSegments()
@ -42,7 +42,7 @@ func (b *BaseBlock) Lines() *textm.Segments {
return b.lines return b.lines
} }
// SetLines implements Node.SetLines. // SetLines implements Node.SetLines
func (b *BaseBlock) SetLines(v *textm.Segments) { func (b *BaseBlock) SetLines(v *textm.Segments) {
b.lines = v b.lines = v
} }
@ -72,7 +72,7 @@ func (n *Document) Kind() NodeKind {
return KindDocument return KindDocument
} }
// OwnerDocument implements Node.OwnerDocument. // OwnerDocument implements Node.OwnerDocument
func (n *Document) OwnerDocument() *Document { func (n *Document) OwnerDocument() *Document {
return n return n
} }
@ -87,20 +87,7 @@ func (n *Document) Meta() map[string]interface{} {
// SetMeta sets given metadata to this document. // SetMeta sets given metadata to this document.
func (n *Document) SetMeta(meta map[string]interface{}) { func (n *Document) SetMeta(meta map[string]interface{}) {
if n.meta == nil { n.meta = meta
n.meta = map[string]interface{}{}
}
for k, v := range meta {
n.meta[k] = v
}
}
// AddMeta adds given metadata to this document.
func (n *Document) AddMeta(key string, value interface{}) {
if n.meta == nil {
n.meta = map[string]interface{}{}
}
n.meta[key] = value
} }
// NewDocument returns a new Document node. // NewDocument returns a new Document node.
@ -130,13 +117,6 @@ func (n *TextBlock) Kind() NodeKind {
return KindTextBlock return KindTextBlock
} }
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. TextBlock.Lines).
func (n *TextBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewTextBlock returns a new TextBlock node. // NewTextBlock returns a new TextBlock node.
func NewTextBlock() *TextBlock { func NewTextBlock() *TextBlock {
return &TextBlock{ return &TextBlock{
@ -162,13 +142,6 @@ func (n *Paragraph) Kind() NodeKind {
return KindParagraph return KindParagraph
} }
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. Paragraph.Lines).
func (n *Paragraph) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewParagraph returns a new Paragraph node. // NewParagraph returns a new Paragraph node.
func NewParagraph() *Paragraph { func NewParagraph() *Paragraph {
return &Paragraph{ return &Paragraph{
@ -263,13 +236,6 @@ func (n *CodeBlock) Kind() NodeKind {
return KindCodeBlock return KindCodeBlock
} }
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. CodeBlock.Lines).
func (n *CodeBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewCodeBlock returns a new CodeBlock node. // NewCodeBlock returns a new CodeBlock node.
func NewCodeBlock() *CodeBlock { func NewCodeBlock() *CodeBlock {
return &CodeBlock{ return &CodeBlock{
@ -325,13 +291,6 @@ func (n *FencedCodeBlock) Kind() NodeKind {
return KindFencedCodeBlock return KindFencedCodeBlock
} }
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. FencedCodeBlock.Lines).
func (n *FencedCodeBlock) Text(source []byte) []byte {
return n.Lines().Value(source)
}
// NewFencedCodeBlock return a new FencedCodeBlock node. // NewFencedCodeBlock return a new FencedCodeBlock node.
func NewFencedCodeBlock(info *Text) *FencedCodeBlock { func NewFencedCodeBlock(info *Text) *FencedCodeBlock {
return &FencedCodeBlock{ return &FencedCodeBlock{
@ -373,7 +332,7 @@ type List struct {
Marker byte Marker byte
// IsTight is a true if this list is a 'tight' list. // IsTight is a true if this list is a 'tight' list.
// See https://spec.commonmark.org/0.30/#loose for details. // See https://spec.commonmark.org/0.29/#loose for details.
IsTight bool IsTight bool
// Start is an initial number of this ordered list. // Start is an initial number of this ordered list.
@ -455,23 +414,23 @@ func NewListItem(offset int) *ListItem {
} }
// HTMLBlockType represents kinds of an html blocks. // HTMLBlockType represents kinds of an html blocks.
// See https://spec.commonmark.org/0.30/#html-blocks // See https://spec.commonmark.org/0.29/#html-blocks
type HTMLBlockType int type HTMLBlockType int
const ( const (
// HTMLBlockType1 represents type 1 html blocks. // HTMLBlockType1 represents type 1 html blocks
HTMLBlockType1 HTMLBlockType = iota + 1 HTMLBlockType1 HTMLBlockType = iota + 1
// HTMLBlockType2 represents type 2 html blocks. // HTMLBlockType2 represents type 2 html blocks
HTMLBlockType2 HTMLBlockType2
// HTMLBlockType3 represents type 3 html blocks. // HTMLBlockType3 represents type 3 html blocks
HTMLBlockType3 HTMLBlockType3
// HTMLBlockType4 represents type 4 html blocks. // HTMLBlockType4 represents type 4 html blocks
HTMLBlockType4 HTMLBlockType4
// HTMLBlockType5 represents type 5 html blocks. // HTMLBlockType5 represents type 5 html blocks
HTMLBlockType5 HTMLBlockType5
// HTMLBlockType6 represents type 6 html blocks. // HTMLBlockType6 represents type 6 html blocks
HTMLBlockType6 HTMLBlockType6
// HTMLBlockType7 represents type 7 html blocks. // HTMLBlockType7 represents type 7 html blocks
HTMLBlockType7 HTMLBlockType7
) )
@ -526,17 +485,6 @@ func (n *HTMLBlock) Kind() NodeKind {
return KindHTMLBlock return KindHTMLBlock
} }
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. HTMLBlock.Lines).
func (n *HTMLBlock) Text(source []byte) []byte {
ret := n.Lines().Value(source)
if n.HasClosure() {
ret = append(ret, n.ClosureLine.Value(source)...)
}
return ret
}
// NewHTMLBlock returns a new HTMLBlock node. // NewHTMLBlock returns a new HTMLBlock node.
func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock { func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock {
return &HTMLBlock{ return &HTMLBlock{

View file

@ -13,12 +13,12 @@ type BaseInline struct {
BaseNode BaseNode
} }
// Type implements Node.Type. // Type implements Node.Type
func (b *BaseInline) Type() NodeType { func (b *BaseInline) Type() NodeType {
return TypeInline return TypeInline
} }
// IsRaw implements Node.IsRaw. // IsRaw implements Node.IsRaw
func (b *BaseInline) IsRaw() bool { func (b *BaseInline) IsRaw() bool {
return false return false
} }
@ -33,12 +33,12 @@ func (b *BaseInline) SetBlankPreviousLines(v bool) {
panic("can not call with inline nodes.") panic("can not call with inline nodes.")
} }
// Lines implements Node.Lines. // Lines implements Node.Lines
func (b *BaseInline) Lines() *textm.Segments { func (b *BaseInline) Lines() *textm.Segments {
panic("can not call with inline nodes.") panic("can not call with inline nodes.")
} }
// SetLines implements Node.SetLines. // SetLines implements Node.SetLines
func (b *BaseInline) SetLines(v *textm.Segments) { func (b *BaseInline) SetLines(v *textm.Segments) {
panic("can not call with inline nodes.") panic("can not call with inline nodes.")
} }
@ -91,7 +91,7 @@ func (n *Text) SetSoftLineBreak(v bool) {
if v { if v {
n.flags |= textSoftLineBreak n.flags |= textSoftLineBreak
} else { } else {
n.flags = n.flags &^ textSoftLineBreak n.flags = n.flags &^ textHardLineBreak
} }
} }
@ -111,7 +111,7 @@ func (n *Text) SetRaw(v bool) {
} }
// HardLineBreak returns true if this node ends with a hard line break. // HardLineBreak returns true if this node ends with a hard line break.
// See https://spec.commonmark.org/0.30/#hard-line-breaks for details. // See https://spec.commonmark.org/0.29/#hard-line-breaks for details.
func (n *Text) HardLineBreak() bool { func (n *Text) HardLineBreak() bool {
return n.flags&textHardLineBreak != 0 return n.flags&textHardLineBreak != 0
} }
@ -132,8 +132,7 @@ func (n *Text) Merge(node Node, source []byte) bool {
if !ok { if !ok {
return false return false
} }
if n.Segment.Stop != t.Segment.Start || t.Segment.Padding != 0 || if n.Segment.Stop != t.Segment.Start || t.Segment.Padding != 0 || source[n.Segment.Stop-1] == '\n' || t.IsRaw() != n.IsRaw() {
source[n.Segment.Stop-1] == '\n' || t.IsRaw() != n.IsRaw() {
return false return false
} }
n.Segment.Stop = t.Segment.Stop n.Segment.Stop = t.Segment.Stop
@ -143,25 +142,17 @@ func (n *Text) Merge(node Node, source []byte) bool {
} }
// Text implements Node.Text. // Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. Text.Value).
func (n *Text) Text(source []byte) []byte { func (n *Text) Text(source []byte) []byte {
return n.Segment.Value(source) return n.Segment.Value(source)
} }
// Value returns a value of this node.
// SoftLineBreaks are not included in the returned value.
func (n *Text) Value(source []byte) []byte {
return n.Segment.Value(source)
}
// Dump implements Node.Dump. // Dump implements Node.Dump.
func (n *Text) Dump(source []byte, level int) { func (n *Text) Dump(source []byte, level int) {
fs := textFlagsString(n.flags) fs := textFlagsString(n.flags)
if len(fs) != 0 { if len(fs) != 0 {
fs = "(" + fs + ")" fs = "(" + fs + ")"
} }
fmt.Printf("%sText%s: \"%s\"\n", strings.Repeat(" ", level), fs, strings.TrimRight(string(n.Value(source)), "\n")) fmt.Printf("%sText%s: \"%s\"\n", strings.Repeat(" ", level), fs, strings.TrimRight(string(n.Text(source)), "\n"))
} }
// KindText is a NodeKind of the Text node. // KindText is a NodeKind of the Text node.
@ -223,7 +214,7 @@ func MergeOrReplaceTextSegment(parent Node, n Node, s textm.Segment) {
} }
} }
// A String struct is a textual content that has a concrete value. // A String struct is a textual content that has a concrete value
type String struct { type String struct {
BaseInline BaseInline
@ -266,8 +257,6 @@ func (n *String) SetCode(v bool) {
} }
// Text implements Node.Text. // Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. String.Value).
func (n *String) Text(source []byte) []byte { func (n *String) Text(source []byte) []byte {
return n.Value return n.Value
} }
@ -316,7 +305,7 @@ func (n *CodeSpan) IsBlank(source []byte) bool {
return true return true
} }
// Dump implements Node.Dump. // Dump implements Node.Dump
func (n *CodeSpan) Dump(source []byte, level int) { func (n *CodeSpan) Dump(source []byte, level int) {
DumpHelper(n, source, level, nil, nil) DumpHelper(n, source, level, nil, nil)
} }
@ -478,7 +467,7 @@ type AutoLink struct {
// Inline implements Inline.Inline. // Inline implements Inline.Inline.
func (n *AutoLink) Inline() {} func (n *AutoLink) Inline() {}
// Dump implements Node.Dump. // Dump implements Node.Dump
func (n *AutoLink) Dump(source []byte, level int) { func (n *AutoLink) Dump(source []byte, level int) {
segment := n.value.Segment segment := n.value.Segment
m := map[string]string{ m := map[string]string{
@ -502,22 +491,15 @@ func (n *AutoLink) URL(source []byte) []byte {
ret := make([]byte, 0, len(n.Protocol)+s.Len()+3) ret := make([]byte, 0, len(n.Protocol)+s.Len()+3)
ret = append(ret, n.Protocol...) ret = append(ret, n.Protocol...)
ret = append(ret, ':', '/', '/') ret = append(ret, ':', '/', '/')
ret = append(ret, n.value.Value(source)...) ret = append(ret, n.value.Text(source)...)
return ret return ret
} }
return n.value.Value(source) return n.value.Text(source)
} }
// Label returns a label of this node. // Label returns a label of this node.
func (n *AutoLink) Label(source []byte) []byte { func (n *AutoLink) Label(source []byte) []byte {
return n.value.Value(source) return n.value.Text(source)
}
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. AutoLink.Label).
func (n *AutoLink) Text(source []byte) []byte {
return n.value.Value(source)
} }
// NewAutoLink returns a new AutoLink node. // NewAutoLink returns a new AutoLink node.
@ -558,13 +540,6 @@ func (n *RawHTML) Kind() NodeKind {
return KindRawHTML return KindRawHTML
} }
// Text implements Node.Text.
//
// Deprecated: Use other properties of the node to get the text value(i.e. RawHTML.Segments).
func (n *RawHTML) Text(source []byte) []byte {
return n.Segments.Value(source)
}
// NewRawHTML returns a new RawHTML node. // NewRawHTML returns a new RawHTML node.
func NewRawHTML() *RawHTML { func NewRawHTML() *RawHTML {
return &RawHTML{ return &RawHTML{

View file

@ -1,204 +0,0 @@
package goldmark_test
import (
"bytes"
"testing"
. "github.com/yuin/goldmark"
"github.com/yuin/goldmark/testutil"
"github.com/yuin/goldmark/text"
)
func TestASTBlockNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
T2 string
C bool
}{
{
Name: "AtxHeading",
Source: `# l1
a
# l2`,
T1: `l1`,
T2: `l2`,
},
{
Name: "SetextHeading",
Source: `l1
l2
===============
a
l3
l4
==============`,
T1: `l1
l2`,
T2: `l3
l4`,
},
{
Name: "CodeBlock",
Source: ` l1
l2
a
l3
l4`,
T1: `l1
l2
`,
T2: `l3
l4
`,
},
{
Name: "FencedCodeBlock",
Source: "```" + `
l1
l2
` + "```" + `
a
` + "```" + `
l3
l4`,
T1: `l1
l2
`,
T2: `l3
l4
`,
},
{
Name: "Blockquote",
Source: `> l1
> l2
a
> l3
> l4`,
T1: `l1
l2`,
T2: `l3
l4`,
},
{
Name: "List",
Source: `- l1
l2
a
- l3
l4`,
T1: `l1
l2`,
T2: `l3
l4`,
C: true,
},
{
Name: "HTMLBlock",
Source: `<div>
l1
l2
</div>
a
<div>
l3
l4`,
T1: `<div>
l1
l2
</div>
`,
T2: `<div>
l3
l4`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := New()
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild()
c2 := c1.NextSibling().NextSibling()
if cs.C {
c1 = c1.FirstChild()
c2 = c2.FirstChild()
}
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch: %s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
if !bytes.Equal(c2.Text(s), []byte(cs.T2)) { // nolint: staticcheck
t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2))) // nolint: staticcheck
}
})
}
}
func TestASTInlineNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
}{
{
Name: "CodeSpan",
Source: "`c1`",
T1: `c1`,
},
{
Name: "Emphasis",
Source: `*c1 **c2***`,
T1: `c1 c2`,
},
{
Name: "Link",
Source: `[label](url)`,
T1: `label`,
},
{
Name: "AutoLink",
Source: `<http://url>`,
T1: `http://url`,
},
{
Name: "RawHTML",
Source: `<span>c1</span>`,
T1: `<span>`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := New()
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild().FirstChild()
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
})
}
}

View file

@ -2,7 +2,7 @@ package goldmark_test
import ( import (
"encoding/json" "encoding/json"
"os" "io/ioutil"
"testing" "testing"
. "github.com/yuin/goldmark" . "github.com/yuin/goldmark"
@ -20,7 +20,7 @@ type commonmarkSpecTestCase struct {
} }
func TestSpec(t *testing.T) { func TestSpec(t *testing.T) {
bs, err := os.ReadFile("_test/spec.json") bs, err := ioutil.ReadFile("_test/spec.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -29,25 +29,12 @@ func TestSpec(t *testing.T) {
panic(err) panic(err)
} }
cases := []testutil.MarkdownTestCase{} cases := []testutil.MarkdownTestCase{}
nos := testutil.ParseCliCaseArg()
for _, c := range testCases { for _, c := range testCases {
shouldAdd := len(nos) == 0 cases = append(cases, testutil.MarkdownTestCase{
if !shouldAdd { No: c.Example,
for _, no := range nos { Markdown: c.Markdown,
if c.Example == no { Expected: c.HTML,
shouldAdd = true })
break
}
}
}
if shouldAdd {
cases = append(cases, testutil.MarkdownTestCase{
No: c.Example,
Markdown: c.Markdown,
Expected: c.HTML,
})
}
} }
markdown := New(WithRendererOptions( markdown := New(WithRendererOptions(
html.WithXHTML(), html.WithXHTML(),

View file

@ -150,8 +150,7 @@ on two lines.</p>
//- - - - - - - - -// //- - - - - - - - -//
<dl> <dl>
<dt>0</dt> <dt>0</dt>
<dd><pre><code> 0 <dd><pre><code> 0</code></pre>
</code></pre>
</dd> </dd>
</dl> </dl>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -7,15 +7,15 @@ That's some text with a footnote.[^1]
That's the second paragraph. That's the second paragraph.
//- - - - - - - - -// //- - - - - - - - -//
<p>That's some text with a footnote.<sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p> <p>That's some text with a footnote.<sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p>
<div class="footnotes" role="doc-endnotes"> <section class="footnotes" role="doc-endnotes">
<hr> <hr>
<ol> <ol>
<li id="fn:1"> <li id="fn:1" role="doc-endnote">
<p>And that's the footnote.</p> <p>And that's the footnote.</p>
<p>That's the second paragraph.&#160;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p> <p>That's the second paragraph.&#160;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p>
</li> </li>
</ol> </ol>
</div> </section>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
3 3
@ -33,20 +33,20 @@ This[^3] is[^1] text with footnotes[^2].
[^3]: Footnote three [^3]: Footnote three
//- - - - - - - - -// //- - - - - - - - -//
<p>This<sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup> is<sup id="fnref:2"><a href="#fn:2" class="footnote-ref" role="doc-noteref">2</a></sup> text with footnotes<sup id="fnref:3"><a href="#fn:3" class="footnote-ref" role="doc-noteref">3</a></sup>.</p> <p>This<sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup> is<sup id="fnref:2"><a href="#fn:2" class="footnote-ref" role="doc-noteref">2</a></sup> text with footnotes<sup id="fnref:3"><a href="#fn:3" class="footnote-ref" role="doc-noteref">3</a></sup>.</p>
<div class="footnotes" role="doc-endnotes"> <section class="footnotes" role="doc-endnotes">
<hr> <hr>
<ol> <ol>
<li id="fn:1"> <li id="fn:1" role="doc-endnote">
<p>Footnote three&#160;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p> <p>Footnote three&#160;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p>
</li> </li>
<li id="fn:2"> <li id="fn:2" role="doc-endnote">
<p>Footnote one&#160;<a href="#fnref:2" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p> <p>Footnote one&#160;<a href="#fnref:2" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p>
</li> </li>
<li id="fn:3"> <li id="fn:3" role="doc-endnote">
<p>Footnote two&#160;<a href="#fnref:3" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p> <p>Footnote two&#160;<a href="#fnref:3" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p>
</li> </li>
</ol> </ol>
</div> </section>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
@ -57,35 +57,12 @@ test![^1]
[^1]: footnote [^1]: footnote
//- - - - - - - - -// //- - - - - - - - -//
<p>test!<sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p> <p>test!<sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p>
<div class="footnotes" role="doc-endnotes"> <section class="footnotes" role="doc-endnotes">
<hr> <hr>
<ol> <ol>
<li id="fn:1"> <li id="fn:1" role="doc-endnote">
<p>footnote&#160;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p> <p>footnote&#160;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p>
</li> </li>
</ol> </ol>
</div> </section>
//= = = = = = = = = = = = = = = = = = = = = = = =//
6: Multiple references to the same footnotes should have different ids
//- - - - - - - - -//
something[^fn:1]
something[^fn:1]
something[^fn:1]
[^fn:1]: footnote text
//- - - - - - - - -//
<p>something<sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p>
<p>something<sup id="fnref1:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p>
<p>something<sup id="fnref2:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p>
<div class="footnotes" role="doc-endnotes">
<hr>
<ol>
<li id="fn:1">
<p>footnote text&#160;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a>&#160;<a href="#fnref1:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a>&#160;<a href="#fnref2:1" class="footnote-backref" role="doc-backlink">&#x21a9;&#xfe0e;</a></p>
</li>
</ol>
</div>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -171,23 +171,9 @@ http://server.intranet.acme.com:1313
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
18 17
//- - - - - - - - -// //- - - - - - - - -//
https://g.page/foo https://g.page/foo
//- - - - - - - - -// //- - - - - - - - -//
<p><a href="https://g.page/foo">https://g.page/foo</a></p> <p><a href="https://g.page/foo">https://g.page/foo</a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
19: Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not be considered part of the autolink
//- - - - - - - - -//
__http://test.com/~/a__
__http://test.com/~/__
__http://test.com/~__
__http://test.com/a/~__
//- - - - - - - - -//
<p><strong><a href="http://test.com/~/a">http://test.com/~/a</a></strong>
<strong><a href="http://test.com/~/">http://test.com/~/</a></strong>
<strong><a href="http://test.com/">http://test.com/</a>~</strong>
<strong><a href="http://test.com/a/">http://test.com/a/</a>~</strong></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -5,6 +5,8 @@
<p><del>Hi</del> Hello, world!</p> <p><del>Hi</del> Hello, world!</p>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
2 2
//- - - - - - - - -// //- - - - - - - - -//
This ~~has a This ~~has a
@ -14,26 +16,3 @@ new paragraph~~.
<p>This ~~has a</p> <p>This ~~has a</p>
<p>new paragraph~~.</p> <p>new paragraph~~.</p>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
3
//- - - - - - - - -//
~Hi~ Hello, world!
//- - - - - - - - -//
<p><del>Hi</del> Hello, world!</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
4: Three or more tildes do not create a strikethrough
//- - - - - - - - -//
This will ~~~not~~~ strike.
//- - - - - - - - -//
<p>This will ~~~not~~~ strike.</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
5: Leading three or more tildes do not create a strikethrough, create a code block
//- - - - - - - - -//
~~~Hi~~~ Hello, world!
//- - - - - - - - -//
<pre><code class="language-Hi~~~"></code></pre>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -253,30 +253,3 @@ foo|bar
</thead> </thead>
</table> </table>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
12: A delimiter can not start with more than 3 spaces
//- - - - - - - - -//
Foo
---
//- - - - - - - - -//
<p>Foo
---</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
13: A delimiter can not start with more than 3 spaces(w/ tabs)
OPTIONS: {"enableEscape": true}
//- - - - - - - - -//
- aaa
Foo
\t\t---
//- - - - - - - - -//
<ul>
<li>
<p>aaa</p>
<p>Foo
---</p>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -28,24 +28,3 @@
<li><input disabled="" type="checkbox"> bim</li> <li><input disabled="" type="checkbox"> bim</li>
</ul> </ul>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
3
//- - - - - - - - -//
- test[x]=[x]
//- - - - - - - - -//
<ul>
<li>test[x]=[x]</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
4
//- - - - - - - - -//
+ [x] [x]
//- - - - - - - - -//
<ul>
<li><input checked="" disabled="" type="checkbox"> [x]</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -64,80 +64,3 @@ reported "issue 1 (IE-only)", "issue 2", 'issue3 (FF-only)', 'issue4'
//- - - - - - - - -// //- - - - - - - - -//
<p>&ldquo;Monitor 21&quot;&rdquo; and &ldquo;Monitor&rdquo;&quot;</p> <p>&ldquo;Monitor 21&quot;&rdquo; and &ldquo;Monitor&rdquo;&quot;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
9: Closing quotation marks within italics
//- - - - - - - - -//
*"At first, things were not clear."*
//- - - - - - - - -//
<p><em>&ldquo;At first, things were not clear.&rdquo;</em></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
10: Closing quotation marks within boldfacing
//- - - - - - - - -//
**"At first, things were not clear."**
//- - - - - - - - -//
<p><strong>&ldquo;At first, things were not clear.&rdquo;</strong></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
11: Closing quotation marks within boldfacing and italics
//- - - - - - - - -//
***"At first, things were not clear."***
//- - - - - - - - -//
<p><em><strong>&ldquo;At first, things were not clear.&rdquo;</strong></em></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
12: Closing quotation marks within boldfacing and italics
//- - - - - - - - -//
***"At first, things were not clear."***
//- - - - - - - - -//
<p><em><strong>&ldquo;At first, things were not clear.&rdquo;</strong></em></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
13: Plural possessives
//- - - - - - - - -//
John's dog is named Sam. The Smiths' dog is named Rover.
//- - - - - - - - -//
<p>John&rsquo;s dog is named Sam. The Smiths&rsquo; dog is named Rover.</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
14: Links within quotation marks and parenthetical phrases
//- - - - - - - - -//
This is not difficult (see "[Introduction to Hugo Templating](https://gohugo.io/templates/introduction/)").
//- - - - - - - - -//
<p>This is not difficult (see &ldquo;<a href="https://gohugo.io/templates/introduction/">Introduction to Hugo Templating</a>&rdquo;).</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
15: Quotation marks within links
//- - - - - - - - -//
Apple's early Cairo font gave us ["moof" and the "dogcow."](https://www.macworld.com/article/2926184/we-miss-you-clarus-the-dogcow.html)
//- - - - - - - - -//
<p>Apple&rsquo;s early Cairo font gave us <a href="https://www.macworld.com/article/2926184/we-miss-you-clarus-the-dogcow.html">&ldquo;moof&rdquo; and the &ldquo;dogcow.&rdquo;</a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
16: Single closing quotation marks with slang/informalities
//- - - - - - - - -//
"I'm not doin' that," Bill said with emphasis.
//- - - - - - - - -//
<p>&ldquo;I&rsquo;m not doin&rsquo; that,&rdquo; Bill said with emphasis.</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
17: Closing single quotation marks in quotations-within-quotations
//- - - - - - - - -//
Janet said, "When everything is 'breaking news,' nothing is 'breaking news.'"
//- - - - - - - - -//
<p>Janet said, &ldquo;When everything is &lsquo;breaking news,&rsquo; nothing is &lsquo;breaking news.&rsquo;&rdquo;</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
18: Opening single quotation marks for abbreviations
//- - - - - - - - -//
We're talking about the internet --- 'net for short. Let's rock 'n roll!
//- - - - - - - - -//
<p>We&rsquo;re talking about the internet &mdash; &rsquo;net for short. Let&rsquo;s rock &rsquo;n roll!</p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
19: Quotes in alt text
//- - - - - - - - -//
![Nice & day, **isn't** it?](https://example.com/image.jpg)
//- - - - - - - - -//
<p><img src="https://example.com/image.jpg" alt="Nice &amp; day, isn&rsquo;t it?"></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -12,7 +12,6 @@ type FootnoteLink struct {
gast.BaseInline gast.BaseInline
Index int Index int
RefCount int RefCount int
RefIndex int
} }
// Dump implements Node.Dump. // Dump implements Node.Dump.
@ -20,7 +19,6 @@ func (n *FootnoteLink) Dump(source []byte, level int) {
m := map[string]string{} m := map[string]string{}
m["Index"] = fmt.Sprintf("%v", n.Index) m["Index"] = fmt.Sprintf("%v", n.Index)
m["RefCount"] = fmt.Sprintf("%v", n.RefCount) m["RefCount"] = fmt.Sprintf("%v", n.RefCount)
m["RefIndex"] = fmt.Sprintf("%v", n.RefIndex)
gast.DumpHelper(n, source, level, m, nil) gast.DumpHelper(n, source, level, m, nil)
} }
@ -37,7 +35,6 @@ func NewFootnoteLink(index int) *FootnoteLink {
return &FootnoteLink{ return &FootnoteLink{
Index: index, Index: index,
RefCount: 0, RefCount: 0,
RefIndex: 0,
} }
} }
@ -47,7 +44,6 @@ type FootnoteBacklink struct {
gast.BaseInline gast.BaseInline
Index int Index int
RefCount int RefCount int
RefIndex int
} }
// Dump implements Node.Dump. // Dump implements Node.Dump.
@ -55,7 +51,6 @@ func (n *FootnoteBacklink) Dump(source []byte, level int) {
m := map[string]string{} m := map[string]string{}
m["Index"] = fmt.Sprintf("%v", n.Index) m["Index"] = fmt.Sprintf("%v", n.Index)
m["RefCount"] = fmt.Sprintf("%v", n.RefCount) m["RefCount"] = fmt.Sprintf("%v", n.RefCount)
m["RefIndex"] = fmt.Sprintf("%v", n.RefIndex)
gast.DumpHelper(n, source, level, m, nil) gast.DumpHelper(n, source, level, m, nil)
} }
@ -72,7 +67,6 @@ func NewFootnoteBacklink(index int) *FootnoteBacklink {
return &FootnoteBacklink{ return &FootnoteBacklink{
Index: index, Index: index,
RefCount: 0, RefCount: 0,
RefIndex: 0,
} }
} }
@ -88,7 +82,7 @@ type Footnote struct {
func (n *Footnote) Dump(source []byte, level int) { func (n *Footnote) Dump(source []byte, level int) {
m := map[string]string{} m := map[string]string{}
m["Index"] = fmt.Sprintf("%v", n.Index) m["Index"] = fmt.Sprintf("%v", n.Index)
m["Ref"] = string(n.Ref) m["Ref"] = fmt.Sprintf("%s", n.Ref)
gast.DumpHelper(n, source, level, m, nil) gast.DumpHelper(n, source, level, m, nil)
} }

View file

@ -2,9 +2,8 @@ package ast
import ( import (
"fmt" "fmt"
"strings"
gast "github.com/yuin/goldmark/ast" gast "github.com/yuin/goldmark/ast"
"strings"
) )
// Alignment is a text alignment of table cells. // Alignment is a text alignment of table cells.
@ -46,7 +45,7 @@ type Table struct {
Alignments []Alignment Alignments []Alignment
} }
// Dump implements Node.Dump. // Dump implements Node.Dump
func (n *Table) Dump(source []byte, level int) { func (n *Table) Dump(source []byte, level int) {
gast.DumpHelper(n, source, level, nil, func(level int) { gast.DumpHelper(n, source, level, nil, func(level int) {
indent := strings.Repeat(" ", level) indent := strings.Repeat(" ", level)
@ -98,7 +97,7 @@ func (n *TableRow) Kind() gast.NodeKind {
// NewTableRow returns a new TableRow node. // NewTableRow returns a new TableRow node.
func NewTableRow(alignments []Alignment) *TableRow { func NewTableRow(alignments []Alignment) *TableRow {
return &TableRow{Alignments: alignments} return &TableRow{}
} }
// A TableHeader struct represents a table header of Markdown(GFM) text. // A TableHeader struct represents a table header of Markdown(GFM) text.

View file

@ -1,123 +0,0 @@
package extension
import (
"bytes"
"testing"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/testutil"
"github.com/yuin/goldmark/text"
)
func TestASTBlockNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
T2 string
C bool
}{
{
Name: "DefinitionList",
Source: `c1
: c2
c3
a
c4
: c5
c6`,
T1: `c1c2
c3`,
T2: `c4c5
c6`,
},
{
Name: "Table",
Source: `| h1 | h2 |
| -- | -- |
| c1 | c2 |
a
| h3 | h4 |
| -- | -- |
| c3 | c4 |`,
T1: `h1h2c1c2`,
T2: `h3h4c3c4`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
goldmark.WithExtensions(
DefinitionList,
Table,
),
)
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild()
c2 := c1.NextSibling().NextSibling()
if cs.C {
c1 = c1.FirstChild()
c2 = c2.FirstChild()
}
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
if !bytes.Equal(c2.Text(s), []byte(cs.T2)) { // nolint: staticcheck
t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2))) // nolint: staticcheck
}
})
}
}
func TestASTInlineNodeText(t *testing.T) {
var cases = []struct {
Name string
Source string
T1 string
}{
{
Name: "Strikethrough",
Source: `~c1 *c2*~`,
T1: `c1 c2`,
},
}
for _, cs := range cases {
t.Run(cs.Name, func(t *testing.T) {
s := []byte(cs.Source)
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
goldmark.WithExtensions(
Strikethrough,
),
)
n := md.Parser().Parse(text.NewReader(s))
c1 := n.FirstChild().FirstChild()
if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { // nolint: staticcheck
t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) // nolint: staticcheck
}
})
}
}

View file

@ -1,72 +0,0 @@
package extension
import (
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
)
// A CJKOption sets options for CJK support mostly for HTML based renderers.
type CJKOption func(*cjk)
// A EastAsianLineBreaks is a style of east asian line breaks.
type EastAsianLineBreaks int
const (
//EastAsianLineBreaksNone renders line breaks as it is.
EastAsianLineBreaksNone EastAsianLineBreaks = iota
// EastAsianLineBreaksSimple is a style where soft line breaks are ignored
// if both sides of the break are east asian wide characters.
EastAsianLineBreaksSimple
// EastAsianLineBreaksCSS3Draft is a style where soft line breaks are ignored
// even if only one side of the break is an east asian wide character.
EastAsianLineBreaksCSS3Draft
)
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
// between east asian wide characters should be ignored.
// style defauts to [EastAsianLineBreaksSimple] .
func WithEastAsianLineBreaks(style ...EastAsianLineBreaks) CJKOption {
return func(c *cjk) {
if len(style) == 0 {
c.EastAsianLineBreaks = EastAsianLineBreaksSimple
return
}
c.EastAsianLineBreaks = style[0]
}
}
// WithEscapedSpace is a functional option that indicates that a '\' escaped half-space(0x20) should not be rendered.
func WithEscapedSpace() CJKOption {
return func(c *cjk) {
c.EscapedSpace = true
}
}
type cjk struct {
EastAsianLineBreaks EastAsianLineBreaks
EscapedSpace bool
}
// CJK is a goldmark extension that provides functionalities for CJK languages.
var CJK = NewCJK(WithEastAsianLineBreaks(), WithEscapedSpace())
// NewCJK returns a new extension with given options.
func NewCJK(opts ...CJKOption) goldmark.Extender {
e := &cjk{
EastAsianLineBreaks: EastAsianLineBreaksNone,
}
for _, opt := range opts {
opt(e)
}
return e
}
func (e *cjk) Extend(m goldmark.Markdown) {
m.Renderer().AddOptions(html.WithEastAsianLineBreaks(
html.EastAsianLineBreaks(e.EastAsianLineBreaks)))
if e.EscapedSpace {
m.Renderer().AddOptions(html.WithWriter(html.NewWriter(html.WithEscapedSpace())))
m.Parser().AddOptions(parser.WithEscapedSpace())
}
}

View file

@ -1,269 +0,0 @@
package extension
import (
"testing"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/testutil"
)
func TestEscapedSpace(t *testing.T) {
markdown := goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
))
no := 1
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec)",
Markdown: "太郎は**「こんにちわ」**と言った\nんです",
Expected: "<p>太郎は**「こんにちわ」**と言った\nんです</p>",
},
t,
)
no = 2
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "With spaces around an emphasis started with east asian punctuations, it is interpreted as an emphasis(but remains unnecessary spaces)",
Markdown: "太郎は **「こんにちわ」** と言った\nんです",
Expected: "<p>太郎は <strong>「こんにちわ」</strong> と言った\nんです</p>",
},
t,
)
// Enables EscapedSpace
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(NewCJK(WithEscapedSpace())),
)
no = 3
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "With spaces around an emphasis started with east asian punctuations,it is interpreted as an emphasis",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
Expected: "<p>太郎は<strong>「こんにちわ」</strong>と言った\nんです</p>",
},
t,
)
// ' ' triggers Linkify extension inline parser.
// Escaped spaces should not trigger the inline parser.
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewCJK(WithEscapedSpace()),
Linkify,
),
)
no = 4
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Escaped space and linkfy extension",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
Expected: "<p>太郎は<strong>「こんにちわ」</strong>と言った\nんです</p>",
},
t,
)
}
func TestEastAsianLineBreaks(t *testing.T) {
markdown := goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
))
no := 1
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks are rendered as a newline, so some asian users will see it as an unnecessary space",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言った\nんです</p>",
},
t,
)
// Enables EastAsianLineBreaks
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(NewCJK(WithEastAsianLineBreaks())),
)
no = 2
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between east asian wide characters are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったんです</p>",
},
t,
)
no = 3
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between western characters are rendered as a newline",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nbんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったa\nbんです</p>",
},
t,
)
no = 4
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between a western character and an east asian wide character are rendered as a newline",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったa\nんです</p>",
},
t,
)
no = 5
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are rendered as a newline",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言った\nbんです</p>",
},
t,
)
// WithHardWraps take precedence over WithEastAsianLineBreaks
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithHardWraps(),
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(NewCJK(WithEastAsianLineBreaks())),
)
no = 6
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "WithHardWraps take precedence over WithEastAsianLineBreaks",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言った<br />\nんです</p>",
},
t,
)
// Tests with EastAsianLineBreaksStyleSimple
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewCJK(WithEastAsianLineBreaks()),
Linkify,
),
)
no = 7
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "WithEastAsianLineBreaks and linkfy extension",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\r\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったんです</p>",
},
t,
)
no = 8
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between east asian wide characters or punctuations are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と、\r\n言った\r\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と、言ったんです</p>",
},
t,
)
no = 9
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are ignored",
Markdown: "私はプログラマーです。\n東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。",
Expected: "<p>私はプログラマーです。東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。</p>",
},
t,
)
// Tests with EastAsianLineBreaksCSS3Draft
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewCJK(WithEastAsianLineBreaks(EastAsianLineBreaksCSS3Draft)),
),
)
no = 10
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between a western character and an east asian wide character are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったaんです</p>",
},
t,
)
no = 11
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったbんです</p>",
},
t,
)
no = 12
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are ignored",
Markdown: "私はプログラマーです。\n東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。",
Expected: "<p>私はプログラマーです。東京の会社に勤めています。GoでWebアプリケーションを開発しています。</p>",
},
t,
)
}

View file

@ -113,8 +113,7 @@ func (b *definitionDescriptionParser) Trigger() []byte {
return []byte{':'} return []byte{':'}
} }
func (b *definitionDescriptionParser) Open( func (b *definitionDescriptionParser) Open(parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
parent gast.Node, reader text.Reader, pc parser.Context) (gast.Node, parser.State) {
line, _ := reader.PeekLine() line, _ := reader.PeekLine()
pos := pc.BlockOffset() pos := pc.BlockOffset()
indent := pc.BlockIndent() indent := pc.BlockIndent()
@ -200,8 +199,7 @@ func (r *DefinitionListHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFunc
// DefinitionListAttributeFilter defines attribute names which dl elements can have. // DefinitionListAttributeFilter defines attribute names which dl elements can have.
var DefinitionListAttributeFilter = html.GlobalAttributeFilter var DefinitionListAttributeFilter = html.GlobalAttributeFilter
func (r *DefinitionListHTMLRenderer) renderDefinitionList( func (r *DefinitionListHTMLRenderer) renderDefinitionList(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
if n.Attributes() != nil { if n.Attributes() != nil {
_, _ = w.WriteString("<dl") _, _ = w.WriteString("<dl")
@ -219,8 +217,7 @@ func (r *DefinitionListHTMLRenderer) renderDefinitionList(
// DefinitionTermAttributeFilter defines attribute names which dd elements can have. // DefinitionTermAttributeFilter defines attribute names which dd elements can have.
var DefinitionTermAttributeFilter = html.GlobalAttributeFilter var DefinitionTermAttributeFilter = html.GlobalAttributeFilter
func (r *DefinitionListHTMLRenderer) renderDefinitionTerm( func (r *DefinitionListHTMLRenderer) renderDefinitionTerm(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
if n.Attributes() != nil { if n.Attributes() != nil {
_, _ = w.WriteString("<dt") _, _ = w.WriteString("<dt")
@ -238,8 +235,7 @@ func (r *DefinitionListHTMLRenderer) renderDefinitionTerm(
// DefinitionDescriptionAttributeFilter defines attribute names which dd elements can have. // DefinitionDescriptionAttributeFilter defines attribute names which dd elements can have.
var DefinitionDescriptionAttributeFilter = html.GlobalAttributeFilter var DefinitionDescriptionAttributeFilter = html.GlobalAttributeFilter
func (r *DefinitionListHTMLRenderer) renderDefinitionDescription( func (r *DefinitionListHTMLRenderer) renderDefinitionDescription(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
n := node.(*ast.DefinitionDescription) n := node.(*ast.DefinitionDescription)
_, _ = w.WriteString("<dd") _, _ = w.WriteString("<dd")

View file

@ -2,7 +2,6 @@ package extension
import ( import (
"bytes" "bytes"
"fmt"
"strconv" "strconv"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
@ -44,8 +43,8 @@ func (b *footnoteBlockParser) Open(parent gast.Node, reader text.Reader, pc pars
return nil, parser.NoChildren return nil, parser.NoChildren
} }
open := pos + 1 open := pos + 1
var closes int closes := 0
closure := util.FindClosure(line[pos+1:], '[', ']', false, false) //nolint:staticcheck closure := util.FindClosure(line[pos+1:], '[', ']', false, false)
closes = pos + 1 + closure closes = pos + 1 + closure
next := closes + 1 next := closes + 1
if closure > -1 { if closure > -1 {
@ -136,7 +135,7 @@ func (s *footnoteParser) Parse(parent gast.Node, block text.Reader, pc parser.Co
return nil return nil
} }
open := pos open := pos
closure := util.FindClosure(line[pos:], '[', ']', false, false) //nolint:staticcheck closure := util.FindClosure(line[pos:], '[', ']', false, false)
if closure < 0 { if closure < 0 {
return nil return nil
} }
@ -156,7 +155,7 @@ func (s *footnoteParser) Parse(parent gast.Node, block text.Reader, pc parser.Co
d := def.(*ast.Footnote) d := def.(*ast.Footnote)
if bytes.Equal(d.Ref, value) { if bytes.Equal(d.Ref, value) {
if d.Index < 0 { if d.Index < 0 {
list.Count++ list.Count += 1
d.Index = list.Count d.Index = list.Count
} }
index = d.Index index = d.Index
@ -218,14 +217,8 @@ func (a *footnoteASTTransformer) Transform(node *gast.Document, reader text.Read
counter[fnlink.Index]++ counter[fnlink.Index]++
} }
} }
refCounter := map[int]int{}
for _, fnlink := range fnlist { for _, fnlink := range fnlist {
fnlink.RefCount = counter[fnlink.Index] fnlink.RefCount = counter[fnlink.Index]
if _, ok := refCounter[fnlink.Index]; !ok {
refCounter[fnlink.Index] = 0
}
fnlink.RefIndex = refCounter[fnlink.Index]
refCounter[fnlink.Index]++
} }
} }
for footnote := list.FirstChild(); footnote != nil; { for footnote := list.FirstChild(); footnote != nil; {
@ -239,19 +232,9 @@ func (a *footnoteASTTransformer) Transform(node *gast.Document, reader text.Read
if index < 0 { if index < 0 {
list.RemoveChild(list, footnote) list.RemoveChild(list, footnote)
} else { } else {
refCount := counter[index]
backLink := ast.NewFootnoteBacklink(index) backLink := ast.NewFootnoteBacklink(index)
backLink.RefCount = refCount backLink.RefCount = counter[index]
backLink.RefIndex = 0
container.AppendChild(container, backLink) container.AppendChild(container, backLink)
if refCount > 1 {
for i := 1; i < refCount; i++ {
backLink := ast.NewFootnoteBacklink(index)
backLink.RefCount = refCount
backLink.RefIndex = i
container.AppendChild(container, backLink)
}
}
} }
footnote = next footnote = next
} }
@ -272,9 +255,9 @@ func (a *footnoteASTTransformer) Transform(node *gast.Document, reader text.Read
// FootnoteConfig holds configuration values for the footnote extension. // FootnoteConfig holds configuration values for the footnote extension.
// //
// Link* and Backlink* configurations have some variables: // Link* and Backlink* configurations have some variables:
// Occurrences of “^^” in the string will be replaced by the // Occurrances of “^^” in the string will be replaced by the
// corresponding footnote number in the HTML output. // corresponding footnote number in the HTML output.
// Occurrences of “%%” will be replaced by a number for the // Occurrances of “%%” will be replaced by a number for the
// reference (footnotes can have multiple references). // reference (footnotes can have multiple references).
type FootnoteConfig struct { type FootnoteConfig struct {
html.Config html.Config
@ -382,8 +365,8 @@ func (o *withFootnoteIDPrefix) SetFootnoteOption(c *FootnoteConfig) {
} }
// WithFootnoteIDPrefix is a functional option that is a prefix for the id attributes generated by footnotes. // WithFootnoteIDPrefix is a functional option that is a prefix for the id attributes generated by footnotes.
func WithFootnoteIDPrefix[T []byte | string](a T) FootnoteOption { func WithFootnoteIDPrefix(a []byte) FootnoteOption {
return &withFootnoteIDPrefix{[]byte(a)} return &withFootnoteIDPrefix{a}
} }
const optFootnoteIDPrefixFunction renderer.OptionName = "FootnoteIDPrefixFunction" const optFootnoteIDPrefixFunction renderer.OptionName = "FootnoteIDPrefixFunction"
@ -420,8 +403,8 @@ func (o *withFootnoteLinkTitle) SetFootnoteOption(c *FootnoteConfig) {
} }
// WithFootnoteLinkTitle is a functional option that is an optional title attribute for footnote links. // WithFootnoteLinkTitle is a functional option that is an optional title attribute for footnote links.
func WithFootnoteLinkTitle[T []byte | string](a T) FootnoteOption { func WithFootnoteLinkTitle(a []byte) FootnoteOption {
return &withFootnoteLinkTitle{[]byte(a)} return &withFootnoteLinkTitle{a}
} }
const optFootnoteBacklinkTitle renderer.OptionName = "FootnoteBacklinkTitle" const optFootnoteBacklinkTitle renderer.OptionName = "FootnoteBacklinkTitle"
@ -439,8 +422,8 @@ func (o *withFootnoteBacklinkTitle) SetFootnoteOption(c *FootnoteConfig) {
} }
// WithFootnoteBacklinkTitle is a functional option that is an optional title attribute for footnote backlinks. // WithFootnoteBacklinkTitle is a functional option that is an optional title attribute for footnote backlinks.
func WithFootnoteBacklinkTitle[T []byte | string](a T) FootnoteOption { func WithFootnoteBacklinkTitle(a []byte) FootnoteOption {
return &withFootnoteBacklinkTitle{[]byte(a)} return &withFootnoteBacklinkTitle{a}
} }
const optFootnoteLinkClass renderer.OptionName = "FootnoteLinkClass" const optFootnoteLinkClass renderer.OptionName = "FootnoteLinkClass"
@ -458,8 +441,8 @@ func (o *withFootnoteLinkClass) SetFootnoteOption(c *FootnoteConfig) {
} }
// WithFootnoteLinkClass is a functional option that is a class for footnote links. // WithFootnoteLinkClass is a functional option that is a class for footnote links.
func WithFootnoteLinkClass[T []byte | string](a T) FootnoteOption { func WithFootnoteLinkClass(a []byte) FootnoteOption {
return &withFootnoteLinkClass{[]byte(a)} return &withFootnoteLinkClass{a}
} }
const optFootnoteBacklinkClass renderer.OptionName = "FootnoteBacklinkClass" const optFootnoteBacklinkClass renderer.OptionName = "FootnoteBacklinkClass"
@ -477,8 +460,8 @@ func (o *withFootnoteBacklinkClass) SetFootnoteOption(c *FootnoteConfig) {
} }
// WithFootnoteBacklinkClass is a functional option that is a class for footnote backlinks. // WithFootnoteBacklinkClass is a functional option that is a class for footnote backlinks.
func WithFootnoteBacklinkClass[T []byte | string](a T) FootnoteOption { func WithFootnoteBacklinkClass(a []byte) FootnoteOption {
return &withFootnoteBacklinkClass{[]byte(a)} return &withFootnoteBacklinkClass{a}
} }
const optFootnoteBacklinkHTML renderer.OptionName = "FootnoteBacklinkHTML" const optFootnoteBacklinkHTML renderer.OptionName = "FootnoteBacklinkHTML"
@ -496,8 +479,8 @@ func (o *withFootnoteBacklinkHTML) SetFootnoteOption(c *FootnoteConfig) {
} }
// WithFootnoteBacklinkHTML is an HTML content for footnote backlinks. // WithFootnoteBacklinkHTML is an HTML content for footnote backlinks.
func WithFootnoteBacklinkHTML[T []byte | string](a T) FootnoteOption { func WithFootnoteBacklinkHTML(a []byte) FootnoteOption {
return &withFootnoteBacklinkHTML{[]byte(a)} return &withFootnoteBacklinkHTML{a}
} }
// FootnoteHTMLRenderer is a renderer.NodeRenderer implementation that // FootnoteHTMLRenderer is a renderer.NodeRenderer implementation that
@ -525,18 +508,13 @@ func (r *FootnoteHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegist
reg.Register(ast.KindFootnoteList, r.renderFootnoteList) reg.Register(ast.KindFootnoteList, r.renderFootnoteList)
} }
func (r *FootnoteHTMLRenderer) renderFootnoteLink( func (r *FootnoteHTMLRenderer) renderFootnoteLink(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
n := node.(*ast.FootnoteLink) n := node.(*ast.FootnoteLink)
is := strconv.Itoa(n.Index) is := strconv.Itoa(n.Index)
_, _ = w.WriteString(`<sup id="`) _, _ = w.WriteString(`<sup id="`)
_, _ = w.Write(r.idPrefix(node)) _, _ = w.Write(r.idPrefix(node))
_, _ = w.WriteString(`fnref`) _, _ = w.WriteString(`fnref:`)
if n.RefIndex > 0 {
_, _ = w.WriteString(fmt.Sprintf("%v", n.RefIndex))
}
_ = w.WriteByte(':')
_, _ = w.WriteString(is) _, _ = w.WriteString(is)
_, _ = w.WriteString(`"><a href="#`) _, _ = w.WriteString(`"><a href="#`)
_, _ = w.Write(r.idPrefix(node)) _, _ = w.Write(r.idPrefix(node))
@ -557,18 +535,13 @@ func (r *FootnoteHTMLRenderer) renderFootnoteLink(
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }
func (r *FootnoteHTMLRenderer) renderFootnoteBacklink( func (r *FootnoteHTMLRenderer) renderFootnoteBacklink(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
n := node.(*ast.FootnoteBacklink) n := node.(*ast.FootnoteBacklink)
is := strconv.Itoa(n.Index) is := strconv.Itoa(n.Index)
_, _ = w.WriteString(`&#160;<a href="#`) _, _ = w.WriteString(`&#160;<a href="#`)
_, _ = w.Write(r.idPrefix(node)) _, _ = w.Write(r.idPrefix(node))
_, _ = w.WriteString(`fnref`) _, _ = w.WriteString(`fnref:`)
if n.RefIndex > 0 {
_, _ = w.WriteString(fmt.Sprintf("%v", n.RefIndex))
}
_ = w.WriteByte(':')
_, _ = w.WriteString(is) _, _ = w.WriteString(is)
_, _ = w.WriteString(`" class="`) _, _ = w.WriteString(`" class="`)
_, _ = w.Write(applyFootnoteTemplate(r.FootnoteConfig.BacklinkClass, n.Index, n.RefCount)) _, _ = w.Write(applyFootnoteTemplate(r.FootnoteConfig.BacklinkClass, n.Index, n.RefCount))
@ -583,8 +556,7 @@ func (r *FootnoteHTMLRenderer) renderFootnoteBacklink(
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }
func (r *FootnoteHTMLRenderer) renderFootnote( func (r *FootnoteHTMLRenderer) renderFootnote(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
n := node.(*ast.Footnote) n := node.(*ast.Footnote)
is := strconv.Itoa(n.Index) is := strconv.Itoa(n.Index)
if entering { if entering {
@ -592,7 +564,7 @@ func (r *FootnoteHTMLRenderer) renderFootnote(
_, _ = w.Write(r.idPrefix(node)) _, _ = w.Write(r.idPrefix(node))
_, _ = w.WriteString(`fn:`) _, _ = w.WriteString(`fn:`)
_, _ = w.WriteString(is) _, _ = w.WriteString(is)
_, _ = w.WriteString(`"`) _, _ = w.WriteString(`" role="doc-endnote"`)
if node.Attributes() != nil { if node.Attributes() != nil {
html.RenderAttributes(w, node, html.ListItemAttributeFilter) html.RenderAttributes(w, node, html.ListItemAttributeFilter)
} }
@ -603,10 +575,15 @@ func (r *FootnoteHTMLRenderer) renderFootnote(
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }
func (r *FootnoteHTMLRenderer) renderFootnoteList( func (r *FootnoteHTMLRenderer) renderFootnoteList(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) { tag := "section"
if r.Config.XHTML {
tag = "div"
}
if entering { if entering {
_, _ = w.WriteString(`<div class="footnotes" role="doc-endnotes"`) _, _ = w.WriteString("<")
_, _ = w.WriteString(tag)
_, _ = w.WriteString(` class="footnotes" role="doc-endnotes"`)
if node.Attributes() != nil { if node.Attributes() != nil {
html.RenderAttributes(w, node, html.GlobalAttributeFilter) html.RenderAttributes(w, node, html.GlobalAttributeFilter)
} }
@ -619,7 +596,9 @@ func (r *FootnoteHTMLRenderer) renderFootnoteList(
_, _ = w.WriteString("<ol>\n") _, _ = w.WriteString("<ol>\n")
} else { } else {
_, _ = w.WriteString("</ol>\n") _, _ = w.WriteString("</ol>\n")
_, _ = w.WriteString("</div>\n") _, _ = w.WriteString("</")
_, _ = w.WriteString(tag)
_, _ = w.WriteString(">\n")
} }
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }

View file

@ -38,12 +38,12 @@ func TestFootnoteOptions(t *testing.T) {
), ),
goldmark.WithExtensions( goldmark.WithExtensions(
NewFootnote( NewFootnote(
WithFootnoteIDPrefix("article12-"), WithFootnoteIDPrefix([]byte("article12-")),
WithFootnoteLinkClass("link-class"), WithFootnoteLinkClass([]byte("link-class")),
WithFootnoteBacklinkClass("backlink-class"), WithFootnoteBacklinkClass([]byte("backlink-class")),
WithFootnoteLinkTitle("link-title-%%-^^"), WithFootnoteLinkTitle([]byte("link-title-%%-^^")),
WithFootnoteBacklinkTitle("backlink-title"), WithFootnoteBacklinkTitle([]byte("backlink-title")),
WithFootnoteBacklinkHTML("^"), WithFootnoteBacklinkHTML([]byte("^")),
), ),
), ),
) )
@ -63,19 +63,20 @@ Another one.[^2]
[^2]: Another footnote. [^2]: Another footnote.
`, `,
Expected: `<p>That's some text with a footnote.<sup id="article12-fnref:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p> Expected: `<p>That's some text with a footnote.<sup id="article12-fnref:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p>
<p>Same footnote.<sup id="article12-fnref1:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p> <p>Same footnote.<sup id="article12-fnref:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p>
<p>Another one.<sup id="article12-fnref:2"><a href="#article12-fn:2" class="link-class" title="link-title-1-2" role="doc-noteref">2</a></sup></p> <p>Another one.<sup id="article12-fnref:2"><a href="#article12-fn:2" class="link-class" title="link-title-1-2" role="doc-noteref">2</a></sup></p>
<div class="footnotes" role="doc-endnotes"> <section class="footnotes" role="doc-endnotes">
<hr> <hr>
<ol> <ol>
<li id="article12-fn:1"> <li id="article12-fn:1" role="doc-endnote">
<p>And that's the footnote.&#160;<a href="#article12-fnref:1" class="backlink-class" title="backlink-title" role="doc-backlink">^</a>&#160;<a href="#article12-fnref1:1" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p> <p>And that's the footnote.&#160;<a href="#article12-fnref:1" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p>
</li> </li>
<li id="article12-fn:2"> <li id="article12-fn:2" role="doc-endnote">
<p>Another footnote.&#160;<a href="#article12-fnref:2" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p> <p>Another footnote.&#160;<a href="#article12-fnref:2" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p>
</li> </li>
</ol> </ol>
</div>`, </section>
`,
}, },
t, t,
) )
@ -122,19 +123,20 @@ Another one.[^2]
[^2]: Another footnote. [^2]: Another footnote.
`, `,
Expected: `<p>That's some text with a footnote.<sup id="article12-fnref:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p> Expected: `<p>That's some text with a footnote.<sup id="article12-fnref:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p>
<p>Same footnote.<sup id="article12-fnref1:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p> <p>Same footnote.<sup id="article12-fnref:1"><a href="#article12-fn:1" class="link-class" title="link-title-2-1" role="doc-noteref">1</a></sup></p>
<p>Another one.<sup id="article12-fnref:2"><a href="#article12-fn:2" class="link-class" title="link-title-1-2" role="doc-noteref">2</a></sup></p> <p>Another one.<sup id="article12-fnref:2"><a href="#article12-fn:2" class="link-class" title="link-title-1-2" role="doc-noteref">2</a></sup></p>
<div class="footnotes" role="doc-endnotes"> <section class="footnotes" role="doc-endnotes">
<hr> <hr>
<ol> <ol>
<li id="article12-fn:1"> <li id="article12-fn:1" role="doc-endnote">
<p>And that's the footnote.&#160;<a href="#article12-fnref:1" class="backlink-class" title="backlink-title" role="doc-backlink">^</a>&#160;<a href="#article12-fnref1:1" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p> <p>And that's the footnote.&#160;<a href="#article12-fnref:1" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p>
</li> </li>
<li id="article12-fn:2"> <li id="article12-fn:2" role="doc-endnote">
<p>Another footnote.&#160;<a href="#article12-fnref:2" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p> <p>Another footnote.&#160;<a href="#article12-fnref:2" class="backlink-class" title="backlink-title" role="doc-backlink">^</a></p>
</li> </li>
</ol> </ol>
</div>`, </section>
`,
}, },
t, t,
) )

View file

@ -11,9 +11,9 @@ import (
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?:[/#?][-a-zA-Z0-9@:%_\+.~#!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) //nolint:golint,lll var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?:[/#?][-a-zA-Z0-9@:%_\+.~#!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp)://[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?::\d+)?(?:[/#?][-a-zA-Z0-9@:%_+.~#$!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) //nolint:golint,lll var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp)://[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?::\d+)?(?:[/#?][-a-zA-Z0-9@:%_+.~#$!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
// An LinkifyConfig struct is a data structure that holds configuration of the // An LinkifyConfig struct is a data structure that holds configuration of the
// Linkify extension. // Linkify extension.
@ -66,12 +66,10 @@ func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
// WithLinkifyAllowedProtocols is a functional option that specify allowed // WithLinkifyAllowedProtocols is a functional option that specify allowed
// protocols in autolinks. Each protocol must end with ':' like // protocols in autolinks. Each protocol must end with ':' like
// 'http:' . // 'http:' .
func WithLinkifyAllowedProtocols[T []byte | string](value []T) LinkifyOption { func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption {
opt := &withLinkifyAllowedProtocols{} return &withLinkifyAllowedProtocols{
for _, v := range value { value: value,
opt.value = append(opt.value, []byte(v))
} }
return opt
} }
type withLinkifyURLRegexp struct { type withLinkifyURLRegexp struct {
@ -94,6 +92,9 @@ func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
} }
} }
// WithLinkifyWWWRegexp is a functional option that specify
// a pattern of the URL without a protocol.
// This pattern must start with 'www.' .
type withLinkifyWWWRegexp struct { type withLinkifyWWWRegexp struct {
value *regexp.Regexp value *regexp.Regexp
} }
@ -106,15 +107,14 @@ func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
p.WWWRegexp = o.value p.WWWRegexp = o.value
} }
// WithLinkifyWWWRegexp is a functional option that specify
// a pattern of the URL without a protocol.
// This pattern must start with 'www.' .
func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption { func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
return &withLinkifyWWWRegexp{ return &withLinkifyWWWRegexp{
value: value, value: value,
} }
} }
// WithLinkifyWWWRegexp is a functional otpion that specify
// a pattern of the email address.
type withLinkifyEmailRegexp struct { type withLinkifyEmailRegexp struct {
value *regexp.Regexp value *regexp.Regexp
} }
@ -127,8 +127,6 @@ func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
p.EmailRegexp = o.value p.EmailRegexp = o.value
} }
// WithLinkifyEmailRegexp is a functional otpion that specify
// a pattern of the email address.
func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption { func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
return &withLinkifyEmailRegexp{ return &withLinkifyEmailRegexp{
value: value, value: value,
@ -275,20 +273,9 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
s := segment.WithStop(segment.Start + 1) s := segment.WithStop(segment.Start + 1)
ast.MergeOrAppendTextSegment(parent, s) ast.MergeOrAppendTextSegment(parent, s)
} }
i := m[1] - 1 consumes += m[1]
for ; i > 0; i-- {
c := line[i]
switch c {
case '?', '!', '.', ',', ':', '*', '_', '~':
default:
goto endfor
}
}
endfor:
i++
consumes += i
block.Advance(consumes) block.Advance(consumes)
n := ast.NewTextSegment(text.NewSegment(start, start+i)) n := ast.NewTextSegment(text.NewSegment(start, start+m[1]))
link := ast.NewAutoLink(typ, n) link := ast.NewAutoLink(typ, n)
link.Protocol = protocol link.Protocol = protocol
return link return link
@ -305,8 +292,6 @@ type linkify struct {
// Linkify is an extension that allow you to parse text that seems like a URL. // Linkify is an extension that allow you to parse text that seems like a URL.
var Linkify = &linkify{} var Linkify = &linkify{}
// NewLinkify creates a new [goldmark.Extender] that
// allow you to parse text that seems like a URL.
func NewLinkify(opts ...LinkifyOption) goldmark.Extender { func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
return &linkify{ return &linkify{
options: opts, options: opts,

View file

@ -29,8 +29,8 @@ func TestLinkifyWithAllowedProtocols(t *testing.T) {
), ),
goldmark.WithExtensions( goldmark.WithExtensions(
NewLinkify( NewLinkify(
WithLinkifyAllowedProtocols([]string{ WithLinkifyAllowedProtocols([][]byte{
"ssh:", []byte("ssh:"),
}), }),
WithLinkifyURLRegexp( WithLinkifyURLRegexp(
regexp.MustCompile(`\w+://[^\s]+`), regexp.MustCompile(`\w+://[^\s]+`),

View file

@ -1,2 +0,0 @@
// Package extension is a collection of builtin extensions.
package extension

View file

@ -46,11 +46,10 @@ func (s *strikethroughParser) Trigger() []byte {
func (s *strikethroughParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node { func (s *strikethroughParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
before := block.PrecendingCharacter() before := block.PrecendingCharacter()
line, segment := block.PeekLine() line, segment := block.PeekLine()
node := parser.ScanDelimiter(line, before, 1, defaultStrikethroughDelimiterProcessor) node := parser.ScanDelimiter(line, before, 2, defaultStrikethroughDelimiterProcessor)
if node == nil || node.OriginalLength > 2 || before == '~' { if node == nil {
return nil return nil
} }
node.Segment = segment.WithStop(segment.Start + node.OriginalLength) node.Segment = segment.WithStop(segment.Start + node.OriginalLength)
block.Advance(node.OriginalLength) block.Advance(node.OriginalLength)
pc.PushDelimiter(node) pc.PushDelimiter(node)
@ -86,8 +85,7 @@ func (r *StrikethroughHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncR
// StrikethroughAttributeFilter defines attribute names which dd elements can have. // StrikethroughAttributeFilter defines attribute names which dd elements can have.
var StrikethroughAttributeFilter = html.GlobalAttributeFilter var StrikethroughAttributeFilter = html.GlobalAttributeFilter
func (r *StrikethroughHTMLRenderer) renderStrikethrough( func (r *StrikethroughHTMLRenderer) renderStrikethrough(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
if n.Attributes() != nil { if n.Attributes() != nil {
_, _ = w.WriteString("<del") _, _ = w.WriteString("<del")

View file

@ -23,7 +23,7 @@ type escapedPipeCell struct {
Transformed bool Transformed bool
} }
// TableCellAlignMethod indicates how are table cells aligned in HTML format. // TableCellAlignMethod indicates how are table cells aligned in HTML format.indicates how are table cells aligned in HTML format.
type TableCellAlignMethod int type TableCellAlignMethod int
const ( const (
@ -122,9 +122,6 @@ func WithTableCellAlignMethod(a TableCellAlignMethod) TableOption {
} }
func isTableDelim(bs []byte) bool { func isTableDelim(bs []byte) bool {
if w, _ := util.IndentWidth(bs, 0); w > 3 {
return false
}
for _, b := range bs { for _, b := range bs {
if !(util.IsSpace(b) || b == '-' || b == '|' || b == ':') { if !(util.IsSpace(b) || b == '-' || b == '|' || b == ':') {
return false return false
@ -181,14 +178,13 @@ func (b *tableParagraphTransformer) Transform(node *gast.Paragraph, reader text.
} }
} }
func (b *tableParagraphTransformer) parseRow(segment text.Segment, func (b *tableParagraphTransformer) parseRow(segment text.Segment, alignments []ast.Alignment, isHeader bool, reader text.Reader, pc parser.Context) *ast.TableRow {
alignments []ast.Alignment, isHeader bool, reader text.Reader, pc parser.Context) *ast.TableRow {
source := reader.Source() source := reader.Source()
segment = segment.TrimLeftSpace(source)
segment = segment.TrimRightSpace(source)
line := segment.Value(source) line := segment.Value(source)
pos := 0 pos := 0
pos += util.TrimLeftSpaceLength(line)
limit := len(line) limit := len(line)
limit -= util.TrimRightSpaceLength(line)
row := ast.NewTableRow(alignments) row := ast.NewTableRow(alignments)
if len(line) > 0 && line[pos] == '|' { if len(line) > 0 && line[pos] == '|' {
pos++ pos++
@ -247,7 +243,6 @@ func (b *tableParagraphTransformer) parseRow(segment text.Segment,
} }
func (b *tableParagraphTransformer) parseDelimiter(segment text.Segment, reader text.Reader) []ast.Alignment { func (b *tableParagraphTransformer) parseDelimiter(segment text.Segment, reader text.Reader) []ast.Alignment {
line := segment.Value(reader.Source()) line := segment.Value(reader.Source())
if !isTableDelim(line) { if !isTableDelim(line) {
return nil return nil
@ -370,8 +365,7 @@ var TableAttributeFilter = html.GlobalAttributeFilter.Extend(
[]byte("width"), // [Deprecated] []byte("width"), // [Deprecated]
) )
func (r *TableHTMLRenderer) renderTable( func (r *TableHTMLRenderer) renderTable(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
_, _ = w.WriteString("<table") _, _ = w.WriteString("<table")
if n.Attributes() != nil { if n.Attributes() != nil {
@ -393,8 +387,7 @@ var TableHeaderAttributeFilter = html.GlobalAttributeFilter.Extend(
[]byte("valign"), // [Deprecated since HTML4] [Obsolete since HTML5] []byte("valign"), // [Deprecated since HTML4] [Obsolete since HTML5]
) )
func (r *TableHTMLRenderer) renderTableHeader( func (r *TableHTMLRenderer) renderTableHeader(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
_, _ = w.WriteString("<thead") _, _ = w.WriteString("<thead")
if n.Attributes() != nil { if n.Attributes() != nil {
@ -421,8 +414,7 @@ var TableRowAttributeFilter = html.GlobalAttributeFilter.Extend(
[]byte("valign"), // [Obsolete since HTML5] []byte("valign"), // [Obsolete since HTML5]
) )
func (r *TableHTMLRenderer) renderTableRow( func (r *TableHTMLRenderer) renderTableRow(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering { if entering {
_, _ = w.WriteString("<tr") _, _ = w.WriteString("<tr")
if n.Attributes() != nil { if n.Attributes() != nil {
@ -449,14 +441,12 @@ var TableThCellAttributeFilter = html.GlobalAttributeFilter.Extend(
[]byte("charoff"), // [Obsolete since HTML5] []byte("charoff"), // [Obsolete since HTML5]
[]byte("colspan"), // [OK] Number of columns that the cell is to span []byte("colspan"), // [OK] Number of columns that the cell is to span
[]byte("headers"), // [OK] This attribute contains a list of space-separated []byte("headers"), // [OK] This attribute contains a list of space-separated strings, each corresponding to the id attribute of the <th> elements that apply to this element
// strings, each corresponding to the id attribute of the <th> elements that apply to this element
[]byte("height"), // [Deprecated since HTML4] [Obsolete since HTML5] []byte("height"), // [Deprecated since HTML4] [Obsolete since HTML5]
[]byte("rowspan"), // [OK] Number of rows that the cell is to span []byte("rowspan"), // [OK] Number of rows that the cell is to span
[]byte("scope"), // [OK] This enumerated attribute defines the cells that []byte("scope"), // [OK] This enumerated attribute defines the cells that the header (defined in the <th>) element relates to [NOT OK in <td>]
// the header (defined in the <th>) element relates to [NOT OK in <td>]
[]byte("valign"), // [Obsolete since HTML5] []byte("valign"), // [Obsolete since HTML5]
[]byte("width"), // [Deprecated since HTML4] [Obsolete since HTML5] []byte("width"), // [Deprecated since HTML4] [Obsolete since HTML5]
@ -472,8 +462,7 @@ var TableTdCellAttributeFilter = html.GlobalAttributeFilter.Extend(
[]byte("charoff"), // [Obsolete since HTML5] []byte("charoff"), // [Obsolete since HTML5]
[]byte("colspan"), // [OK] Number of columns that the cell is to span []byte("colspan"), // [OK] Number of columns that the cell is to span
[]byte("headers"), // [OK] This attribute contains a list of space-separated []byte("headers"), // [OK] This attribute contains a list of space-separated strings, each corresponding to the id attribute of the <th> elements that apply to this element
// strings, each corresponding to the id attribute of the <th> elements that apply to this element
[]byte("height"), // [Deprecated since HTML4] [Obsolete since HTML5] []byte("height"), // [Deprecated since HTML4] [Obsolete since HTML5]
@ -484,15 +473,14 @@ var TableTdCellAttributeFilter = html.GlobalAttributeFilter.Extend(
[]byte("width"), // [Deprecated since HTML4] [Obsolete since HTML5] []byte("width"), // [Deprecated since HTML4] [Obsolete since HTML5]
) )
func (r *TableHTMLRenderer) renderTableCell( func (r *TableHTMLRenderer) renderTableCell(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
n := node.(*ast.TableCell) n := node.(*ast.TableCell)
tag := "td" tag := "td"
if n.Parent().Kind() == ast.KindTableHeader { if n.Parent().Kind() == ast.KindTableHeader {
tag = "th" tag = "th"
} }
if entering { if entering {
_, _ = fmt.Fprintf(w, "<%s", tag) fmt.Fprintf(w, "<%s", tag)
if n.Alignment != ast.AlignNone { if n.Alignment != ast.AlignNone {
amethod := r.TableConfig.TableCellAlignMethod amethod := r.TableConfig.TableCellAlignMethod
if amethod == TableCellAlignDefault { if amethod == TableCellAlignDefault {
@ -505,7 +493,7 @@ func (r *TableHTMLRenderer) renderTableCell(
switch amethod { switch amethod {
case TableCellAlignAttribute: case TableCellAlignAttribute:
if _, ok := n.AttributeString("align"); !ok { // Skip align render if overridden if _, ok := n.AttributeString("align"); !ok { // Skip align render if overridden
_, _ = fmt.Fprintf(w, ` align="%s"`, n.Alignment.String()) fmt.Fprintf(w, ` align="%s"`, n.Alignment.String())
} }
case TableCellAlignStyle: case TableCellAlignStyle:
v, ok := n.AttributeString("style") v, ok := n.AttributeString("style")
@ -528,7 +516,7 @@ func (r *TableHTMLRenderer) renderTableCell(
} }
_ = w.WriteByte('>') _ = w.WriteByte('>')
} else { } else {
_, _ = fmt.Fprintf(w, "</%s>\n", tag) fmt.Fprintf(w, "</%s>\n", tag)
} }
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }

View file

@ -355,40 +355,3 @@ bar | baz
t, t,
) )
} }
func TestTableFuzzedPanics(t *testing.T) {
markdown := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewTable(),
),
)
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: 1,
Description: "This should not panic",
Markdown: "* 0\n-|\n\t0",
Expected: `<ul>
<li>
<table>
<thead>
<tr>
<th>0</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
</tr>
</tbody>
</table>
</li>
</ul>`,
},
t,
)
}

View file

@ -1,8 +1,6 @@
package extension package extension
import ( import (
"regexp"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
gast "github.com/yuin/goldmark/ast" gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension/ast" "github.com/yuin/goldmark/extension/ast"
@ -11,6 +9,7 @@ import (
"github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
"regexp"
) )
var taskListRegexp = regexp.MustCompile(`^\[([\sxX])\]\s*`) var taskListRegexp = regexp.MustCompile(`^\[([\sxX])\]\s*`)
@ -41,9 +40,6 @@ func (s *taskCheckBoxParser) Parse(parent gast.Node, block text.Reader, pc parse
return nil return nil
} }
if parent.HasChildren() {
return nil
}
if _, ok := parent.Parent().(*gast.ListItem); !ok { if _, ok := parent.Parent().(*gast.ListItem); !ok {
return nil return nil
} }
@ -84,22 +80,21 @@ func (r *TaskCheckBoxHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRe
reg.Register(ast.KindTaskCheckBox, r.renderTaskCheckBox) reg.Register(ast.KindTaskCheckBox, r.renderTaskCheckBox)
} }
func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox( func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering { if !entering {
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }
n := node.(*ast.TaskCheckBox) n := node.(*ast.TaskCheckBox)
if n.IsChecked { if n.IsChecked {
_, _ = w.WriteString(`<input checked="" disabled="" type="checkbox"`) w.WriteString(`<input checked="" disabled="" type="checkbox"`)
} else { } else {
_, _ = w.WriteString(`<input disabled="" type="checkbox"`) w.WriteString(`<input disabled="" type="checkbox"`)
} }
if r.XHTML { if r.XHTML {
_, _ = w.WriteString(" /> ") w.WriteString(" /> ")
} else { } else {
_, _ = w.WriteString("> ") w.WriteString("> ")
} }
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }

View file

@ -36,25 +36,25 @@ func getUnclosedCounter(pc parser.Context) *unclosedCounter {
type TypographicPunctuation int type TypographicPunctuation int
const ( const (
// LeftSingleQuote is ' . // LeftSingleQuote is '
LeftSingleQuote TypographicPunctuation = iota + 1 LeftSingleQuote TypographicPunctuation = iota + 1
// RightSingleQuote is ' . // RightSingleQuote is '
RightSingleQuote RightSingleQuote
// LeftDoubleQuote is " . // LeftDoubleQuote is "
LeftDoubleQuote LeftDoubleQuote
// RightDoubleQuote is " . // RightDoubleQuote is "
RightDoubleQuote RightDoubleQuote
// EnDash is -- . // EnDash is --
EnDash EnDash
// EmDash is --- . // EmDash is ---
EmDash EmDash
// Ellipsis is ... . // Ellipsis is ...
Ellipsis Ellipsis
// LeftAngleQuote is << . // LeftAngleQuote is <<
LeftAngleQuote LeftAngleQuote
// RightAngleQuote is >> . // RightAngleQuote is >>
RightAngleQuote RightAngleQuote
// Apostrophe is ' . // Apostrophe is '
Apostrophe Apostrophe
typographicPunctuationMax typographicPunctuationMax
@ -115,10 +115,10 @@ func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig
// WithTypographicSubstitutions is a functional otpion that specify replacement text // WithTypographicSubstitutions is a functional otpion that specify replacement text
// for punctuations. // for punctuations.
func WithTypographicSubstitutions[T []byte | string](values map[TypographicPunctuation]T) TypographerOption { func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption {
replacements := newDefaultSubstitutions() replacements := newDefaultSubstitutions()
for k, v := range values { for k, v := range values {
replacements[k] = []byte(v) replacements[k] = v
} }
return &withTypographicSubstitutions{replacements} return &withTypographicSubstitutions{replacements}
@ -160,7 +160,7 @@ func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
} }
func (s *typographerParser) Trigger() []byte { func (s *typographerParser) Trigger() []byte {
return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['} return []byte{'\'', '"', '-', '.', '<', '>'}
} }
func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node { func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
@ -218,8 +218,7 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser
if c == '\'' { if c == '\'' {
if s.Substitutions[Apostrophe] != nil { if s.Substitutions[Apostrophe] != nil {
// Handle decade abbrevations such as '90s // Handle decade abbrevations such as '90s
if d.CanOpen && !d.CanClose && len(line) > 3 && if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
after := rune(' ') after := rune(' ')
if len(line) > 4 { if len(line) > 4 {
after = util.ToRune(line, 4) after = util.ToRune(line, 4)
@ -231,18 +230,9 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser
return node return node
} }
} }
// special cases: 'twas, 'em, 'net
if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) &&
(line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') {
node := gast.NewString(s.Substitutions[Apostrophe])
node.SetCode(true)
block.Advance(1)
return node
}
// Convert normal apostrophes. This is probably more flexible than necessary but // Convert normal apostrophes. This is probably more flexible than necessary but
// converts any apostrophe in between two alphanumerics. // converts any apostrophe in between two alphanumerics.
if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (unicode.IsLetter(util.ToRune(line, 1))) {
(unicode.IsLetter(util.ToRune(line, 1))) {
node := gast.NewString(s.Substitutions[Apostrophe]) node := gast.NewString(s.Substitutions[Apostrophe])
node.SetCode(true) node.SetCode(true)
block.Advance(1) block.Advance(1)
@ -251,15 +241,12 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser
} }
if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose { if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
nt := LeftSingleQuote nt := LeftSingleQuote
// special cases: Alice's, I'm, Don't, You'd // special cases: Alice's, I'm ,Don't, You'd
if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') && if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') && (len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
(len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
nt = RightSingleQuote nt = RightSingleQuote
} }
// special cases: I've, I'll, You're // special cases: I've, I'll, You're
if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') || if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') || (line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) && (len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
(line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) &&
(len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
nt = RightSingleQuote nt = RightSingleQuote
} }
if nt == LeftSingleQuote { if nt == LeftSingleQuote {
@ -271,20 +258,9 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser
block.Advance(1) block.Advance(1)
return node return node
} }
if s.Substitutions[RightSingleQuote] != nil {
// plural possesive and abbreviations: Smiths', doin'
if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) &&
(len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) {
node := gast.NewString(s.Substitutions[RightSingleQuote])
node.SetCode(true)
block.Advance(1)
return node
}
}
if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 { if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 {
isClose := d.CanClose && !d.CanOpen isClose := d.CanClose && !d.CanOpen
maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) && maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (line[1] == ',' || line[1] == '.' || line[1] == '!' || line[1] == '?') && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
(len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
if isClose || maybeClose { if isClose || maybeClose {
node := gast.NewString(s.Substitutions[RightSingleQuote]) node := gast.NewString(s.Substitutions[RightSingleQuote])
node.SetCode(true) node.SetCode(true)
@ -304,8 +280,7 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser
} }
if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 { if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 {
isClose := d.CanClose && !d.CanOpen isClose := d.CanClose && !d.CanOpen
maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) && maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (line[1] == ',' || line[1] == '.' || line[1] == '!' || line[1] == '?') && (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
(len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
if isClose || maybeClose { if isClose || maybeClose {
// special case: "Monitor 21"" // special case: "Monitor 21""
if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) { if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) {

View file

@ -2,11 +2,7 @@ package goldmark_test
import ( import (
"bytes" "bytes"
"os"
"strconv"
"strings"
"testing" "testing"
"time"
. "github.com/yuin/goldmark" . "github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
@ -15,15 +11,6 @@ import (
"github.com/yuin/goldmark/testutil" "github.com/yuin/goldmark/testutil"
) )
var testTimeoutMultiplier = 1.0
func init() {
m, err := strconv.ParseFloat(os.Getenv("GOLDMARK_TEST_TIMEOUT_MULTIPLIER"), 64)
if err == nil {
testTimeoutMultiplier = m
}
}
func TestExtras(t *testing.T) { func TestExtras(t *testing.T) {
markdown := New(WithRendererOptions( markdown := New(WithRendererOptions(
html.WithXHTML(), html.WithXHTML(),
@ -98,124 +85,3 @@ func TestAutogeneratedIDs(t *testing.T) {
t.Errorf("%s\n---------\n%s", source, b.String()) t.Errorf("%s\n---------\n%s", source, b.String())
} }
} }
func nowMillis() int64 {
// TODO: replace UnixNano to UnixMillis(drops Go1.16 support)
return time.Now().UnixNano() / 1000000
}
func TestDeepNestedLabelPerformance(t *testing.T) {
if testing.Short() {
t.Skip("skipping performance test in short mode")
}
markdown := New(WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
))
started := nowMillis()
n := 50000
source := []byte(strings.Repeat("[", n) + strings.Repeat("]", n))
var b bytes.Buffer
_ = markdown.Convert(source, &b)
finished := nowMillis()
if (finished - started) > int64(5000*testTimeoutMultiplier) {
t.Error("Parsing deep nested labels took too long")
}
}
func TestManyProcessingInstructionPerformance(t *testing.T) {
if testing.Short() {
t.Skip("skipping performance test in short mode")
}
markdown := New(WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
))
started := nowMillis()
n := 50000
source := []byte("a " + strings.Repeat("<?", n))
var b bytes.Buffer
_ = markdown.Convert(source, &b)
finished := nowMillis()
if (finished - started) > int64(5000*testTimeoutMultiplier) {
t.Error("Parsing processing instructions took too long")
}
}
func TestManyCDATAPerformance(t *testing.T) {
if testing.Short() {
t.Skip("skipping performance test in short mode")
}
markdown := New(WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
))
started := nowMillis()
n := 50000
source := []byte(strings.Repeat("a <![CDATA[", n))
var b bytes.Buffer
_ = markdown.Convert(source, &b)
finished := nowMillis()
if (finished - started) > int64(5000*testTimeoutMultiplier) {
t.Error("Parsing processing instructions took too long")
}
}
func TestManyDeclPerformance(t *testing.T) {
if testing.Short() {
t.Skip("skipping performance test in short mode")
}
markdown := New(WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
))
started := nowMillis()
n := 50000
source := []byte(strings.Repeat("a <!A ", n))
var b bytes.Buffer
_ = markdown.Convert(source, &b)
finished := nowMillis()
if (finished - started) > int64(5000*testTimeoutMultiplier) {
t.Error("Parsing processing instructions took too long")
}
}
func TestManyCommentPerformance(t *testing.T) {
if testing.Short() {
t.Skip("skipping performance test in short mode")
}
markdown := New(WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
))
started := nowMillis()
n := 50000
source := []byte(strings.Repeat("a <!-- ", n))
var b bytes.Buffer
_ = markdown.Convert(source, &b)
finished := nowMillis()
if (finished - started) > int64(5000*testTimeoutMultiplier) {
t.Error("Parsing processing instructions took too long")
}
}
func TestDangerousURLStringCase(t *testing.T) {
markdown := New()
source := []byte(`[Basic](javascript:alert('Basic'))
[CaseInsensitive](JaVaScRiPt:alert('CaseInsensitive'))
`)
expected := []byte(`<p><a href="">Basic</a>
<a href="">CaseInsensitive</a></p>
`)
var b bytes.Buffer
_ = markdown.Convert(source, &b)
if !bytes.Equal(expected, b.Bytes()) {
t.Error("Dangerous URL should ignore cases:\n" + string(testutil.DiffPretty(expected, b.Bytes())))
}
}

39
fuzz/fuzz.go Normal file
View file

@ -0,0 +1,39 @@
package fuzz
import (
"bytes"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
)
// Fuzz runs automated fuzzing against goldmark.
func Fuzz(data []byte) int {
markdown := goldmark.New(
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
parser.WithAttribute(),
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
html.WithXHTML(),
),
goldmark.WithExtensions(
extension.DefinitionList,
extension.Footnote,
extension.GFM,
extension.Linkify,
extension.Table,
extension.TaskList,
extension.Typographer,
),
)
var b bytes.Buffer
if err := markdown.Convert(data, &b); err != nil {
return 0
}
return 1
}

View file

@ -2,8 +2,8 @@ package fuzz
import ( import (
"bytes" "bytes"
"encoding/json" "fmt"
"os" "io/ioutil"
"testing" "testing"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
@ -13,45 +13,38 @@ import (
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
func fuzz(f *testing.F) { var _ = fmt.Printf
f.Fuzz(func(t *testing.T, orig string) {
markdown := goldmark.New(
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
parser.WithAttribute(),
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
html.WithXHTML(),
),
goldmark.WithExtensions(
extension.DefinitionList,
extension.Footnote,
extension.GFM,
extension.Typographer,
extension.Linkify,
extension.Table,
extension.TaskList,
),
)
var b bytes.Buffer
if err := markdown.Convert(util.StringToReadOnlyBytes(orig), &b); err != nil {
panic(err)
}
})
}
func FuzzDefault(f *testing.F) { func TestFuzz(t *testing.T) {
bs, err := os.ReadFile("../_test/spec.json") crasher := "6dff3d03167cb144d4e2891edac76ee740a77bc7"
data, err := ioutil.ReadFile("crashers/" + crasher)
if err != nil { if err != nil {
return
}
fmt.Printf("%s\n", util.VisualizeSpaces(data))
fmt.Println("||||||||||||||||||||||")
markdown := goldmark.New(
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
parser.WithAttribute(),
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
html.WithXHTML(),
),
goldmark.WithExtensions(
extension.DefinitionList,
extension.Footnote,
extension.GFM,
extension.Typographer,
extension.Linkify,
extension.Table,
extension.TaskList,
),
)
var b bytes.Buffer
if err := markdown.Convert(data, &b); err != nil {
panic(err) panic(err)
} }
var testCases []map[string]interface{} fmt.Println(b.String())
if err := json.Unmarshal(bs, &testCases); err != nil {
panic(err)
}
for _, c := range testCases {
f.Add(c["markdown"])
}
fuzz(f)
} }

View file

@ -1,9 +0,0 @@
package fuzz
import (
"testing"
)
func FuzzOss(f *testing.F) {
fuzz(f)
}

2
go.mod
View file

@ -1,3 +1,3 @@
module github.com/yuin/goldmark module github.com/yuin/goldmark
go 1.19 go 1.15

View file

@ -2,13 +2,12 @@
package goldmark package goldmark
import ( import (
"io"
"github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
"io"
) )
// DefaultParser returns a new Parser that is configured by default values. // DefaultParser returns a new Parser that is configured by default values.

View file

@ -12,7 +12,7 @@ import (
var attrNameID = []byte("id") var attrNameID = []byte("id")
var attrNameClass = []byte("class") var attrNameClass = []byte("class")
// An Attribute is an attribute of the markdown elements. // An Attribute is an attribute of the markdown elements
type Attribute struct { type Attribute struct {
Name []byte Name []byte
Value interface{} Value interface{}
@ -89,12 +89,7 @@ func parseAttribute(reader text.Reader) (Attribute, bool) {
reader.Advance(1) reader.Advance(1)
line, _ := reader.PeekLine() line, _ := reader.PeekLine()
i := 0 i := 0
// HTML5 allows any kind of characters as id, but XHTML restricts characters for id. for ; i < len(line) && !util.IsSpace(line[i]) && (!util.IsPunct(line[i]) || line[i] == '_' || line[i] == '-'); i++ {
// CommonMark is basically defined for XHTML(even though it is legacy).
// So we restrict id characters.
for ; i < len(line) && !util.IsSpace(line[i]) &&
(!util.IsPunct(line[i]) || line[i] == '_' ||
line[i] == '-' || line[i] == ':' || line[i] == '.'); i++ {
} }
name := attrNameClass name := attrNameClass
if c == '#' { if c == '#' {
@ -134,11 +129,6 @@ func parseAttribute(reader text.Reader) (Attribute, bool) {
if !ok { if !ok {
return Attribute{}, false return Attribute{}, false
} }
if bytes.Equal(name, attrNameClass) {
if _, ok = value.([]byte); !ok {
return Attribute{}, false
}
}
return Attribute{Name: name, Value: value}, true return Attribute{Name: name, Value: value}, true
} }
@ -146,7 +136,7 @@ func parseAttributeValue(reader text.Reader) (interface{}, bool) {
reader.SkipSpaces() reader.SkipSpaces()
c := reader.Peek() c := reader.Peek()
var value interface{} var value interface{}
var ok bool ok := false
switch c { switch c {
case text.EOF: case text.EOF:
return Attribute{}, false return Attribute{}, false
@ -245,7 +235,7 @@ func scanAttributeDecimal(reader text.Reader, w io.ByteWriter) {
for { for {
c := reader.Peek() c := reader.Peek()
if util.IsNumeric(c) { if util.IsNumeric(c) {
_ = w.WriteByte(c) w.WriteByte(c)
} else { } else {
return return
} }
@ -287,7 +277,7 @@ func parseAttributeNumber(reader text.Reader) (float64, bool) {
} }
scanAttributeDecimal(reader, &buf) scanAttributeDecimal(reader, &buf)
} }
f, err := strconv.ParseFloat(buf.String(), 64) f, err := strconv.ParseFloat(buf.String(), 10)
if err != nil { if err != nil {
return 0, false return 0, false
} }

View file

@ -13,7 +13,7 @@ type HeadingConfig struct {
} }
// SetOption implements SetOptioner. // SetOption implements SetOptioner.
func (b *HeadingConfig) SetOption(name OptionName, _ interface{}) { func (b *HeadingConfig) SetOption(name OptionName, value interface{}) {
switch name { switch name {
case optAutoHeadingID: case optAutoHeadingID:
b.AutoHeadingID = true b.AutoHeadingID = true
@ -91,9 +91,6 @@ func (b *atxHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context)
if i == pos || level > 6 { if i == pos || level > 6 {
return nil, NoChildren return nil, NoChildren
} }
if i == len(line) { // alone '#' (without a new line character)
return ast.NewHeading(level), NoChildren
}
l := util.TrimLeftSpaceLength(line[i:]) l := util.TrimLeftSpaceLength(line[i:])
if l == 0 { if l == 0 {
return nil, NoChildren return nil, NoChildren
@ -135,9 +132,7 @@ func (b *atxHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context)
for _, attr := range attrs { for _, attr := range attrs {
node.SetAttribute(attr.Name, attr.Value) node.SetAttribute(attr.Name, attr.Value)
} }
node.Lines().Append(text.NewSegment( node.Lines().Append(text.NewSegment(segment.Start+start+1-segment.Padding, segment.Start+closureOpen-segment.Padding))
segment.Start+start+1-segment.Padding,
segment.Start+closureOpen-segment.Padding))
} }
} }
} }

View file

@ -28,13 +28,12 @@ func (b *blockquoteParser) process(reader text.Reader) bool {
reader.Advance(pos) reader.Advance(pos)
return true return true
} }
reader.Advance(pos)
if line[pos] == ' ' || line[pos] == '\t' { if line[pos] == ' ' || line[pos] == '\t' {
padding := 0 pos++
if line[pos] == '\t' { }
padding = util.TabWidth(reader.LineOffset()) - 1 reader.Advance(pos)
} if line[pos-1] == '\t' {
reader.AdvanceAndSetPadding(1, padding) reader.SetPadding(2)
} }
return true return true
} }

View file

@ -31,11 +31,6 @@ func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context)
node := ast.NewCodeBlock() node := ast.NewCodeBlock()
reader.AdvanceAndSetPadding(pos, padding) reader.AdvanceAndSetPadding(pos, padding)
_, segment = reader.PeekLine() _, segment = reader.PeekLine()
// if code block line starts with a tab, keep a tab as it is.
if segment.Padding != 0 {
preserveLeadingTabInCodeBlock(&segment, reader, 0)
}
segment.ForceNewline = true
node.Lines().Append(segment) node.Lines().Append(segment)
reader.Advance(segment.Len() - 1) reader.Advance(segment.Len() - 1)
return node, NoChildren return node, NoChildren
@ -60,7 +55,6 @@ func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
preserveLeadingTabInCodeBlock(&segment, reader, 0) preserveLeadingTabInCodeBlock(&segment, reader, 0)
} }
segment.ForceNewline = true
node.Lines().Append(segment) node.Lines().Append(segment)
reader.Advance(segment.Len() - 1) reader.Advance(segment.Len() - 1)
return Continue | NoChildren return Continue | NoChildren

View file

@ -3,6 +3,7 @@ package parser
import ( import (
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
) )
type codeSpanParser struct { type codeSpanParser struct {
@ -51,7 +52,9 @@ func (s *codeSpanParser) Parse(parent ast.Node, block text.Reader, pc Context) a
} }
} }
} }
node.AppendChild(node, ast.NewRawTextSegment(segment)) if !util.IsBlank(line) {
node.AppendChild(node, ast.NewRawTextSegment(segment))
}
block.AdvanceLine() block.AdvanceLine()
} }
end: end:
@ -59,11 +62,11 @@ end:
// trim first halfspace and last halfspace // trim first halfspace and last halfspace
segment := node.FirstChild().(*ast.Text).Segment segment := node.FirstChild().(*ast.Text).Segment
shouldTrimmed := true shouldTrimmed := true
if !(!segment.IsEmpty() && isSpaceOrNewline(block.Source()[segment.Start])) { if !(!segment.IsEmpty() && block.Source()[segment.Start] == ' ') {
shouldTrimmed = false shouldTrimmed = false
} }
segment = node.LastChild().(*ast.Text).Segment segment = node.LastChild().(*ast.Text).Segment
if !(!segment.IsEmpty() && isSpaceOrNewline(block.Source()[segment.Stop-1])) { if !(!segment.IsEmpty() && block.Source()[segment.Stop-1] == ' ') {
shouldTrimmed = false shouldTrimmed = false
} }
if shouldTrimmed { if shouldTrimmed {
@ -78,7 +81,3 @@ end:
} }
return node return node
} }
func isSpaceOrNewline(c byte) bool {
return c == ' ' || c == '\n'
}

View file

@ -30,11 +30,11 @@ type Delimiter struct {
Segment text.Segment Segment text.Segment
// CanOpen is set true if this delimiter can open a span for a new node. // CanOpen is set true if this delimiter can open a span for a new node.
// See https://spec.commonmark.org/0.30/#can-open-emphasis for details. // See https://spec.commonmark.org/0.29/#can-open-emphasis for details.
CanOpen bool CanOpen bool
// CanClose is set true if this delimiter can close a span for a new node. // CanClose is set true if this delimiter can close a span for a new node.
// See https://spec.commonmark.org/0.30/#can-open-emphasis for details. // See https://spec.commonmark.org/0.29/#can-open-emphasis for details.
CanClose bool CanClose bool
// Length is a remaining length of this delimiter. // Length is a remaining length of this delimiter.
@ -66,12 +66,12 @@ func (d *Delimiter) Dump(source []byte, level int) {
var kindDelimiter = ast.NewNodeKind("Delimiter") var kindDelimiter = ast.NewNodeKind("Delimiter")
// Kind implements Node.Kind. // Kind implements Node.Kind
func (d *Delimiter) Kind() ast.NodeKind { func (d *Delimiter) Kind() ast.NodeKind {
return kindDelimiter return kindDelimiter
} }
// Text implements Node.Text. // Text implements Node.Text
func (d *Delimiter) Text(source []byte) []byte { func (d *Delimiter) Text(source []byte) []byte {
return d.Segment.Value(source) return d.Segment.Value(source)
} }
@ -126,7 +126,7 @@ func ScanDelimiter(line []byte, before rune, min int, processor DelimiterProcess
after = util.ToRune(line, j) after = util.ToRune(line, j)
} }
var canOpen, canClose bool canOpen, canClose := false, false
beforeIsPunctuation := util.IsPunctRune(before) beforeIsPunctuation := util.IsPunctRune(before)
beforeIsWhitespace := util.IsSpaceRune(before) beforeIsWhitespace := util.IsSpaceRune(before)
afterIsPunctuation := util.IsPunctRune(after) afterIsPunctuation := util.IsPunctRune(after)
@ -162,11 +162,15 @@ func ProcessDelimiters(bottom ast.Node, pc Context) {
var closer *Delimiter var closer *Delimiter
if bottom != nil { if bottom != nil {
if bottom != lastDelimiter { if bottom != lastDelimiter {
for c := lastDelimiter.PreviousSibling(); c != nil && c != bottom; { for c := lastDelimiter.PreviousSibling(); c != nil; {
if d, ok := c.(*Delimiter); ok { if d, ok := c.(*Delimiter); ok {
closer = d closer = d
} }
c = c.PreviousSibling() prev := c.PreviousSibling()
if prev == bottom {
break
}
c = prev
} }
} }
} else { } else {
@ -185,7 +189,7 @@ func ProcessDelimiters(bottom ast.Node, pc Context) {
found := false found := false
maybeOpener := false maybeOpener := false
var opener *Delimiter var opener *Delimiter
for opener = closer.PreviousDelimiter; opener != nil && opener != bottom; opener = opener.PreviousDelimiter { for opener = closer.PreviousDelimiter; opener != nil; opener = opener.PreviousDelimiter {
if opener.CanOpen && opener.Processor.CanOpenCloser(opener, closer) { if opener.CanOpen && opener.Processor.CanOpenCloser(opener, closer) {
maybeOpener = true maybeOpener = true
consume = opener.CalcComsumption(closer) consume = opener.CalcComsumption(closer)
@ -196,11 +200,10 @@ func ProcessDelimiters(bottom ast.Node, pc Context) {
} }
} }
if !found { if !found {
next := closer.NextDelimiter
if !maybeOpener && !closer.CanOpen { if !maybeOpener && !closer.CanOpen {
pc.RemoveDelimiter(closer) pc.RemoveDelimiter(closer)
} }
closer = next closer = closer.NextDelimiter
continue continue
} }
opener.ConsumeCharacters(consume) opener.ConsumeCharacters(consume)

View file

@ -72,6 +72,10 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
line, segment := reader.PeekLine() line, segment := reader.PeekLine()
fdata := pc.Get(fencedCodeBlockInfoKey).(*fenceData) fdata := pc.Get(fencedCodeBlockInfoKey).(*fenceData)
// if code block line starts with a tab, keep a tab as it is.
if segment.Padding != 0 {
preserveLeadingTabInCodeBlock(&segment, reader, fdata.indent)
}
w, pos := util.IndentWidth(line, reader.LineOffset()) w, pos := util.IndentWidth(line, reader.LineOffset())
if w < 4 { if w < 4 {
i := pos i := pos
@ -83,24 +87,13 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
if line[len(line)-1] != '\n' { if line[len(line)-1] != '\n' {
newline = 0 newline = 0
} }
reader.Advance(segment.Stop - segment.Start - newline + segment.Padding) reader.Advance(segment.Stop - segment.Start - newline - segment.Padding)
return Close return Close
} }
} }
pos, padding := util.IndentPositionPadding(line, reader.LineOffset(), segment.Padding, fdata.indent) pos, padding := util.DedentPositionPadding(line, reader.LineOffset(), segment.Padding, fdata.indent)
if pos < 0 {
pos = util.FirstNonSpacePosition(line)
if pos < 0 {
pos = 0
}
padding = 0
}
seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding) seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding)
// if code block line starts with a tab, keep a tab as it is.
if padding != 0 {
preserveLeadingTabInCodeBlock(&seg, reader, fdata.indent)
}
seg.ForceNewline = true // EOF as newline
node.Lines().Append(seg) node.Lines().Append(seg)
reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding) reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding)
return Continue | NoChildren return Continue | NoChildren

View file

@ -61,8 +61,8 @@ var allowedBlockTags = map[string]bool{
"option": true, "option": true,
"p": true, "p": true,
"param": true, "param": true,
"search": true,
"section": true, "section": true,
"source": true,
"summary": true, "summary": true,
"table": true, "table": true,
"tbody": true, "tbody": true,
@ -76,8 +76,8 @@ var allowedBlockTags = map[string]bool{
"ul": true, "ul": true,
} }
var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style|textarea)(?:\s.*|>.*|/>.*|)(?:\r\n|\n)?$`) //nolint:golint,lll var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style)(?:\s.*|>.*|/>.*|)\n?$`)
var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style|textarea)>.*`) var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style)>.*`)
var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`) var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
var htmlBlockType2Close = []byte{'-', '-', '>'} var htmlBlockType2Close = []byte{'-', '-', '>'}
@ -85,15 +85,15 @@ var htmlBlockType2Close = []byte{'-', '-', '>'}
var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`) var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
var htmlBlockType3Close = []byte{'?', '>'} var htmlBlockType3Close = []byte{'?', '>'}
var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*(?:\r\n|\n)?$`) var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*\n?$`)
var htmlBlockType4Close = []byte{'>'} var htmlBlockType4Close = []byte{'>'}
var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`) var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
var htmlBlockType5Close = []byte{']', ']', '>'} var htmlBlockType5Close = []byte{']', ']', '>'}
var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}<(?:/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(?:[ ].*|>.*|/>.*|)(?:\r\n|\n)?$`) //nolint:golint,lll var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}</?([a-zA-Z0-9]+)(?:\s.*|>.*|/>.*|)\n?$`)
var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/[ ]*)?([a-zA-Z]+[a-zA-Z0-9\-]*)(` + attributePattern + `*)[ ]*(?:>|/>)[ ]*(?:\r\n|\n)?$`) //nolint:golint,lll var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9\-]+)(` + attributePattern + `*)(:?>|/>)\s*\n?$`)
type htmlBlockParser struct { type htmlBlockParser struct {
} }
@ -135,8 +135,7 @@ func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context)
_, ok := allowedBlockTags[tagName] _, ok := allowedBlockTags[tagName]
if ok { if ok {
node = ast.NewHTMLBlock(ast.HTMLBlockType6) node = ast.NewHTMLBlock(ast.HTMLBlockType6)
} else if tagName != "script" && tagName != "style" && } else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
node = ast.NewHTMLBlock(ast.HTMLBlockType7) node = ast.NewHTMLBlock(ast.HTMLBlockType7)
} }
} }
@ -150,7 +149,7 @@ func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context)
} }
} }
if node != nil { if node != nil {
reader.Advance(segment.Len() - util.TrimRightSpaceLength(line)) reader.Advance(segment.Len() - 1)
node.Lines().Append(segment) node.Lines().Append(segment)
return node, NoChildren return node, NoChildren
} }
@ -173,7 +172,7 @@ func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
} }
if htmlBlockType1CloseRegexp.Match(line) { if htmlBlockType1CloseRegexp.Match(line) {
htmlBlock.ClosureLine = segment htmlBlock.ClosureLine = segment
reader.Advance(segment.Len() - util.TrimRightSpaceLength(line)) reader.Advance(segment.Len() - 1)
return Close return Close
} }
case ast.HTMLBlockType2: case ast.HTMLBlockType2:
@ -202,7 +201,7 @@ func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
} }
if bytes.Contains(line, closurePattern) { if bytes.Contains(line, closurePattern) {
htmlBlock.ClosureLine = segment htmlBlock.ClosureLine = segment
reader.Advance(segment.Len()) reader.Advance(segment.Len() - 1)
return Close return Close
} }
@ -212,7 +211,7 @@ func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
} }
} }
node.Lines().Append(segment) node.Lines().Append(segment)
reader.Advance(segment.Len() - util.TrimRightSpaceLength(line)) reader.Advance(segment.Len() - 1)
return Continue | NoChildren return Continue | NoChildren
} }

View file

@ -48,13 +48,6 @@ func (s *linkLabelState) Kind() ast.NodeKind {
return kindLinkLabelState return kindLinkLabelState
} }
func linkLabelStateLength(v *linkLabelState) int {
if v == nil || v.Last == nil || v.First == nil {
return 0
}
return v.Last.Segment.Stop - v.First.Segment.Start
}
func pushLinkLabelState(pc Context, v *linkLabelState) { func pushLinkLabelState(pc Context, v *linkLabelState) {
tlist := pc.Get(linkLabelStateKey) tlist := pc.Get(linkLabelStateKey)
var list *linkLabelState var list *linkLabelState
@ -126,13 +119,13 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
if line[0] == '!' { if line[0] == '!' {
if len(line) > 1 && line[1] == '[' { if len(line) > 1 && line[1] == '[' {
block.Advance(1) block.Advance(1)
pushLinkBottom(pc) pc.Set(linkBottom, pc.LastDelimiter())
return processLinkLabelOpen(block, segment.Start+1, true, pc) return processLinkLabelOpen(block, segment.Start+1, true, pc)
} }
return nil return nil
} }
if line[0] == '[' { if line[0] == '[' {
pushLinkBottom(pc) pc.Set(linkBottom, pc.LastDelimiter())
return processLinkLabelOpen(block, segment.Start, false, pc) return processLinkLabelOpen(block, segment.Start, false, pc)
} }
@ -143,22 +136,12 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
} }
last := tlist.(*linkLabelState).Last last := tlist.(*linkLabelState).Last
if last == nil { if last == nil {
_ = popLinkBottom(pc)
return nil return nil
} }
block.Advance(1) block.Advance(1)
removeLinkLabelState(pc, last) removeLinkLabelState(pc, last)
// CommonMark spec says: if s.containsLink(last) { // a link in a link text is not allowed
// > A link label can have at most 999 characters inside the square brackets.
if linkLabelStateLength(tlist.(*linkLabelState)) > 998 {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil
}
if !last.IsImage && s.containsLink(last) { // a link in a link text is not allowed
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil return nil
} }
@ -172,7 +155,6 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
link, hasValue = s.parseReferenceLink(parent, last, block, pc) link, hasValue = s.parseReferenceLink(parent, last, block, pc)
if link == nil && hasValue { if link == nil && hasValue {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil return nil
} }
} }
@ -182,18 +164,9 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
block.SetPosition(l, pos) block.SetPosition(l, pos)
ssegment := text.NewSegment(last.Segment.Stop, segment.Start) ssegment := text.NewSegment(last.Segment.Stop, segment.Start)
maybeReference := block.Value(ssegment) maybeReference := block.Value(ssegment)
// CommonMark spec says:
// > A link label can have at most 999 characters inside the square brackets.
if len(maybeReference) > 999 {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil
}
ref, ok := pc.Reference(util.ToLinkReference(maybeReference)) ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
if !ok { if !ok {
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
_ = popLinkBottom(pc)
return nil return nil
} }
link = ast.NewLink() link = ast.NewLink()
@ -209,17 +182,15 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
return link return link
} }
func (s *linkParser) containsLink(n ast.Node) bool { func (s *linkParser) containsLink(last *linkLabelState) bool {
if n == nil { if last.IsImage {
return false return false
} }
for c := n; c != nil; c = c.NextSibling() { var c ast.Node
for c = last; c != nil; c = c.NextSibling() {
if _, ok := c.(*ast.Link); ok { if _, ok := c.(*ast.Link); ok {
return true return true
} }
if s.containsLink(c.FirstChild()) {
return true
}
} }
return false return false
} }
@ -236,7 +207,11 @@ func processLinkLabelOpen(block text.Reader, pos int, isImage bool, pc Context)
} }
func (s *linkParser) processLinkLabel(parent ast.Node, link *ast.Link, last *linkLabelState, pc Context) { func (s *linkParser) processLinkLabel(parent ast.Node, link *ast.Link, last *linkLabelState, pc Context) {
bottom := popLinkBottom(pc) var bottom ast.Node
if v := pc.Get(linkBottom); v != nil {
bottom = v.(ast.Node)
}
pc.Set(linkBottom, nil)
ProcessDelimiters(bottom, pc) ProcessDelimiters(bottom, pc)
for c := last.NextSibling(); c != nil; { for c := last.NextSibling(); c != nil; {
next := c.NextSibling() next := c.NextSibling()
@ -246,39 +221,21 @@ func (s *linkParser) processLinkLabel(parent ast.Node, link *ast.Link, last *lin
} }
} }
var linkFindClosureOptions text.FindClosureOptions = text.FindClosureOptions{ func (s *linkParser) parseReferenceLink(parent ast.Node, last *linkLabelState, block text.Reader, pc Context) (*ast.Link, bool) {
Nesting: false,
Newline: true,
Advance: true,
}
func (s *linkParser) parseReferenceLink(parent ast.Node, last *linkLabelState,
block text.Reader, pc Context) (*ast.Link, bool) {
_, orgpos := block.Position() _, orgpos := block.Position()
block.Advance(1) // skip '[' block.Advance(1) // skip '['
segments, found := block.FindClosure('[', ']', linkFindClosureOptions) line, segment := block.PeekLine()
if !found { endIndex := util.FindClosure(line, '[', ']', false, true)
if endIndex < 0 {
return nil, false return nil, false
} }
var maybeReference []byte block.Advance(endIndex + 1)
if segments.Len() == 1 { // avoid allocate a new byte slice ssegment := segment.WithStop(segment.Start + endIndex)
maybeReference = block.Value(segments.At(0)) maybeReference := block.Value(ssegment)
} else {
maybeReference = []byte{}
for i := 0; i < segments.Len(); i++ {
s := segments.At(i)
maybeReference = append(maybeReference, block.Value(s)...)
}
}
if util.IsBlank(maybeReference) { // collapsed reference link if util.IsBlank(maybeReference) { // collapsed reference link
s := text.NewSegment(last.Segment.Stop, orgpos.Start-1) ssegment = text.NewSegment(last.Segment.Stop, orgpos.Start-1)
maybeReference = block.Value(s) maybeReference = block.Value(ssegment)
}
// CommonMark spec says:
// > A link label can have at most 999 characters inside the square brackets.
if len(maybeReference) > 999 {
return nil, true
} }
ref, ok := pc.Reference(util.ToLinkReference(maybeReference)) ref, ok := pc.Reference(util.ToLinkReference(maybeReference))
@ -381,61 +338,34 @@ func parseLinkTitle(block text.Reader) ([]byte, bool) {
if opener == '(' { if opener == '(' {
closer = ')' closer = ')'
} }
block.Advance(1) savedLine, savedPosition := block.Position()
segments, found := block.FindClosure(opener, closer, linkFindClosureOptions) var title []byte
if found { for i := 0; ; i++ {
if segments.Len() == 1 { line, _ := block.PeekLine()
return block.Value(segments.At(0)), true if line == nil {
block.SetPosition(savedLine, savedPosition)
return nil, false
} }
var title []byte offset := 0
for i := 0; i < segments.Len(); i++ { if i == 0 {
s := segments.At(i) offset = 1
title = append(title, block.Value(s)...)
} }
return title, true pos := util.FindClosure(line[offset:], opener, closer, false, true)
if pos < 0 {
title = append(title, line[offset:]...)
block.AdvanceLine()
continue
}
pos += offset + 1 // 1: closer
block.Advance(pos)
if i == 0 { // avoid allocating new slice
return line[offset : pos-1], true
}
return append(title, line[offset:pos-1]...), true
} }
return nil, false
}
func pushLinkBottom(pc Context) {
bottoms := pc.Get(linkBottom)
b := pc.LastDelimiter()
if bottoms == nil {
pc.Set(linkBottom, b)
return
}
if s, ok := bottoms.([]ast.Node); ok {
pc.Set(linkBottom, append(s, b))
return
}
pc.Set(linkBottom, []ast.Node{bottoms.(ast.Node), b})
}
func popLinkBottom(pc Context) ast.Node {
bottoms := pc.Get(linkBottom)
if bottoms == nil {
return nil
}
if v, ok := bottoms.(ast.Node); ok {
pc.Set(linkBottom, nil)
return v
}
s := bottoms.([]ast.Node)
v := s[len(s)-1]
n := s[0 : len(s)-1]
switch len(n) {
case 0:
pc.Set(linkBottom, nil)
case 1:
pc.Set(linkBottom, n[0])
default:
pc.Set(linkBottom, s[0:len(s)-1])
}
return v
} }
func (s *linkParser) CloseBlock(parent ast.Node, block text.Reader, pc Context) { func (s *linkParser) CloseBlock(parent ast.Node, block text.Reader, pc Context) {
pc.Set(linkBottom, nil)
tlist := pc.Get(linkLabelStateKey) tlist := pc.Get(linkLabelStateKey)
if tlist == nil { if tlist == nil {
return return

View file

@ -52,7 +52,7 @@ func (p *linkReferenceParagraphTransformer) Transform(node *ast.Paragraph, reade
func parseLinkReferenceDefinition(block text.Reader, pc Context) (int, int) { func parseLinkReferenceDefinition(block text.Reader, pc Context) (int, int) {
block.SkipSpaces() block.SkipSpaces()
line, _ := block.PeekLine() line, segment := block.PeekLine()
if line == nil { if line == nil {
return -1, -1 return -1, -1
} }
@ -67,33 +67,39 @@ func parseLinkReferenceDefinition(block text.Reader, pc Context) (int, int) {
if line[pos] != '[' { if line[pos] != '[' {
return -1, -1 return -1, -1
} }
open := segment.Start + pos + 1
closes := -1
block.Advance(pos + 1) block.Advance(pos + 1)
segments, found := block.FindClosure('[', ']', linkFindClosureOptions) for {
if !found { line, segment = block.PeekLine()
if line == nil {
return -1, -1
}
closure := util.FindClosure(line, '[', ']', false, false)
if closure > -1 {
closes = segment.Start + closure
next := closure + 1
if next >= len(line) || line[next] != ':' {
return -1, -1
}
block.Advance(next + 1)
break
}
block.AdvanceLine()
}
if closes < 0 {
return -1, -1 return -1, -1
} }
var label []byte label := block.Value(text.NewSegment(open, closes))
if segments.Len() == 1 {
label = block.Value(segments.At(0))
} else {
for i := 0; i < segments.Len(); i++ {
s := segments.At(i)
label = append(label, block.Value(s)...)
}
}
if util.IsBlank(label) { if util.IsBlank(label) {
return -1, -1 return -1, -1
} }
if block.Peek() != ':' {
return -1, -1
}
block.Advance(1)
block.SkipSpaces() block.SkipSpaces()
destination, ok := parseLinkDestination(block) destination, ok := parseLinkDestination(block)
if !ok { if !ok {
return -1, -1 return -1, -1
} }
line, _ = block.PeekLine() line, segment = block.PeekLine()
isNewLine := line == nil || util.IsBlank(line) isNewLine := line == nil || util.IsBlank(line)
endLine, _ := block.Position() endLine, _ := block.Position()
@ -111,40 +117,45 @@ func parseLinkReferenceDefinition(block text.Reader, pc Context) (int, int) {
return -1, -1 return -1, -1
} }
block.Advance(1) block.Advance(1)
open = -1
closes = -1
closer := opener closer := opener
if opener == '(' { if opener == '(' {
closer = ')' closer = ')'
} }
segments, found = block.FindClosure(opener, closer, linkFindClosureOptions) for {
if !found { line, segment = block.PeekLine()
if !isNewLine { if line == nil {
return -1, -1 return -1, -1
} }
ref := NewReference(label, destination, nil) if open < 0 {
pc.AddReference(ref) open = segment.Start
block.AdvanceLine()
return startLine, endLine + 1
}
var title []byte
if segments.Len() == 1 {
title = block.Value(segments.At(0))
} else {
for i := 0; i < segments.Len(); i++ {
s := segments.At(i)
title = append(title, block.Value(s)...)
} }
closure := util.FindClosure(line, opener, closer, false, true)
if closure > -1 {
closes = segment.Start + closure
block.Advance(closure + 1)
break
}
block.AdvanceLine()
}
if closes < 0 {
return -1, -1
} }
line, _ = block.PeekLine() line, segment = block.PeekLine()
if line != nil && !util.IsBlank(line) { if line != nil && !util.IsBlank(line) {
if !isNewLine { if !isNewLine {
return -1, -1 return -1, -1
} }
title := block.Value(text.NewSegment(open, closes))
ref := NewReference(label, destination, title) ref := NewReference(label, destination, title)
pc.AddReference(ref) pc.AddReference(ref)
return startLine, endLine return startLine, endLine
} }
title := block.Value(text.NewSegment(open, closes))
endLine, _ = block.Position() endLine, _ = block.Position()
ref := NewReference(label, destination, title) ref := NewReference(label, destination, title)
pc.AddReference(ref) pc.AddReference(ref)

View file

@ -1,11 +1,10 @@
package parser package parser
import ( import (
"strconv"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
"strconv"
) )
type listItemType int type listItemType int
@ -16,13 +15,9 @@ const (
orderedList orderedList
) )
var skipListParserKey = NewContextKey()
var emptyListItemWithBlankLines = NewContextKey()
var listItemFlagValue interface{} = true
// Same as // Same as
// `^(([ ]*)([\-\*\+]))(\s+.*)?\n?$`.FindSubmatchIndex or // `^(([ ]*)([\-\*\+]))(\s+.*)?\n?$`.FindSubmatchIndex or
// `^(([ ]*)(\d{1,9}[\.\)]))(\s+.*)?\n?$`.FindSubmatchIndex. // `^(([ ]*)(\d{1,9}[\.\)]))(\s+.*)?\n?$`.FindSubmatchIndex
func parseListItem(line []byte) ([6]int, listItemType) { func parseListItem(line []byte) ([6]int, listItemType) {
i := 0 i := 0
l := len(line) l := len(line)
@ -89,7 +84,7 @@ func matchesListItem(source []byte, strict bool) ([6]int, listItemType) {
} }
func calcListOffset(source []byte, match [6]int) int { func calcListOffset(source []byte, match [6]int) int {
var offset int offset := 0
if match[4] < 0 || util.IsBlank(source[match[4]:]) { // list item starts with a blank line if match[4] < 0 || util.IsBlank(source[match[4]:]) { // list item starts with a blank line
offset = 1 offset = 1
} else { } else {
@ -127,8 +122,8 @@ func (b *listParser) Trigger() []byte {
func (b *listParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { func (b *listParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
last := pc.LastOpenedBlock().Node last := pc.LastOpenedBlock().Node
if _, lok := last.(*ast.List); lok || pc.Get(skipListParserKey) != nil { if _, lok := last.(*ast.List); lok || pc.Get(skipListParser) != nil {
pc.Set(skipListParserKey, nil) pc.Set(skipListParser, nil)
return nil, NoChildren return nil, NoChildren
} }
line, _ := reader.PeekLine() line, _ := reader.PeekLine()
@ -148,7 +143,7 @@ func (b *listParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.
return nil, NoChildren return nil, NoChildren
} }
//an empty list item cannot interrupt a paragraph: //an empty list item cannot interrupt a paragraph:
if match[4] < 0 || util.IsBlank(line[match[4]:match[5]]) { if match[5]-match[4] == 1 {
return nil, NoChildren return nil, NoChildren
} }
} }
@ -158,7 +153,6 @@ func (b *listParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.
if start > -1 { if start > -1 {
node.Start = start node.Start = start
} }
pc.Set(emptyListItemWithBlankLines, nil)
return node, HasChildren return node, HasChildren
} }
@ -166,9 +160,12 @@ func (b *listParser) Continue(node ast.Node, reader text.Reader, pc Context) Sta
list := node.(*ast.List) list := node.(*ast.List)
line, _ := reader.PeekLine() line, _ := reader.PeekLine()
if util.IsBlank(line) { if util.IsBlank(line) {
if node.LastChild().ChildCount() == 0 { // A list item can begin with at most one blank line
pc.Set(emptyListItemWithBlankLines, listItemFlagValue) if node.ChildCount() == 1 && node.LastChild().ChildCount() == 0 {
return Close
} }
reader.Advance(len(line)-1)
return Continue | HasChildren return Continue | HasChildren
} }
@ -180,23 +177,10 @@ func (b *listParser) Continue(node ast.Node, reader text.Reader, pc Context) Sta
// - a // - a
// - b <--- current line // - b <--- current line
// it maybe a new child of the list. // it maybe a new child of the list.
//
// Empty list items can have multiple blanklines
//
// - <--- 1st item is an empty thus "offset" is unknown
//
//
// - <--- current line
//
// -> 1 list with 2 blank items
//
// So if the last item is an empty, it maybe a new child of the list.
//
offset := lastOffset(node) offset := lastOffset(node)
lastIsEmpty := node.LastChild().ChildCount() == 0
indent, _ := util.IndentWidth(line, reader.LineOffset()) indent, _ := util.IndentWidth(line, reader.LineOffset())
if indent < offset || lastIsEmpty { if indent < offset {
if indent < 4 { if indent < 4 {
match, typ := matchesListItem(line, false) // may have a leading spaces more than 3 match, typ := matchesListItem(line, false) // may have a leading spaces more than 3
if typ != notList && match[1]-offset < 4 { if typ != notList && match[1]-offset < 4 {
@ -218,27 +202,10 @@ func (b *listParser) Continue(node ast.Node, reader text.Reader, pc Context) Sta
return Close return Close
} }
} }
return Continue | HasChildren return Continue | HasChildren
} }
} }
if !lastIsEmpty {
return Close
}
}
if lastIsEmpty && indent < offset {
return Close
}
// Non empty items can not exist next to an empty list item
// with blank lines. So we need to close the current list
//
// -
//
// foo
//
// -> 1 list with 1 blank items and 1 paragraph
if pc.Get(emptyListItemWithBlankLines) != nil {
return Close return Close
} }
return Continue | HasChildren return Continue | HasChildren
@ -250,14 +217,14 @@ func (b *listParser) Close(node ast.Node, reader text.Reader, pc Context) {
for c := node.FirstChild(); c != nil && list.IsTight; c = c.NextSibling() { for c := node.FirstChild(); c != nil && list.IsTight; c = c.NextSibling() {
if c.FirstChild() != nil && c.FirstChild() != c.LastChild() { if c.FirstChild() != nil && c.FirstChild() != c.LastChild() {
for c1 := c.FirstChild().NextSibling(); c1 != nil; c1 = c1.NextSibling() { for c1 := c.FirstChild().NextSibling(); c1 != nil; c1 = c1.NextSibling() {
if c1.HasBlankPreviousLines() { if bl, ok := c1.(ast.Node); ok && bl.HasBlankPreviousLines() {
list.IsTight = false list.IsTight = false
break break
} }
} }
} }
if c != node.FirstChild() { if c != node.FirstChild() {
if c.HasBlankPreviousLines() { if bl, ok := c.(ast.Node); ok && bl.HasBlankPreviousLines() {
list.IsTight = false list.IsTight = false
} }
} }
@ -265,9 +232,8 @@ func (b *listParser) Close(node ast.Node, reader text.Reader, pc Context) {
if list.IsTight { if list.IsTight {
for child := node.FirstChild(); child != nil; child = child.NextSibling() { for child := node.FirstChild(); child != nil; child = child.NextSibling() {
for gc := child.FirstChild(); gc != nil; { for gc := child.FirstChild(); gc != nil; gc = gc.NextSibling() {
paragraph, ok := gc.(*ast.Paragraph) paragraph, ok := gc.(*ast.Paragraph)
gc = gc.NextSibling()
if ok { if ok {
textBlock := ast.NewTextBlock() textBlock := ast.NewTextBlock()
textBlock.SetLines(paragraph.Lines()) textBlock.SetLines(paragraph.Lines())

View file

@ -17,6 +17,9 @@ func NewListItemParser() BlockParser {
return defaultListItemParser return defaultListItemParser
} }
var skipListParser = NewContextKey()
var skipListParserValue interface{} = true
func (b *listItemParser) Trigger() []byte { func (b *listItemParser) Trigger() []byte {
return []byte{'-', '+', '*', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} return []byte{'-', '+', '*', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
} }
@ -35,12 +38,9 @@ func (b *listItemParser) Open(parent ast.Node, reader text.Reader, pc Context) (
if match[1]-offset > 3 { if match[1]-offset > 3 {
return nil, NoChildren return nil, NoChildren
} }
pc.Set(emptyListItemWithBlankLines, nil)
itemOffset := calcListOffset(line, match) itemOffset := calcListOffset(line, match)
node := ast.NewListItem(match[3] + itemOffset) node := ast.NewListItem(match[3] + itemOffset)
if match[4] < 0 || util.IsBlank(line[match[4]:match[5]]) { if match[4] < 0 || match[5]-match[4] == 1 {
return node, NoChildren return node, NoChildren
} }
@ -53,23 +53,20 @@ func (b *listItemParser) Open(parent ast.Node, reader text.Reader, pc Context) (
func (b *listItemParser) Continue(node ast.Node, reader text.Reader, pc Context) State { func (b *listItemParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
line, _ := reader.PeekLine() line, _ := reader.PeekLine()
if util.IsBlank(line) { if util.IsBlank(line) {
reader.Advance(len(line) - 1) reader.Advance(len(line)-1)
return Continue | HasChildren return Continue | HasChildren
} }
offset := lastOffset(node.Parent())
isEmpty := node.ChildCount() == 0 && pc.Get(emptyListItemWithBlankLines) != nil
indent, _ := util.IndentWidth(line, reader.LineOffset()) indent, _ := util.IndentWidth(line, reader.LineOffset())
if (isEmpty || indent < offset) && indent < 4 { offset := lastOffset(node.Parent())
if indent < offset && indent < 4 {
_, typ := matchesListItem(line, true) _, typ := matchesListItem(line, true)
// new list item found // new list item found
if typ != notList { if typ != notList {
pc.Set(skipListParserKey, listItemFlagValue) pc.Set(skipListParser, skipListParserValue)
return Close
}
if !isEmpty {
return Close
} }
return Close
} }
pos, padding := util.IndentPosition(line, reader.LineOffset(), offset) pos, padding := util.IndentPosition(line, reader.LineOffset(), offset)
reader.AdvanceAndSetPadding(pos, padding) reader.AdvanceAndSetPadding(pos, padding)

View file

@ -3,7 +3,6 @@ package parser
import ( import (
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
) )
type paragraphParser struct { type paragraphParser struct {
@ -34,8 +33,9 @@ func (b *paragraphParser) Open(parent ast.Node, reader text.Reader, pc Context)
} }
func (b *paragraphParser) Continue(node ast.Node, reader text.Reader, pc Context) State { func (b *paragraphParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
line, segment := reader.PeekLine() _, segment := reader.PeekLine()
if util.IsBlank(line) { segment = segment.TrimLeftSpace(reader.Source())
if segment.IsEmpty() {
return Close return Close
} }
node.Lines().Append(segment) node.Lines().Append(segment)
@ -44,14 +44,13 @@ func (b *paragraphParser) Continue(node ast.Node, reader text.Reader, pc Context
} }
func (b *paragraphParser) Close(node ast.Node, reader text.Reader, pc Context) { func (b *paragraphParser) Close(node ast.Node, reader text.Reader, pc Context) {
parent := node.Parent()
if parent == nil {
// paragraph has been transformed
return
}
lines := node.Lines() lines := node.Lines()
if lines.Len() != 0 { if lines.Len() != 0 {
// trim leading spaces
for i := 0; i < lines.Len(); i++ {
l := lines.At(i)
lines.Set(i, l.TrimLeftSpace(reader.Source()))
}
// trim trailing spaces // trim trailing spaces
length := lines.Len() length := lines.Len()
lastLine := node.Lines().At(length - 1) lastLine := node.Lines().At(length - 1)

View file

@ -403,8 +403,7 @@ func (p *parseContext) IsInLinkLabel() bool {
type State int type State int
const ( const (
// None is a default value of the [State]. none State = 1 << iota
None State = 1 << iota
// Continue indicates parser can continue parsing. // Continue indicates parser can continue parsing.
Continue Continue
@ -431,7 +430,6 @@ type Config struct {
InlineParsers util.PrioritizedSlice /*<InlineParser>*/ InlineParsers util.PrioritizedSlice /*<InlineParser>*/
ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/ ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/ ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
EscapedSpace bool
} }
// NewConfig returns a new Config. // NewConfig returns a new Config.
@ -568,16 +566,16 @@ type ASTTransformer interface {
// DefaultBlockParsers returns a new list of default BlockParsers. // DefaultBlockParsers returns a new list of default BlockParsers.
// Priorities of default BlockParsers are: // Priorities of default BlockParsers are:
// //
// SetextHeadingParser, 100 // SetextHeadingParser, 100
// ThematicBreakParser, 200 // ThematicBreakParser, 200
// ListParser, 300 // ListParser, 300
// ListItemParser, 400 // ListItemParser, 400
// CodeBlockParser, 500 // CodeBlockParser, 500
// ATXHeadingParser, 600 // ATXHeadingParser, 600
// FencedCodeBlockParser, 700 // FencedCodeBlockParser, 700
// BlockquoteParser, 800 // BlockquoteParser, 800
// HTMLBlockParser, 900 // HTMLBlockParser, 900
// ParagraphParser, 1000 // ParagraphParser, 1000
func DefaultBlockParsers() []util.PrioritizedValue { func DefaultBlockParsers() []util.PrioritizedValue {
return []util.PrioritizedValue{ return []util.PrioritizedValue{
util.Prioritized(NewSetextHeadingParser(), 100), util.Prioritized(NewSetextHeadingParser(), 100),
@ -596,11 +594,11 @@ func DefaultBlockParsers() []util.PrioritizedValue {
// DefaultInlineParsers returns a new list of default InlineParsers. // DefaultInlineParsers returns a new list of default InlineParsers.
// Priorities of default InlineParsers are: // Priorities of default InlineParsers are:
// //
// CodeSpanParser, 100 // CodeSpanParser, 100
// LinkParser, 200 // LinkParser, 200
// AutoLinkParser, 300 // AutoLinkParser, 300
// RawHTMLParser, 400 // RawHTMLParser, 400
// EmphasisParser, 500 // EmphasisParser, 500
func DefaultInlineParsers() []util.PrioritizedValue { func DefaultInlineParsers() []util.PrioritizedValue {
return []util.PrioritizedValue{ return []util.PrioritizedValue{
util.Prioritized(NewCodeSpanParser(), 100), util.Prioritized(NewCodeSpanParser(), 100),
@ -614,7 +612,7 @@ func DefaultInlineParsers() []util.PrioritizedValue {
// DefaultParagraphTransformers returns a new list of default ParagraphTransformers. // DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
// Priorities of default ParagraphTransformers are: // Priorities of default ParagraphTransformers are:
// //
// LinkReferenceParagraphTransformer, 100 // LinkReferenceParagraphTransformer, 100
func DefaultParagraphTransformers() []util.PrioritizedValue { func DefaultParagraphTransformers() []util.PrioritizedValue {
return []util.PrioritizedValue{ return []util.PrioritizedValue{
util.Prioritized(LinkReferenceParagraphTransformer, 100), util.Prioritized(LinkReferenceParagraphTransformer, 100),
@ -637,7 +635,6 @@ type parser struct {
closeBlockers []CloseBlocker closeBlockers []CloseBlocker
paragraphTransformers []ParagraphTransformer paragraphTransformers []ParagraphTransformer
astTransformers []ASTTransformer astTransformers []ASTTransformer
escapedSpace bool
config *Config config *Config
initSync sync.Once initSync sync.Once
} }
@ -698,18 +695,6 @@ func WithASTTransformers(ps ...util.PrioritizedValue) Option {
return &withASTTransformers{ps} return &withASTTransformers{ps}
} }
type withEscapedSpace struct {
}
func (o *withEscapedSpace) SetParserOption(c *Config) {
c.EscapedSpace = true
}
// WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
func WithEscapedSpace() Option {
return &withEscapedSpace{}
}
type withOption struct { type withOption struct {
name OptionName name OptionName
value interface{} value interface{}
@ -861,7 +846,6 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
for _, v := range p.config.ASTTransformers { for _, v := range p.config.ASTTransformers {
p.addASTTransformer(v, p.config.Options) p.addASTTransformer(v, p.config.Options)
} }
p.escapedSpace = p.config.EscapedSpace
p.config = nil p.config = nil
}) })
c := &ParseConfig{} c := &ParseConfig{}
@ -882,7 +866,6 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
for _, at := range p.astTransformers { for _, at := range p.astTransformers {
at.Transform(root, reader, pc) at.Transform(root, reader, pc)
} }
// root.Dump(reader.Source(), 0) // root.Dump(reader.Source(), 0)
return root return root
} }
@ -901,13 +884,11 @@ func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
blocks := pc.OpenedBlocks() blocks := pc.OpenedBlocks()
for i := from; i >= to; i-- { for i := from; i >= to; i-- {
node := blocks[i].Node node := blocks[i].Node
blocks[i].Parser.Close(blocks[i].Node, reader, pc)
paragraph, ok := node.(*ast.Paragraph) paragraph, ok := node.(*ast.Paragraph)
if ok && node.Parent() != nil { if ok && node.Parent() != nil {
p.transformParagraph(paragraph, reader, pc) p.transformParagraph(paragraph, reader, pc)
} }
if node.Parent() != nil { // closes only if node has not been transformed
blocks[i].Parser.Close(blocks[i].Node, reader, pc)
}
} }
if from == len(blocks)-1 { if from == len(blocks)-1 {
blocks = blocks[0:to] blocks = blocks[0:to]
@ -1051,7 +1032,7 @@ func isBlankLine(lineNum, level int, stats []lineStat) bool {
func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) { func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
pc.SetOpenedBlocks([]Block{}) pc.SetOpenedBlocks([]Block{})
blankLines := make([]lineStat, 0, 128) blankLines := make([]lineStat, 0, 128)
var isBlank bool isBlank := false
for { // process blocks separated by blank lines for { // process blocks separated by blank lines
_, lines, ok := reader.SkipBlankLines() _, lines, ok := reader.SkipBlankLines()
if !ok { if !ok {
@ -1134,12 +1115,6 @@ func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
cb(block) cb(block)
} }
const (
lineBreakHard uint8 = 1 << iota
lineBreakSoft
lineBreakVisible
)
func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) { func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
if parent.IsRaw() { if parent.IsRaw() {
return return
@ -1154,30 +1129,21 @@ func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context)
break break
} }
lineLength := len(line) lineLength := len(line)
var lineBreakFlags uint8 hardlineBreak := false
hasNewLine := line[lineLength-1] == '\n' softLinebreak := line[lineLength-1] == '\n'
if ((lineLength >= 3 && line[lineLength-2] == '\\' && if lineLength >= 2 && line[lineLength-2] == '\\' && softLinebreak { // ends with \\n
line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
lineLength -= 2 lineLength -= 2
lineBreakFlags |= lineBreakHard | lineBreakVisible hardlineBreak = true
} else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) && } else if lineLength >= 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' && softLinebreak { // ends with \\r\n
hasNewLine { // ends with \\r\n
lineLength -= 3 lineLength -= 3
lineBreakFlags |= lineBreakHard | lineBreakVisible hardlineBreak = true
} else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && softLinebreak { // ends with [space][space]\n
hasNewLine { // ends with [space][space]\n
lineLength -= 3 lineLength -= 3
lineBreakFlags |= lineBreakHard hardlineBreak = true
} else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && line[lineLength-2] == '\r' && softLinebreak { // ends with [space][space]\r\n
line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
lineLength -= 4 lineLength -= 4
lineBreakFlags |= lineBreakHard hardlineBreak = true
} else if hasNewLine {
// If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
// If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
// See https://spec.commonmark.org/0.30/#soft-line-breaks
lineBreakFlags |= lineBreakSoft
} }
l, startPosition := block.Position() l, startPosition := block.Position()
@ -1187,9 +1153,9 @@ func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context)
if c == '\n' { if c == '\n' {
break break
} }
isSpace := util.IsSpace(c) && c != '\r' && c != '\n' isSpace := util.IsSpace(c)
isPunct := util.IsPunct(c) isPunct := util.IsPunct(c)
if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 { if (isPunct && !escaped) || isSpace || i == 0 {
parserChar := c parserChar := c
if isSpace || (i == 0 && !isPunct) { if isSpace || (i == 0 && !isPunct) {
parserChar = ' ' parserChar = ' '
@ -1241,14 +1207,11 @@ func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context)
continue continue
} }
diff := startPosition.Between(currentPosition) diff := startPosition.Between(currentPosition)
var text *ast.Text stop := diff.Stop
if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible { rest := diff.WithStop(stop)
text = ast.NewTextSegment(diff) text := ast.NewTextSegment(rest.TrimRightSpace(source))
} else { text.SetSoftLineBreak(softLinebreak)
text = ast.NewTextSegment(diff.TrimRightSpace(source)) text.SetHardLineBreak(hardlineBreak)
}
text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
parent.AppendChild(parent, text) parent.AppendChild(parent, text)
block.AdvanceLine() block.AdvanceLine()
} }
@ -1257,5 +1220,4 @@ func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context)
for _, ip := range p.closeBlockers { for _, ip := range p.closeBlockers {
ip.CloseBlock(parent, block, pc) ip.CloseBlock(parent, block, pc)
} }
} }

View file

@ -15,7 +15,7 @@ type rawHTMLParser struct {
var defaultRawHTMLParser = &rawHTMLParser{} var defaultRawHTMLParser = &rawHTMLParser{}
// NewRawHTMLParser return a new InlineParser that can parse // NewRawHTMLParser return a new InlineParser that can parse
// inline htmls. // inline htmls
func NewRawHTMLParser() InlineParser { func NewRawHTMLParser() InlineParser {
return defaultRawHTMLParser return defaultRawHTMLParser
} }
@ -32,91 +32,40 @@ func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) as
if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) { if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
return s.parseMultiLineRegexp(closeTagRegexp, block, pc) return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
} }
if bytes.HasPrefix(line, openComment) { if bytes.HasPrefix(line, []byte("<!--")) {
return s.parseComment(block, pc) return s.parseMultiLineRegexp(commentRegexp, block, pc)
} }
if bytes.HasPrefix(line, openProcessingInstruction) { if bytes.HasPrefix(line, []byte("<?")) {
return s.parseUntil(block, closeProcessingInstruction, pc) return s.parseSingleLineRegexp(processingInstructionRegexp, block, pc)
} }
if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' { if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
return s.parseUntil(block, closeDecl, pc) return s.parseSingleLineRegexp(declRegexp, block, pc)
} }
if bytes.HasPrefix(line, openCDATA) { if bytes.HasPrefix(line, []byte("<![CDATA[")) {
return s.parseUntil(block, closeCDATA, pc) return s.parseMultiLineRegexp(cdataRegexp, block, pc)
} }
return nil return nil
} }
var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)` var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
var spaceOrOneNewline = `(?:[ \t]|(?:\r\n|\n){0,1})` var attributePattern = `(?:\s+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:\s*=\s*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`
var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` //nolint:golint,lll var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*\s*/?>`)
var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*` + spaceOrOneNewline + `*/?>`) var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + `\s*>`)
var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + spaceOrOneNewline + `*>`) var commentRegexp = regexp.MustCompile(`^<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->`)
var processingInstructionRegexp = regexp.MustCompile(`^(?:<\?).*?(?:\?>)`)
var declRegexp = regexp.MustCompile(`^<![A-Z]+\s+[^>]*>`)
var cdataRegexp = regexp.MustCompile(`<!\[CDATA\[[\s\S]*?\]\]>`)
var openProcessingInstruction = []byte("<?") func (s *rawHTMLParser) parseSingleLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
var closeProcessingInstruction = []byte("?>")
var openCDATA = []byte("<![CDATA[")
var closeCDATA = []byte("]]>")
var closeDecl = []byte(">")
var emptyComment1 = []byte("<!-->")
var emptyComment2 = []byte("<!--->")
var openComment = []byte("<!--")
var closeComment = []byte("-->")
func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
savedLine, savedSegment := block.Position()
node := ast.NewRawHTML()
line, segment := block.PeekLine() line, segment := block.PeekLine()
if bytes.HasPrefix(line, emptyComment1) { match := reg.FindSubmatchIndex(line)
node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment1))) if match == nil {
block.Advance(len(emptyComment1)) return nil
return node
} }
if bytes.HasPrefix(line, emptyComment2) {
node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment2)))
block.Advance(len(emptyComment2))
return node
}
offset := len(openComment)
line = line[offset:]
for {
index := bytes.Index(line, closeComment)
if index > -1 {
node.Segments.Append(segment.WithStop(segment.Start + offset + index + len(closeComment)))
block.Advance(offset + index + len(closeComment))
return node
}
offset = 0
node.Segments.Append(segment)
block.AdvanceLine()
line, segment = block.PeekLine()
if line == nil {
break
}
}
block.SetPosition(savedLine, savedSegment)
return nil
}
func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node {
savedLine, savedSegment := block.Position()
node := ast.NewRawHTML() node := ast.NewRawHTML()
for { node.Segments.Append(segment.WithStop(segment.Start + match[1]))
line, segment := block.PeekLine() block.Advance(match[1])
if line == nil { return node
break
}
index := bytes.Index(line, closer)
if index > -1 {
node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
block.Advance(index + len(closer))
return node
}
node.Segments.Append(segment)
block.AdvanceLine()
}
block.SetPosition(savedLine, savedSegment)
return nil
} }
func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node { func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
@ -144,8 +93,9 @@ func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Read
if l == eline { if l == eline {
block.Advance(end - start) block.Advance(end - start)
break break
} else {
block.AdvanceLine()
} }
block.AdvanceLine()
} }
return node return node
} }

View file

@ -91,7 +91,7 @@ func (b *setextHeadingParser) Close(node ast.Node, reader text.Reader, pc Contex
para.Lines().Append(segment) para.Lines().Append(segment)
heading.Parent().InsertAfter(heading.Parent(), heading, para) heading.Parent().InsertAfter(heading.Parent(), heading, para)
} else { } else {
next.Lines().Unshift(segment) next.(ast.Node).Lines().Unshift(segment)
} }
heading.Parent().RemoveChild(heading.Parent(), heading) heading.Parent().RemoveChild(heading.Parent(), heading)
} else { } else {

View file

@ -1,12 +1,9 @@
// Package html implements renderer that outputs HTMLs.
package html package html
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"strconv" "strconv"
"unicode"
"unicode/utf8"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer"
@ -15,21 +12,19 @@ import (
// A Config struct has configurations for the HTML based renderers. // A Config struct has configurations for the HTML based renderers.
type Config struct { type Config struct {
Writer Writer Writer Writer
HardWraps bool HardWraps bool
EastAsianLineBreaks EastAsianLineBreaks XHTML bool
XHTML bool Unsafe bool
Unsafe bool
} }
// NewConfig returns a new Config with defaults. // NewConfig returns a new Config with defaults.
func NewConfig() Config { func NewConfig() Config {
return Config{ return Config{
Writer: DefaultWriter, Writer: DefaultWriter,
HardWraps: false, HardWraps: false,
EastAsianLineBreaks: EastAsianLineBreaksNone, XHTML: false,
XHTML: false, Unsafe: false,
Unsafe: false,
} }
} }
@ -38,8 +33,6 @@ func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
switch name { switch name {
case optHardWraps: case optHardWraps:
c.HardWraps = value.(bool) c.HardWraps = value.(bool)
case optEastAsianLineBreaks:
c.EastAsianLineBreaks = value.(EastAsianLineBreaks)
case optXHTML: case optXHTML:
c.XHTML = value.(bool) c.XHTML = value.(bool)
case optUnsafe: case optUnsafe:
@ -101,99 +94,6 @@ func WithHardWraps() interface {
return &withHardWraps{} return &withHardWraps{}
} }
// EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
// A EastAsianLineBreaks is a style of east asian line breaks.
type EastAsianLineBreaks int
const (
//EastAsianLineBreaksNone renders line breaks as it is.
EastAsianLineBreaksNone EastAsianLineBreaks = iota
// EastAsianLineBreaksSimple follows east_asian_line_breaks in Pandoc.
EastAsianLineBreaksSimple
// EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements.
EastAsianLineBreaksCSS3Draft
)
func (b EastAsianLineBreaks) softLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
switch b {
case EastAsianLineBreaksNone:
return false
case EastAsianLineBreaksSimple:
return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
case EastAsianLineBreaksCSS3Draft:
return eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
}
return false
}
func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
// Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
// References:
// - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
// - https://github.com/w3c/csswg-drafts/issues/5086
// Rule1:
// If the character immediately before or immediately after the segment break is
// the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
return false
}
// Rule2:
// Otherwise, if the East Asian Width property of both the character before and after the segment break is
// F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
if (thisLastRuneEastAsianWidth == "F" ||
thisLastRuneEastAsianWidth == "W" ||
thisLastRuneEastAsianWidth == "H") &&
(siblingFirstRuneEastAsianWidth == "F" ||
siblingFirstRuneEastAsianWidth == "W" ||
siblingFirstRuneEastAsianWidth == "H") {
return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
}
// Rule3:
// Otherwise, if either the character before or after the segment break belongs to
// the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
// then the segment break is removed.
if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
unicode.IsPunct(thisLastRune) ||
thisLastRune == '\u3000' ||
util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
unicode.IsPunct(siblingFirstRune) ||
siblingFirstRune == '\u3000' {
return false
}
// Rule4:
// Otherwise, the segment break is converted to a space (U+0020).
return true
}
type withEastAsianLineBreaks struct {
eastAsianLineBreaksStyle EastAsianLineBreaks
}
func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
c.Options[optEastAsianLineBreaks] = o.eastAsianLineBreaksStyle
}
func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
c.EastAsianLineBreaks = o.eastAsianLineBreaksStyle
}
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
// between east asian wide characters should be ignored.
func WithEastAsianLineBreaks(e EastAsianLineBreaks) interface {
renderer.Option
Option
} {
return &withEastAsianLineBreaks{e}
}
// XHTML is an option name used in WithXHTML. // XHTML is an option name used in WithXHTML.
const optXHTML renderer.OptionName = "XHTML" const optXHTML renderer.OptionName = "XHTML"
@ -298,25 +198,16 @@ func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
var GlobalAttributeFilter = util.NewBytesFilter( var GlobalAttributeFilter = util.NewBytesFilter(
[]byte("accesskey"), []byte("accesskey"),
[]byte("autocapitalize"), []byte("autocapitalize"),
[]byte("autofocus"),
[]byte("class"), []byte("class"),
[]byte("contenteditable"), []byte("contenteditable"),
[]byte("contextmenu"),
[]byte("dir"), []byte("dir"),
[]byte("draggable"), []byte("draggable"),
[]byte("enterkeyhint"), []byte("dropzone"),
[]byte("hidden"), []byte("hidden"),
[]byte("id"), []byte("id"),
[]byte("inert"),
[]byte("inputmode"),
[]byte("is"),
[]byte("itemid"),
[]byte("itemprop"), []byte("itemprop"),
[]byte("itemref"),
[]byte("itemscope"),
[]byte("itemtype"),
[]byte("lang"), []byte("lang"),
[]byte("part"),
[]byte("role"),
[]byte("slot"), []byte("slot"),
[]byte("spellcheck"), []byte("spellcheck"),
[]byte("style"), []byte("style"),
@ -325,17 +216,15 @@ var GlobalAttributeFilter = util.NewBytesFilter(
[]byte("translate"), []byte("translate"),
) )
func (r *Renderer) renderDocument( func (r *Renderer) renderDocument(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
// nothing to do // nothing to do
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
// HeadingAttributeFilter defines attribute names which heading elements can have. // HeadingAttributeFilter defines attribute names which heading elements can have
var HeadingAttributeFilter = GlobalAttributeFilter var HeadingAttributeFilter = GlobalAttributeFilter
func (r *Renderer) renderHeading( func (r *Renderer) renderHeading(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*ast.Heading) n := node.(*ast.Heading)
if entering { if entering {
_, _ = w.WriteString("<h") _, _ = w.WriteString("<h")
@ -352,13 +241,12 @@ func (r *Renderer) renderHeading(
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
// BlockquoteAttributeFilter defines attribute names which blockquote elements can have. // BlockquoteAttributeFilter defines attribute names which blockquote elements can have
var BlockquoteAttributeFilter = GlobalAttributeFilter.Extend( var BlockquoteAttributeFilter = GlobalAttributeFilter.Extend(
[]byte("cite"), []byte("cite"),
) )
func (r *Renderer) renderBlockquote( func (r *Renderer) renderBlockquote(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
if entering { if entering {
if n.Attributes() != nil { if n.Attributes() != nil {
_, _ = w.WriteString("<blockquote") _, _ = w.WriteString("<blockquote")
@ -383,8 +271,7 @@ func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node,
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
func (r *Renderer) renderFencedCodeBlock( func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*ast.FencedCodeBlock) n := node.(*ast.FencedCodeBlock)
if entering { if entering {
_, _ = w.WriteString("<pre><code") _, _ = w.WriteString("<pre><code")
@ -402,15 +289,14 @@ func (r *Renderer) renderFencedCodeBlock(
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
func (r *Renderer) renderHTMLBlock( func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*ast.HTMLBlock) n := node.(*ast.HTMLBlock)
if entering { if entering {
if r.Unsafe { if r.Unsafe {
l := n.Lines().Len() l := n.Lines().Len()
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
line := n.Lines().At(i) line := n.Lines().At(i)
r.Writer.SecureWrite(w, line.Value(source)) _, _ = w.Write(line.Value(source))
} }
} else { } else {
_, _ = w.WriteString("<!-- raw HTML omitted -->\n") _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
@ -419,7 +305,7 @@ func (r *Renderer) renderHTMLBlock(
if n.HasClosure() { if n.HasClosure() {
if r.Unsafe { if r.Unsafe {
closure := n.ClosureLine closure := n.ClosureLine
r.Writer.SecureWrite(w, closure.Value(source)) _, _ = w.Write(closure.Value(source))
} else { } else {
_, _ = w.WriteString("<!-- raw HTML omitted -->\n") _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
} }
@ -445,7 +331,7 @@ func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, en
_ = w.WriteByte('<') _ = w.WriteByte('<')
_, _ = w.WriteString(tag) _, _ = w.WriteString(tag)
if n.IsOrdered() && n.Start != 1 { if n.IsOrdered() && n.Start != 1 {
_, _ = fmt.Fprintf(w, " start=\"%d\"", n.Start) fmt.Fprintf(w, " start=\"%d\"", n.Start)
} }
if n.Attributes() != nil { if n.Attributes() != nil {
RenderAttributes(w, n, ListAttributeFilter) RenderAttributes(w, n, ListAttributeFilter)
@ -505,7 +391,7 @@ func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node,
func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering { if !entering {
if n.NextSibling() != nil && n.FirstChild() != nil { if _, ok := n.NextSibling().(ast.Node); ok && n.FirstChild() != nil {
_ = w.WriteByte('\n') _ = w.WriteByte('\n')
} }
} }
@ -521,8 +407,7 @@ var ThematicAttributeFilter = GlobalAttributeFilter.Extend(
[]byte("width"), // [Deprecated] []byte("width"), // [Deprecated]
) )
func (r *Renderer) renderThematicBreak( func (r *Renderer) renderThematicBreak(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering { if !entering {
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
@ -551,8 +436,7 @@ var LinkAttributeFilter = GlobalAttributeFilter.Extend(
[]byte("target"), []byte("target"),
) )
func (r *Renderer) renderAutoLink( func (r *Renderer) renderAutoLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*ast.AutoLink) n := node.(*ast.AutoLink)
if !entering { if !entering {
return ast.WalkContinue, nil return ast.WalkContinue, nil
@ -593,7 +477,9 @@ func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, e
value := segment.Value(source) value := segment.Value(source)
if bytes.HasSuffix(value, []byte("\n")) { if bytes.HasSuffix(value, []byte("\n")) {
r.Writer.RawWrite(w, value[:len(value)-1]) r.Writer.RawWrite(w, value[:len(value)-1])
r.Writer.RawWrite(w, []byte(" ")) if c != n.LastChild() {
r.Writer.RawWrite(w, []byte(" "))
}
} else { } else {
r.Writer.RawWrite(w, value) r.Writer.RawWrite(w, value)
} }
@ -607,8 +493,7 @@ func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, e
// EmphasisAttributeFilter defines attribute names which emphasis elements can have. // EmphasisAttributeFilter defines attribute names which emphasis elements can have.
var EmphasisAttributeFilter = GlobalAttributeFilter var EmphasisAttributeFilter = GlobalAttributeFilter
func (r *Renderer) renderEmphasis( func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*ast.Emphasis) n := node.(*ast.Emphasis)
tag := "em" tag := "em"
if n.Level == 2 { if n.Level == 2 {
@ -680,7 +565,7 @@ func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, e
_, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true))) _, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
} }
_, _ = w.WriteString(`" alt="`) _, _ = w.WriteString(`" alt="`)
r.renderTexts(w, source, n) _, _ = w.Write(util.EscapeHTML(n.Text(source)))
_ = w.WriteByte('"') _ = w.WriteByte('"')
if n.Title != nil { if n.Title != nil {
_, _ = w.WriteString(` title="`) _, _ = w.WriteString(` title="`)
@ -698,8 +583,7 @@ func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, e
return ast.WalkSkipChildren, nil return ast.WalkSkipChildren, nil
} }
func (r *Renderer) renderRawHTML( func (r *Renderer) renderRawHTML(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering { if !entering {
return ast.WalkSkipChildren, nil return ast.WalkSkipChildren, nil
} }
@ -725,8 +609,7 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en
if n.IsRaw() { if n.IsRaw() {
r.Writer.RawWrite(w, segment.Value(source)) r.Writer.RawWrite(w, segment.Value(source))
} else { } else {
value := segment.Value(source) r.Writer.Write(w, segment.Value(source))
r.Writer.Write(w, value)
if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) { if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
if r.XHTML { if r.XHTML {
_, _ = w.WriteString("<br />\n") _, _ = w.WriteString("<br />\n")
@ -734,20 +617,7 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en
_, _ = w.WriteString("<br>\n") _, _ = w.WriteString("<br>\n")
} }
} else if n.SoftLineBreak() { } else if n.SoftLineBreak() {
if r.EastAsianLineBreaks != EastAsianLineBreaksNone && len(value) != 0 { _ = w.WriteByte('\n')
sibling := node.NextSibling()
if sibling != nil && sibling.Kind() == ast.KindText {
if siblingText := sibling.(*ast.Text).Value(source); len(siblingText) != 0 {
thisLastRune := util.ToRune(value, len(value)-1)
siblingFirstRune, _ := utf8.DecodeRune(siblingText)
if r.EastAsianLineBreaks.softLineBreak(thisLastRune, siblingFirstRune) {
_ = w.WriteByte('\n')
}
}
}
} else {
_ = w.WriteByte('\n')
}
} }
} }
return ast.WalkContinue, nil return ast.WalkContinue, nil
@ -770,18 +640,6 @@ func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node,
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
func (r *Renderer) renderTexts(w util.BufWriter, source []byte, n ast.Node) {
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
if s, ok := c.(*ast.String); ok {
_, _ = r.renderString(w, source, s, true)
} else if t, ok := c.(*ast.Text); ok {
_, _ = r.renderText(w, source, t, true)
} else {
r.renderTexts(w, source, c)
}
}
}
var dataPrefix = []byte("data-") var dataPrefix = []byte("data-")
// RenderAttributes renders given node's attributes. // RenderAttributes renders given node's attributes.
@ -798,14 +656,7 @@ func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter)
_, _ = w.Write(attr.Name) _, _ = w.Write(attr.Name)
_, _ = w.WriteString(`="`) _, _ = w.WriteString(`="`)
// TODO: convert numeric values to strings // TODO: convert numeric values to strings
var value []byte _, _ = w.Write(util.EscapeHTML(attr.Value.([]byte)))
switch typed := attr.Value.(type) {
case []byte:
value = typed
case string:
value = util.StringToReadOnlyBytes(typed)
}
_, _ = w.Write(util.EscapeHTML(value))
_ = w.WriteByte('"') _ = w.WriteByte('"')
} }
} }
@ -819,40 +670,9 @@ type Writer interface {
// RawWrite writes the given source to writer without resolving references and // RawWrite writes the given source to writer without resolving references and
// unescaping backslash escaped characters. // unescaping backslash escaped characters.
RawWrite(writer util.BufWriter, source []byte) RawWrite(writer util.BufWriter, source []byte)
// SecureWrite writes the given source to writer with replacing insecure characters.
SecureWrite(writer util.BufWriter, source []byte)
}
var replacementCharacter = []byte("\ufffd")
// A WriterConfig struct has configurations for the HTML based writers.
type WriterConfig struct {
// EscapedSpace is an option that indicates that a '\' escaped half-space(0x20) should not be rendered.
EscapedSpace bool
}
// A WriterOption interface sets options for HTML based writers.
type WriterOption func(*WriterConfig)
// WithEscapedSpace is a WriterOption indicates that a '\' escaped half-space(0x20) should not be rendered.
func WithEscapedSpace() WriterOption {
return func(c *WriterConfig) {
c.EscapedSpace = true
}
} }
type defaultWriter struct { type defaultWriter struct {
WriterConfig
}
// NewWriter returns a new Writer.
func NewWriter(opts ...WriterOption) Writer {
w := &defaultWriter{}
for _, opt := range opts {
opt(&w.WriterConfig)
}
return w
} }
func escapeRune(writer util.BufWriter, r rune) { func escapeRune(writer util.BufWriter, r rune) {
@ -866,23 +686,6 @@ func escapeRune(writer util.BufWriter, r rune) {
_, _ = writer.WriteRune(util.ToValidRune(r)) _, _ = writer.WriteRune(util.ToValidRune(r))
} }
func (d *defaultWriter) SecureWrite(writer util.BufWriter, source []byte) {
n := 0
l := len(source)
for i := 0; i < l; i++ {
if source[i] == '\u0000' {
_, _ = writer.Write(source[i-n : i])
n = 0
_, _ = writer.Write(replacementCharacter)
continue
}
n++
}
if n != 0 {
_, _ = writer.Write(source[l-n:])
}
}
func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) { func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
n := 0 n := 0
l := len(source) l := len(source)
@ -915,19 +718,6 @@ func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
escaped = false escaped = false
continue continue
} }
if d.EscapedSpace && c == ' ' {
d.RawWrite(writer, source[n:i-1])
n = i + 1
escaped = false
continue
}
}
if c == '\x00' {
d.RawWrite(writer, source[n:i])
d.RawWrite(writer, replacementCharacter)
n = i + 1
escaped = false
continue
} }
if c == '&' { if c == '&' {
pos := i pos := i
@ -940,7 +730,7 @@ func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
if nnext < limit && nc == 'x' || nc == 'X' { if nnext < limit && nc == 'x' || nc == 'X' {
start := nnext + 1 start := nnext + 1
i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal) i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
if ok && i < limit && source[i] == ';' && i-start < 7 { if ok && i < limit && source[i] == ';' {
v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32) v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
d.RawWrite(writer, source[n:pos]) d.RawWrite(writer, source[n:pos])
n = i + 1 n = i + 1
@ -952,7 +742,7 @@ func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
start := nnext start := nnext
i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric) i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
if ok && i < limit && i-start < 8 && source[i] == ';' { if ok && i < limit && i-start < 8 && source[i] == ';' {
v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 10, 32) v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 0, 32)
d.RawWrite(writer, source[n:pos]) d.RawWrite(writer, source[n:pos])
n = i + 1 n = i + 1
escapeRune(writer, rune(v)) escapeRune(writer, rune(v))
@ -986,36 +776,30 @@ func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
d.RawWrite(writer, source[n:]) d.RawWrite(writer, source[n:])
} }
// DefaultWriter is a default instance of the Writer. // DefaultWriter is a default implementation of the Writer.
var DefaultWriter = NewWriter() var DefaultWriter = &defaultWriter{}
var bDataImage = []byte("data:image/") var bDataImage = []byte("data:image/")
var bPng = []byte("png;") var bPng = []byte("png;")
var bGif = []byte("gif;") var bGif = []byte("gif;")
var bJpeg = []byte("jpeg;") var bJpeg = []byte("jpeg;")
var bWebp = []byte("webp;") var bWebp = []byte("webp;")
var bSvg = []byte("svg+xml;")
var bJs = []byte("javascript:") var bJs = []byte("javascript:")
var bVb = []byte("vbscript:") var bVb = []byte("vbscript:")
var bFile = []byte("file:") var bFile = []byte("file:")
var bData = []byte("data:") var bData = []byte("data:")
func hasPrefix(s, prefix []byte) bool {
return len(s) >= len(prefix) && bytes.Equal(bytes.ToLower(s[0:len(prefix)]), bytes.ToLower(prefix))
}
// IsDangerousURL returns true if the given url seems a potentially dangerous url, // IsDangerousURL returns true if the given url seems a potentially dangerous url,
// otherwise false. // otherwise false.
func IsDangerousURL(url []byte) bool { func IsDangerousURL(url []byte) bool {
if hasPrefix(url, bDataImage) && len(url) >= 11 { if bytes.HasPrefix(url, bDataImage) && len(url) >= 11 {
v := url[11:] v := url[11:]
if hasPrefix(v, bPng) || hasPrefix(v, bGif) || if bytes.HasPrefix(v, bPng) || bytes.HasPrefix(v, bGif) ||
hasPrefix(v, bJpeg) || hasPrefix(v, bWebp) || bytes.HasPrefix(v, bJpeg) || bytes.HasPrefix(v, bWebp) {
hasPrefix(v, bSvg) {
return false return false
} }
return true return true
} }
return hasPrefix(url, bJs) || hasPrefix(url, bVb) || return bytes.HasPrefix(url, bJs) || bytes.HasPrefix(url, bVb) ||
hasPrefix(url, bFile) || hasPrefix(url, bData) bytes.HasPrefix(url, bFile) || bytes.HasPrefix(url, bData)
} }

View file

@ -16,7 +16,7 @@ type Config struct {
NodeRenderers util.PrioritizedSlice NodeRenderers util.PrioritizedSlice
} }
// NewConfig returns a new Config. // NewConfig returns a new Config
func NewConfig() *Config { func NewConfig() *Config {
return &Config{ return &Config{
Options: map[OptionName]interface{}{}, Options: map[OptionName]interface{}{},
@ -78,7 +78,7 @@ type NodeRenderer interface {
RegisterFuncs(NodeRendererFuncRegisterer) RegisterFuncs(NodeRendererFuncRegisterer)
} }
// A NodeRendererFuncRegisterer registers given NodeRendererFunc to this object. // A NodeRendererFuncRegisterer registers
type NodeRendererFuncRegisterer interface { type NodeRendererFuncRegisterer interface {
// Register registers given NodeRendererFunc to this object. // Register registers given NodeRendererFunc to this object.
Register(ast.NodeKind, NodeRendererFunc) Register(ast.NodeKind, NodeRendererFunc)

View file

@ -1,14 +1,10 @@
// Package testutil provides utilities for unit tests.
package testutil package testutil
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"encoding/hex"
"encoding/json"
"fmt" "fmt"
"os" "os"
"regexp"
"runtime/debug" "runtime/debug"
"strconv" "strconv"
"strings" "strings"
@ -30,44 +26,13 @@ type TestingT interface {
type MarkdownTestCase struct { type MarkdownTestCase struct {
No int No int
Description string Description string
Options MarkdownTestCaseOptions
Markdown string Markdown string
Expected string Expected string
} }
func source(t *MarkdownTestCase) string {
ret := t.Markdown
if t.Options.Trim {
ret = strings.TrimSpace(ret)
}
if t.Options.EnableEscape {
return string(applyEscapeSequence([]byte(ret)))
}
return ret
}
func expected(t *MarkdownTestCase) string {
ret := t.Expected
if t.Options.Trim {
ret = strings.TrimSpace(ret)
}
if t.Options.EnableEscape {
return string(applyEscapeSequence([]byte(ret)))
}
return ret
}
// MarkdownTestCaseOptions represents options for each test case.
type MarkdownTestCaseOptions struct {
EnableEscape bool
Trim bool
}
const attributeSeparator = "//- - - - - - - - -//" const attributeSeparator = "//- - - - - - - - -//"
const caseSeparator = "//= = = = = = = = = = = = = = = = = = = = = = = =//" const caseSeparator = "//= = = = = = = = = = = = = = = = = = = = = = = =//"
var optionsRegexp = regexp.MustCompile(`(?i)\s*options:(.*)`)
// ParseCliCaseArg parses -case command line args. // ParseCliCaseArg parses -case command line args.
func ParseCliCaseArg() []int { func ParseCliCaseArg() []int {
ret := []int{} ret := []int{}
@ -91,15 +56,12 @@ func DoTestCaseFile(m goldmark.Markdown, filename string, t TestingT, no ...int)
if err != nil { if err != nil {
panic(err) panic(err)
} }
defer func() { defer fp.Close()
_ = fp.Close()
}()
scanner := bufio.NewScanner(fp) scanner := bufio.NewScanner(fp)
c := MarkdownTestCase{ c := MarkdownTestCase{
No: -1, No: -1,
Description: "", Description: "",
Options: MarkdownTestCaseOptions{},
Markdown: "", Markdown: "",
Expected: "", Expected: "",
} }
@ -126,15 +88,6 @@ func DoTestCaseFile(m goldmark.Markdown, filename string, t TestingT, no ...int)
panic(fmt.Sprintf("%s: invalid case at line %d", filename, line)) panic(fmt.Sprintf("%s: invalid case at line %d", filename, line))
} }
line++ line++
matches := optionsRegexp.FindAllStringSubmatch(scanner.Text(), -1)
if len(matches) != 0 {
err = json.Unmarshal([]byte(matches[0][1]), &c.Options)
if err != nil {
panic(fmt.Sprintf("%s: invalid options at line %d", filename, line))
}
scanner.Scan()
line++
}
if scanner.Text() != attributeSeparator { if scanner.Text() != attributeSeparator {
panic(fmt.Sprintf("%s: invalid separator '%s' at line %d", filename, scanner.Text(), line)) panic(fmt.Sprintf("%s: invalid separator '%s' at line %d", filename, scanner.Text(), line))
} }
@ -158,9 +111,6 @@ func DoTestCaseFile(m goldmark.Markdown, filename string, t TestingT, no ...int)
buf = append(buf, text) buf = append(buf, text)
} }
c.Expected = strings.Join(buf, "\n") c.Expected = strings.Join(buf, "\n")
if len(c.Expected) != 0 {
c.Expected = c.Expected + "\n"
}
shouldAdd := len(no) == 0 shouldAdd := len(no) == 0
if !shouldAdd { if !shouldAdd {
for _, n := range no { for _, n := range no {
@ -208,7 +158,7 @@ Actual
%v %v
%s %s
` `
t.Errorf(format, testCase.No, description, source(&testCase), expected(&testCase), err, debug.Stack()) t.Errorf(format, testCase.No, description, testCase.Markdown, testCase.Expected, err, debug.Stack())
} else if !ok { } else if !ok {
format := `============= case %d%s ================ format := `============= case %d%s ================
Markdown: Markdown:
@ -222,188 +172,13 @@ Expected:
Actual Actual
--------- ---------
%s %s
Diff
---------
%s
` `
t.Errorf(format, testCase.No, description, source(&testCase), expected(&testCase), out.Bytes(), t.Errorf(format, testCase.No, description, testCase.Markdown, testCase.Expected, out.Bytes())
DiffPretty([]byte(expected(&testCase)), out.Bytes()))
} }
}() }()
if err := m.Convert([]byte(source(&testCase)), &out, opts...); err != nil { if err := m.Convert([]byte(testCase.Markdown), &out, opts...); err != nil {
panic(err) panic(err)
} }
ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(expected(&testCase)))) ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.Expected)))
}
type diffType int
const (
diffRemoved diffType = iota
diffAdded
diffNone
)
type diff struct {
Type diffType
Lines [][]byte
}
func simpleDiff(v1, v2 []byte) []diff {
return simpleDiffAux(
bytes.Split(v1, []byte("\n")),
bytes.Split(v2, []byte("\n")))
}
func simpleDiffAux(v1lines, v2lines [][]byte) []diff {
v1index := map[string][]int{}
for i, line := range v1lines {
key := util.BytesToReadOnlyString(line)
if _, ok := v1index[key]; !ok {
v1index[key] = []int{}
}
v1index[key] = append(v1index[key], i)
}
overlap := map[int]int{}
v1start := 0
v2start := 0
length := 0
for v2pos, line := range v2lines {
newOverlap := map[int]int{}
key := util.BytesToReadOnlyString(line)
if _, ok := v1index[key]; !ok {
v1index[key] = []int{}
}
for _, v1pos := range v1index[key] {
value := 0
if v1pos != 0 {
if v, ok := overlap[v1pos-1]; ok {
value = v
}
}
newOverlap[v1pos] = value + 1
if newOverlap[v1pos] > length {
length = newOverlap[v1pos]
v1start = v1pos - length + 1
v2start = v2pos - length + 1
}
}
overlap = newOverlap
}
if length == 0 {
diffs := []diff{}
if len(v1lines) != 0 {
diffs = append(diffs, diff{diffRemoved, v1lines})
}
if len(v2lines) != 0 {
diffs = append(diffs, diff{diffAdded, v2lines})
}
return diffs
}
diffs := simpleDiffAux(v1lines[:v1start], v2lines[:v2start])
diffs = append(diffs, diff{diffNone, v2lines[v2start : v2start+length]})
diffs = append(diffs, simpleDiffAux(v1lines[v1start+length:],
v2lines[v2start+length:])...)
return diffs
}
// DiffPretty returns pretty formatted diff between given bytes.
func DiffPretty(v1, v2 []byte) []byte {
var b bytes.Buffer
diffs := simpleDiff(v1, v2)
for _, diff := range diffs {
c := " "
switch diff.Type {
case diffAdded:
c = "+"
case diffRemoved:
c = "-"
case diffNone:
c = " "
}
for _, line := range diff.Lines {
if c != " " {
b.WriteString(fmt.Sprintf("%s | %s\n", c, util.VisualizeSpaces(line)))
} else {
b.WriteString(fmt.Sprintf("%s | %s\n", c, line))
}
}
}
return b.Bytes()
}
func applyEscapeSequence(b []byte) []byte {
result := make([]byte, 0, len(b))
for i := 0; i < len(b); i++ {
if b[i] == '\\' && i != len(b)-1 {
switch b[i+1] {
case 'a':
result = append(result, '\a')
i++
continue
case 'b':
result = append(result, '\b')
i++
continue
case 'f':
result = append(result, '\f')
i++
continue
case 'n':
result = append(result, '\n')
i++
continue
case 'r':
result = append(result, '\r')
i++
continue
case 't':
result = append(result, '\t')
i++
continue
case 'v':
result = append(result, '\v')
i++
continue
case '\\':
result = append(result, '\\')
i++
continue
case 'x':
if len(b) >= i+3 && util.IsHexDecimal(b[i+2]) && util.IsHexDecimal(b[i+3]) {
v, _ := hex.DecodeString(string(b[i+2 : i+4]))
result = append(result, v[0])
i += 3
continue
}
case 'u', 'U':
if len(b) > i+2 {
num := []byte{}
for j := i + 2; j < len(b); j++ {
if util.IsHexDecimal(b[j]) {
num = append(num, b[j])
continue
}
break
}
if len(num) >= 4 && len(num) < 8 {
v, _ := strconv.ParseInt(string(num[:4]), 16, 32)
result = append(result, []byte(string(rune(v)))...)
i += 5
continue
}
if len(num) >= 8 {
v, _ := strconv.ParseInt(string(num[:8]), 16, 32)
result = append(result, []byte(string(rune(v)))...)
i += 9
continue
}
}
}
}
result = append(result, b[i])
}
return result
} }

View file

@ -1,2 +0,0 @@
// Package text provides functionalities to manipulate texts.
package text

View file

@ -1,7 +1,6 @@
package text package text
import ( import (
"bytes"
"io" "io"
"regexp" "regexp"
"unicode/utf8" "unicode/utf8"
@ -71,28 +70,6 @@ type Reader interface {
// Match performs regular expression searching to current line. // Match performs regular expression searching to current line.
FindSubMatch(reg *regexp.Regexp) [][]byte FindSubMatch(reg *regexp.Regexp) [][]byte
// FindClosure finds corresponding closure.
FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
}
// FindClosureOptions is options for Reader.FindClosure.
type FindClosureOptions struct {
// CodeSpan is a flag for the FindClosure. If this is set to true,
// FindClosure ignores closers in codespans.
CodeSpan bool
// Nesting is a flag for the FindClosure. If this is set to true,
// FindClosure allows nesting.
Nesting bool
// Newline is a flag for the FindClosure. If this is set to true,
// FindClosure searches for a closer over multiple lines.
Newline bool
// Advance is a flag for the FindClosure. If this is set to true,
// FindClosure advances pointers when closer is found.
Advance bool
} }
type reader struct { type reader struct {
@ -115,10 +92,6 @@ func NewReader(source []byte) Reader {
return r return r
} }
func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
return findClosureReader(r, opener, closer, options)
}
func (r *reader) ResetPosition() { func (r *reader) ResetPosition() {
r.line = -1 r.line = -1
r.head = 0 r.head = 0
@ -154,7 +127,7 @@ func (r *reader) PeekLine() ([]byte, Segment) {
return nil, r.pos return nil, r.pos
} }
// io.RuneReader interface. // io.RuneReader interface
func (r *reader) ReadRune() (rune, int, error) { func (r *reader) ReadRune() (rune, int, error) {
return readRuneReader(r) return readRuneReader(r)
} }
@ -299,10 +272,6 @@ func NewBlockReader(source []byte, segments *Segments) BlockReader {
return r return r
} }
func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
return findClosureReader(r, opener, closer, options)
}
func (r *blockReader) ResetPosition() { func (r *blockReader) ResetPosition() {
r.line = -1 r.line = -1
r.head = 0 r.head = 0
@ -354,7 +323,7 @@ func (r *blockReader) Value(seg Segment) []byte {
return ret return ret
} }
// io.RuneReader interface. // io.RuneReader interface
func (r *blockReader) ReadRune() (rune, int, error) { func (r *blockReader) ReadRune() (rune, int, error) {
return readRuneReader(r) return readRuneReader(r)
} }
@ -538,30 +507,24 @@ func matchReader(r Reader, reg *regexp.Regexp) bool {
} }
func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte { func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
oldLine, oldSeg := r.Position() oldline, oldseg := r.Position()
match := reg.FindReaderSubmatchIndex(r) match := reg.FindReaderSubmatchIndex(r)
r.SetPosition(oldLine, oldSeg) r.SetPosition(oldline, oldseg)
if match == nil { if match == nil {
return nil return nil
} }
var bb bytes.Buffer runes := make([]rune, 0, match[1]-match[0])
bb.Grow(match[1] - match[0])
for i := 0; i < match[1]; { for i := 0; i < match[1]; {
r, size, _ := readRuneReader(r) r, size, _ := readRuneReader(r)
i += size i += size
bb.WriteRune(r) runes = append(runes, r)
} }
bs := bb.Bytes() result := [][]byte{}
var result [][]byte
for i := 0; i < len(match); i += 2 { for i := 0; i < len(match); i += 2 {
if match[i] < 0 { result = append(result, []byte(string(runes[match[i]:match[i+1]])))
result = append(result, []byte{})
continue
}
result = append(result, bs[match[i]:match[i+1]])
} }
r.SetPosition(oldLine, oldSeg) r.SetPosition(oldline, oldseg)
r.Advance(match[1] - match[0]) r.Advance(match[1] - match[0])
return result return result
} }
@ -578,83 +541,3 @@ func readRuneReader(r Reader) (rune, int, error) {
r.Advance(size) r.Advance(size)
return rn, size, nil return rn, size, nil
} }
func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
opened := 1
codeSpanOpener := 0
closed := false
orgline, orgpos := r.Position()
var ret *Segments
for {
bs, seg := r.PeekLine()
if bs == nil {
goto end
}
i := 0
for i < len(bs) {
c := bs[i]
if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
codeSpanCloser := 0
for ; i < len(bs); i++ {
if bs[i] == '`' {
codeSpanCloser++
} else {
i--
break
}
}
if codeSpanCloser == codeSpanOpener {
codeSpanOpener = 0
}
} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
i += 2
continue
} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
for ; i < len(bs); i++ {
if bs[i] == '`' {
codeSpanOpener++
} else {
i--
break
}
}
} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
if c == closer {
opened--
if opened == 0 {
if ret == nil {
ret = NewSegments()
}
ret.Append(seg.WithStop(seg.Start + i))
r.Advance(i + 1)
closed = true
goto end
}
} else if c == opener {
if !opts.Nesting {
goto end
}
opened++
}
}
i++
}
if !opts.Newline {
goto end
}
r.AdvanceLine()
if ret == nil {
ret = NewSegments()
}
ret.Append(seg)
}
end:
if !opts.Advance {
r.SetPosition(orgline, orgpos)
}
if closed {
return ret, true
}
return nil, false
}

View file

@ -1,16 +0,0 @@
package text
import (
"regexp"
"testing"
)
func TestFindSubMatchReader(t *testing.T) {
s := "微笑"
r := NewReader([]byte(":" + s + ":"))
reg := regexp.MustCompile(`:(\p{L}+):`)
match := r.FindSubMatch(reg)
if len(match) != 2 || string(match[1]) != s {
t.Fatal("no match cjk")
}
}

View file

@ -2,7 +2,6 @@ package text
import ( import (
"bytes" "bytes"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
@ -19,20 +18,6 @@ type Segment struct {
// Padding is a padding length of the segment. // Padding is a padding length of the segment.
Padding int Padding int
// ForceNewline is true if the segment should be ended with a newline.
// Some elements(i.e. CodeBlock, FencedCodeBlock) does not trim trailing
// newlines. Spec defines that EOF is treated as a newline, so we need to
// add a newline to the end of the segment if it is not empty.
//
// i.e.:
//
// ```go
// const test = "test"
//
// This code does not close the code block and ends with EOF. In this case,
// we need to add a newline to the end of the last line like `const test = "test"\n`.
ForceNewline bool
} }
// NewSegment return a new Segment. // NewSegment return a new Segment.
@ -55,18 +40,12 @@ func NewSegmentPadding(start, stop, n int) Segment {
// Value returns a value of the segment. // Value returns a value of the segment.
func (t *Segment) Value(buffer []byte) []byte { func (t *Segment) Value(buffer []byte) []byte {
var result []byte
if t.Padding == 0 { if t.Padding == 0 {
result = buffer[t.Start:t.Stop] return buffer[t.Start:t.Stop]
} else {
result = make([]byte, 0, t.Padding+t.Stop-t.Start+1)
result = append(result, bytes.Repeat(space, t.Padding)...)
result = append(result, buffer[t.Start:t.Stop]...)
} }
if t.ForceNewline && len(result) > 0 && result[len(result)-1] != '\n' { result := make([]byte, 0, t.Padding+t.Stop-t.Start+1)
result = append(result, '\n') result = append(result, bytes.Repeat(space, t.Padding)...)
} return append(result, buffer[t.Start:t.Stop]...)
return result
} }
// Len returns a length of the segment. // Len returns a length of the segment.
@ -228,12 +207,3 @@ func (s *Segments) Unshift(v Segment) {
s.values = append(s.values[0:1], s.values[0:]...) s.values = append(s.values[0:1], s.values[0:]...)
s.values[0] = v s.values[0] = v
} }
// Value returns a string value of the collection.
func (s *Segments) Value(buffer []byte) []byte {
var result []byte
for _, v := range s.values {
result = append(result, v.Value(buffer)...)
}
return result
}

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,3 @@
//nolint:all
package util package util
var unicodeCaseFoldings = map[rune][]rune{ var unicodeCaseFoldings = map[rune][]rune{
@ -962,7 +961,6 @@ var unicodeCaseFoldings = map[rune][]rune{
0x2c2c: []int32{11356}, 0x2c2c: []int32{11356},
0x2c2d: []int32{11357}, 0x2c2d: []int32{11357},
0x2c2e: []int32{11358}, 0x2c2e: []int32{11358},
0x2c2f: []int32{11359},
0x2c60: []int32{11361}, 0x2c60: []int32{11361},
0x2c62: []int32{619}, 0x2c62: []int32{619},
0x2c63: []int32{7549}, 0x2c63: []int32{7549},
@ -1143,17 +1141,10 @@ var unicodeCaseFoldings = map[rune][]rune{
0xa7ba: []int32{42939}, 0xa7ba: []int32{42939},
0xa7bc: []int32{42941}, 0xa7bc: []int32{42941},
0xa7be: []int32{42943}, 0xa7be: []int32{42943},
0xa7c0: []int32{42945},
0xa7c2: []int32{42947}, 0xa7c2: []int32{42947},
0xa7c4: []int32{42900}, 0xa7c4: []int32{42900},
0xa7c5: []int32{642}, 0xa7c5: []int32{642},
0xa7c6: []int32{7566}, 0xa7c6: []int32{7566},
0xa7c7: []int32{42952},
0xa7c9: []int32{42954},
0xa7d0: []int32{42961},
0xa7d6: []int32{42967},
0xa7d8: []int32{42969},
0xa7f5: []int32{42998},
0xab70: []int32{5024}, 0xab70: []int32{5024},
0xab71: []int32{5025}, 0xab71: []int32{5025},
0xab72: []int32{5026}, 0xab72: []int32{5026},
@ -1348,41 +1339,6 @@ var unicodeCaseFoldings = map[rune][]rune{
0x104d1: []int32{66809}, 0x104d1: []int32{66809},
0x104d2: []int32{66810}, 0x104d2: []int32{66810},
0x104d3: []int32{66811}, 0x104d3: []int32{66811},
0x10570: []int32{66967},
0x10571: []int32{66968},
0x10572: []int32{66969},
0x10573: []int32{66970},
0x10574: []int32{66971},
0x10575: []int32{66972},
0x10576: []int32{66973},
0x10577: []int32{66974},
0x10578: []int32{66975},
0x10579: []int32{66976},
0x1057a: []int32{66977},
0x1057c: []int32{66979},
0x1057d: []int32{66980},
0x1057e: []int32{66981},
0x1057f: []int32{66982},
0x10580: []int32{66983},
0x10581: []int32{66984},
0x10582: []int32{66985},
0x10583: []int32{66986},
0x10584: []int32{66987},
0x10585: []int32{66988},
0x10586: []int32{66989},
0x10587: []int32{66990},
0x10588: []int32{66991},
0x10589: []int32{66992},
0x1058a: []int32{66993},
0x1058c: []int32{66995},
0x1058d: []int32{66996},
0x1058e: []int32{66997},
0x1058f: []int32{66998},
0x10590: []int32{66999},
0x10591: []int32{67000},
0x10592: []int32{67001},
0x10594: []int32{67003},
0x10595: []int32{67004},
0x10c80: []int32{68800}, 0x10c80: []int32{68800},
0x10c81: []int32{68801}, 0x10c81: []int32{68801},
0x10c82: []int32{68802}, 0x10c82: []int32{68802},

View file

@ -63,13 +63,12 @@ func (b *CopyOnWriteBuffer) AppendString(value string) {
// WriteByte writes the given byte to the buffer. // WriteByte writes the given byte to the buffer.
// WriteByte allocate new buffer and clears it at the first time. // WriteByte allocate new buffer and clears it at the first time.
func (b *CopyOnWriteBuffer) WriteByte(c byte) error { func (b *CopyOnWriteBuffer) WriteByte(c byte) {
if !b.copied { if !b.copied {
b.buffer = make([]byte, 0, len(b.buffer)+20) b.buffer = make([]byte, 0, len(b.buffer)+20)
b.copied = true b.copied = true
} }
b.buffer = append(b.buffer, c) b.buffer = append(b.buffer, c)
return nil
} }
// AppendByte appends given bytes to the buffer. // AppendByte appends given bytes to the buffer.
@ -131,9 +130,6 @@ func VisualizeSpaces(bs []byte) []byte {
bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1) bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1)
bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1) bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1)
bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1) bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1)
bs = bytes.Replace(bs, []byte("\v"), []byte("[VTAB]"), -1)
bs = bytes.Replace(bs, []byte("\x00"), []byte("[NUL]"), -1)
bs = bytes.Replace(bs, []byte("\ufffd"), []byte("[U+FFFD]"), -1)
return bs return bs
} }
@ -146,14 +142,37 @@ func TabWidth(currentPos int) int {
// If the line contains tab characters, paddings may be not zero. // If the line contains tab characters, paddings may be not zero.
// currentPos==0 and width==2: // currentPos==0 and width==2:
// //
// position: 0 1 // position: 0 1
// [TAB]aaaa // [TAB]aaaa
// width: 1234 5678 // width: 1234 5678
// //
// width=2 is in the tab character. In this case, IndentPosition returns // width=2 is in the tab character. In this case, IndentPosition returns
// (pos=1, padding=2). // (pos=1, padding=2)
func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) { func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
return IndentPositionPadding(bs, currentPos, 0, width) if width == 0 {
return 0, 0
}
w := 0
l := len(bs)
i := 0
hasTab := false
for ; i < l; i++ {
if bs[i] == '\t' {
w += TabWidth(currentPos + w)
hasTab = true
} else if bs[i] == ' ' {
w++
} else {
break
}
}
if w >= width {
if !hasTab {
return width, 0
}
return i, w - width
}
return -1, -1
} }
// IndentPositionPadding searches an indent position with the given width for the given line. // IndentPositionPadding searches an indent position with the given width for the given line.
@ -166,16 +185,10 @@ func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, pad
w := 0 w := 0
i := 0 i := 0
l := len(bs) l := len(bs)
p := paddingv
for ; i < l; i++ { for ; i < l; i++ {
if p > 0 { if bs[i] == '\t' {
p--
w++
continue
}
if bs[i] == '\t' && w < width {
w += TabWidth(currentPos + w) w += TabWidth(currentPos + w)
} else if bs[i] == ' ' && w < width { } else if bs[i] == ' ' {
w++ w++
} else { } else {
break break
@ -188,8 +201,6 @@ func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, pad
} }
// DedentPosition dedents lines by the given width. // DedentPosition dedents lines by the given width.
//
// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) { func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
if width == 0 { if width == 0 {
return 0, 0 return 0, 0
@ -215,8 +226,6 @@ func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
// DedentPositionPadding dedents lines by the given width. // DedentPositionPadding dedents lines by the given width.
// This function is mostly same as DedentPosition except this function // This function is mostly same as DedentPosition except this function
// takes account into additional paddings. // takes account into additional paddings.
//
// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) { func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
if width == 0 { if width == 0 {
return 0, paddingv return 0, paddingv
@ -279,10 +288,6 @@ func FirstNonSpacePosition(bs []byte) int {
// If codeSpan is set true, it ignores characters in code spans. // If codeSpan is set true, it ignores characters in code spans.
// If allowNesting is set true, closures correspond to nested opener will be // If allowNesting is set true, closures correspond to nested opener will be
// ignored. // ignored.
//
// Deprecated: This function can not handle newlines. Many elements
// can be existed over multiple lines(e.g. link labels).
// Use text.Reader.FindClosure.
func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int { func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
i := 0 i := 0
opened := 1 opened := 1
@ -431,7 +436,7 @@ func DoFullUnicodeCaseFolding(v []byte) []byte {
if c >= 0x41 && c <= 0x5a { if c >= 0x41 && c <= 0x5a {
// A-Z to a-z // A-Z to a-z
cob.Write(v[n:i]) cob.Write(v[n:i])
_ = cob.WriteByte(c + 32) cob.WriteByte(c + 32)
n = i + 1 n = i + 1
} }
continue continue
@ -528,7 +533,7 @@ func ToLinkReference(v []byte) string {
return string(ReplaceSpaces(v, ' ')) return string(ReplaceSpaces(v, ' '))
} }
var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} //nolint:golint,lll var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}
// EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped, // EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
// otherwise nil. // otherwise nil.
@ -564,7 +569,7 @@ func UnescapePunctuations(source []byte) []byte {
c := source[i] c := source[i]
if i < limit-1 && c == '\\' && IsPunct(source[i+1]) { if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
cob.Write(source[n:i]) cob.Write(source[n:i])
_ = cob.WriteByte(source[i+1]) cob.WriteByte(source[i+1])
i += 2 i += 2
n = i n = i
continue continue
@ -580,9 +585,9 @@ func UnescapePunctuations(source []byte) []byte {
// ResolveNumericReferences resolve numeric references like '&#1234;" . // ResolveNumericReferences resolve numeric references like '&#1234;" .
func ResolveNumericReferences(source []byte) []byte { func ResolveNumericReferences(source []byte) []byte {
cob := NewCopyOnWriteBuffer(source) cob := NewCopyOnWriteBuffer(source)
buf := make([]byte, 6) buf := make([]byte, 6, 6)
limit := len(source) limit := len(source)
var ok bool ok := false
n := 0 n := 0
for i := 0; i < limit; i++ { for i := 0; i < limit; i++ {
if source[i] == '&' { if source[i] == '&' {
@ -632,7 +637,7 @@ func ResolveNumericReferences(source []byte) []byte {
func ResolveEntityNames(source []byte) []byte { func ResolveEntityNames(source []byte) []byte {
cob := NewCopyOnWriteBuffer(source) cob := NewCopyOnWriteBuffer(source)
limit := len(source) limit := len(source)
var ok bool ok := false
n := 0 n := 0
for i := 0; i < limit; i++ { for i := 0; i < limit; i++ {
if source[i] == '&' { if source[i] == '&' {
@ -665,9 +670,9 @@ var htmlSpace = []byte("%20")
// URLEscape escape the given URL. // URLEscape escape the given URL.
// If resolveReference is set true: // If resolveReference is set true:
// 1. unescape punctuations // 1. unescape punctuations
// 2. resolve numeric references // 2. resolve numeric references
// 3. resolve entity references // 3. resolve entity references
// //
// URL encoded values (%xx) are kept as is. // URL encoded values (%xx) are kept as is.
func URLEscape(v []byte, resolveReference bool) []byte { func URLEscape(v []byte, resolveReference bool) []byte {
@ -702,7 +707,7 @@ func URLEscape(v []byte, resolveReference bool) []byte {
n = i n = i
continue continue
} }
if int(u8len) > len(v) { if int(u8len) >= len(v) {
u8len = int8(len(v) - 1) u8len = int8(len(v) - 1)
} }
if u8len == 0 { if u8len == 0 {
@ -757,7 +762,7 @@ func FindURLIndex(b []byte) int {
return i return i
} }
var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`) //nolint:golint,lll var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`)
// FindEmailIndex returns a stop index value if the given bytes seem an email address. // FindEmailIndex returns a stop index value if the given bytes seem an email address.
func FindEmailIndex(b []byte) int { func FindEmailIndex(b []byte) int {
@ -788,19 +793,18 @@ func FindEmailIndex(b []byte) int {
var spaces = []byte(" \t\n\x0b\x0c\x0d") var spaces = []byte(" \t\n\x0b\x0c\x0d")
var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()# // a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99}
var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99} //nolint:golint,lll var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} //nolint:golint,lll var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
// UTF8Len returns a byte length of the utf-8 character. // UTF8Len returns a byte length of the utf-8 character.
func UTF8Len(b byte) int8 { func UTF8Len(b byte) int8 {
@ -814,7 +818,7 @@ func IsPunct(c byte) bool {
// IsPunctRune returns true if the given rune is a punctuation, otherwise false. // IsPunctRune returns true if the given rune is a punctuation, otherwise false.
func IsPunctRune(r rune) bool { func IsPunctRune(r rune) bool {
return unicode.IsSymbol(r) || unicode.IsPunct(r) return int32(r) <= 256 && IsPunct(byte(r)) || unicode.IsPunct(r)
} }
// IsSpace returns true if the given character is a space, otherwise false. // IsSpace returns true if the given character is a space, otherwise false.
@ -861,7 +865,7 @@ type PrioritizedValue struct {
Priority int Priority int
} }
// PrioritizedSlice is a slice of the PrioritizedValues. // PrioritizedSlice is a slice of the PrioritizedValues
type PrioritizedSlice []PrioritizedValue type PrioritizedSlice []PrioritizedValue
// Sort sorts the PrioritizedSlice in ascending order. // Sort sorts the PrioritizedSlice in ascending order.
@ -976,7 +980,7 @@ func (s *bytesFilter) Contains(b []byte) bool {
} }
h := bytesHash(b) % uint64(len(s.slots)) h := bytesHash(b) % uint64(len(s.slots))
slot := s.slots[h] slot := s.slots[h]
if len(slot) == 0 { if slot == nil || len(slot) == 0 {
return false return false
} }
for _, element := range slot { for _, element := range slot {

View file

@ -1,469 +0,0 @@
package util
import "unicode"
var cjkRadicalsSupplement = &unicode.RangeTable{
R16: []unicode.Range16{
{0x2E80, 0x2EFF, 1},
},
}
var kangxiRadicals = &unicode.RangeTable{
R16: []unicode.Range16{
{0x2F00, 0x2FDF, 1},
},
}
var ideographicDescriptionCharacters = &unicode.RangeTable{
R16: []unicode.Range16{
{0x2FF0, 0x2FFF, 1},
},
}
var cjkSymbolsAndPunctuation = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3000, 0x303F, 1},
},
}
var hiragana = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3040, 0x309F, 1},
},
}
var katakana = &unicode.RangeTable{
R16: []unicode.Range16{
{0x30A0, 0x30FF, 1},
},
}
var kanbun = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3130, 0x318F, 1},
{0x3190, 0x319F, 1},
},
}
var cjkStrokes = &unicode.RangeTable{
R16: []unicode.Range16{
{0x31C0, 0x31EF, 1},
},
}
var katakanaPhoneticExtensions = &unicode.RangeTable{
R16: []unicode.Range16{
{0x31F0, 0x31FF, 1},
},
}
var cjkCompatibility = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3300, 0x33FF, 1},
},
}
var cjkUnifiedIdeographsExtensionA = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3400, 0x4DBF, 1},
},
}
var cjkUnifiedIdeographs = &unicode.RangeTable{
R16: []unicode.Range16{
{0x4E00, 0x9FFF, 1},
},
}
var yiSyllables = &unicode.RangeTable{
R16: []unicode.Range16{
{0xA000, 0xA48F, 1},
},
}
var yiRadicals = &unicode.RangeTable{
R16: []unicode.Range16{
{0xA490, 0xA4CF, 1},
},
}
var cjkCompatibilityIdeographs = &unicode.RangeTable{
R16: []unicode.Range16{
{0xF900, 0xFAFF, 1},
},
}
var verticalForms = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFE10, 0xFE1F, 1},
},
}
var cjkCompatibilityForms = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFE30, 0xFE4F, 1},
},
}
var smallFormVariants = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFE50, 0xFE6F, 1},
},
}
var halfwidthAndFullwidthForms = &unicode.RangeTable{
R16: []unicode.Range16{
{0xFF00, 0xFFEF, 1},
},
}
var kanaSupplement = &unicode.RangeTable{
R32: []unicode.Range32{
{0x1B000, 0x1B0FF, 1},
},
}
var kanaExtendedA = &unicode.RangeTable{
R32: []unicode.Range32{
{0x1B100, 0x1B12F, 1},
},
}
var smallKanaExtension = &unicode.RangeTable{
R32: []unicode.Range32{
{0x1B130, 0x1B16F, 1},
},
}
var cjkUnifiedIdeographsExtensionB = &unicode.RangeTable{
R32: []unicode.Range32{
{0x20000, 0x2A6DF, 1},
},
}
var cjkUnifiedIdeographsExtensionC = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2A700, 0x2B73F, 1},
},
}
var cjkUnifiedIdeographsExtensionD = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2B740, 0x2B81F, 1},
},
}
var cjkUnifiedIdeographsExtensionE = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2B820, 0x2CEAF, 1},
},
}
var cjkUnifiedIdeographsExtensionF = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2CEB0, 0x2EBEF, 1},
},
}
var cjkCompatibilityIdeographsSupplement = &unicode.RangeTable{
R32: []unicode.Range32{
{0x2F800, 0x2FA1F, 1},
},
}
var cjkUnifiedIdeographsExtensionG = &unicode.RangeTable{
R32: []unicode.Range32{
{0x30000, 0x3134F, 1},
},
}
// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
func IsEastAsianWideRune(r rune) bool {
return unicode.Is(unicode.Hiragana, r) ||
unicode.Is(unicode.Katakana, r) ||
unicode.Is(unicode.Han, r) ||
unicode.Is(unicode.Lm, r) ||
unicode.Is(unicode.Hangul, r) ||
unicode.Is(cjkSymbolsAndPunctuation, r)
}
// IsSpaceDiscardingUnicodeRune returns true if the given rune is space-discarding unicode character, otherwise false.
// See https://www.w3.org/TR/2020/WD-css-text-3-20200429/#space-discard-set
func IsSpaceDiscardingUnicodeRune(r rune) bool {
return unicode.Is(cjkRadicalsSupplement, r) ||
unicode.Is(kangxiRadicals, r) ||
unicode.Is(ideographicDescriptionCharacters, r) ||
unicode.Is(cjkSymbolsAndPunctuation, r) ||
unicode.Is(hiragana, r) ||
unicode.Is(katakana, r) ||
unicode.Is(kanbun, r) ||
unicode.Is(cjkStrokes, r) ||
unicode.Is(katakanaPhoneticExtensions, r) ||
unicode.Is(cjkCompatibility, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionA, r) ||
unicode.Is(cjkUnifiedIdeographs, r) ||
unicode.Is(yiSyllables, r) ||
unicode.Is(yiRadicals, r) ||
unicode.Is(cjkCompatibilityIdeographs, r) ||
unicode.Is(verticalForms, r) ||
unicode.Is(cjkCompatibilityForms, r) ||
unicode.Is(smallFormVariants, r) ||
unicode.Is(halfwidthAndFullwidthForms, r) ||
unicode.Is(kanaSupplement, r) ||
unicode.Is(kanaExtendedA, r) ||
unicode.Is(smallKanaExtension, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionB, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionC, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionD, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionE, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionF, r) ||
unicode.Is(cjkCompatibilityIdeographsSupplement, r) ||
unicode.Is(cjkUnifiedIdeographsExtensionG, r)
}
// EastAsianWidth returns the east asian width of the given rune.
// See https://www.unicode.org/reports/tr11/tr11-36.html
func EastAsianWidth(r rune) string {
switch {
case r == 0x3000,
(0xFF01 <= r && r <= 0xFF60),
(0xFFE0 <= r && r <= 0xFFE6):
return "F"
case r == 0x20A9,
(0xFF61 <= r && r <= 0xFFBE),
(0xFFC2 <= r && r <= 0xFFC7),
(0xFFCA <= r && r <= 0xFFCF),
(0xFFD2 <= r && r <= 0xFFD7),
(0xFFDA <= r && r <= 0xFFDC),
(0xFFE8 <= r && r <= 0xFFEE):
return "H"
case (0x1100 <= r && r <= 0x115F),
(0x11A3 <= r && r <= 0x11A7),
(0x11FA <= r && r <= 0x11FF),
(0x2329 <= r && r <= 0x232A),
(0x2E80 <= r && r <= 0x2E99),
(0x2E9B <= r && r <= 0x2EF3),
(0x2F00 <= r && r <= 0x2FD5),
(0x2FF0 <= r && r <= 0x2FFB),
(0x3001 <= r && r <= 0x303E),
(0x3041 <= r && r <= 0x3096),
(0x3099 <= r && r <= 0x30FF),
(0x3105 <= r && r <= 0x312D),
(0x3131 <= r && r <= 0x318E),
(0x3190 <= r && r <= 0x31BA),
(0x31C0 <= r && r <= 0x31E3),
(0x31F0 <= r && r <= 0x321E),
(0x3220 <= r && r <= 0x3247),
(0x3250 <= r && r <= 0x32FE),
(0x3300 <= r && r <= 0x4DBF),
(0x4E00 <= r && r <= 0xA48C),
(0xA490 <= r && r <= 0xA4C6),
(0xA960 <= r && r <= 0xA97C),
(0xAC00 <= r && r <= 0xD7A3),
(0xD7B0 <= r && r <= 0xD7C6),
(0xD7CB <= r && r <= 0xD7FB),
(0xF900 <= r && r <= 0xFAFF),
(0xFE10 <= r && r <= 0xFE19),
(0xFE30 <= r && r <= 0xFE52),
(0xFE54 <= r && r <= 0xFE66),
(0xFE68 <= r && r <= 0xFE6B),
(0x1B000 <= r && r <= 0x1B001),
(0x1F200 <= r && r <= 0x1F202),
(0x1F210 <= r && r <= 0x1F23A),
(0x1F240 <= r && r <= 0x1F248),
(0x1F250 <= r && r <= 0x1F251),
(0x20000 <= r && r <= 0x2F73F),
(0x2B740 <= r && r <= 0x2FFFD),
(0x30000 <= r && r <= 0x3FFFD):
return "W"
case (0x0020 <= r && r <= 0x007E),
(0x00A2 <= r && r <= 0x00A3),
(0x00A5 <= r && r <= 0x00A6),
r == 0x00AC,
r == 0x00AF,
(0x27E6 <= r && r <= 0x27ED),
(0x2985 <= r && r <= 0x2986):
return "Na"
case (0x00A1 == r),
(0x00A4 == r),
(0x00A7 <= r && r <= 0x00A8),
(0x00AA == r),
(0x00AD <= r && r <= 0x00AE),
(0x00B0 <= r && r <= 0x00B4),
(0x00B6 <= r && r <= 0x00BA),
(0x00BC <= r && r <= 0x00BF),
(0x00C6 == r),
(0x00D0 == r),
(0x00D7 <= r && r <= 0x00D8),
(0x00DE <= r && r <= 0x00E1),
(0x00E6 == r),
(0x00E8 <= r && r <= 0x00EA),
(0x00EC <= r && r <= 0x00ED),
(0x00F0 == r),
(0x00F2 <= r && r <= 0x00F3),
(0x00F7 <= r && r <= 0x00FA),
(0x00FC == r),
(0x00FE == r),
(0x0101 == r),
(0x0111 == r),
(0x0113 == r),
(0x011B == r),
(0x0126 <= r && r <= 0x0127),
(0x012B == r),
(0x0131 <= r && r <= 0x0133),
(0x0138 == r),
(0x013F <= r && r <= 0x0142),
(0x0144 == r),
(0x0148 <= r && r <= 0x014B),
(0x014D == r),
(0x0152 <= r && r <= 0x0153),
(0x0166 <= r && r <= 0x0167),
(0x016B == r),
(0x01CE == r),
(0x01D0 == r),
(0x01D2 == r),
(0x01D4 == r),
(0x01D6 == r),
(0x01D8 == r),
(0x01DA == r),
(0x01DC == r),
(0x0251 == r),
(0x0261 == r),
(0x02C4 == r),
(0x02C7 == r),
(0x02C9 <= r && r <= 0x02CB),
(0x02CD == r),
(0x02D0 == r),
(0x02D8 <= r && r <= 0x02DB),
(0x02DD == r),
(0x02DF == r),
(0x0300 <= r && r <= 0x036F),
(0x0391 <= r && r <= 0x03A1),
(0x03A3 <= r && r <= 0x03A9),
(0x03B1 <= r && r <= 0x03C1),
(0x03C3 <= r && r <= 0x03C9),
(0x0401 == r),
(0x0410 <= r && r <= 0x044F),
(0x0451 == r),
(0x2010 == r),
(0x2013 <= r && r <= 0x2016),
(0x2018 <= r && r <= 0x2019),
(0x201C <= r && r <= 0x201D),
(0x2020 <= r && r <= 0x2022),
(0x2024 <= r && r <= 0x2027),
(0x2030 == r),
(0x2032 <= r && r <= 0x2033),
(0x2035 == r),
(0x203B == r),
(0x203E == r),
(0x2074 == r),
(0x207F == r),
(0x2081 <= r && r <= 0x2084),
(0x20AC == r),
(0x2103 == r),
(0x2105 == r),
(0x2109 == r),
(0x2113 == r),
(0x2116 == r),
(0x2121 <= r && r <= 0x2122),
(0x2126 == r),
(0x212B == r),
(0x2153 <= r && r <= 0x2154),
(0x215B <= r && r <= 0x215E),
(0x2160 <= r && r <= 0x216B),
(0x2170 <= r && r <= 0x2179),
(0x2189 == r),
(0x2190 <= r && r <= 0x2199),
(0x21B8 <= r && r <= 0x21B9),
(0x21D2 == r),
(0x21D4 == r),
(0x21E7 == r),
(0x2200 == r),
(0x2202 <= r && r <= 0x2203),
(0x2207 <= r && r <= 0x2208),
(0x220B == r),
(0x220F == r),
(0x2211 == r),
(0x2215 == r),
(0x221A == r),
(0x221D <= r && r <= 0x2220),
(0x2223 == r),
(0x2225 == r),
(0x2227 <= r && r <= 0x222C),
(0x222E == r),
(0x2234 <= r && r <= 0x2237),
(0x223C <= r && r <= 0x223D),
(0x2248 == r),
(0x224C == r),
(0x2252 == r),
(0x2260 <= r && r <= 0x2261),
(0x2264 <= r && r <= 0x2267),
(0x226A <= r && r <= 0x226B),
(0x226E <= r && r <= 0x226F),
(0x2282 <= r && r <= 0x2283),
(0x2286 <= r && r <= 0x2287),
(0x2295 == r),
(0x2299 == r),
(0x22A5 == r),
(0x22BF == r),
(0x2312 == r),
(0x2460 <= r && r <= 0x24E9),
(0x24EB <= r && r <= 0x254B),
(0x2550 <= r && r <= 0x2573),
(0x2580 <= r && r <= 0x258F),
(0x2592 <= r && r <= 0x2595),
(0x25A0 <= r && r <= 0x25A1),
(0x25A3 <= r && r <= 0x25A9),
(0x25B2 <= r && r <= 0x25B3),
(0x25B6 <= r && r <= 0x25B7),
(0x25BC <= r && r <= 0x25BD),
(0x25C0 <= r && r <= 0x25C1),
(0x25C6 <= r && r <= 0x25C8),
(0x25CB == r),
(0x25CE <= r && r <= 0x25D1),
(0x25E2 <= r && r <= 0x25E5),
(0x25EF == r),
(0x2605 <= r && r <= 0x2606),
(0x2609 == r),
(0x260E <= r && r <= 0x260F),
(0x2614 <= r && r <= 0x2615),
(0x261C == r),
(0x261E == r),
(0x2640 == r),
(0x2642 == r),
(0x2660 <= r && r <= 0x2661),
(0x2663 <= r && r <= 0x2665),
(0x2667 <= r && r <= 0x266A),
(0x266C <= r && r <= 0x266D),
(0x266F == r),
(0x269E <= r && r <= 0x269F),
(0x26BE <= r && r <= 0x26BF),
(0x26C4 <= r && r <= 0x26CD),
(0x26CF <= r && r <= 0x26E1),
(0x26E3 == r),
(0x26E8 <= r && r <= 0x26FF),
(0x273D == r),
(0x2757 == r),
(0x2776 <= r && r <= 0x277F),
(0x2B55 <= r && r <= 0x2B59),
(0x3248 <= r && r <= 0x324F),
(0xE000 <= r && r <= 0xF8FF),
(0xFE00 <= r && r <= 0xFE0F),
(0xFFFD == r),
(0x1F100 <= r && r <= 0x1F10A),
(0x1F110 <= r && r <= 0x1F12D),
(0x1F130 <= r && r <= 0x1F169),
(0x1F170 <= r && r <= 0x1F19A),
(0xE0100 <= r && r <= 0xE01EF),
(0xF0000 <= r && r <= 0xFFFFD),
(0x100000 <= r && r <= 0x10FFFD):
return "A"
default:
return "N"
}
}

View file

@ -1,4 +1,3 @@
//go:build appengine || js
// +build appengine js // +build appengine js
package util package util

View file

@ -1,5 +1,4 @@
//go:build !appengine && !js && !go1.21 // +build !appengine,!js
// +build !appengine,!js,!go1.21
package util package util

View file

@ -1,18 +0,0 @@
//go:build !appengine && !js && go1.21
// +build !appengine,!js,go1.21
package util
import (
"unsafe"
)
// BytesToReadOnlyString returns a string converted from given bytes.
func BytesToReadOnlyString(b []byte) string {
return unsafe.String(unsafe.SliceData(b), len(b))
}
// StringToReadOnlyBytes returns bytes converted from given string.
func StringToReadOnlyBytes(s string) []byte {
return unsafe.Slice(unsafe.StringData(s), len(s))
}