Fixes #83, Adds options for Linkify

This commit is contained in:
yuin 2020-01-02 01:36:46 +09:00
parent f9b134f6be
commit 4b54582dee
4 changed files with 308 additions and 28 deletions

View file

@ -64,8 +64,8 @@ Import packages:
```
import (
"bytes"
"github.com/yuin/goldmark"
"bytes"
"github.com/yuin/goldmark"
)
```
@ -105,11 +105,11 @@ Custom parser and renderer
--------------------------
```go
import (
"bytes"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"bytes"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
)
md := goldmark.New(
@ -215,14 +215,53 @@ You can overwrite the substitutions by `extensions.WithTypographicSubstitutions`
```go
markdown := goldmark.New(
goldmark.WithExtensions(
extension.NewTypographer(
extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{
extension.LeftSingleQuote: []byte("‚"),
extension.RightSingleQuote: nil, // nil disables a substitution
}),
),
),
goldmark.WithExtensions(
extension.NewTypographer(
extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{
extension.LeftSingleQuote: []byte("‚"),
extension.RightSingleQuote: nil, // nil disables a substitution
}),
),
),
)
```
### Linkify extension
Linkify extension implements [Autolinks(extension)](https://github.github.com/gfm/#autolinks-extension-)
defined in [GitHub Flavored Markdown Spec](https://github.github.com/gfm/).
Since spec does not define details about URL, there are many ambiguous cases.
You can overwrite autolinking patterns by options.
| Functional option | Type | Description |
| ----------------- | ---- | ----------- |
| `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` |
| `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URL including protocols |
| `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) |
| `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email address` |
Example: using [xurls](https://github.com/mvdan/xurls)
```go
import "mvdan.cc/xurls/v2"
markdown := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
extension.NewLinkify(
extension.WithLinkifyAllowedProtocols([][]byte{
[]byte("http:"),
[]byte("https:"),
}),
extension.WithLinkifyURLRegexp(
xurls.Strict(),
),
),
),
)
```
@ -317,6 +356,7 @@ AST nodes do not have concrete text. AST nodes have segment information of the d
`text.Segment` has 3 attributes: `Start`, `End`, `Padding` .
(TBC)
**TODO**

View file

@ -153,3 +153,11 @@ This is a `git@github.com:vim/vim`
//- - - - - - - - -//
<p>This is a <code>git@github.com:vim/vim</code></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
16
//- - - - - - - - -//
https://nic.college
//- - - - - - - - -//
<p><a href="https://nic.college">https://nic.college</a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -2,27 +2,153 @@ package extension
import (
"bytes"
"regexp"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"regexp"
)
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
type linkifyParser struct {
// An LinkifyConfig struct is a data structure that holds configuration of the
// Linkify extension.
type LinkifyConfig struct {
AllowedProtocols [][]byte
URLRegexp *regexp.Regexp
WWWRegexp *regexp.Regexp
EmailRegexp *regexp.Regexp
}
var defaultLinkifyParser = &linkifyParser{}
const optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols"
const optLinkifyURLRegexp parser.OptionName = "LinkifyURLRegexp"
const optLinkifyWWWRegexp parser.OptionName = "LinkifyWWWRegexp"
const optLinkifyEmailRegexp parser.OptionName = "LinkifyEmailRegexp"
// SetOption implements SetOptioner.
func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) {
switch name {
case optLinkifyAllowedProtocols:
c.AllowedProtocols = value.([][]byte)
case optLinkifyURLRegexp:
c.URLRegexp = value.(*regexp.Regexp)
case optLinkifyWWWRegexp:
c.WWWRegexp = value.(*regexp.Regexp)
case optLinkifyEmailRegexp:
c.EmailRegexp = value.(*regexp.Regexp)
}
}
// A LinkifyOption interface sets options for the LinkifyOption.
type LinkifyOption interface {
parser.Option
SetLinkifyOption(*LinkifyConfig)
}
type withLinkifyAllowedProtocols struct {
value [][]byte
}
func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) {
c.Options[optLinkifyAllowedProtocols] = o.value
}
func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
p.AllowedProtocols = o.value
}
// WithLinkifyAllowedProtocols is a functional otpion that specify allowed
// protocols in autolinks. Each protocol must end with ':' like
// 'http:' .
func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption {
return &withLinkifyAllowedProtocols{
value: value,
}
}
type withLinkifyURLRegexp struct {
value *regexp.Regexp
}
func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) {
c.Options[optLinkifyURLRegexp] = o.value
}
func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) {
p.URLRegexp = o.value
}
// WithLinkifyURLRegexp is a functional otpion that specify
// a pattern of the URL including a protocol.
func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
return &withLinkifyURLRegexp{
value: value,
}
}
// WithLinkifyWWWRegexp is a functional otpion that specify
// a pattern of the URL without a protocol.
// This pattern must start with 'www.' .
type withLinkifyWWWRegexp struct {
value *regexp.Regexp
}
func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) {
c.Options[optLinkifyWWWRegexp] = o.value
}
func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
p.WWWRegexp = o.value
}
func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
return &withLinkifyWWWRegexp{
value: value,
}
}
// WithLinkifyWWWRegexp is a functional otpion that specify
// a pattern of the email address.
type withLinkifyEmailRegexp struct {
value *regexp.Regexp
}
func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) {
c.Options[optLinkifyEmailRegexp] = o.value
}
func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
p.EmailRegexp = o.value
}
func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
return &withLinkifyEmailRegexp{
value: value,
}
}
type linkifyParser struct {
LinkifyConfig
}
// NewLinkifyParser return a new InlineParser can parse
// text that seems like a URL.
func NewLinkifyParser() parser.InlineParser {
return defaultLinkifyParser
func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser {
p := &linkifyParser{
LinkifyConfig: LinkifyConfig{
AllowedProtocols: nil,
URLRegexp: urlRegexp,
WWWRegexp: wwwURLRegxp,
},
}
for _, o := range opts {
o.SetLinkifyOption(&p.LinkifyConfig)
}
return p
}
func (s *linkifyParser) Trigger() []byte {
@ -53,14 +179,26 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
var m []int
var protocol []byte
var typ ast.AutoLinkType = ast.AutoLinkURL
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
m = urlRegexp.FindSubmatchIndex(line)
if s.LinkifyConfig.AllowedProtocols == nil {
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
}
} else {
for _, prefix := range s.LinkifyConfig.AllowedProtocols {
if bytes.HasPrefix(line, prefix) {
m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
break
}
}
}
if m == nil && bytes.HasPrefix(line, domainWWW) {
m = wwwURLRegxp.FindSubmatchIndex(line)
m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line)
protocol = []byte("http")
}
if m != nil {
if m != nil && m[0] != 0 {
m = nil
}
if m != nil && m[0] == 0 {
lastChar := line[m[1]-1]
if lastChar == '.' {
m[1]--
@ -96,7 +234,15 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
return nil
}
typ = ast.AutoLinkEmail
stop := util.FindEmailIndex(line)
stop := -1
if s.LinkifyConfig.EmailRegexp == nil {
stop = util.FindEmailIndex(line)
} else {
m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line)
if m != nil && m[0] == 0 {
stop = m[1]
}
}
if stop < 0 {
return nil
}
@ -136,15 +282,22 @@ func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
}
type linkify struct {
options []LinkifyOption
}
// Linkify is an extension that allow you to parse text that seems like a URL.
var Linkify = &linkify{}
func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
return &linkify{
options: opts,
}
}
func (e *linkify) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(
parser.WithInlineParsers(
util.Prioritized(NewLinkifyParser(), 999),
util.Prioritized(NewLinkifyParser(e.options...), 999),
),
)
}

View file

@ -1,6 +1,7 @@
package extension
import (
"regexp"
"testing"
"github.com/yuin/goldmark"
@ -19,3 +20,81 @@ func TestLinkify(t *testing.T) {
)
testutil.DoTestCaseFile(markdown, "_test/linkify.txt", t)
}
func TestLinkifyWithAllowedProtocols(t *testing.T) {
markdown := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewLinkify(
WithLinkifyAllowedProtocols([][]byte{
[]byte("ssh:"),
}),
WithLinkifyURLRegexp(
regexp.MustCompile(`\w+://[^\s]+`),
),
),
),
)
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: 1,
Markdown: `hoge ssh://user@hoge.com. http://example.com/`,
Expected: `<p>hoge <a href="ssh://user@hoge.com">ssh://user@hoge.com</a>. http://example.com/</p>`,
},
t,
)
}
func TestLinkifyWithWWWRegexp(t *testing.T) {
markdown := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewLinkify(
WithLinkifyWWWRegexp(
regexp.MustCompile(`www\.example\.com`),
),
),
),
)
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: 1,
Markdown: `www.google.com www.example.com`,
Expected: `<p>www.google.com <a href="http://www.example.com">www.example.com</a></p>`,
},
t,
)
}
func TestLinkifyWithEmailRegexp(t *testing.T) {
markdown := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewLinkify(
WithLinkifyEmailRegexp(
regexp.MustCompile(`user@example\.com`),
),
),
),
)
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: 1,
Markdown: `hoge@example.com user@example.com`,
Expected: `<p>hoge@example.com <a href="mailto:user@example.com">user@example.com</a></p>`,
},
t,
)
}