diff --git a/README.md b/README.md
index 9e61344..0bde9c5 100644
--- a/README.md
+++ b/README.md
@@ -64,8 +64,8 @@ Import packages:
```
import (
- "bytes"
- "github.com/yuin/goldmark"
+ "bytes"
+ "github.com/yuin/goldmark"
)
```
@@ -105,11 +105,11 @@ Custom parser and renderer
--------------------------
```go
import (
- "bytes"
- "github.com/yuin/goldmark"
- "github.com/yuin/goldmark/extension"
- "github.com/yuin/goldmark/parser"
- "github.com/yuin/goldmark/renderer/html"
+ "bytes"
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/extension"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/renderer/html"
)
md := goldmark.New(
@@ -215,14 +215,53 @@ You can overwrite the substitutions by `extensions.WithTypographicSubstitutions`
```go
markdown := goldmark.New(
- goldmark.WithExtensions(
- extension.NewTypographer(
- extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{
- extension.LeftSingleQuote: []byte("‚"),
- extension.RightSingleQuote: nil, // nil disables a substitution
- }),
- ),
- ),
+ goldmark.WithExtensions(
+ extension.NewTypographer(
+ extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{
+ extension.LeftSingleQuote: []byte("‚"),
+ extension.RightSingleQuote: nil, // nil disables a substitution
+ }),
+ ),
+ ),
+)
+```
+
+### Linkify extension
+Linkify extension implements [Autolinks(extension)](https://github.github.com/gfm/#autolinks-extension-)
+defined in [GitHub Flavored Markdown Spec](https://github.github.com/gfm/).
+
+Since spec does not define details about URL, there are many ambiguous cases.
+
+You can overwrite autolinking patterns by options.
+
+| Functional option | Type | Description |
+| ----------------- | ---- | ----------- |
+| `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` |
+| `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URL including protocols |
+| `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) |
+| `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email address` |
+
+Example: using [xurls](https://github.com/mvdan/xurls)
+
+```go
+import "mvdan.cc/xurls/v2"
+
+markdown := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithUnsafe(),
+ ),
+ goldmark.WithExtensions(
+ extension.NewLinkify(
+ extension.WithLinkifyAllowedProtocols([][]byte{
+ []byte("http:"),
+ []byte("https:"),
+ }),
+ extension.WithLinkifyURLRegexp(
+ xurls.Strict(),
+ ),
+ ),
+ ),
)
```
@@ -317,6 +356,7 @@ AST nodes do not have concrete text. AST nodes have segment information of the d
`text.Segment` has 3 attributes: `Start`, `End`, `Padding` .
+(TBC)
**TODO**
diff --git a/extension/_test/linkify.txt b/extension/_test/linkify.txt
index 669ab72..1e75aeb 100644
--- a/extension/_test/linkify.txt
+++ b/extension/_test/linkify.txt
@@ -153,3 +153,11 @@ This is a `git@github.com:vim/vim`
//- - - - - - - - -//
This is a git@github.com:vim/vim
//= = = = = = = = = = = = = = = = = = = = = = = =//
+
+
+16
+//- - - - - - - - -//
+https://nic.college
+//- - - - - - - - -//
+https://nic.college
+//= = = = = = = = = = = = = = = = = = = = = = = =//
diff --git a/extension/linkify.go b/extension/linkify.go
index 0a584e8..565c6bd 100644
--- a/extension/linkify.go
+++ b/extension/linkify.go
@@ -2,27 +2,153 @@ package extension
import (
"bytes"
+ "regexp"
+
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
- "regexp"
)
-var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
+var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
-var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
+var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
-type linkifyParser struct {
+// An LinkifyConfig struct is a data structure that holds configuration of the
+// Linkify extension.
+type LinkifyConfig struct {
+ AllowedProtocols [][]byte
+ URLRegexp *regexp.Regexp
+ WWWRegexp *regexp.Regexp
+ EmailRegexp *regexp.Regexp
}
-var defaultLinkifyParser = &linkifyParser{}
+const optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols"
+const optLinkifyURLRegexp parser.OptionName = "LinkifyURLRegexp"
+const optLinkifyWWWRegexp parser.OptionName = "LinkifyWWWRegexp"
+const optLinkifyEmailRegexp parser.OptionName = "LinkifyEmailRegexp"
+
+// SetOption implements SetOptioner.
+func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) {
+ switch name {
+ case optLinkifyAllowedProtocols:
+ c.AllowedProtocols = value.([][]byte)
+ case optLinkifyURLRegexp:
+ c.URLRegexp = value.(*regexp.Regexp)
+ case optLinkifyWWWRegexp:
+ c.WWWRegexp = value.(*regexp.Regexp)
+ case optLinkifyEmailRegexp:
+ c.EmailRegexp = value.(*regexp.Regexp)
+ }
+}
+
+// A LinkifyOption interface sets options for the LinkifyOption.
+type LinkifyOption interface {
+ parser.Option
+ SetLinkifyOption(*LinkifyConfig)
+}
+
+type withLinkifyAllowedProtocols struct {
+ value [][]byte
+}
+
+func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) {
+ c.Options[optLinkifyAllowedProtocols] = o.value
+}
+
+func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
+ p.AllowedProtocols = o.value
+}
+
+// WithLinkifyAllowedProtocols is a functional otpion that specify allowed
+// protocols in autolinks. Each protocol must end with ':' like
+// 'http:' .
+func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption {
+ return &withLinkifyAllowedProtocols{
+ value: value,
+ }
+}
+
+type withLinkifyURLRegexp struct {
+ value *regexp.Regexp
+}
+
+func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) {
+ c.Options[optLinkifyURLRegexp] = o.value
+}
+
+func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) {
+ p.URLRegexp = o.value
+}
+
+// WithLinkifyURLRegexp is a functional otpion that specify
+// a pattern of the URL including a protocol.
+func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
+ return &withLinkifyURLRegexp{
+ value: value,
+ }
+}
+
+// WithLinkifyWWWRegexp is a functional otpion that specify
+// a pattern of the URL without a protocol.
+// This pattern must start with 'www.' .
+type withLinkifyWWWRegexp struct {
+ value *regexp.Regexp
+}
+
+func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) {
+ c.Options[optLinkifyWWWRegexp] = o.value
+}
+
+func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
+ p.WWWRegexp = o.value
+}
+
+func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
+ return &withLinkifyWWWRegexp{
+ value: value,
+ }
+}
+
+// WithLinkifyWWWRegexp is a functional otpion that specify
+// a pattern of the email address.
+type withLinkifyEmailRegexp struct {
+ value *regexp.Regexp
+}
+
+func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) {
+ c.Options[optLinkifyEmailRegexp] = o.value
+}
+
+func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
+ p.EmailRegexp = o.value
+}
+
+func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
+ return &withLinkifyEmailRegexp{
+ value: value,
+ }
+}
+
+type linkifyParser struct {
+ LinkifyConfig
+}
// NewLinkifyParser return a new InlineParser can parse
// text that seems like a URL.
-func NewLinkifyParser() parser.InlineParser {
- return defaultLinkifyParser
+func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser {
+ p := &linkifyParser{
+ LinkifyConfig: LinkifyConfig{
+ AllowedProtocols: nil,
+ URLRegexp: urlRegexp,
+ WWWRegexp: wwwURLRegxp,
+ },
+ }
+ for _, o := range opts {
+ o.SetLinkifyOption(&p.LinkifyConfig)
+ }
+ return p
}
func (s *linkifyParser) Trigger() []byte {
@@ -53,14 +179,26 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
var m []int
var protocol []byte
var typ ast.AutoLinkType = ast.AutoLinkURL
- if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
- m = urlRegexp.FindSubmatchIndex(line)
+ if s.LinkifyConfig.AllowedProtocols == nil {
+ if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
+ m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
+ }
+ } else {
+ for _, prefix := range s.LinkifyConfig.AllowedProtocols {
+ if bytes.HasPrefix(line, prefix) {
+ m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
+ break
+ }
+ }
}
if m == nil && bytes.HasPrefix(line, domainWWW) {
- m = wwwURLRegxp.FindSubmatchIndex(line)
+ m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line)
protocol = []byte("http")
}
- if m != nil {
+ if m != nil && m[0] != 0 {
+ m = nil
+ }
+ if m != nil && m[0] == 0 {
lastChar := line[m[1]-1]
if lastChar == '.' {
m[1]--
@@ -96,7 +234,15 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
return nil
}
typ = ast.AutoLinkEmail
- stop := util.FindEmailIndex(line)
+ stop := -1
+ if s.LinkifyConfig.EmailRegexp == nil {
+ stop = util.FindEmailIndex(line)
+ } else {
+ m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line)
+ if m != nil && m[0] == 0 {
+ stop = m[1]
+ }
+ }
if stop < 0 {
return nil
}
@@ -136,15 +282,22 @@ func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
}
type linkify struct {
+ options []LinkifyOption
}
// Linkify is an extension that allow you to parse text that seems like a URL.
var Linkify = &linkify{}
+func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
+ return &linkify{
+ options: opts,
+ }
+}
+
func (e *linkify) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(
parser.WithInlineParsers(
- util.Prioritized(NewLinkifyParser(), 999),
+ util.Prioritized(NewLinkifyParser(e.options...), 999),
),
)
}
diff --git a/extension/linkify_test.go b/extension/linkify_test.go
index 1abefad..18d1331 100644
--- a/extension/linkify_test.go
+++ b/extension/linkify_test.go
@@ -1,6 +1,7 @@
package extension
import (
+ "regexp"
"testing"
"github.com/yuin/goldmark"
@@ -19,3 +20,81 @@ func TestLinkify(t *testing.T) {
)
testutil.DoTestCaseFile(markdown, "_test/linkify.txt", t)
}
+
+func TestLinkifyWithAllowedProtocols(t *testing.T) {
+ markdown := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithUnsafe(),
+ ),
+ goldmark.WithExtensions(
+ NewLinkify(
+ WithLinkifyAllowedProtocols([][]byte{
+ []byte("ssh:"),
+ }),
+ WithLinkifyURLRegexp(
+ regexp.MustCompile(`\w+://[^\s]+`),
+ ),
+ ),
+ ),
+ )
+ testutil.DoTestCase(
+ markdown,
+ testutil.MarkdownTestCase{
+ No: 1,
+ Markdown: `hoge ssh://user@hoge.com. http://example.com/`,
+ Expected: `hoge ssh://user@hoge.com. http://example.com/
`,
+ },
+ t,
+ )
+}
+
+func TestLinkifyWithWWWRegexp(t *testing.T) {
+ markdown := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithUnsafe(),
+ ),
+ goldmark.WithExtensions(
+ NewLinkify(
+ WithLinkifyWWWRegexp(
+ regexp.MustCompile(`www\.example\.com`),
+ ),
+ ),
+ ),
+ )
+ testutil.DoTestCase(
+ markdown,
+ testutil.MarkdownTestCase{
+ No: 1,
+ Markdown: `www.google.com www.example.com`,
+ Expected: `www.google.com www.example.com
`,
+ },
+ t,
+ )
+}
+
+func TestLinkifyWithEmailRegexp(t *testing.T) {
+ markdown := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithUnsafe(),
+ ),
+ goldmark.WithExtensions(
+ NewLinkify(
+ WithLinkifyEmailRegexp(
+ regexp.MustCompile(`user@example\.com`),
+ ),
+ ),
+ ),
+ )
+ testutil.DoTestCase(
+ markdown,
+ testutil.MarkdownTestCase{
+ No: 1,
+ Markdown: `hoge@example.com user@example.com`,
+ Expected: `hoge@example.com user@example.com
`,
+ },
+ t,
+ )
+}