From 4b54582deed916c7f716ea01a7cf44112e6945c7 Mon Sep 17 00:00:00 2001 From: yuin Date: Thu, 2 Jan 2020 01:36:46 +0900 Subject: [PATCH] Fixes #83, Adds options for Linkify --- README.md | 70 +++++++++++--- extension/_test/linkify.txt | 8 ++ extension/linkify.go | 179 +++++++++++++++++++++++++++++++++--- extension/linkify_test.go | 79 ++++++++++++++++ 4 files changed, 308 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 9e61344..0bde9c5 100644 --- a/README.md +++ b/README.md @@ -64,8 +64,8 @@ Import packages: ``` import ( - "bytes" - "github.com/yuin/goldmark" + "bytes" + "github.com/yuin/goldmark" ) ``` @@ -105,11 +105,11 @@ Custom parser and renderer -------------------------- ```go import ( - "bytes" - "github.com/yuin/goldmark" - "github.com/yuin/goldmark/extension" - "github.com/yuin/goldmark/parser" - "github.com/yuin/goldmark/renderer/html" + "bytes" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer/html" ) md := goldmark.New( @@ -215,14 +215,53 @@ You can overwrite the substitutions by `extensions.WithTypographicSubstitutions` ```go markdown := goldmark.New( - goldmark.WithExtensions( - extension.NewTypographer( - extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ - extension.LeftSingleQuote: []byte("‚"), - extension.RightSingleQuote: nil, // nil disables a substitution - }), - ), - ), + goldmark.WithExtensions( + extension.NewTypographer( + extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ + extension.LeftSingleQuote: []byte("‚"), + extension.RightSingleQuote: nil, // nil disables a substitution + }), + ), + ), +) +``` + +### Linkify extension +Linkify extension implements [Autolinks(extension)](https://github.github.com/gfm/#autolinks-extension-) +defined in [GitHub Flavored Markdown Spec](https://github.github.com/gfm/). + +Since spec does not define details about URL, there are many ambiguous cases. + +You can overwrite autolinking patterns by options. + +| Functional option | Type | Description | +| ----------------- | ---- | ----------- | +| `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` | +| `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URL including protocols | +| `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) | +| `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email address` | + +Example: using [xurls](https://github.com/mvdan/xurls) + +```go +import "mvdan.cc/xurls/v2" + +markdown := goldmark.New( + goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithUnsafe(), + ), + goldmark.WithExtensions( + extension.NewLinkify( + extension.WithLinkifyAllowedProtocols([][]byte{ + []byte("http:"), + []byte("https:"), + }), + extension.WithLinkifyURLRegexp( + xurls.Strict(), + ), + ), + ), ) ``` @@ -317,6 +356,7 @@ AST nodes do not have concrete text. AST nodes have segment information of the d `text.Segment` has 3 attributes: `Start`, `End`, `Padding` . +(TBC) **TODO** diff --git a/extension/_test/linkify.txt b/extension/_test/linkify.txt index 669ab72..1e75aeb 100644 --- a/extension/_test/linkify.txt +++ b/extension/_test/linkify.txt @@ -153,3 +153,11 @@ This is a `git@github.com:vim/vim` //- - - - - - - - -//

This is a git@github.com:vim/vim

//= = = = = = = = = = = = = = = = = = = = = = = =// + + +16 +//- - - - - - - - -// +https://nic.college +//- - - - - - - - -// +

https://nic.college

+//= = = = = = = = = = = = = = = = = = = = = = = =// diff --git a/extension/linkify.go b/extension/linkify.go index 0a584e8..565c6bd 100644 --- a/extension/linkify.go +++ b/extension/linkify.go @@ -2,27 +2,153 @@ package extension import ( "bytes" + "regexp" + "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" "github.com/yuin/goldmark/util" - "regexp" ) -var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) +var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) -var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) +var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) -type linkifyParser struct { +// An LinkifyConfig struct is a data structure that holds configuration of the +// Linkify extension. +type LinkifyConfig struct { + AllowedProtocols [][]byte + URLRegexp *regexp.Regexp + WWWRegexp *regexp.Regexp + EmailRegexp *regexp.Regexp } -var defaultLinkifyParser = &linkifyParser{} +const optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols" +const optLinkifyURLRegexp parser.OptionName = "LinkifyURLRegexp" +const optLinkifyWWWRegexp parser.OptionName = "LinkifyWWWRegexp" +const optLinkifyEmailRegexp parser.OptionName = "LinkifyEmailRegexp" + +// SetOption implements SetOptioner. +func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) { + switch name { + case optLinkifyAllowedProtocols: + c.AllowedProtocols = value.([][]byte) + case optLinkifyURLRegexp: + c.URLRegexp = value.(*regexp.Regexp) + case optLinkifyWWWRegexp: + c.WWWRegexp = value.(*regexp.Regexp) + case optLinkifyEmailRegexp: + c.EmailRegexp = value.(*regexp.Regexp) + } +} + +// A LinkifyOption interface sets options for the LinkifyOption. +type LinkifyOption interface { + parser.Option + SetLinkifyOption(*LinkifyConfig) +} + +type withLinkifyAllowedProtocols struct { + value [][]byte +} + +func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) { + c.Options[optLinkifyAllowedProtocols] = o.value +} + +func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) { + p.AllowedProtocols = o.value +} + +// WithLinkifyAllowedProtocols is a functional otpion that specify allowed +// protocols in autolinks. Each protocol must end with ':' like +// 'http:' . +func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption { + return &withLinkifyAllowedProtocols{ + value: value, + } +} + +type withLinkifyURLRegexp struct { + value *regexp.Regexp +} + +func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) { + c.Options[optLinkifyURLRegexp] = o.value +} + +func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) { + p.URLRegexp = o.value +} + +// WithLinkifyURLRegexp is a functional otpion that specify +// a pattern of the URL including a protocol. +func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption { + return &withLinkifyURLRegexp{ + value: value, + } +} + +// WithLinkifyWWWRegexp is a functional otpion that specify +// a pattern of the URL without a protocol. +// This pattern must start with 'www.' . +type withLinkifyWWWRegexp struct { + value *regexp.Regexp +} + +func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) { + c.Options[optLinkifyWWWRegexp] = o.value +} + +func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) { + p.WWWRegexp = o.value +} + +func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption { + return &withLinkifyWWWRegexp{ + value: value, + } +} + +// WithLinkifyWWWRegexp is a functional otpion that specify +// a pattern of the email address. +type withLinkifyEmailRegexp struct { + value *regexp.Regexp +} + +func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) { + c.Options[optLinkifyEmailRegexp] = o.value +} + +func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) { + p.EmailRegexp = o.value +} + +func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption { + return &withLinkifyEmailRegexp{ + value: value, + } +} + +type linkifyParser struct { + LinkifyConfig +} // NewLinkifyParser return a new InlineParser can parse // text that seems like a URL. -func NewLinkifyParser() parser.InlineParser { - return defaultLinkifyParser +func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser { + p := &linkifyParser{ + LinkifyConfig: LinkifyConfig{ + AllowedProtocols: nil, + URLRegexp: urlRegexp, + WWWRegexp: wwwURLRegxp, + }, + } + for _, o := range opts { + o.SetLinkifyOption(&p.LinkifyConfig) + } + return p } func (s *linkifyParser) Trigger() []byte { @@ -53,14 +179,26 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont var m []int var protocol []byte var typ ast.AutoLinkType = ast.AutoLinkURL - if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { - m = urlRegexp.FindSubmatchIndex(line) + if s.LinkifyConfig.AllowedProtocols == nil { + if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { + m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) + } + } else { + for _, prefix := range s.LinkifyConfig.AllowedProtocols { + if bytes.HasPrefix(line, prefix) { + m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) + break + } + } } if m == nil && bytes.HasPrefix(line, domainWWW) { - m = wwwURLRegxp.FindSubmatchIndex(line) + m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line) protocol = []byte("http") } - if m != nil { + if m != nil && m[0] != 0 { + m = nil + } + if m != nil && m[0] == 0 { lastChar := line[m[1]-1] if lastChar == '.' { m[1]-- @@ -96,7 +234,15 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont return nil } typ = ast.AutoLinkEmail - stop := util.FindEmailIndex(line) + stop := -1 + if s.LinkifyConfig.EmailRegexp == nil { + stop = util.FindEmailIndex(line) + } else { + m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line) + if m != nil && m[0] == 0 { + stop = m[1] + } + } if stop < 0 { return nil } @@ -136,15 +282,22 @@ func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { } type linkify struct { + options []LinkifyOption } // Linkify is an extension that allow you to parse text that seems like a URL. var Linkify = &linkify{} +func NewLinkify(opts ...LinkifyOption) goldmark.Extender { + return &linkify{ + options: opts, + } +} + func (e *linkify) Extend(m goldmark.Markdown) { m.Parser().AddOptions( parser.WithInlineParsers( - util.Prioritized(NewLinkifyParser(), 999), + util.Prioritized(NewLinkifyParser(e.options...), 999), ), ) } diff --git a/extension/linkify_test.go b/extension/linkify_test.go index 1abefad..18d1331 100644 --- a/extension/linkify_test.go +++ b/extension/linkify_test.go @@ -1,6 +1,7 @@ package extension import ( + "regexp" "testing" "github.com/yuin/goldmark" @@ -19,3 +20,81 @@ func TestLinkify(t *testing.T) { ) testutil.DoTestCaseFile(markdown, "_test/linkify.txt", t) } + +func TestLinkifyWithAllowedProtocols(t *testing.T) { + markdown := goldmark.New( + goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithUnsafe(), + ), + goldmark.WithExtensions( + NewLinkify( + WithLinkifyAllowedProtocols([][]byte{ + []byte("ssh:"), + }), + WithLinkifyURLRegexp( + regexp.MustCompile(`\w+://[^\s]+`), + ), + ), + ), + ) + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: 1, + Markdown: `hoge ssh://user@hoge.com. http://example.com/`, + Expected: `

hoge ssh://user@hoge.com. http://example.com/

`, + }, + t, + ) +} + +func TestLinkifyWithWWWRegexp(t *testing.T) { + markdown := goldmark.New( + goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithUnsafe(), + ), + goldmark.WithExtensions( + NewLinkify( + WithLinkifyWWWRegexp( + regexp.MustCompile(`www\.example\.com`), + ), + ), + ), + ) + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: 1, + Markdown: `www.google.com www.example.com`, + Expected: `

www.google.com www.example.com

`, + }, + t, + ) +} + +func TestLinkifyWithEmailRegexp(t *testing.T) { + markdown := goldmark.New( + goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithUnsafe(), + ), + goldmark.WithExtensions( + NewLinkify( + WithLinkifyEmailRegexp( + regexp.MustCompile(`user@example\.com`), + ), + ), + ), + ) + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: 1, + Markdown: `hoge@example.com user@example.com`, + Expected: `

hoge@example.com user@example.com

`, + }, + t, + ) +}