diff --git a/README.md b/README.md index f1d0974..032304d 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,8 @@ Parser and Renderer options - [PHP Markdown Extra: Definition lists](https://michelf.ca/projects/php-markdown/extra/#def-list) - `extension.Footnote` - [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes) +- `extension.Typographer` + - This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/). ### Attributes `parser.WithAttribute` option allows you to define attributes on some elements. @@ -137,6 +139,38 @@ heading {#id .className attrName=attrValue} ============ ``` +### Typographer extension + +Typographer extension translates plain ASCII punctuation characters into typographic punctuation HTML entities. + +Default substitutions are: + +| Punctuation | Default entitiy | +| ------------ | ---------- | +| `'` | `‘`, `’` | +| `"` | `“`, `”` | +| `--` | `–` | +| `---` | `—` | +| `...` | `…` | +| `<<` | `«` | +| `>>` | `»` | + +You can overwrite the substitutions by `extensions.WithTypographicSubstitutions`. + +```go +markdown := goldmark.New( + goldmark.WithExtensions( + extension.NewTypographer( + extension.WithTypographicSubstitutions(extension.TypographerSubstitutions{ + extension.LeftSingleQuote: []byte("‚"), + extension.RightSingleQuote: nil, // nil disables a substitution + }), + ), + ), +) +``` + + Create extensions -------------------- diff --git a/ast/block.go b/ast/block.go index cabc816..103f4f6 100644 --- a/ast/block.go +++ b/ast/block.go @@ -226,6 +226,25 @@ type FencedCodeBlock struct { BaseBlock // Info returns a info text of this fenced code block. Info *Text + + language []byte +} + +// Language returns an language in an info string. +// Language returns nil if this node does not have an info string. +func (n *FencedCodeBlock) Language(source []byte) []byte { + if n.language == nil && n.Info != nil { + segment := n.Info.Segment + info := segment.Value(source) + i := 0 + for ; i < len(info); i++ { + if info[i] == ' ' { + break + } + } + n.language = info[:i] + } + return n.language } // IsRaw implements Node.IsRaw. diff --git a/extension/ast/typographer.go b/extension/ast/typographer.go new file mode 100644 index 0000000..4101b4d --- /dev/null +++ b/extension/ast/typographer.go @@ -0,0 +1,32 @@ +package ast + +import ( + gast "github.com/yuin/goldmark/ast" +) + +// A TypographicText struct represents text that +// typographic text replaces certain punctuations. +type TypographicText struct { + gast.BaseInline + Value []byte +} + +// Dump implements Node.Dump. +func (n *TypographicText) Dump(source []byte, level int) { + gast.DumpHelper(n, source, level, nil, nil) +} + +// KindTypographicText is a NodeKind of the TypographicText node. +var KindTypographicText = gast.NewNodeKind("TypographicText") + +// Kind implements Node.Kind. +func (n *TypographicText) Kind() gast.NodeKind { + return KindTypographicText +} + +// NewTypographicText returns a new TypographicText node. +func NewTypographicText(value []byte) *TypographicText { + return &TypographicText{ + Value: value, + } +} diff --git a/extension/typographer.go b/extension/typographer.go new file mode 100644 index 0000000..9978086 --- /dev/null +++ b/extension/typographer.go @@ -0,0 +1,273 @@ +package extension + +import ( + "github.com/yuin/goldmark" + gast "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// TypographicPunctuation is a key of the punctuations that can be replaced with +// typographic entities. +type TypographicPunctuation int + +const ( + // LeftSingleQuote is ' + LeftSingleQuote TypographicPunctuation = iota + 1 + // RightSingleQuote is ' + RightSingleQuote + // LeftDoubleQuote is " + LeftDoubleQuote + // RightDoubleQuote is " + RightDoubleQuote + // EnDash is -- + EnDash + // EmDash is --- + EmDash + // Ellipsis is ... + Ellipsis + // LeftAngleQuote is << + LeftAngleQuote + // RightAngleQuote is >> + RightAngleQuote + + typographicPunctuationMax +) + +// An TypographerConfig struct is a data structure that holds configuration of the +// Typographer extension. +type TypographerConfig struct { + Substitutions [][]byte +} + +func newDefaultSubstitutions() [][]byte { + replacements := make([][]byte, typographicPunctuationMax) + replacements[LeftSingleQuote] = []byte("‘") + replacements[RightSingleQuote] = []byte("’") + replacements[LeftDoubleQuote] = []byte("“") + replacements[RightDoubleQuote] = []byte("”") + replacements[EnDash] = []byte("–") + replacements[EmDash] = []byte("—") + replacements[Ellipsis] = []byte("…") + replacements[LeftAngleQuote] = []byte("«") + replacements[RightAngleQuote] = []byte("»") + + return replacements +} + +// SetOption implements SetOptioner. +func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) { + switch name { + case TypographicSubstitutions: + b.Substitutions = value.([][]byte) + } +} + +// A TypographerOption interface sets options for the TypographerParser. +type TypographerOption interface { + parser.Option + SetTypographerOption(*TypographerConfig) +} + +// TypographicSubstitutions is an otpion name that specify replacement text for +// punctuations. +const TypographicSubstitutions parser.OptionName = "TypographicSubstitutions" + +// TypographerSubstitutions is a list of the substitutions for the Typographer extension. +type TypographerSubstitutions map[TypographicPunctuation][]byte + +type withTypographicSubstitutions struct { + value [][]byte +} + +func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) { + c.Options[TypographicSubstitutions] = o.value +} + +func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) { + p.Substitutions = o.value +} + +// WithTypographicSubstitutions is a functional otpion that specify replacement text +// for punctuations. +func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption { + replacements := newDefaultSubstitutions() + for k, v := range values { + replacements[k] = v + } + + return &withTypographicSubstitutions{replacements} +} + +type typographerDelimiterProcessor struct { +} + +func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool { + return b == '\'' || b == '"' +} + +func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool { + return opener.Char == closer.Char +} + +func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node { + return nil +} + +var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{} + +type typographerParser struct { + TypographerConfig +} + +// NewTypographerParser return a new InlineParser that parses +// typographer expressions. +func NewTypographerParser(opts ...TypographerOption) parser.InlineParser { + p := &typographerParser{ + TypographerConfig: TypographerConfig{ + Substitutions: newDefaultSubstitutions(), + }, + } + for _, o := range opts { + o.SetTypographerOption(&p.TypographerConfig) + } + return p +} + +func (s *typographerParser) Trigger() []byte { + return []byte{'\'', '"', '-', '.', '<', '>'} +} + +func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node { + before := block.PrecendingCharacter() + line, _ := block.PeekLine() + c := line[0] + if len(line) > 2 { + if c == '-' { + if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // --- + node := ast.NewTypographicText(s.Substitutions[EmDash]) + block.Advance(3) + return node + } + } else if c == '.' { + if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ... + node := ast.NewTypographicText(s.Substitutions[Ellipsis]) + block.Advance(3) + return node + } + return nil + } + } + if len(line) > 1 { + if c == '<' { + if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // << + node := ast.NewTypographicText(s.Substitutions[LeftAngleQuote]) + block.Advance(2) + return node + } + return nil + } else if c == '>' { + if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >> + node := ast.NewTypographicText(s.Substitutions[RightAngleQuote]) + block.Advance(2) + return node + } + return nil + } else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // -- + node := ast.NewTypographicText(s.Substitutions[EnDash]) + block.Advance(2) + return node + } + } + if c == '\'' || c == '"' { + d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor) + if d == nil { + return nil + } + if c == '\'' { + if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose { + node := ast.NewTypographicText(s.Substitutions[LeftSingleQuote]) + block.Advance(1) + return node + } + if s.Substitutions[RightSingleQuote] != nil && d.CanClose && !d.CanOpen { + node := ast.NewTypographicText(s.Substitutions[RightSingleQuote]) + block.Advance(1) + return node + } + } + if c == '"' { + if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose { + node := ast.NewTypographicText(s.Substitutions[LeftDoubleQuote]) + block.Advance(1) + return node + } + if s.Substitutions[RightDoubleQuote] != nil && d.CanClose && !d.CanOpen { + node := ast.NewTypographicText(s.Substitutions[RightDoubleQuote]) + block.Advance(1) + return node + } + } + } + return nil +} + +func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) { + // nothing to do +} + +// TypographerHTMLRenderer is a renderer.NodeRenderer implementation that +// renders Typographer nodes. +type TypographerHTMLRenderer struct { + html.Config +} + +// NewTypographerHTMLRenderer returns a new TypographerHTMLRenderer. +func NewTypographerHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &TypographerHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *TypographerHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindTypographicText, r.renderTypographicText) +} + +func (r *TypographerHTMLRenderer) renderTypographicText(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { + if entering { + w.Write(n.(*ast.TypographicText).Value) + } + return gast.WalkContinue, nil +} + +type typographer struct { + options []TypographerOption +} + +// Typographer is an extension that repalace punctuations with typographic entities. +var Typographer = &typographer{} + +// NewTypographer returns a new Entender that repalace punctuations with typographic entities. +func NewTypographer(opts ...TypographerOption) goldmark.Extender { + return &typographer{ + options: opts, + } +} + +func (e *typographer) Extend(m goldmark.Markdown) { + m.Parser().AddOptions(parser.WithInlineParsers( + util.Prioritized(NewTypographerParser(e.options...), 9999), + )) + m.Renderer().AddOptions(renderer.WithNodeRenderers( + util.Prioritized(NewTypographerHTMLRenderer(), 500), + )) +} diff --git a/renderer/html/html.go b/renderer/html/html.go index e22ca17..25284c0 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -240,16 +240,8 @@ func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node a n := node.(*ast.FencedCodeBlock) if entering { w.WriteString("