Add Typographer extension

This commit is contained in:
yuin 2019-05-06 00:53:22 +09:00
parent 08a89f162a
commit 28b28e34bb
5 changed files with 360 additions and 10 deletions

View file

@ -120,6 +120,8 @@ Parser and Renderer options
- [PHP Markdown Extra: Definition lists](https://michelf.ca/projects/php-markdown/extra/#def-list)
- `extension.Footnote`
- [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes)
- `extension.Typographer`
- This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/).
### Attributes
`parser.WithAttribute` option allows you to define attributes on some elements.
@ -137,6 +139,38 @@ heading {#id .className attrName=attrValue}
============
```
### Typographer extension
Typographer extension translates plain ASCII punctuation characters into typographic punctuation HTML entities.
Default substitutions are:
| Punctuation | Default entitiy |
| ------------ | ---------- |
| `'` | `‘`, `’` |
| `"` | `“`, `”` |
| `--` | `–` |
| `---` | `—` |
| `...` | `…` |
| `<<` | `&laquo;` |
| `>>` | `&raquo;` |
You can overwrite the substitutions by `extensions.WithTypographicSubstitutions`.
```go
markdown := goldmark.New(
goldmark.WithExtensions(
extension.NewTypographer(
extension.WithTypographicSubstitutions(extension.TypographerSubstitutions{
extension.LeftSingleQuote: []byte("&sbquo;"),
extension.RightSingleQuote: nil, // nil disables a substitution
}),
),
),
)
```
Create extensions
--------------------

View file

@ -226,6 +226,25 @@ type FencedCodeBlock struct {
BaseBlock
// Info returns a info text of this fenced code block.
Info *Text
language []byte
}
// Language returns an language in an info string.
// Language returns nil if this node does not have an info string.
func (n *FencedCodeBlock) Language(source []byte) []byte {
if n.language == nil && n.Info != nil {
segment := n.Info.Segment
info := segment.Value(source)
i := 0
for ; i < len(info); i++ {
if info[i] == ' ' {
break
}
}
n.language = info[:i]
}
return n.language
}
// IsRaw implements Node.IsRaw.

View file

@ -0,0 +1,32 @@
package ast
import (
gast "github.com/yuin/goldmark/ast"
)
// A TypographicText struct represents text that
// typographic text replaces certain punctuations.
type TypographicText struct {
gast.BaseInline
Value []byte
}
// Dump implements Node.Dump.
func (n *TypographicText) Dump(source []byte, level int) {
gast.DumpHelper(n, source, level, nil, nil)
}
// KindTypographicText is a NodeKind of the TypographicText node.
var KindTypographicText = gast.NewNodeKind("TypographicText")
// Kind implements Node.Kind.
func (n *TypographicText) Kind() gast.NodeKind {
return KindTypographicText
}
// NewTypographicText returns a new TypographicText node.
func NewTypographicText(value []byte) *TypographicText {
return &TypographicText{
Value: value,
}
}

273
extension/typographer.go Normal file
View file

@ -0,0 +1,273 @@
package extension
import (
"github.com/yuin/goldmark"
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
// TypographicPunctuation is a key of the punctuations that can be replaced with
// typographic entities.
type TypographicPunctuation int
const (
// LeftSingleQuote is '
LeftSingleQuote TypographicPunctuation = iota + 1
// RightSingleQuote is '
RightSingleQuote
// LeftDoubleQuote is "
LeftDoubleQuote
// RightDoubleQuote is "
RightDoubleQuote
// EnDash is --
EnDash
// EmDash is ---
EmDash
// Ellipsis is ...
Ellipsis
// LeftAngleQuote is <<
LeftAngleQuote
// RightAngleQuote is >>
RightAngleQuote
typographicPunctuationMax
)
// An TypographerConfig struct is a data structure that holds configuration of the
// Typographer extension.
type TypographerConfig struct {
Substitutions [][]byte
}
func newDefaultSubstitutions() [][]byte {
replacements := make([][]byte, typographicPunctuationMax)
replacements[LeftSingleQuote] = []byte("&lsquo;")
replacements[RightSingleQuote] = []byte("&rsquo;")
replacements[LeftDoubleQuote] = []byte("&ldquo;")
replacements[RightDoubleQuote] = []byte("&rdquo;")
replacements[EnDash] = []byte("&ndash;")
replacements[EmDash] = []byte("&mdash;")
replacements[Ellipsis] = []byte("&hellip;")
replacements[LeftAngleQuote] = []byte("&laquo;")
replacements[RightAngleQuote] = []byte("&raquo;")
return replacements
}
// SetOption implements SetOptioner.
func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) {
switch name {
case TypographicSubstitutions:
b.Substitutions = value.([][]byte)
}
}
// A TypographerOption interface sets options for the TypographerParser.
type TypographerOption interface {
parser.Option
SetTypographerOption(*TypographerConfig)
}
// TypographicSubstitutions is an otpion name that specify replacement text for
// punctuations.
const TypographicSubstitutions parser.OptionName = "TypographicSubstitutions"
// TypographerSubstitutions is a list of the substitutions for the Typographer extension.
type TypographerSubstitutions map[TypographicPunctuation][]byte
type withTypographicSubstitutions struct {
value [][]byte
}
func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) {
c.Options[TypographicSubstitutions] = o.value
}
func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) {
p.Substitutions = o.value
}
// WithTypographicSubstitutions is a functional otpion that specify replacement text
// for punctuations.
func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption {
replacements := newDefaultSubstitutions()
for k, v := range values {
replacements[k] = v
}
return &withTypographicSubstitutions{replacements}
}
type typographerDelimiterProcessor struct {
}
func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool {
return b == '\'' || b == '"'
}
func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
return opener.Char == closer.Char
}
func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node {
return nil
}
var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{}
type typographerParser struct {
TypographerConfig
}
// NewTypographerParser return a new InlineParser that parses
// typographer expressions.
func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
p := &typographerParser{
TypographerConfig: TypographerConfig{
Substitutions: newDefaultSubstitutions(),
},
}
for _, o := range opts {
o.SetTypographerOption(&p.TypographerConfig)
}
return p
}
func (s *typographerParser) Trigger() []byte {
return []byte{'\'', '"', '-', '.', '<', '>'}
}
func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
before := block.PrecendingCharacter()
line, _ := block.PeekLine()
c := line[0]
if len(line) > 2 {
if c == '-' {
if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // ---
node := ast.NewTypographicText(s.Substitutions[EmDash])
block.Advance(3)
return node
}
} else if c == '.' {
if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ...
node := ast.NewTypographicText(s.Substitutions[Ellipsis])
block.Advance(3)
return node
}
return nil
}
}
if len(line) > 1 {
if c == '<' {
if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // <<
node := ast.NewTypographicText(s.Substitutions[LeftAngleQuote])
block.Advance(2)
return node
}
return nil
} else if c == '>' {
if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >>
node := ast.NewTypographicText(s.Substitutions[RightAngleQuote])
block.Advance(2)
return node
}
return nil
} else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // --
node := ast.NewTypographicText(s.Substitutions[EnDash])
block.Advance(2)
return node
}
}
if c == '\'' || c == '"' {
d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor)
if d == nil {
return nil
}
if c == '\'' {
if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
node := ast.NewTypographicText(s.Substitutions[LeftSingleQuote])
block.Advance(1)
return node
}
if s.Substitutions[RightSingleQuote] != nil && d.CanClose && !d.CanOpen {
node := ast.NewTypographicText(s.Substitutions[RightSingleQuote])
block.Advance(1)
return node
}
}
if c == '"' {
if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose {
node := ast.NewTypographicText(s.Substitutions[LeftDoubleQuote])
block.Advance(1)
return node
}
if s.Substitutions[RightDoubleQuote] != nil && d.CanClose && !d.CanOpen {
node := ast.NewTypographicText(s.Substitutions[RightDoubleQuote])
block.Advance(1)
return node
}
}
}
return nil
}
func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) {
// nothing to do
}
// TypographerHTMLRenderer is a renderer.NodeRenderer implementation that
// renders Typographer nodes.
type TypographerHTMLRenderer struct {
html.Config
}
// NewTypographerHTMLRenderer returns a new TypographerHTMLRenderer.
func NewTypographerHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
r := &TypographerHTMLRenderer{
Config: html.NewConfig(),
}
for _, opt := range opts {
opt.SetHTMLOption(&r.Config)
}
return r
}
// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs.
func (r *TypographerHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(ast.KindTypographicText, r.renderTypographicText)
}
func (r *TypographerHTMLRenderer) renderTypographicText(w util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering {
w.Write(n.(*ast.TypographicText).Value)
}
return gast.WalkContinue, nil
}
type typographer struct {
options []TypographerOption
}
// Typographer is an extension that repalace punctuations with typographic entities.
var Typographer = &typographer{}
// NewTypographer returns a new Entender that repalace punctuations with typographic entities.
func NewTypographer(opts ...TypographerOption) goldmark.Extender {
return &typographer{
options: opts,
}
}
func (e *typographer) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(parser.WithInlineParsers(
util.Prioritized(NewTypographerParser(e.options...), 9999),
))
m.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(NewTypographerHTMLRenderer(), 500),
))
}

View file

@ -240,16 +240,8 @@ func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node a
n := node.(*ast.FencedCodeBlock)
if entering {
w.WriteString("<pre><code")
if n.Info != nil {
segment := n.Info.Segment
info := segment.Value(source)
i := 0
for ; i < len(info); i++ {
if info[i] == ' ' {
break
}
}
language := info[:i]
language := n.Language(source)
if language != nil {
w.WriteString(" class=\"language-")
r.Writer.Write(w, language)
w.WriteString("\"")