mirror of
https://github.com/yuin/goldmark
synced 2025-03-04 23:04:52 +00:00
Init blocks
This commit is contained in:
parent
a47a029d55
commit
f331fc07cd
2 changed files with 801 additions and 0 deletions
106
README.md
106
README.md
|
|
@ -10,6 +10,112 @@ goldmark
|
|||
|
||||
goldmark is compliant with CommonMark 0.29.
|
||||
|
||||
Anytype
|
||||
---------
|
||||
|
||||
1. Так как сейчас скелет мидла пока устаканивается, пишу утилитки для copy/paste, но не только для обычного текстового, но и для html. Когда закончу с этой библиотечкой, смартблоки чуть более подустаканятся и можно будет все разом и подключить.
|
||||
2. Написал простенький playground вокруг https://github.com/JohannesKaufmann/html-to-markdown и потестил. Работает хорошо. Однако этот конвертер работает на уровне текста и замен, там нет никакого AST, переделать в html -> blocks затруднительно. Но можно его output отдавать в парсер markdown
|
||||
3. Парсер markdown. Чекал goldmark: он делает AST, потом по нодам ходит и рендерит их в html строку. Пытался придумать где именно врезаться – брать ли голые ноды и с ними что-то придумывать, или модифицировать соответствующие им рендерные функции.
|
||||
|
||||
В goldmark в renderer/html есть 22 рендер-функции, которые принимают ноду и io.writer и пишут туда всякие строки.
|
||||
|
||||
Нужно сделать так, чтобы туда передавалась структура состояния, а не `w util.BufWriter`, и переделать эти 22 функции из записи html строк в изменение состояния.
|
||||
|
||||
Пока что вложенность будем игнорировать, то есть конвертация будет без row/column.
|
||||
```js
|
||||
{
|
||||
textBuffer: "..."
|
||||
marksBuffer: []marks
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Что делать с renderAttributes? Один к одному не переделаешь, надо выписать список возможных атрибутов и подумать что делать с каждым.
|
||||
|
||||
```go
|
||||
func (r *Renderer) RenderAttributes(w util.BufWriter, node ast.Node) {
|
||||
|
||||
for _, attr := range node.Attributes() {
|
||||
_, _ = w.WriteString(" ")
|
||||
_, _ = w.Write(attr.Name)
|
||||
_, _ = w.WriteString(`="`)
|
||||
_, _ = w.Write(util.EscapeHTML(attr.Value.([]byte)))
|
||||
_ = w.WriteByte('"')
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Что делать со сложно-вложенными структурами? Например:
|
||||
|
||||
```html
|
||||
<p> <!-- state.openedBlock = paragraph -->
|
||||
Text <!-- state.textBuffer += Text -->
|
||||
<code> <!-- state.closeCurrentBlock
|
||||
state.blocks.push(currentBlock),
|
||||
state.openedBlock = code -->
|
||||
fmt.printLn("Hello world")
|
||||
<!-- state.textBuffer += fmt.printLn("Hello world") -->
|
||||
</code> <!-- state.closeCurrentBlock,
|
||||
state.blocks.push(currentBlock)-->
|
||||
</p> <!-- IGNORE -->
|
||||
```
|
||||
|
||||
Типичная функция для рендеринга paragraph, попробуем ее переделать.
|
||||
|
||||
**BEFORE**
|
||||
|
||||
```go
|
||||
func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if entering {
|
||||
_, _ = w.WriteString("<p>")
|
||||
} else {
|
||||
_, _ = w.WriteString("</p>\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
```
|
||||
|
||||
**AFTER**
|
||||
|
||||
```go
|
||||
func (r *Renderer) renderParagraph(s RenderState, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if entering {
|
||||
if s.isCurrentBlock { // если был какой-то блок открыт, то закрываем его, так как мы не поддерживаем вложенность (да ее особо и не может быть после ковертации в markdown)
|
||||
_, _ = s.closeCurrentBlock();
|
||||
s.pushLastBlockToList();
|
||||
}
|
||||
|
||||
_, _ = s.openNewBlock("Paragraph"); // Открываем блок
|
||||
|
||||
} else {
|
||||
_, _ = s.closeCurrentBlock();
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
```
|
||||
|
||||
Есть `RenderState`, у которого примерно такие интерфейс и структура:
|
||||
|
||||
```go
|
||||
type rState interface {}
|
||||
|
||||
type renderState struct {
|
||||
isCurrentBlock bool
|
||||
blockBuffer &model.Block
|
||||
textBuffer string
|
||||
marksBuffer *[]model.Block.Content.Text.Mark
|
||||
blocksList *[]model.Block
|
||||
|
||||
closeCurrentBlock func()
|
||||
openNewBlock func(blockType string)
|
||||
pushLastBlockToList func()
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
А маркап может быть вложенным, значит нам нужна очередь
|
||||
|
||||
|
||||
Motivation
|
||||
----------------------
|
||||
I need a Markdown parser for Go that meets following conditions:
|
||||
|
|
|
|||
695
renderer/blocks/blocks.go
Normal file
695
renderer/blocks/blocks.go
Normal file
|
|
@ -0,0 +1,695 @@
|
|||
package blocks
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/renderer"
|
||||
"github.com/yuin/goldmark/util"
|
||||
"github.com/anytypeio/go-anytype-library/pb/model"
|
||||
)
|
||||
|
||||
// A Config struct has configurations for the HTML based renderers.
|
||||
type Config struct {
|
||||
Writer Writer
|
||||
HardWraps bool
|
||||
XHTML bool
|
||||
Unsafe bool
|
||||
}
|
||||
|
||||
// NewConfig returns a new Config with defaults.
|
||||
func NewConfig() Config {
|
||||
return Config{
|
||||
Writer: DefaultWriter,
|
||||
HardWraps: false,
|
||||
XHTML: false,
|
||||
Unsafe: false,
|
||||
}
|
||||
}
|
||||
|
||||
// SetOption implements renderer.NodeRenderer.SetOption.
|
||||
func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
|
||||
switch name {
|
||||
case optHardWraps:
|
||||
c.HardWraps = value.(bool)
|
||||
case optXHTML:
|
||||
c.XHTML = value.(bool)
|
||||
case optUnsafe:
|
||||
c.Unsafe = value.(bool)
|
||||
case optTextWriter:
|
||||
c.Writer = value.(Writer)
|
||||
}
|
||||
}
|
||||
|
||||
// An Option interface sets options for HTML based renderers.
|
||||
type Option interface {
|
||||
SetHTMLOption(*Config)
|
||||
}
|
||||
|
||||
// TextWriter is an option name used in WithWriter.
|
||||
const optTextWriter renderer.OptionName = "Writer"
|
||||
|
||||
type withWriter struct {
|
||||
value Writer
|
||||
}
|
||||
|
||||
func (o *withWriter) SetConfig(c *renderer.Config) {
|
||||
c.Options[optTextWriter] = o.value
|
||||
}
|
||||
|
||||
func (o *withWriter) SetHTMLOption(c *Config) {
|
||||
c.Writer = o.value
|
||||
}
|
||||
|
||||
// WithWriter is a functional option that allow you to set the given writer to
|
||||
// the renderer.
|
||||
func WithWriter(writer Writer) interface {
|
||||
renderer.Option
|
||||
Option
|
||||
} {
|
||||
return &withWriter{writer}
|
||||
}
|
||||
|
||||
// HardWraps is an option name used in WithHardWraps.
|
||||
const optHardWraps renderer.OptionName = "HardWraps"
|
||||
|
||||
type withHardWraps struct {
|
||||
}
|
||||
|
||||
func (o *withHardWraps) SetConfig(c *renderer.Config) {
|
||||
c.Options[optHardWraps] = true
|
||||
}
|
||||
|
||||
func (o *withHardWraps) SetHTMLOption(c *Config) {
|
||||
c.HardWraps = true
|
||||
}
|
||||
|
||||
// WithHardWraps is a functional option that indicates whether softline breaks
|
||||
// should be rendered as '<br>'.
|
||||
func WithHardWraps() interface {
|
||||
renderer.Option
|
||||
Option
|
||||
} {
|
||||
return &withHardWraps{}
|
||||
}
|
||||
|
||||
// XHTML is an option name used in WithXHTML.
|
||||
const optXHTML renderer.OptionName = "XHTML"
|
||||
|
||||
type withXHTML struct {
|
||||
}
|
||||
|
||||
func (o *withXHTML) SetConfig(c *renderer.Config) {
|
||||
c.Options[optXHTML] = true
|
||||
}
|
||||
|
||||
func (o *withXHTML) SetHTMLOption(c *Config) {
|
||||
c.XHTML = true
|
||||
}
|
||||
|
||||
// WithXHTML is a functional option indicates that nodes should be rendered in
|
||||
// xhtml instead of HTML5.
|
||||
func WithXHTML() interface {
|
||||
Option
|
||||
renderer.Option
|
||||
} {
|
||||
return &withXHTML{}
|
||||
}
|
||||
|
||||
// Unsafe is an option name used in WithUnsafe.
|
||||
const optUnsafe renderer.OptionName = "Unsafe"
|
||||
|
||||
type withUnsafe struct {
|
||||
}
|
||||
|
||||
func (o *withUnsafe) SetConfig(c *renderer.Config) {
|
||||
c.Options[optUnsafe] = true
|
||||
}
|
||||
|
||||
func (o *withUnsafe) SetHTMLOption(c *Config) {
|
||||
c.Unsafe = true
|
||||
}
|
||||
|
||||
// WithUnsafe is a functional option that renders dangerous contents
|
||||
// (raw htmls and potentially dangerous links) as it is.
|
||||
func WithUnsafe() interface {
|
||||
renderer.Option
|
||||
Option
|
||||
} {
|
||||
return &withUnsafe{}
|
||||
}
|
||||
|
||||
// A Renderer struct is an implementation of renderer.NodeRenderer that renders
|
||||
// nodes as (X)HTML.
|
||||
type Renderer struct {
|
||||
Config
|
||||
}
|
||||
|
||||
// NewRenderer returns a new Renderer with given options.
|
||||
func NewRenderer(opts ...Option) renderer.NodeRenderer {
|
||||
r := &Renderer{
|
||||
Config: NewConfig(),
|
||||
}
|
||||
|
||||
for _, opt := range opts {
|
||||
opt.SetHTMLOption(&r.Config)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// RegisterFuncs implements NodeRenderer.RegisterFuncs .
|
||||
func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
|
||||
// blocks
|
||||
|
||||
reg.Register(ast.KindDocument, r.renderDocument)
|
||||
reg.Register(ast.KindHeading, r.renderHeading)
|
||||
reg.Register(ast.KindBlockquote, r.renderBlockquote)
|
||||
reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
|
||||
reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
|
||||
reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
|
||||
reg.Register(ast.KindList, r.renderList)
|
||||
reg.Register(ast.KindListItem, r.renderListItem)
|
||||
reg.Register(ast.KindParagraph, r.renderParagraph)
|
||||
reg.Register(ast.KindTextBlock, r.renderTextBlock)
|
||||
reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
|
||||
|
||||
// inlines
|
||||
|
||||
reg.Register(ast.KindAutoLink, r.renderAutoLink)
|
||||
reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
|
||||
reg.Register(ast.KindEmphasis, r.renderEmphasis)
|
||||
reg.Register(ast.KindImage, r.renderImage)
|
||||
reg.Register(ast.KindLink, r.renderLink)
|
||||
reg.Register(ast.KindRawHTML, r.renderRawHTML)
|
||||
reg.Register(ast.KindText, r.renderText)
|
||||
reg.Register(ast.KindString, r.renderString)
|
||||
}
|
||||
|
||||
type renderState struct {
|
||||
isCurrentBlock bool
|
||||
blockBuffer *model.Block
|
||||
textBuffer string
|
||||
marksBuffer []model.BlockContentTextMark
|
||||
blocksList []model.Block
|
||||
}
|
||||
|
||||
func (rs *renderState) closeCurrentBlock() {
|
||||
rs.isCurrentBlock = false;
|
||||
rs.blocksList = append(rs.blocksList, *rs.blockBuffer);
|
||||
rs.blockBuffer = &model.Block{};
|
||||
}
|
||||
|
||||
func (rs *renderState) openNewBlock(content model.IsBlockContent) {
|
||||
if rs.isCurrentBlock {
|
||||
rs.closeCurrentBlock();
|
||||
}
|
||||
rs.isCurrentBlock = true;
|
||||
rs.blockBuffer = &model.Block{
|
||||
//Id: "3",
|
||||
Content: content,
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *renderState) addTextToBuffer(text string) {
|
||||
rs.textBuffer += text;
|
||||
}
|
||||
|
||||
|
||||
func (rs *renderState) openMark(text string) {
|
||||
rs.textBuffer += text;
|
||||
}
|
||||
|
||||
|
||||
func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
|
||||
l := n.Lines().Len()
|
||||
for i := 0; i < l; i++ {
|
||||
line := n.Lines().At(i)
|
||||
r.Writer.RawWrite(w, line.Value(source))
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Renderer) renderDocument(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
// nothing to do
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderHeading(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
n := node.(*ast.Heading)
|
||||
if entering {
|
||||
_, _ = w.WriteString("<h")
|
||||
_ = w.WriteByte("0123456"[n.Level])
|
||||
if n.Attributes() != nil {
|
||||
r.RenderAttributes(w, node)
|
||||
}
|
||||
_ = w.WriteByte('>')
|
||||
} else {
|
||||
_, _ = w.WriteString("</h")
|
||||
_ = w.WriteByte("0123456"[n.Level])
|
||||
_, _ = w.WriteString(">\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderBlockquote(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if entering {
|
||||
_, _ = w.WriteString("<blockquote>\n")
|
||||
} else {
|
||||
_, _ = w.WriteString("</blockquote>\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if entering {
|
||||
_, _ = w.WriteString("<pre><code>")
|
||||
r.writeLines(w, source, n)
|
||||
} else {
|
||||
_, _ = w.WriteString("</code></pre>\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
n := node.(*ast.FencedCodeBlock)
|
||||
if entering {
|
||||
_, _ = w.WriteString("<pre><code")
|
||||
language := n.Language(source)
|
||||
if language != nil {
|
||||
_, _ = w.WriteString(" class=\"language-")
|
||||
r.Writer.Write(w, language)
|
||||
_, _ = w.WriteString("\"")
|
||||
}
|
||||
_ = w.WriteByte('>')
|
||||
r.writeLines(w, source, n)
|
||||
} else {
|
||||
_, _ = w.WriteString("</code></pre>\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
n := node.(*ast.HTMLBlock)
|
||||
if entering {
|
||||
if r.Unsafe {
|
||||
l := n.Lines().Len()
|
||||
for i := 0; i < l; i++ {
|
||||
line := n.Lines().At(i)
|
||||
_, _ = w.Write(line.Value(source))
|
||||
}
|
||||
} else {
|
||||
_, _ = w.WriteString("<!-- raw HTML omitted -->\n")
|
||||
}
|
||||
} else {
|
||||
if n.HasClosure() {
|
||||
if r.Unsafe {
|
||||
closure := n.ClosureLine
|
||||
_, _ = w.Write(closure.Value(source))
|
||||
} else {
|
||||
_, _ = w.WriteString("<!-- raw HTML omitted -->\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
n := node.(*ast.List)
|
||||
tag := "ul"
|
||||
if n.IsOrdered() {
|
||||
tag = "ol"
|
||||
}
|
||||
if entering {
|
||||
_ = w.WriteByte('<')
|
||||
_, _ = w.WriteString(tag)
|
||||
if n.IsOrdered() && n.Start != 1 {
|
||||
fmt.Fprintf(w, " start=\"%d\">\n", n.Start)
|
||||
} else {
|
||||
_, _ = w.WriteString(">\n")
|
||||
}
|
||||
} else {
|
||||
_, _ = w.WriteString("</")
|
||||
_, _ = w.WriteString(tag)
|
||||
_, _ = w.WriteString(">\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if entering {
|
||||
_, _ = w.WriteString("<li>")
|
||||
fc := n.FirstChild()
|
||||
if fc != nil {
|
||||
if _, ok := fc.(*ast.TextBlock); !ok {
|
||||
_ = w.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_, _ = w.WriteString("</li>\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if entering {
|
||||
_, _ = w.WriteString("<p>")
|
||||
} else {
|
||||
_, _ = w.WriteString("</p>\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
if _, ok := n.NextSibling().(ast.Node); ok && n.FirstChild() != nil {
|
||||
_ = w.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderThematicBreak(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
if r.XHTML {
|
||||
_, _ = w.WriteString("<hr />\n")
|
||||
} else {
|
||||
_, _ = w.WriteString("<hr>\n")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderAutoLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
n := node.(*ast.AutoLink)
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
_, _ = w.WriteString(`<a href="`)
|
||||
url := n.URL(source)
|
||||
label := n.Label(source)
|
||||
if n.AutoLinkType == ast.AutoLinkEmail && !bytes.HasPrefix(bytes.ToLower(url), []byte("mailto:")) {
|
||||
_, _ = w.WriteString("mailto:")
|
||||
}
|
||||
_, _ = w.Write(util.EscapeHTML(util.URLEscape(url, false)))
|
||||
_, _ = w.WriteString(`">`)
|
||||
_, _ = w.Write(util.EscapeHTML(label))
|
||||
_, _ = w.WriteString(`</a>`)
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if entering {
|
||||
_, _ = w.WriteString("<code>")
|
||||
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
|
||||
segment := c.(*ast.Text).Segment
|
||||
value := segment.Value(source)
|
||||
if bytes.HasSuffix(value, []byte("\n")) {
|
||||
r.Writer.RawWrite(w, value[:len(value)-1])
|
||||
if c != n.LastChild() {
|
||||
r.Writer.RawWrite(w, []byte(" "))
|
||||
}
|
||||
} else {
|
||||
r.Writer.RawWrite(w, value)
|
||||
}
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
_, _ = w.WriteString("</code>")
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
n := node.(*ast.Emphasis)
|
||||
tag := "em"
|
||||
if n.Level == 2 {
|
||||
tag = "strong"
|
||||
}
|
||||
if entering {
|
||||
_ = w.WriteByte('<')
|
||||
_, _ = w.WriteString(tag)
|
||||
_ = w.WriteByte('>')
|
||||
} else {
|
||||
_, _ = w.WriteString("</")
|
||||
_, _ = w.WriteString(tag)
|
||||
_ = w.WriteByte('>')
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
n := node.(*ast.Link)
|
||||
if entering {
|
||||
_, _ = w.WriteString("<a href=\"")
|
||||
if r.Unsafe || !IsDangerousURL(n.Destination) {
|
||||
_, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
|
||||
}
|
||||
_ = w.WriteByte('"')
|
||||
if n.Title != nil {
|
||||
_, _ = w.WriteString(` title="`)
|
||||
r.Writer.Write(w, n.Title)
|
||||
_ = w.WriteByte('"')
|
||||
}
|
||||
_ = w.WriteByte('>')
|
||||
} else {
|
||||
_, _ = w.WriteString("</a>")
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
n := node.(*ast.Image)
|
||||
_, _ = w.WriteString("<img src=\"")
|
||||
if r.Unsafe || !IsDangerousURL(n.Destination) {
|
||||
_, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
|
||||
}
|
||||
_, _ = w.WriteString(`" alt="`)
|
||||
_, _ = w.Write(n.Text(source))
|
||||
_ = w.WriteByte('"')
|
||||
if n.Title != nil {
|
||||
_, _ = w.WriteString(` title="`)
|
||||
r.Writer.Write(w, n.Title)
|
||||
_ = w.WriteByte('"')
|
||||
}
|
||||
if r.XHTML {
|
||||
_, _ = w.WriteString(" />")
|
||||
} else {
|
||||
_, _ = w.WriteString(">")
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderRawHTML(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
if r.Unsafe {
|
||||
n := node.(*ast.RawHTML)
|
||||
l := n.Segments.Len()
|
||||
for i := 0; i < l; i++ {
|
||||
segment := n.Segments.At(i)
|
||||
_, _ = w.Write(segment.Value(source))
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
_, _ = w.WriteString("<!-- raw HTML omitted -->")
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
n := node.(*ast.Text)
|
||||
segment := n.Segment
|
||||
if n.IsRaw() {
|
||||
r.Writer.RawWrite(w, segment.Value(source))
|
||||
} else {
|
||||
r.Writer.Write(w, segment.Value(source))
|
||||
if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
|
||||
if r.XHTML {
|
||||
_, _ = w.WriteString("<br />\n")
|
||||
} else {
|
||||
_, _ = w.WriteString("<br>\n")
|
||||
}
|
||||
} else if n.SoftLineBreak() {
|
||||
_ = w.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
n := node.(*ast.String)
|
||||
if n.IsCode() {
|
||||
_, _ = w.Write(n.Value)
|
||||
} else {
|
||||
if n.IsRaw() {
|
||||
r.Writer.RawWrite(w, n.Value)
|
||||
} else {
|
||||
r.Writer.Write(w, n.Value)
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
// RenderAttributes renders given node's attributes.
|
||||
func (r *Renderer) RenderAttributes(w util.BufWriter, node ast.Node) {
|
||||
|
||||
for _, attr := range node.Attributes() {
|
||||
_, _ = w.WriteString(" ")
|
||||
_, _ = w.Write(attr.Name)
|
||||
_, _ = w.WriteString(`="`)
|
||||
_, _ = w.Write(util.EscapeHTML(attr.Value.([]byte)))
|
||||
_ = w.WriteByte('"')
|
||||
}
|
||||
}
|
||||
|
||||
// A Writer interface wirtes textual contents to a writer.
|
||||
type Writer interface {
|
||||
// Write writes the given source to writer with resolving references and unescaping
|
||||
// backslash escaped characters.
|
||||
Write(writer util.BufWriter, source []byte)
|
||||
|
||||
// RawWrite wirtes the given source to writer without resolving references and
|
||||
// unescaping backslash escaped characters.
|
||||
RawWrite(writer util.BufWriter, source []byte)
|
||||
}
|
||||
|
||||
type defaultWriter struct {
|
||||
}
|
||||
|
||||
func escapeRune(writer util.BufWriter, r rune) {
|
||||
if r < 256 {
|
||||
v := util.EscapeHTMLByte(byte(r))
|
||||
if v != nil {
|
||||
_, _ = writer.Write(v)
|
||||
return
|
||||
}
|
||||
}
|
||||
_, _ = writer.WriteRune(util.ToValidRune(r))
|
||||
}
|
||||
|
||||
func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
|
||||
n := 0
|
||||
l := len(source)
|
||||
for i := 0; i < l; i++ {
|
||||
v := util.EscapeHTMLByte(source[i])
|
||||
if v != nil {
|
||||
_, _ = writer.Write(source[i-n : i])
|
||||
n = 0
|
||||
_, _ = writer.Write(v)
|
||||
continue
|
||||
}
|
||||
n++
|
||||
}
|
||||
if n != 0 {
|
||||
_, _ = writer.Write(source[l-n:])
|
||||
}
|
||||
}
|
||||
|
||||
func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
|
||||
escaped := false
|
||||
var ok bool
|
||||
limit := len(source)
|
||||
n := 0
|
||||
for i := 0; i < limit; i++ {
|
||||
c := source[i]
|
||||
if escaped {
|
||||
if util.IsPunct(c) {
|
||||
d.RawWrite(writer, source[n:i-1])
|
||||
n = i
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
if c == '&' {
|
||||
pos := i
|
||||
next := i + 1
|
||||
if next < limit && source[next] == '#' {
|
||||
nnext := next + 1
|
||||
if nnext < limit {
|
||||
nc := source[nnext]
|
||||
// code point like #x22;
|
||||
if nnext < limit && nc == 'x' || nc == 'X' {
|
||||
start := nnext + 1
|
||||
i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
|
||||
if ok && i < limit && source[i] == ';' {
|
||||
v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
|
||||
d.RawWrite(writer, source[n:pos])
|
||||
n = i + 1
|
||||
escapeRune(writer, rune(v))
|
||||
continue
|
||||
}
|
||||
// code point like #1234;
|
||||
} else if nc >= '0' && nc <= '9' {
|
||||
start := nnext
|
||||
i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
|
||||
if ok && i < limit && i-start < 8 && source[i] == ';' {
|
||||
v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 0, 32)
|
||||
d.RawWrite(writer, source[n:pos])
|
||||
n = i + 1
|
||||
escapeRune(writer, rune(v))
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
start := next
|
||||
i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric)
|
||||
// entity reference
|
||||
if ok && i < limit && source[i] == ';' {
|
||||
name := util.BytesToReadOnlyString(source[start:i])
|
||||
entity, ok := util.LookUpHTML5EntityByName(name)
|
||||
if ok {
|
||||
d.RawWrite(writer, source[n:pos])
|
||||
n = i + 1
|
||||
d.RawWrite(writer, entity.Characters)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
i = next - 1
|
||||
}
|
||||
if c == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
escaped = false
|
||||
}
|
||||
d.RawWrite(writer, source[n:])
|
||||
}
|
||||
|
||||
// DefaultWriter is a default implementation of the Writer.
|
||||
var DefaultWriter = &defaultWriter{}
|
||||
|
||||
var bDataImage = []byte("data:image/")
|
||||
var bPng = []byte("png;")
|
||||
var bGif = []byte("gif;")
|
||||
var bJpeg = []byte("jpeg;")
|
||||
var bWebp = []byte("webp;")
|
||||
var bJs = []byte("javascript:")
|
||||
var bVb = []byte("vbscript:")
|
||||
var bFile = []byte("file:")
|
||||
var bData = []byte("data:")
|
||||
|
||||
// IsDangerousURL returns true if the given url seems a potentially dangerous url,
|
||||
// otherwise false.
|
||||
func IsDangerousURL(url []byte) bool {
|
||||
if bytes.HasPrefix(url, bDataImage) && len(url) >= 11 {
|
||||
v := url[11:]
|
||||
if bytes.HasPrefix(v, bPng) || bytes.HasPrefix(v, bGif) ||
|
||||
bytes.HasPrefix(v, bJpeg) || bytes.HasPrefix(v, bWebp) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
return bytes.HasPrefix(url, bJs) || bytes.HasPrefix(url, bVb) ||
|
||||
bytes.HasPrefix(url, bFile) || bytes.HasPrefix(url, bData)
|
||||
}
|
||||
Loading…
Reference in a new issue