From dd89404e047394c43ae668de815c3abe55be7831 Mon Sep 17 00:00:00 2001 From: yuin Date: Fri, 26 Apr 2019 20:27:01 +0900 Subject: [PATCH] first commit --- .gitignore | 13 + Makefile | 7 + README.md | 129 + _benchmark/_data.md | 9702 ++++++++++++++++++++++++++++++++ _benchmark/benchmark_test.go | 55 + _test/spec.json | 5194 +++++++++++++++++ ast/ast.go | 342 ++ ast/block.go | 364 ++ ast/inline.go | 371 ++ commonmark_test.go | 53 + extension/ast/strikethrough.go | 23 + extension/ast/table.go | 113 + extension/ast/tasklist.go | 27 + extension/gfm.go | 32 + extension/linkify.go | 125 + extension/strikethrough.go | 111 + extension/table.go | 233 + extension/tasklist.go | 118 + go.mod | 1 + markdown.go | 144 + parser/atx_heading.go | 146 + parser/auto_link.go | 46 + parser/blockquote.go | 65 + parser/code_block.go | 75 + parser/code_span.go | 87 + parser/delimiter.go | 232 + parser/emphasis.go | 54 + parser/fcode_block.go | 96 + parser/html_block.go | 278 + parser/link.go | 366 ++ parser/link_ref.go | 163 + parser/list.go | 241 + parser/list_item.go | 81 + parser/paragraph.go | 62 + parser/parser.go | 987 ++++ parser/raw_html.go | 126 + parser/setext_headings.go | 109 + parser/themantic_break.go | 71 + renderer/html/html.go | 588 ++ renderer/renderer.go | 161 + text/reader.go | 492 ++ text/segment.go | 209 + util/html5entities.go | 2142 +++++++ util/util.go | 538 ++ util/util_safe.go | 13 + util/util_unsafe.go | 20 + 46 files changed, 24605 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 _benchmark/_data.md create mode 100644 _benchmark/benchmark_test.go create mode 100644 _test/spec.json create mode 100644 ast/ast.go create mode 100644 ast/block.go create mode 100644 ast/inline.go create mode 100644 commonmark_test.go create mode 100644 extension/ast/strikethrough.go create mode 100644 extension/ast/table.go create mode 100644 extension/ast/tasklist.go create mode 100644 extension/gfm.go create mode 100644 extension/linkify.go create mode 100644 extension/strikethrough.go create mode 100644 extension/table.go create mode 100644 extension/tasklist.go create mode 100644 go.mod create mode 100644 markdown.go create mode 100644 parser/atx_heading.go create mode 100644 parser/auto_link.go create mode 100644 parser/blockquote.go create mode 100644 parser/code_block.go create mode 100644 parser/code_span.go create mode 100644 parser/delimiter.go create mode 100644 parser/emphasis.go create mode 100644 parser/fcode_block.go create mode 100644 parser/html_block.go create mode 100644 parser/link.go create mode 100644 parser/link_ref.go create mode 100644 parser/list.go create mode 100644 parser/list_item.go create mode 100644 parser/paragraph.go create mode 100644 parser/parser.go create mode 100644 parser/raw_html.go create mode 100644 parser/setext_headings.go create mode 100644 parser/themantic_break.go create mode 100644 renderer/html/html.go create mode 100644 renderer/renderer.go create mode 100644 text/reader.go create mode 100644 text/segment.go create mode 100644 util/html5entities.go create mode 100644 util/util.go create mode 100644 util/util_safe.go create mode 100644 util/util_unsafe.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6e4db92 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, build with `go test -c` +*.test +*.pprof + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2ac9f47 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +.PHONY: test + +test: + go test -coverprofile=profile.out -coverpkg=github.com/yuin/goldmark,github.com/yuin/goldmark/ast,github.com/yuin/goldmark/extension,github.com/yuin/goldmark/extension/ast,github.com/yuin/goldmark/parser,github.com/yuin/goldmark/renderer,github.com/yuin/goldmark/renderer/html,github.com/yuin/goldmark/text,github.com/yuin/goldmark/util ./... + +cov: test + go tool cover -html=profile.out diff --git a/README.md b/README.md new file mode 100644 index 0000000..001ff50 --- /dev/null +++ b/README.md @@ -0,0 +1,129 @@ +goldmark +========================================== + +> A markdown parser written in Go. Easy to extend, standard compliant, well structured. + +goldmark is compliant to CommonMark 0.29. + +Motivation +---------------------- +I need a markdown parser for Go that meets following conditions: + +- Easy to extend. + - Markdown is poor in document expressions compared with other light markup languages like restructuredText. + - We have extended a markdown syntax. i.e. : PHPMarkdownExtra, Github Flavored Markdown. +- Standard compliant. + - Markdown has many dialects. + - Github Flavored Markdown is widely used and it is based on CommonMark aside from whether CommonMark is good specification or not. + - CommonMark is too complicated and hard to implement. +- Well structured. + - AST based, and preserves source potision of nodes. +- Written in pure Go. + +[golang-commonmark](https://gitlab.com/golang-commonmark/markdown) may be a good choice, but it seems copy of the [markdown-it](https://github.com/markdown-it) . + +[blackfriday.v2](https://github.com/russross/blackfriday/tree/v2) is a fast and widely used implementation, but it is not CommonMark compliant and can not be extended from outside of the package since it's AST is not interfaces but structs. + +As mentioned above, CommonMark is too complicated and hard to implement, So Markdown parsers base on CommonMark barely exist. + +Usage +---------------------- + +Convert Markdown documents with the CommonMark compliant mode: + +```go +var buf bytes.Buffer +if err := goldmark.Convert(source, &buf); err != nil { + panic(err) +} +``` + +Customize a parser and a renderer: + +```go +md := goldmark.NewMarkdown( + goldmark.WithExtensions(extension.GFM), + goldmark.WithParserOptions( + parser.WithHeadingID(), + ), + goldmark.WithRendererOptions( + html.WithSoftLineBreak(true), + html.WithXHTML(true), + ), + ) +var buf bytes.Buffer +if err := md.Convert(source, &buf); err != nil { + panic(err) +} +``` + +Parser and Renderer options +------------------------------ + +### Parser options + +| Functional option | Type | Description | +| ----------------- | ---- | ----------- | +| `parser.WithBlockParsers` | List of `util.PrioritizedSlice` whose elements are `parser.BlockParser` | Parsers for parsing block level elements. | +| `parser.WithInlineParsers` | List of `util.PrioritizedSlice` whose elements are `parser.InlineParser` | Parsers for parsing inline level elements. | +| `parser.WithParagraphTransformers` | List of `util.PrioritizedSlice` whose elements are `parser.ParagraphTransformer` | Transformers for transforming paragraph nodes. | +| `parser.WithHeadingID` | `-` | Enables custom heading ids( `{#custom-id}` ) and auto heading ids. | +| `parser.WithFilterTags` | `...string` | HTML tag names forbidden in HTML blocks and Raw HTMLs. | + +### HTML Renderer options + +| Functional option | Type | Description | +| ----------------- | ---- | ----------- | +| `html.WithWriter` | `html.Writer` | `html.Writer` for writing contents to an `io.Writer`. | +| `html.WithSoftLineBreak` | `-` | Render new lines as `
` if true, otherwise `\n` .| +| `html.WithXHTML` | `-` | Render as XHTML. | + +### Built-in extensions + +- `extension.Table` +- `extension.Strikethrough` +- `extension.Linkify` +- `extension.TaskList` +- `extension.GFM` + - This extension enables Table, Strikethrough, Linkify and TaskList. + In addition, this extension sets some tags to `parser.FilterTags` . + +Create extensions +-------------------- +**TODO** + +See `extension` directory for examples of extensions. + +Summary: + +1. Define AST Node as a struct in which `ast.BaseBlock` or `ast.BaseInline` is embedded. +2. Write a parser that implements `parser.BlockParser` or `parser.InlineParser`. +3. Write a renderer that implements `renderer.NodeRenderer`. +4. Define your goldmark extension that implements `goldmark.Extender`. + +Benchmark +-------------------- +You can run this benchmark in the `_benchmark` directory. + +blackfriday v2 is fastest, but it is not CommonMark compiliant and not an extensible. + +Though goldmark builds clean extensible AST structure and get full compliance with +Commonmark, it is resonably fast and less memory consumption. + +``` +BenchmarkGoldMark-4 200 7291402 ns/op 2259603 B/op 16867 allocs/op +BenchmarkGolangCommonMark-4 200 7709939 ns/op 3053760 B/op 18682 allocs/op +BenchmarkBlackFriday-4 300 5776369 ns/op 3356386 B/op 17480 allocs/op +``` + +Donation +-------------------- +BTC: 1NEDSyUmo4SMTDP83JJQSWi1MvQUGGNMZB + +License +-------------------- +MIT + +Author +-------------------- +Yusuke Inuzuka diff --git a/_benchmark/_data.md b/_benchmark/_data.md new file mode 100644 index 0000000..87db650 --- /dev/null +++ b/_benchmark/_data.md @@ -0,0 +1,9702 @@ +# Introduction + +## What is Markdown? + +Markdown is a plain text format for writing structured documents, +based on conventions for indicating formatting in email +and usenet posts. It was developed by John Gruber (with +help from Aaron Swartz) and released in 2004 in the form of a +[syntax description](http://daringfireball.net/projects/markdown/syntax) +and a Perl script (`Markdown.pl`) for converting Markdown to +HTML. In the next decade, dozens of implementations were +developed in many languages. Some extended the original +Markdown syntax with conventions for footnotes, tables, and +other document elements. Some allowed Markdown documents to be +rendered in formats other than HTML. Websites like Reddit, +StackOverflow, and GitHub had millions of people using Markdown. +And Markdown started to be used beyond the web, to author books, +articles, slide shows, letters, and lecture notes. + +What distinguishes Markdown from many other lightweight markup +syntaxes, which are often easier to write, is its readability. +As Gruber writes: + +> The overriding design goal for Markdown's formatting syntax is +> to make it as readable as possible. The idea is that a +> Markdown-formatted document should be publishable as-is, as +> plain text, without looking like it's been marked up with tags +> or formatting instructions. +> () + +The point can be illustrated by comparing a sample of +[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +an equivalent sample of Markdown. Here is a sample of +AsciiDoc from the AsciiDoc manual: + +``` +1. List item one. ++ +List item one continued with a second paragraph followed by an +Indented block. ++ +................. +$ ls *.sh +$ mv *.sh ~/tmp +................. ++ +List item continued with a third paragraph. + +2. List item two continued with an open block. ++ +-- +This paragraph is part of the preceding list item. + +a. This list is nested and does not require explicit item +continuation. ++ +This paragraph is part of the preceding list item. + +b. List item b. + +This paragraph belongs to item two of the outer list. +-- +``` + +And here is the equivalent in Markdown: +``` +1. List item one. + + List item one continued with a second paragraph followed by an + Indented block. + + $ ls *.sh + $ mv *.sh ~/tmp + + List item continued with a third paragraph. + +2. List item two continued with an open block. + + This paragraph is part of the preceding list item. + + 1. This list is nested and does not require explicit item continuation. + + This paragraph is part of the preceding list item. + + 2. List item b. + + This paragraph belongs to item two of the outer list. +``` + +The AsciiDoc version is, arguably, easier to write. You don't need +to worry about indentation. But the Markdown version is much easier +to read. The nesting of list items is apparent to the eye in the +source, not just in the processed document. + +## Why is a spec needed? + +John Gruber's [canonical description of Markdown's +syntax](http://daringfireball.net/projects/markdown/syntax) +does not specify the syntax unambiguously. Here are some examples of +questions it does not answer: + +1. How much indentation is needed for a sublist? The spec says that + continuation paragraphs need to be indented four spaces, but is + not fully explicit about sublists. It is natural to think that + they, too, must be indented four spaces, but `Markdown.pl` does + not require that. This is hardly a "corner case," and divergences + between implementations on this issue often lead to surprises for + users in real documents. (See [this comment by John + Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + +2. Is a blank line needed before a block quote or heading? + Most implementations do not require the blank line. However, + this can lead to unexpected results in hard-wrapped text, and + also to ambiguities in parsing (note that some implementations + put the heading inside the blockquote, while others do not). + (John Gruber has also spoken [in favor of requiring the blank + lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + +3. Is a blank line needed before an indented code block? + (`Markdown.pl` requires it, but this is not mentioned in the + documentation, and some implementations do not require it.) + + ``` markdown + paragraph + code? + ``` + +4. What is the exact rule for determining when list items get + wrapped in `

` tags? Can a list be partially "loose" and partially + "tight"? What should we do with a list like this? + + ``` markdown + 1. one + + 2. two + 3. three + ``` + + Or this? + + ``` markdown + 1. one + - a + + - b + 2. two + ``` + + (There are some relevant comments by John Gruber + [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + +5. Can list markers be indented? Can ordered list markers be right-aligned? + + ``` markdown + 8. item 1 + 9. item 2 + 10. item 2a + ``` + +6. Is this one list with a thematic break in its second item, + or two lists separated by a thematic break? + + ``` markdown + * a + * * * * * + * b + ``` + +7. When list markers change from numbers to bullets, do we have + two lists or one? (The Markdown syntax description suggests two, + but the perl scripts and many other implementations produce one.) + + ``` markdown + 1. fee + 2. fie + - foe + - fum + ``` + +8. What are the precedence rules for the markers of inline structure? + For example, is the following a valid link, or does the code span + take precedence ? + + ``` markdown + [a backtick (`)](/url) and [another backtick (`)](/url). + ``` + +9. What are the precedence rules for markers of emphasis and strong + emphasis? For example, how should the following be parsed? + + ``` markdown + *foo *bar* baz* + ``` + +10. What are the precedence rules between block-level and inline-level + structure? For example, how should the following be parsed? + + ``` markdown + - `a long code span can contain a hyphen like this + - and it can screw things up` + ``` + +11. Can list items include section headings? (`Markdown.pl` does not + allow this, but does allow blockquotes to include headings.) + + ``` markdown + - # Heading + ``` + +12. Can list items be empty? + + ``` markdown + * a + * + * b + ``` + +13. Can link references be defined inside block quotes or list items? + + ``` markdown + > Blockquote [foo]. + > + > [foo]: /url + ``` + +14. If there are multiple definitions for the same reference, which takes + precedence? + + ``` markdown + [foo]: /url1 + [foo]: /url2 + + [foo][] + ``` + +In the absence of a spec, early implementers consulted `Markdown.pl` +to resolve these ambiguities. But `Markdown.pl` was quite buggy, and +gave manifestly bad results in many cases, so it was not a +satisfactory replacement for a spec. + +Because there is no unambiguous spec, implementations have diverged +considerably. As a result, users are often surprised to find that +a document that renders one way on one system (say, a GitHub wiki) +renders differently on another (say, converting to docbook using +pandoc). To make matters worse, because nothing in Markdown counts +as a "syntax error," the divergence often isn't discovered right away. + +## About this document + +This document attempts to specify Markdown syntax unambiguously. +It contains many examples with side-by-side Markdown and +HTML. These are intended to double as conformance tests. An +accompanying script `spec_tests.py` can be used to run the tests +against any Markdown program: + + python test/spec_tests.py --spec spec.txt --program PROGRAM + +Since this document describes how Markdown is to be parsed into +an abstract syntax tree, it would have made sense to use an abstract +representation of the syntax tree instead of HTML. But HTML is capable +of representing the structural distinctions we need to make, and the +choice of HTML for the tests makes it possible to run the tests against +an implementation without writing an abstract syntax tree renderer. + +This document is generated from a text file, `spec.txt`, written +in Markdown with a small extension for the side-by-side tests. +The script `tools/makespec.py` can be used to convert `spec.txt` into +HTML or CommonMark (which can then be converted into other formats). + +In the examples, the `→` character is used to represent tabs. + +# Preliminaries + +## Characters and lines + +Any sequence of [characters] is a valid CommonMark +document. + +A [character](@) is a Unicode code point. Although some +code points (for example, combining accents) do not correspond to +characters in an intuitive sense, all code points count as characters +for purposes of this spec. + +This spec does not specify an encoding; it thinks of lines as composed +of [characters] rather than bytes. A conforming parser may be limited +to a certain encoding. + +A [line](@) is a sequence of zero or more [characters] +other than newline (`U+000A`) or carriage return (`U+000D`), +followed by a [line ending] or by the end of file. + +A [line ending](@) is a newline (`U+000A`), a carriage return +(`U+000D`) not followed by a newline, or a carriage return and a +following newline. + +A line containing no characters, or a line containing only spaces +(`U+0020`) or tabs (`U+0009`), is called a [blank line](@). + +The following definitions of character classes will be used in this spec: + +A [whitespace character](@) is a space +(`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`), +form feed (`U+000C`), or carriage return (`U+000D`). + +[Whitespace](@) is a sequence of one or more [whitespace +characters]. + +A [Unicode whitespace character](@) is +any code point in the Unicode `Zs` general category, or a tab (`U+0009`), +carriage return (`U+000D`), newline (`U+000A`), or form feed +(`U+000C`). + +[Unicode whitespace](@) is a sequence of one +or more [Unicode whitespace characters]. + +A [space](@) is `U+0020`. + +A [non-whitespace character](@) is any character +that is not a [whitespace character]. + +An [ASCII punctuation character](@) +is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, +`*`, `+`, `,`, `-`, `.`, `/` (U+0021–2F), +`:`, `;`, `<`, `=`, `>`, `?`, `@` (U+003A–0040), +`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), +`{`, `|`, `}`, or `~` (U+007B–007E). + +A [punctuation character](@) is an [ASCII +punctuation character] or anything in +the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. + +## Tabs + +Tabs in lines are not expanded to [spaces]. However, +in contexts where whitespace helps to define block structure, +tabs behave as if they were replaced by spaces with a tab stop +of 4 characters. + +Thus, for example, a tab can be used instead of four spaces +in an indented code block. (Note, however, that internal +tabs are passed through as literal tabs, not expanded to +spaces.) + +```````````````````````````````` example +→foo→baz→→bim +. +

foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + →foo→baz→→bim +. +
foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + a→a + ὐ→a +. +
a→a
+ὐ→a
+
+```````````````````````````````` + +In the following example, a continuation paragraph of a list +item is indented with a tab; this has exactly the same effect +as indentation with four spaces would: + +```````````````````````````````` example + - foo + +→bar +. + +```````````````````````````````` + +```````````````````````````````` example +- foo + +→→bar +. + +```````````````````````````````` + +Normally the `>` that begins a block quote may be followed +optionally by a space, which is not considered part of the +content. In the following case `>` is followed by a tab, +which is treated as if it were expanded into three spaces. +Since one of these spaces is considered part of the +delimiter, `foo` is considered to be indented six spaces +inside the block quote context, so we get an indented +code block starting with two spaces. + +```````````````````````````````` example +>→→foo +. +
+
  foo
+
+
+```````````````````````````````` + +```````````````````````````````` example +-→→foo +. + +```````````````````````````````` + + +```````````````````````````````` example + foo +→bar +. +
foo
+bar
+
+```````````````````````````````` + +```````````````````````````````` example + - foo + - bar +→ - baz +. + +```````````````````````````````` + +```````````````````````````````` example +#→Foo +. +

Foo

+```````````````````````````````` + +```````````````````````````````` example +*→*→*→ +. +
+```````````````````````````````` + + +## Insecure characters + +For security reasons, the Unicode character `U+0000` must be replaced +with the REPLACEMENT CHARACTER (`U+FFFD`). + +# Blocks and inlines + +We can think of a document as a sequence of +[blocks](@)---structural elements like paragraphs, block +quotations, lists, headings, rules, and code blocks. Some blocks (like +block quotes and list items) contain other blocks; others (like +headings and paragraphs) contain [inline](@) content---text, +links, emphasized text, images, code spans, and so on. + +## Precedence + +Indicators of block structure always take precedence over indicators +of inline structure. So, for example, the following is a list with +two items, not a list with one item containing a code span: + +```````````````````````````````` example +- `one +- two` +. + +```````````````````````````````` + + +This means that parsing can proceed in two steps: first, the block +structure of the document can be discerned; second, text lines inside +paragraphs, headings, and other block constructs can be parsed for inline +structure. The second step requires information about link reference +definitions that will be available only at the end of the first +step. Note that the first step requires processing lines in sequence, +but the second can be parallelized, since the inline parsing of +one block element does not affect the inline parsing of any other. + +## Container blocks and leaf blocks + +We can divide blocks into two types: +[container blocks](@), +which can contain other blocks, and [leaf blocks](@), +which cannot. + +# Leaf blocks + +This section describes the different kinds of leaf block that make up a +Markdown document. + +## Thematic breaks + +A line consisting of 0-3 spaces of indentation, followed by a sequence +of three or more matching `-`, `_`, or `*` characters, each followed +optionally by any number of spaces or tabs, forms a +[thematic break](@). + +```````````````````````````````` example +*** +--- +___ +. +
+
+
+```````````````````````````````` + + +Wrong characters: + +```````````````````````````````` example ++++ +. +

+++

+```````````````````````````````` + + +```````````````````````````````` example +=== +. +

===

+```````````````````````````````` + + +Not enough characters: + +```````````````````````````````` example +-- +** +__ +. +

-- +** +__

+```````````````````````````````` + + +One to three spaces indent are allowed: + +```````````````````````````````` example + *** + *** + *** +. +
+
+
+```````````````````````````````` + + +Four spaces is too many: + +```````````````````````````````` example + *** +. +
***
+
+```````````````````````````````` + + +```````````````````````````````` example +Foo + *** +. +

Foo +***

+```````````````````````````````` + + +More than three characters may be used: + +```````````````````````````````` example +_____________________________________ +. +
+```````````````````````````````` + + +Spaces are allowed between the characters: + +```````````````````````````````` example + - - - +. +
+```````````````````````````````` + + +```````````````````````````````` example + ** * ** * ** * ** +. +
+```````````````````````````````` + + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +Spaces are allowed at the end: + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +However, no other characters may occur in the line: + +```````````````````````````````` example +_ _ _ _ a + +a------ + +---a--- +. +

_ _ _ _ a

+

a------

+

---a---

+```````````````````````````````` + + +It is required that all of the [non-whitespace characters] be the same. +So, this is not a thematic break: + +```````````````````````````````` example + *-* +. +

-

+```````````````````````````````` + + +Thematic breaks do not need blank lines before or after: + +```````````````````````````````` example +- foo +*** +- bar +. + +
+ +```````````````````````````````` + + +Thematic breaks can interrupt a paragraph: + +```````````````````````````````` example +Foo +*** +bar +. +

Foo

+
+

bar

+```````````````````````````````` + + +If a line of dashes that meets the above conditions for being a +thematic break could also be interpreted as the underline of a [setext +heading], the interpretation as a +[setext heading] takes precedence. Thus, for example, +this is a setext heading, not a paragraph followed by a thematic break: + +```````````````````````````````` example +Foo +--- +bar +. +

Foo

+

bar

+```````````````````````````````` + + +When both a thematic break and a list item are possible +interpretations of a line, the thematic break takes precedence: + +```````````````````````````````` example +* Foo +* * * +* Bar +. + +
+ +```````````````````````````````` + + +If you want a thematic break in a list item, use a different bullet: + +```````````````````````````````` example +- Foo +- * * * +. + +```````````````````````````````` + + +## ATX headings + +An [ATX heading](@) +consists of a string of characters, parsed as inline content, between an +opening sequence of 1--6 unescaped `#` characters and an optional +closing sequence of any number of unescaped `#` characters. +The opening sequence of `#` characters must be followed by a +[space] or by the end of line. The optional closing sequence of `#`s must be +preceded by a [space] and may be followed by spaces only. The opening +`#` character may be indented 0-3 spaces. The raw contents of the +heading are stripped of leading and trailing spaces before being parsed +as inline content. The heading level is equal to the number of `#` +characters in the opening sequence. + +Simple headings: + +```````````````````````````````` example +# foo +## foo +### foo +#### foo +##### foo +###### foo +. +

foo

+

foo

+

foo

+

foo

+
foo
+
foo
+```````````````````````````````` + + +More than six `#` characters is not a heading: + +```````````````````````````````` example +####### foo +. +

####### foo

+```````````````````````````````` + + +At least one space is required between the `#` characters and the +heading's contents, unless the heading is empty. Note that many +implementations currently do not require the space. However, the +space was required by the +[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), +and it helps prevent things like the following from being parsed as +headings: + +```````````````````````````````` example +#5 bolt + +#hashtag +. +

#5 bolt

+

#hashtag

+```````````````````````````````` + + +This is not a heading, because the first `#` is escaped: + +```````````````````````````````` example +\## foo +. +

## foo

+```````````````````````````````` + + +Contents are parsed as inlines: + +```````````````````````````````` example +# foo *bar* \*baz\* +. +

foo bar *baz*

+```````````````````````````````` + + +Leading and trailing [whitespace] is ignored in parsing inline content: + +```````````````````````````````` example +# foo +. +

foo

+```````````````````````````````` + + +One to three spaces indentation are allowed: + +```````````````````````````````` example + ### foo + ## foo + # foo +. +

foo

+

foo

+

foo

+```````````````````````````````` + + +Four spaces are too much: + +```````````````````````````````` example + # foo +. +
# foo
+
+```````````````````````````````` + + +```````````````````````````````` example +foo + # bar +. +

foo +# bar

+```````````````````````````````` + + +A closing sequence of `#` characters is optional: + +```````````````````````````````` example +## foo ## + ### bar ### +. +

foo

+

bar

+```````````````````````````````` + + +It need not be the same length as the opening sequence: + +```````````````````````````````` example +# foo ################################## +##### foo ## +. +

foo

+
foo
+```````````````````````````````` + + +Spaces are allowed after the closing sequence: + +```````````````````````````````` example +### foo ### +. +

foo

+```````````````````````````````` + + +A sequence of `#` characters with anything but [spaces] following it +is not a closing sequence, but counts as part of the contents of the +heading: + +```````````````````````````````` example +### foo ### b +. +

foo ### b

+```````````````````````````````` + + +The closing sequence must be preceded by a space: + +```````````````````````````````` example +# foo# +. +

foo#

+```````````````````````````````` + + +Backslash-escaped `#` characters do not count as part +of the closing sequence: + +```````````````````````````````` example +### foo \### +## foo #\## +# foo \# +. +

foo ###

+

foo ###

+

foo #

+```````````````````````````````` + + +ATX headings need not be separated from surrounding content by blank +lines, and they can interrupt paragraphs: + +```````````````````````````````` example +**** +## foo +**** +. +
+

foo

+
+```````````````````````````````` + + +```````````````````````````````` example +Foo bar +# baz +Bar foo +. +

Foo bar

+

baz

+

Bar foo

+```````````````````````````````` + + +ATX headings can be empty: + +```````````````````````````````` example +## +# +### ### +. +

+

+

+```````````````````````````````` + + +## Setext headings + +A [setext heading](@) consists of one or more +lines of text, each containing at least one [non-whitespace +character], with no more than 3 spaces indentation, followed by +a [setext heading underline]. The lines of text must be such +that, were they not followed by the setext heading underline, +they would be interpreted as a paragraph: they cannot be +interpretable as a [code fence], [ATX heading][ATX headings], +[block quote][block quotes], [thematic break][thematic breaks], +[list item][list items], or [HTML block][HTML blocks]. + +A [setext heading underline](@) is a sequence of +`=` characters or a sequence of `-` characters, with no more than 3 +spaces indentation and any number of trailing spaces. If a line +containing a single `-` can be interpreted as an +empty [list items], it should be interpreted this way +and not as a [setext heading underline]. + +The heading is a level 1 heading if `=` characters are used in +the [setext heading underline], and a level 2 heading if `-` +characters are used. The contents of the heading are the result +of parsing the preceding lines of text as CommonMark inline +content. + +In general, a setext heading need not be preceded or followed by a +blank line. However, it cannot interrupt a paragraph, so when a +setext heading comes after a paragraph, a blank line is needed between +them. + +Simple examples: + +```````````````````````````````` example +Foo *bar* +========= + +Foo *bar* +--------- +. +

Foo bar

+

Foo bar

+```````````````````````````````` + + +The content of the header may span more than one line: + +```````````````````````````````` example +Foo *bar +baz* +==== +. +

Foo bar +baz

+```````````````````````````````` + +The contents are the result of parsing the headings's raw +content as inlines. The heading's raw content is formed by +concatenating the lines and removing initial and final +[whitespace]. + +```````````````````````````````` example + Foo *bar +baz*→ +==== +. +

Foo bar +baz

+```````````````````````````````` + + +The underlining can be any length: + +```````````````````````````````` example +Foo +------------------------- + +Foo += +. +

Foo

+

Foo

+```````````````````````````````` + + +The heading content can be indented up to three spaces, and need +not line up with the underlining: + +```````````````````````````````` example + Foo +--- + + Foo +----- + + Foo + === +. +

Foo

+

Foo

+

Foo

+```````````````````````````````` + + +Four spaces indent is too much: + +```````````````````````````````` example + Foo + --- + + Foo +--- +. +
Foo
+---
+
+Foo
+
+
+```````````````````````````````` + + +The setext heading underline can be indented up to three spaces, and +may have trailing spaces: + +```````````````````````````````` example +Foo + ---- +. +

Foo

+```````````````````````````````` + + +Four spaces is too much: + +```````````````````````````````` example +Foo + --- +. +

Foo +---

+```````````````````````````````` + + +The setext heading underline cannot contain internal spaces: + +```````````````````````````````` example +Foo += = + +Foo +--- - +. +

Foo += =

+

Foo

+
+```````````````````````````````` + + +Trailing spaces in the content line do not cause a line break: + +```````````````````````````````` example +Foo +----- +. +

Foo

+```````````````````````````````` + + +Nor does a backslash at the end: + +```````````````````````````````` example +Foo\ +---- +. +

Foo\

+```````````````````````````````` + + +Since indicators of block structure take precedence over +indicators of inline structure, the following are setext headings: + +```````````````````````````````` example +`Foo +---- +` + + +. +

`Foo

+

`

+

<a title="a lot

+

of dashes"/>

+```````````````````````````````` + + +The setext heading underline cannot be a [lazy continuation +line] in a list item or block quote: + +```````````````````````````````` example +> Foo +--- +. +
+

Foo

+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +bar +=== +. +
+

foo +bar +===

+
+```````````````````````````````` + + +```````````````````````````````` example +- Foo +--- +. +
    +
  • Foo
  • +
+
+```````````````````````````````` + + +A blank line is needed between a paragraph and a following +setext heading, since otherwise the paragraph becomes part +of the heading's content: + +```````````````````````````````` example +Foo +Bar +--- +. +

Foo +Bar

+```````````````````````````````` + + +But in general a blank line is not required before or after +setext headings: + +```````````````````````````````` example +--- +Foo +--- +Bar +--- +Baz +. +
+

Foo

+

Bar

+

Baz

+```````````````````````````````` + + +Setext headings cannot be empty: + +```````````````````````````````` example + +==== +. +

====

+```````````````````````````````` + + +Setext heading text lines must not be interpretable as block +constructs other than paragraphs. So, the line of dashes +in these examples gets interpreted as a thematic break: + +```````````````````````````````` example +--- +--- +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +- foo +----- +. +
    +
  • foo
  • +
+
+```````````````````````````````` + + +```````````````````````````````` example + foo +--- +. +
foo
+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +----- +. +
+

foo

+
+
+```````````````````````````````` + + +If you want a heading with `> foo` as its literal text, you can +use backslash escapes: + +```````````````````````````````` example +\> foo +------ +. +

> foo

+```````````````````````````````` + + +**Compatibility note:** Most existing Markdown implementations +do not allow the text of setext headings to span multiple lines. +But there is no consensus about how to interpret + +``` markdown +Foo +bar +--- +baz +``` + +One can find four different interpretations: + +1. paragraph "Foo", heading "bar", paragraph "baz" +2. paragraph "Foo bar", thematic break, paragraph "baz" +3. paragraph "Foo bar --- baz" +4. heading "Foo bar", paragraph "baz" + +We find interpretation 4 most natural, and interpretation 4 +increases the expressive power of CommonMark, by allowing +multiline headings. Authors who want interpretation 1 can +put a blank line after the first paragraph: + +```````````````````````````````` example +Foo + +bar +--- +baz +. +

Foo

+

bar

+

baz

+```````````````````````````````` + + +Authors who want interpretation 2 can put blank lines around +the thematic break, + +```````````````````````````````` example +Foo +bar + +--- + +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +or use a thematic break that cannot count as a [setext heading +underline], such as + +```````````````````````````````` example +Foo +bar +* * * +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +Authors who want interpretation 3 can use backslash escapes: + +```````````````````````````````` example +Foo +bar +\--- +baz +. +

Foo +bar +--- +baz

+```````````````````````````````` + + +## Indented code blocks + +An [indented code block](@) is composed of one or more +[indented chunks] separated by blank lines. +An [indented chunk](@) is a sequence of non-blank lines, +each indented four or more spaces. The contents of the code block are +the literal contents of the lines, including trailing +[line endings], minus four spaces of indentation. +An indented code block has no [info string]. + +An indented code block cannot interrupt a paragraph, so there must be +a blank line between a paragraph and a following indented code block. +(A blank line is not needed, however, between a code block and a following +paragraph.) + +```````````````````````````````` example + a simple + indented code block +. +
a simple
+  indented code block
+
+```````````````````````````````` + + +If there is any ambiguity between an interpretation of indentation +as a code block and as indicating that material belongs to a [list +item][list items], the list item interpretation takes precedence: + +```````````````````````````````` example + - foo + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. foo + + - bar +. +
    +
  1. +

    foo

    +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + + +The contents of a code block are literal text, and do not get parsed +as Markdown: + +```````````````````````````````` example +
+ *hi* + + - one +. +
<a/>
+*hi*
+
+- one
+
+```````````````````````````````` + + +Here we have three chunks separated by blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 + + + + chunk3 +. +
chunk1
+
+chunk2
+
+
+
+chunk3
+
+```````````````````````````````` + + +Any initial spaces beyond four will be included in the content, even +in interior blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 +. +
chunk1
+  
+  chunk2
+
+```````````````````````````````` + + +An indented code block cannot interrupt a paragraph. (This +allows hanging indents and the like.) + +```````````````````````````````` example +Foo + bar + +. +

Foo +bar

+```````````````````````````````` + + +However, any non-blank line with fewer than four leading spaces ends +the code block immediately. So a paragraph may occur immediately +after indented code: + +```````````````````````````````` example + foo +bar +. +
foo
+
+

bar

+```````````````````````````````` + + +And indented code can occur immediately before and after other kinds of +blocks: + +```````````````````````````````` example +# Heading + foo +Heading +------ + foo +---- +. +

Heading

+
foo
+
+

Heading

+
foo
+
+
+```````````````````````````````` + + +The first line can be indented more than four spaces: + +```````````````````````````````` example + foo + bar +. +
    foo
+bar
+
+```````````````````````````````` + + +Blank lines preceding or following an indented code block +are not included in it: + +```````````````````````````````` example + + + foo + + +. +
foo
+
+```````````````````````````````` + + +Trailing spaces are included in the code block's content: + +```````````````````````````````` example + foo +. +
foo  
+
+```````````````````````````````` + + + +## Fenced code blocks + +A [code fence](@) is a sequence +of at least three consecutive backtick characters (`` ` ``) or +tildes (`~`). (Tildes and backticks cannot be mixed.) +A [fenced code block](@) +begins with a code fence, indented no more than three spaces. + +The line with the opening code fence may optionally contain some text +following the code fence; this is trimmed of leading and trailing +whitespace and called the [info string](@). If the [info string] comes +after a backtick fence, it may not contain any backtick +characters. (The reason for this restriction is that otherwise +some inline code would be incorrectly interpreted as the +beginning of a fenced code block.) + +The content of the code block consists of all subsequent lines, until +a closing [code fence] of the same type as the code block +began with (backticks or tildes), and with at least as many backticks +or tildes as the opening code fence. If the leading code fence is +indented N spaces, then up to N spaces of indentation are removed from +each line of the content (if present). (If a content line is not +indented, it is preserved unchanged. If it is indented less than N +spaces, all of the indentation is removed.) + +The closing code fence may be indented up to three spaces, and may be +followed only by spaces, which are ignored. If the end of the +containing block (or document) is reached and no closing code fence +has been found, the code block contains all of the lines after the +opening code fence until the end of the containing block (or +document). (An alternative spec would require backtracking in the +event that a closing code fence is not found. But this makes parsing +much less efficient, and there seems to be no real down side to the +behavior described here.) + +A fenced code block may interrupt a paragraph, and does not require +a blank line either before or after. + +The content of a code fence is treated as literal text, not parsed +as inlines. The first word of the [info string] is typically used to +specify the language of the code sample, and rendered in the `class` +attribute of the `code` tag. However, this spec does not mandate any +particular treatment of the [info string]. + +Here is a simple example with backticks: + +```````````````````````````````` example +``` +< + > +``` +. +
<
+ >
+
+```````````````````````````````` + + +With tildes: + +```````````````````````````````` example +~~~ +< + > +~~~ +. +
<
+ >
+
+```````````````````````````````` + +Fewer than three backticks is not enough: + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` + +The closing code fence must use the same character as the opening +fence: + +```````````````````````````````` example +``` +aaa +~~~ +``` +. +
aaa
+~~~
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +aaa +``` +~~~ +. +
aaa
+```
+
+```````````````````````````````` + + +The closing code fence must be at least as long as the opening fence: + +```````````````````````````````` example +```` +aaa +``` +`````` +. +
aaa
+```
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ +aaa +~~~ +~~~~ +. +
aaa
+~~~
+
+```````````````````````````````` + + +Unclosed code blocks are closed by the end of the document +(or the enclosing [block quote][block quotes] or [list item][list items]): + +```````````````````````````````` example +``` +. +
+```````````````````````````````` + + +```````````````````````````````` example +````` + +``` +aaa +. +

+```
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +> aaa + +bbb +. +
+
aaa
+
+
+

bbb

+```````````````````````````````` + + +A code block can have all empty lines as its content: + +```````````````````````````````` example +``` + + +``` +. +

+  
+
+```````````````````````````````` + + +A code block can be empty: + +```````````````````````````````` example +``` +``` +. +
+```````````````````````````````` + + +Fences can be indented. If the opening fence is indented, +content lines will have equivalent opening indentation removed, +if present: + +```````````````````````````````` example + ``` + aaa +aaa +``` +. +
aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + aaa +aaa + ``` +. +
aaa
+aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` + aaa + aaa + aaa + ``` +. +
aaa
+ aaa
+aaa
+
+```````````````````````````````` + + +Four spaces indentation produces an indented code block: + +```````````````````````````````` example + ``` + aaa + ``` +. +
```
+aaa
+```
+
+```````````````````````````````` + + +Closing fences may be indented by 0-3 spaces, and their indentation +need not match that of the opening fence: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +This is not a closing fence, because it is indented 4 spaces: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+    ```
+
+```````````````````````````````` + + + +Code fences (opening and closing) cannot contain internal spaces: + +```````````````````````````````` example +``` ``` +aaa +. +

+aaa

+```````````````````````````````` + + +```````````````````````````````` example +~~~~~~ +aaa +~~~ ~~ +. +
aaa
+~~~ ~~
+
+```````````````````````````````` + + +Fenced code blocks can interrupt paragraphs, and can be followed +directly by paragraphs, without a blank line between: + +```````````````````````````````` example +foo +``` +bar +``` +baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +Other blocks can also occur before and after fenced code blocks +without an intervening blank line: + +```````````````````````````````` example +foo +--- +~~~ +bar +~~~ +# baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +An [info string] can be provided after the opening code fence. +Although this spec doesn't mandate any particular treatment of +the info string, the first word is typically used to specify +the language of the code block. In HTML output, the language is +normally indicated by adding a class to the `code` element consisting +of `language-` followed by the language name. + +```````````````````````````````` example +```ruby +def foo(x) + return 3 +end +``` +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +````; +```` +. +
+```````````````````````````````` + + +[Info strings] for backtick code blocks cannot contain backticks: + +```````````````````````````````` example +``` aa ``` +foo +. +

aa +foo

+```````````````````````````````` + + +[Info strings] for tilde code blocks can contain backticks and tildes: + +```````````````````````````````` example +~~~ aa ``` ~~~ +foo +~~~ +. +
foo
+
+```````````````````````````````` + + +Closing code fences cannot have [info strings]: + +```````````````````````````````` example +``` +``` aaa +``` +. +
``` aaa
+
+```````````````````````````````` + + + +## HTML blocks + +An [HTML block](@) is a group of lines that is treated +as raw HTML (and will not be escaped in HTML output). + +There are seven kinds of [HTML block], which can be defined by their +start and end conditions. The block begins with a line that meets a +[start condition](@) (after up to three spaces optional indentation). +It ends with the first subsequent line that meets a matching [end +condition](@), or the last line of the document, or the last line of +the [container block](#container-blocks) containing the current HTML +block, if no line is encountered that meets the [end condition]. If +the first line meets both the [start condition] and the [end +condition], the block will contain just that line. + +1. **Start condition:** line begins with the string ``, or the end of the line.\ +**End condition:** line contains an end tag +``, ``, or `` (case-insensitive; it +need not match the start tag). + +2. **Start condition:** line begins with the string ``. + +3. **Start condition:** line begins with the string ``. + +4. **Start condition:** line begins with the string ``. + +5. **Start condition:** line begins with the string +``. + +6. **Start condition:** line begins the string `<` or ``, or +the string `/>`.\ +**End condition:** line is followed by a [blank line]. + +7. **Start condition:** line begins with a complete [open tag] +(with any [tag name] other than `script`, +`style`, or `pre`) or a complete [closing tag], +followed only by [whitespace] or the end of the line.\ +**End condition:** line is followed by a [blank line]. + +HTML blocks continue until they are closed by their appropriate +[end condition], or the last line of the document or other [container +block](#container-blocks). This means any HTML **within an HTML +block** that might otherwise be recognised as a start condition will +be ignored by the parser and passed through as-is, without changing +the parser's state. + +For instance, `
` within a HTML block started by `` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
+
+```````````````````````````````` example
+
+
+**Hello**,
+
+_world_.
+
+
+. +
+
+**Hello**,
+

world. +

+
+```````````````````````````````` + +In this case, the HTML block is terminated by the newline — the `**Hello**` +text remains verbatim — and regular parsing resumes, with a paragraph, +emphasised `world` and inline and block HTML following. + +All types of [HTML blocks] except type 7 may interrupt +a paragraph. Blocks of type 7 may not interrupt a paragraph. +(This restriction is intended to prevent unwanted interpretation +of long tags inside a wrapped paragraph as starting HTML blocks.) + +Some simple examples follow. Here are some basic HTML blocks +of type 6: + +```````````````````````````````` example + + + + +
+ hi +
+ +okay. +. + + + + +
+ hi +
+

okay.

+```````````````````````````````` + + +```````````````````````````````` example +
+ *hello* + +. +
+ *hello* + +```````````````````````````````` + + +A block can also start with a closing tag: + +```````````````````````````````` example +
+*foo* +. +
+*foo* +```````````````````````````````` + + +Here we have two HTML blocks with a Markdown paragraph between them: + +```````````````````````````````` example +
+ +*Markdown* + +
+. +
+

Markdown

+
+```````````````````````````````` + + +The tag on the first line can be partial, as long +as it is split where there would be whitespace: + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +An open tag need not be closed: +```````````````````````````````` example +
+*foo* + +*bar* +. +
+*foo* +

bar

+```````````````````````````````` + + + +A partial tag need not even be completed (garbage +in, garbage out): + +```````````````````````````````` example +
+. + +```````````````````````````````` + + +```````````````````````````````` example +
+foo +
+. +
+foo +
+```````````````````````````````` + + +Everything until the next blank line or end of document +gets included in the HTML block. So, in the following +example, what looks like a Markdown code block +is actually part of the HTML block, which continues until a blank +line or the end of the document is reached: + +```````````````````````````````` example +
+``` c +int x = 33; +``` +. +
+``` c +int x = 33; +``` +```````````````````````````````` + + +To start an [HTML block] with a tag that is *not* in the +list of block-level tags in (6), you must put the tag by +itself on the first line (and it must be complete): + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +In type 7 blocks, the [tag name] can be anything: + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* +. + +*bar* +```````````````````````````````` + + +These rules are designed to allow us to work with tags that +can function as either block-level or inline-level tags. +The `` tag is a nice example. We can surround content with +`` tags in three different ways. In this case, we get a raw +HTML block, because the `` tag is on a line by itself: + +```````````````````````````````` example + +*foo* + +. + +*foo* + +```````````````````````````````` + + +In this case, we get a raw HTML block that just includes +the `` tag (because it ends with the following blank +line). So the contents get interpreted as CommonMark: + +```````````````````````````````` example + + +*foo* + + +. + +

foo

+
+```````````````````````````````` + + +Finally, in this case, the `` tags are interpreted +as [raw HTML] *inside* the CommonMark paragraph. (Because +the tag is not on a line by itself, we get inline HTML +rather than an [HTML block].) + +```````````````````````````````` example +*foo* +. +

foo

+```````````````````````````````` + + +HTML tags designed to contain literal content +(`script`, `style`, `pre`), comments, processing instructions, +and declarations are treated somewhat differently. +Instead of ending at the first blank line, these blocks +end at the first line containing a corresponding end tag. +As a result, these blocks can contain blank lines: + +A pre tag (type 1): + +```````````````````````````````` example +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+okay +. +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+

okay

+```````````````````````````````` + + +A script tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +A style tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +If there is no matching end tag, the block will end at the +end of the document (or the enclosing [block quote][block quotes] +or [list item][list items]): + +```````````````````````````````` example + +*foo* +. + +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*bar* +*baz* +. +*bar* +

baz

+```````````````````````````````` + + +Note that anything on the last line after the +end tag will be included in the [HTML block]: + +```````````````````````````````` example +1. *bar* +. +1. *bar* +```````````````````````````````` + + +A comment (type 2): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + + +A processing instruction (type 3): + +```````````````````````````````` example +'; + +?> +okay +. +'; + +?> +

okay

+```````````````````````````````` + + +A declaration (type 4): + +```````````````````````````````` example + +. + +```````````````````````````````` + + +CDATA (type 5): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +The opening tag can be indented 1-3 spaces, but not 4: + +```````````````````````````````` example + + + +. + +
<!-- foo -->
+
+```````````````````````````````` + + +```````````````````````````````` example +
+ +
+. +
+
<div>
+
+```````````````````````````````` + + +An HTML block of types 1--6 can interrupt a paragraph, and need not be +preceded by a blank line. + +```````````````````````````````` example +Foo +
+bar +
+. +

Foo

+
+bar +
+```````````````````````````````` + + +However, a following blank line is needed, except at the end of +a document, and except for blocks of types 1--5, [above][HTML +block]: + +```````````````````````````````` example +
+bar +
+*foo* +. +
+bar +
+*foo* +```````````````````````````````` + + +HTML blocks of type 7 cannot interrupt a paragraph: + +```````````````````````````````` example +Foo + +baz +. +

Foo + +baz

+```````````````````````````````` + + +This rule differs from John Gruber's original Markdown syntax +specification, which says: + +> The only restrictions are that block-level HTML elements — +> e.g. `
`, ``, `
`, `

`, etc. — must be separated from +> surrounding content by blank lines, and the start and end tags of the +> block should not be indented with tabs or spaces. + +In some ways Gruber's rule is more restrictive than the one given +here: + +- It requires that an HTML block be preceded by a blank line. +- It does not allow the start tag to be indented. +- It requires a matching end tag, which it also does not allow to + be indented. + +Most Markdown implementations (including some of Gruber's own) do not +respect all of these restrictions. + +There is one respect, however, in which Gruber's rule is more liberal +than the one given here, since it allows blank lines to occur inside +an HTML block. There are two reasons for disallowing them here. +First, it removes the need to parse balanced tags, which is +expensive and can require backtracking from the end of the document +if no matching end tag is found. Second, it provides a very simple +and flexible way of including Markdown content inside HTML tags: +simply separate the Markdown from the HTML using blank lines: + +Compare: + +```````````````````````````````` example +

+ +*Emphasized* text. + +
+. +
+

Emphasized text.

+
+```````````````````````````````` + + +```````````````````````````````` example +
+*Emphasized* text. +
+. +
+*Emphasized* text. +
+```````````````````````````````` + + +Some Markdown implementations have adopted a convention of +interpreting content inside tags as text if the open tag has +the attribute `markdown=1`. The rule given above seems a simpler and +more elegant way of achieving the same expressive power, which is also +much simpler to parse. + +The main potential drawback is that one can no longer paste HTML +blocks into Markdown documents with 100% reliability. However, +*in most cases* this will work fine, because the blank lines in +HTML are usually followed by HTML block tags. For example: + +```````````````````````````````` example +
+ + + + + + + +
+Hi +
+. + + + + +
+Hi +
+```````````````````````````````` + + +There are problems, however, if the inner tags are indented +*and* separated by spaces, as then they will be interpreted as +an indented code block: + +```````````````````````````````` example + + + + + + + + +
+ Hi +
+. + + +
<td>
+  Hi
+</td>
+
+ +
+```````````````````````````````` + + +Fortunately, blank lines are usually not necessary and can be +deleted. The exception is inside `
` tags, but as described
+[above][HTML blocks], raw HTML blocks starting with `
`
+*can* contain blank lines.
+
+## Link reference definitions
+
+A [link reference definition](@)
+consists of a [link label], indented up to three spaces, followed
+by a colon (`:`), optional [whitespace] (including up to one
+[line ending]), a [link destination],
+optional [whitespace] (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by [whitespace].
+No further [non-whitespace characters] may occur on the line.
+
+A [link reference definition]
+does not correspond to a structural element of a document.  Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document.  [Link
+reference definitions] can come either before or after the links that use
+them.
+
+```````````````````````````````` example
+[foo]: /url "title"
+
+[foo]
+.
+

foo

+```````````````````````````````` + + +```````````````````````````````` example + [foo]: + /url + 'the title' + +[foo] +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[Foo*bar\]]:my_(url) 'title (with parens)' + +[Foo*bar\]] +. +

Foo*bar]

+```````````````````````````````` + + +```````````````````````````````` example +[Foo bar]: + +'title' + +[Foo bar] +. +

Foo bar

+```````````````````````````````` + + +The title may extend over multiple lines: + +```````````````````````````````` example +[foo]: /url ' +title +line1 +line2 +' + +[foo] +. +

foo

+```````````````````````````````` + + +However, it may not contain a [blank line]: + +```````````````````````````````` example +[foo]: /url 'title + +with blank line' + +[foo] +. +

[foo]: /url 'title

+

with blank line'

+

[foo]

+```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[foo]: +/url + +[foo] +. +

foo

+```````````````````````````````` + + +The link destination may not be omitted: + +```````````````````````````````` example +[foo]: + +[foo] +. +

[foo]:

+

[foo]

+```````````````````````````````` + + However, an empty link destination may be specified using + angle brackets: + +```````````````````````````````` example +[foo]: <> + +[foo] +. +

foo

+```````````````````````````````` + +The title must be separated from the link destination by +whitespace: + +```````````````````````````````` example +[foo]: (baz) + +[foo] +. +

[foo]: (baz)

+

[foo]

+```````````````````````````````` + + +Both title and destination can contain backslash escapes +and literal backslashes: + +```````````````````````````````` example +[foo]: /url\bar\*baz "foo\"bar\baz" + +[foo] +. +

foo

+```````````````````````````````` + + +A link can come before its corresponding definition: + +```````````````````````````````` example +[foo] + +[foo]: url +. +

foo

+```````````````````````````````` + + +If there are several matching definitions, the first one takes +precedence: + +```````````````````````````````` example +[foo] + +[foo]: first +[foo]: second +. +

foo

+```````````````````````````````` + + +As noted in the section on [Links], matching of labels is +case-insensitive (see [matches]). + +```````````````````````````````` example +[FOO]: /url + +[Foo] +. +

Foo

+```````````````````````````````` + + +```````````````````````````````` example +[ΑΓΩ]: /φου + +[αγω] +. +

αγω

+```````````````````````````````` + + +Here is a link reference definition with no corresponding link. +It contributes nothing to the document. + +```````````````````````````````` example +[foo]: /url +. +```````````````````````````````` + + +Here is another one: + +```````````````````````````````` example +[ +foo +]: /url +bar +. +

bar

+```````````````````````````````` + + +This is not a link reference definition, because there are +[non-whitespace characters] after the title: + +```````````````````````````````` example +[foo]: /url "title" ok +. +

[foo]: /url "title" ok

+```````````````````````````````` + + +This is a link reference definition, but it has no title: + +```````````````````````````````` example +[foo]: /url +"title" ok +. +

"title" ok

+```````````````````````````````` + + +This is not a link reference definition, because it is indented +four spaces: + +```````````````````````````````` example + [foo]: /url "title" + +[foo] +. +
[foo]: /url "title"
+
+

[foo]

+```````````````````````````````` + + +This is not a link reference definition, because it occurs inside +a code block: + +```````````````````````````````` example +``` +[foo]: /url +``` + +[foo] +. +
[foo]: /url
+
+

[foo]

+```````````````````````````````` + + +A [link reference definition] cannot interrupt a paragraph. + +```````````````````````````````` example +Foo +[bar]: /baz + +[bar] +. +

Foo +[bar]: /baz

+

[bar]

+```````````````````````````````` + + +However, it can directly follow other block elements, such as headings +and thematic breaks, and it need not be followed by a blank line. + +```````````````````````````````` example +# [Foo] +[foo]: /url +> bar +. +

Foo

+
+

bar

+
+```````````````````````````````` + +```````````````````````````````` example +[foo]: /url +bar +=== +[foo] +. +

bar

+

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo]: /url +=== +[foo] +. +

=== +foo

+```````````````````````````````` + + +Several [link reference definitions] +can occur one after another, without intervening blank lines. + +```````````````````````````````` example +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url + +[foo], +[bar], +[baz] +. +

foo, +bar, +baz

+```````````````````````````````` + + +[Link reference definitions] can occur +inside block containers, like lists and block quotations. They +affect the entire document, not just the container in which they +are defined: + +```````````````````````````````` example +[foo] + +> [foo]: /url +. +

foo

+
+
+```````````````````````````````` + + +Whether something is a [link reference definition] is +independent of whether the link reference it defines is +used in the document. Thus, for example, the following +document contains just a link reference definition, and +no visible content: + +```````````````````````````````` example +[foo]: /url +. +```````````````````````````````` + + +## Paragraphs + +A sequence of non-blank lines that cannot be interpreted as other +kinds of blocks forms a [paragraph](@). +The contents of the paragraph are the result of parsing the +paragraph's raw content as inlines. The paragraph's raw content +is formed by concatenating the lines and removing initial and final +[whitespace]. + +A simple example with two paragraphs: + +```````````````````````````````` example +aaa + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Paragraphs can contain multiple lines, but no blank lines: + +```````````````````````````````` example +aaa +bbb + +ccc +ddd +. +

aaa +bbb

+

ccc +ddd

+```````````````````````````````` + + +Multiple blank lines between paragraph have no effect: + +```````````````````````````````` example +aaa + + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Leading spaces are skipped: + +```````````````````````````````` example + aaa + bbb +. +

aaa +bbb

+```````````````````````````````` + + +Lines after the first may be indented any amount, since indented +code blocks cannot interrupt paragraphs. + +```````````````````````````````` example +aaa + bbb + ccc +. +

aaa +bbb +ccc

+```````````````````````````````` + + +However, the first line may be indented at most three spaces, +or an indented code block will be triggered: + +```````````````````````````````` example + aaa +bbb +. +

aaa +bbb

+```````````````````````````````` + + +```````````````````````````````` example + aaa +bbb +. +
aaa
+
+

bbb

+```````````````````````````````` + + +Final spaces are stripped before inline parsing, so a paragraph +that ends with two or more spaces will not end with a [hard line +break]: + +```````````````````````````````` example +aaa +bbb +. +

aaa
+bbb

+```````````````````````````````` + + +## Blank lines + +[Blank lines] between block-level elements are ignored, +except for the role they play in determining whether a [list] +is [tight] or [loose]. + +Blank lines at the beginning and end of the document are also ignored. + +```````````````````````````````` example + + +aaa + + +# aaa + + +. +

aaa

+

aaa

+```````````````````````````````` + + + +# Container blocks + +A [container block](#container-blocks) is a block that has other +blocks as its contents. There are two basic kinds of container blocks: +[block quotes] and [list items]. +[Lists] are meta-containers for [list items]. + +We define the syntax for container blocks recursively. The general +form of the definition is: + +> If X is a sequence of blocks, then the result of +> transforming X in such-and-such a way is a container of type Y +> with these blocks as its content. + +So, we explain what counts as a block quote or list item by explaining +how these can be *generated* from their contents. This should suffice +to define the syntax, although it does not give a recipe for *parsing* +these constructions. (A recipe is provided below in the section entitled +[A parsing strategy](#appendix-a-parsing-strategy).) + +## Block quotes + +A [block quote marker](@) +consists of 0-3 spaces of initial indent, plus (a) the character `>` together +with a following space, or (b) a single character `>` not followed by a space. + +The following rules define [block quotes]: + +1. **Basic case.** If a string of lines *Ls* constitute a sequence + of blocks *Bs*, then the result of prepending a [block quote + marker] to the beginning of each line in *Ls* + is a [block quote](#block-quotes) containing *Bs*. + +2. **Laziness.** If a string of lines *Ls* constitute a [block + quote](#block-quotes) with contents *Bs*, then the result of deleting + the initial [block quote marker] from one or + more lines in which the next [non-whitespace character] after the [block + quote marker] is [paragraph continuation + text] is a block quote with *Bs* as its content. + [Paragraph continuation text](@) is text + that will be parsed as part of the content of a paragraph, but does + not occur at the beginning of the paragraph. + +3. **Consecutiveness.** A document cannot contain two [block + quotes] in a row unless there is a [blank line] between them. + +Nothing else counts as a [block quote](#block-quotes). + +Here is a simple example: + +```````````````````````````````` example +> # Foo +> bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The spaces after the `>` characters can be omitted: + +```````````````````````````````` example +># Foo +>bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The `>` characters can be indented 1-3 spaces: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +Four spaces gives us a code block: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
> # Foo
+> bar
+> baz
+
+```````````````````````````````` + + +The Laziness clause allows us to omit the `>` before +[paragraph continuation text]: + +```````````````````````````````` example +> # Foo +> bar +baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +A block quote can contain some lazy and some non-lazy +continuation lines: + +```````````````````````````````` example +> bar +baz +> foo +. +
+

bar +baz +foo

+
+```````````````````````````````` + + +Laziness only applies to lines that would have been continuations of +paragraphs had they been prepended with [block quote markers]. +For example, the `> ` cannot be omitted in the second line of + +``` markdown +> foo +> --- +``` + +without changing the meaning: + +```````````````````````````````` example +> foo +--- +. +
+

foo

+
+
+```````````````````````````````` + + +Similarly, if we omit the `> ` in the second line of + +``` markdown +> - foo +> - bar +``` + +then the block quote ends after the first line: + +```````````````````````````````` example +> - foo +- bar +. +
+
    +
  • foo
  • +
+
+
    +
  • bar
  • +
+```````````````````````````````` + + +For the same reason, we can't omit the `> ` in front of +subsequent lines of an indented or fenced code block: + +```````````````````````````````` example +> foo + bar +. +
+
foo
+
+
+
bar
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +foo +``` +. +
+
+
+

foo

+
+```````````````````````````````` + + +Note that in the following case, we have a [lazy +continuation line]: + +```````````````````````````````` example +> foo + - bar +. +
+

foo +- bar

+
+```````````````````````````````` + + +To see why, note that in + +```markdown +> foo +> - bar +``` + +the `- bar` is indented too far to start a list, and can't +be an indented code block because indented code blocks cannot +interrupt paragraphs, so it is [paragraph continuation text]. + +A block quote can be empty: + +```````````````````````````````` example +> +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> +> +> +. +
+
+```````````````````````````````` + + +A block quote can have initial or final blank lines: + +```````````````````````````````` example +> +> foo +> +. +
+

foo

+
+```````````````````````````````` + + +A blank line always separates block quotes: + +```````````````````````````````` example +> foo + +> bar +. +
+

foo

+
+
+

bar

+
+```````````````````````````````` + + +(Most current Markdown implementations, including John Gruber's +original `Markdown.pl`, will parse this example as a single block quote +with two paragraphs. But it seems better to allow the author to decide +whether two block quotes or one are wanted.) + +Consecutiveness means that if we put these block quotes together, +we get a single block quote: + +```````````````````````````````` example +> foo +> bar +. +
+

foo +bar

+
+```````````````````````````````` + + +To get a block quote with two paragraphs, use: + +```````````````````````````````` example +> foo +> +> bar +. +
+

foo

+

bar

+
+```````````````````````````````` + + +Block quotes can interrupt paragraphs: + +```````````````````````````````` example +foo +> bar +. +

foo

+
+

bar

+
+```````````````````````````````` + + +In general, blank lines are not needed before or after block +quotes: + +```````````````````````````````` example +> aaa +*** +> bbb +. +
+

aaa

+
+
+
+

bbb

+
+```````````````````````````````` + + +However, because of laziness, a blank line is needed between +a block quote and a following paragraph: + +```````````````````````````````` example +> bar +baz +. +
+

bar +baz

+
+```````````````````````````````` + + +```````````````````````````````` example +> bar + +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +```````````````````````````````` example +> bar +> +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +It is a consequence of the Laziness rule that any number +of initial `>`s may be omitted on a continuation line of a +nested block quote: + +```````````````````````````````` example +> > > foo +bar +. +
+
+
+

foo +bar

+
+
+
+```````````````````````````````` + + +```````````````````````````````` example +>>> foo +> bar +>>baz +. +
+
+
+

foo +bar +baz

+
+
+
+```````````````````````````````` + + +When including an indented code block in a block quote, +remember that the [block quote marker] includes +both the `>` and a following space. So *five spaces* are needed after +the `>`: + +```````````````````````````````` example +> code + +> not code +. +
+
code
+
+
+
+

not code

+
+```````````````````````````````` + + + +## List items + +A [list marker](@) is a +[bullet list marker] or an [ordered list marker]. + +A [bullet list marker](@) +is a `-`, `+`, or `*` character. + +An [ordered list marker](@) +is a sequence of 1--9 arabic digits (`0-9`), followed by either a +`.` character or a `)` character. (The reason for the length +limit is that with 10 digits we start seeing integer overflows +in some browsers.) + +The following rules define [list items]: + +1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of + blocks *Bs* starting with a [non-whitespace character], and *M* is a + list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result + of prepending *M* and the following spaces to the first line of + *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + list item with *Bs* as its contents. The type of the list item + (bullet or ordered) is determined by the type of its list marker. + If the list item is ordered, then it is also assigned a start + number, based on the ordered list marker. + + Exceptions: + + 1. When the first list item in a [list] interrupts + a paragraph---that is, when it starts on a line that would + otherwise count as [paragraph continuation text]---then (a) + the lines *Ls* must not begin with a blank line, and (b) if + the list item is ordered, the start number must be 1. + 2. If any line is a [thematic break][thematic breaks] then + that line is not a list item. + +For example, let *Ls* be the lines + +```````````````````````````````` example +A paragraph +with two lines. + + indented code + +> A block quote. +. +

A paragraph +with two lines.

+
indented code
+
+
+

A block quote.

+
+```````````````````````````````` + + +And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says +that the following is an ordered list item with start number 1, +and the same contents as *Ls*: + +```````````````````````````````` example +1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +The most important thing to notice is that the position of +the text after the list marker determines how much indentation +is needed in subsequent blocks in the list item. If the list +marker takes up two spaces, and there are three spaces between +the list marker and the next [non-whitespace character], then blocks +must be indented five spaces in order to fall under the list +item. + +Here are some examples showing how far content must be indented to be +put under the list item: + +```````````````````````````````` example +- one + + two +. +
    +
  • one
  • +
+

two

+```````````````````````````````` + + +```````````````````````````````` example +- one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • one
  • +
+
 two
+
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +It is tempting to think of this in terms of columns: the continuation +blocks must be indented at least to the column of the first +[non-whitespace character] after the list marker. However, that is not quite right. +The spaces after the list marker determine how much relative indentation +is needed. Which column this indentation reaches will depend on +how the list item is embedded in other constructions, as shown by +this example: + +```````````````````````````````` example + > > 1. one +>> +>> two +. +
+
+
    +
  1. +

    one

    +

    two

    +
  2. +
+
+
+```````````````````````````````` + + +Here `two` occurs in the same column as the list marker `1.`, +but is actually contained in the list item, because there is +sufficient indentation after the last containing blockquote marker. + +The converse is also possible. In the following example, the word `two` +occurs far to the right of the initial text of the list item, `one`, but +it is not considered part of the list item, because it is not indented +far enough past the blockquote marker: + +```````````````````````````````` example +>>- one +>> + > > two +. +
+
+
    +
  • one
  • +
+

two

+
+
+```````````````````````````````` + + +Note that at least one space is needed between the list marker and +any following content, so these are not list items: + +```````````````````````````````` example +-one + +2.two +. +

-one

+

2.two

+```````````````````````````````` + + +A list item may contain blocks that are separated by more than +one blank line. + +```````````````````````````````` example +- foo + + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +A list item may contain any kind of block: + +```````````````````````````````` example +1. foo + + ``` + bar + ``` + + baz + + > bam +. +
    +
  1. +

    foo

    +
    bar
    +
    +

    baz

    +
    +

    bam

    +
    +
  2. +
+```````````````````````````````` + + +A list item that contains an indented code block will preserve +empty lines within the code block verbatim. + +```````````````````````````````` example +- Foo + + bar + + + baz +. +
    +
  • +

    Foo

    +
    bar
    +
    +
    +baz
    +
    +
  • +
+```````````````````````````````` + +Note that ordered list start numbers must be nine digits or less: + +```````````````````````````````` example +123456789. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +1234567890. not ok +. +

1234567890. not ok

+```````````````````````````````` + + +A start number may begin with 0s: + +```````````````````````````````` example +0. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +003. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +A start number may not be negative: + +```````````````````````````````` example +-1. not ok +. +

-1. not ok

+```````````````````````````````` + + + +2. **Item starting with indented code.** If a sequence of lines *Ls* + constitute a sequence of blocks *Bs* starting with an indented code + block, and *M* is a list marker of width *W* followed by + one space, then the result of prepending *M* and the following + space to the first line of *Ls*, and indenting subsequent lines of + *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +An indented code block will have to be indented four spaces beyond +the edge of the region where text will be included in the list item. +In the following case that is 6 spaces: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +
    bar
    +
    +
  • +
+```````````````````````````````` + + +And in this case it is 11 spaces: + +```````````````````````````````` example + 10. foo + + bar +. +
    +
  1. +

    foo

    +
    bar
    +
    +
  2. +
+```````````````````````````````` + + +If the *first* block in the list item is an indented code block, +then by rule #2, the contents must be indented *one* space after the +list marker: + +```````````````````````````````` example + indented code + +paragraph + + more code +. +
indented code
+
+

paragraph

+
more code
+
+```````````````````````````````` + + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
    indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that an additional space indent is interpreted as space +inside the code block: + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
     indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that rules #1 and #2 only apply to two cases: (a) cases +in which the lines to be included in a list item begin with a +[non-whitespace character], and (b) cases in which +they begin with an indented code +block. In a case like the following, where the first block begins with +a three-space indent, the rules do not allow us to form a list item by +indenting the whole thing and prepending a list marker: + +```````````````````````````````` example + foo + +bar +. +

foo

+

bar

+```````````````````````````````` + + +```````````````````````````````` example +- foo + + bar +. +
    +
  • foo
  • +
+

bar

+```````````````````````````````` + + +This is not a significant restriction, because when a block begins +with 1-3 spaces indent, the indentation can always be removed without +a change in interpretation, allowing rule #1 to be applied. So, in +the above case: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +3. **Item starting with a blank line.** If a sequence of lines *Ls* + starting with a single [blank line] constitute a (possibly empty) + sequence of blocks *Bs*, not separated from each other by more than + one blank line, and *M* is a list marker of width *W*, + then the result of prepending *M* to the first line of *Ls*, and + indenting subsequent lines of *Ls* by *W + 1* spaces, is a list + item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +Here are some list items that start with a blank line but are not empty: + +```````````````````````````````` example +- + foo +- + ``` + bar + ``` +- + baz +. +
    +
  • foo
  • +
  • +
    bar
    +
    +
  • +
  • +
    baz
    +
    +
  • +
+```````````````````````````````` + +When the list item starts with a blank line, the number of spaces +following the list marker doesn't change the required indentation: + +```````````````````````````````` example +- + foo +. +
    +
  • foo
  • +
+```````````````````````````````` + + +A list item can begin with at most one blank line. +In the following example, `foo` is not part of the list +item: + +```````````````````````````````` example +- + + foo +. +
    +
  • +
+

foo

+```````````````````````````````` + + +Here is an empty bullet list item: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +It does not matter whether there are spaces following the [list marker]: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +Here is an empty ordered list item: + +```````````````````````````````` example +1. foo +2. +3. bar +. +
    +
  1. foo
  2. +
  3. +
  4. bar
  5. +
+```````````````````````````````` + + +A list may start or end with an empty list item: + +```````````````````````````````` example +* +. +
    +
  • +
+```````````````````````````````` + +However, an empty list item cannot interrupt a paragraph: + +```````````````````````````````` example +foo +* + +foo +1. +. +

foo +*

+

foo +1.

+```````````````````````````````` + + +4. **Indentation.** If a sequence of lines *Ls* constitutes a list item + according to rule #1, #2, or #3, then the result of indenting each line + of *Ls* by 1-3 spaces (the same for each line) also constitutes a + list item with the same contents and attributes. If a line is + empty, then it need not be indented. + +Indented one space: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented two spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented three spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Four spaces indent gives a code block: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
1.  A paragraph
+    with two lines.
+
+        indented code
+
+    > A block quote.
+
+```````````````````````````````` + + + +5. **Laziness.** If a string of lines *Ls* constitute a [list + item](#list-items) with contents *Bs*, then the result of deleting + some or all of the indentation from one or more lines in which the + next [non-whitespace character] after the indentation is + [paragraph continuation text] is a + list item with the same contents and attributes. The unindented + lines are called + [lazy continuation line](@)s. + +Here is an example with [lazy continuation lines]: + +```````````````````````````````` example + 1. A paragraph +with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indentation can be partially deleted: + +```````````````````````````````` example + 1. A paragraph + with two lines. +. +
    +
  1. A paragraph +with two lines.
  2. +
+```````````````````````````````` + + +These examples show how laziness can work in nested structures: + +```````````````````````````````` example +> 1. > Blockquote +continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> 1. > Blockquote +> continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + + +6. **That's all.** Nothing that is not counted as a list item by rules + #1--5 counts as a [list item](#list-items). + +The rules for sublists follow from the general rules +[above][List items]. A sublist must be indented the same number +of spaces a paragraph would need to be in order to be included +in the list item. + +So, in this case we need two spaces indent: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo +
      +
    • bar +
        +
      • baz +
          +
        • boo
        • +
        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +One is not enough: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo
  • +
  • bar
  • +
  • baz
  • +
  • boo
  • +
+```````````````````````````````` + + +Here we need four, because the list marker is wider: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + +Three is not enough: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo
  2. +
+
    +
  • bar
  • +
+```````````````````````````````` + + +A list may be the first block in a list item: + +```````````````````````````````` example +- - foo +. +
    +
  • +
      +
    • foo
    • +
    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. - 2. foo +. +
    +
  1. +
      +
    • +
        +
      1. foo
      2. +
      +
    • +
    +
  2. +
+```````````````````````````````` + + +A list item can contain a heading: + +```````````````````````````````` example +- # Foo +- Bar + --- + baz +. +
    +
  • +

    Foo

    +
  • +
  • +

    Bar

    +baz
  • +
+```````````````````````````````` + + +### Motivation + +John Gruber's Markdown spec says the following about list items: + +1. "List markers typically start at the left margin, but may be indented + by up to three spaces. List markers must be followed by one or more + spaces or a tab." + +2. "To make lists look nice, you can wrap items with hanging indents.... + But if you don't want to, you don't have to." + +3. "List items may consist of multiple paragraphs. Each subsequent + paragraph in a list item must be indented by either 4 spaces or one + tab." + +4. "It looks nice if you indent every line of the subsequent paragraphs, + but here again, Markdown will allow you to be lazy." + +5. "To put a blockquote within a list item, the blockquote's `>` + delimiters need to be indented." + +6. "To put a code block within a list item, the code block needs to be + indented twice — 8 spaces or two tabs." + +These rules specify that a paragraph under a list item must be indented +four spaces (presumably, from the left margin, rather than the start of +the list marker, but this is not said), and that code under a list item +must be indented eight spaces instead of the usual four. They also say +that a block quote must be indented, but not by how much; however, the +example given has four spaces indentation. Although nothing is said +about other kinds of block-level content, it is certainly reasonable to +infer that *all* block elements under a list item, including other +lists, must be indented four spaces. This principle has been called the +*four-space rule*. + +The four-space rule is clear and principled, and if the reference +implementation `Markdown.pl` had followed it, it probably would have +become the standard. However, `Markdown.pl` allowed paragraphs and +sublists to start with only two spaces indentation, at least on the +outer level. Worse, its behavior was inconsistent: a sublist of an +outer-level list needed two spaces indentation, but a sublist of this +sublist needed three spaces. It is not surprising, then, that different +implementations of Markdown have developed very different rules for +determining what comes under a list item. (Pandoc and python-Markdown, +for example, stuck with Gruber's syntax description and the four-space +rule, while discount, redcarpet, marked, PHP Markdown, and others +followed `Markdown.pl`'s behavior more closely.) + +Unfortunately, given the divergences between implementations, there +is no way to give a spec for list items that will be guaranteed not +to break any existing documents. However, the spec given here should +correctly handle lists formatted with either the four-space rule or +the more forgiving `Markdown.pl` behavior, provided they are laid out +in a way that is natural for a human to read. + +The strategy here is to let the width and indentation of the list marker +determine the indentation necessary for blocks to fall under the list +item, rather than having a fixed and arbitrary number. The writer can +think of the body of the list item as a unit which gets indented to the +right enough to fit the list marker (and any indentation on the list +marker). (The laziness rule, #5, then allows continuation lines to be +unindented if needed.) + +This rule is superior, we claim, to any rule requiring a fixed level of +indentation from the margin. The four-space rule is clear but +unnatural. It is quite unintuitive that + +``` markdown +- foo + + bar + + - baz +``` + +should be parsed as two lists with an intervening paragraph, + +``` html +
    +
  • foo
  • +
+

bar

+
    +
  • baz
  • +
+``` + +as the four-space rule demands, rather than a single list, + +``` html +
    +
  • +

    foo

    +

    bar

    +
      +
    • baz
    • +
    +
  • +
+``` + +The choice of four spaces is arbitrary. It can be learned, but it is +not likely to be guessed, and it trips up beginners regularly. + +Would it help to adopt a two-space rule? The problem is that such +a rule, together with the rule allowing 1--3 spaces indentation of the +initial list marker, allows text that is indented *less than* the +original list marker to be included in the list item. For example, +`Markdown.pl` parses + +``` markdown + - one + + two +``` + +as a single list item, with `two` a continuation paragraph: + +``` html +
    +
  • +

    one

    +

    two

    +
  • +
+``` + +and similarly + +``` markdown +> - one +> +> two +``` + +as + +``` html +
+
    +
  • +

    one

    +

    two

    +
  • +
+
+``` + +This is extremely unintuitive. + +Rather than requiring a fixed indent from the margin, we could require +a fixed indent (say, two spaces, or even one space) from the list marker (which +may itself be indented). This proposal would remove the last anomaly +discussed. Unlike the spec presented above, it would count the following +as a list item with a subparagraph, even though the paragraph `bar` +is not indented as far as the first paragraph `foo`: + +``` markdown + 10. foo + + bar +``` + +Arguably this text does read like a list item with `bar` as a subparagraph, +which may count in favor of the proposal. However, on this proposal indented +code would have to be indented six spaces after the list marker. And this +would break a lot of existing Markdown, which has the pattern: + +``` markdown +1. foo + + indented code +``` + +where the code is indented eight spaces. The spec above, by contrast, will +parse this text as expected, since the code block's indentation is measured +from the beginning of `foo`. + +The one case that needs special treatment is a list item that *starts* +with indented code. How much indentation is required in that case, since +we don't have a "first paragraph" to measure from? Rule #2 simply stipulates +that in such cases, we require one space indentation from the list marker +(and then the normal four spaces for the indented code). This will match the +four-space rule in cases where the list marker plus its initial indentation +takes four spaces (a common case), but diverge in other cases. + +## Lists + +A [list](@) is a sequence of one or more +list items [of the same type]. The list items +may be separated by any number of blank lines. + +Two list items are [of the same type](@) +if they begin with a [list marker] of the same type. +Two list markers are of the +same type if (a) they are bullet list markers using the same character +(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same +delimiter (either `.` or `)`). + +A list is an [ordered list](@) +if its constituent list items begin with +[ordered list markers], and a +[bullet list](@) if its constituent list +items begin with [bullet list markers]. + +The [start number](@) +of an [ordered list] is determined by the list number of +its initial list item. The numbers of subsequent list items are +disregarded. + +A list is [loose](@) if any of its constituent +list items are separated by blank lines, or if any of its constituent +list items directly contain two block-level elements with a blank line +between them. Otherwise a list is [tight](@). +(The difference in HTML output is that paragraphs in a loose list are +wrapped in `

` tags, while paragraphs in a tight list are not.) + +Changing the bullet or ordered list delimiter starts a new list: + +```````````````````````````````` example +- foo +- bar ++ baz +. +

    +
  • foo
  • +
  • bar
  • +
+
    +
  • baz
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. foo +2. bar +3) baz +. +
    +
  1. foo
  2. +
  3. bar
  4. +
+
    +
  1. baz
  2. +
+```````````````````````````````` + + +In CommonMark, a list can interrupt a paragraph. That is, +no blank line is needed to separate a paragraph from a following +list: + +```````````````````````````````` example +Foo +- bar +- baz +. +

Foo

+
    +
  • bar
  • +
  • baz
  • +
+```````````````````````````````` + +`Markdown.pl` does not allow this, through fear of triggering a list +via a numeral in a hard-wrapped line: + +``` markdown +The number of windows in my house is +14. The number of doors is 6. +``` + +Oddly, though, `Markdown.pl` *does* allow a blockquote to +interrupt a paragraph, even though the same considerations might +apply. + +In CommonMark, we do allow lists to interrupt paragraphs, for +two reasons. First, it is natural and not uncommon for people +to start lists without blank lines: + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +Second, we are attracted to a + +> [principle of uniformity](@): +> if a chunk of text has a certain +> meaning, it will continue to have the same meaning when put into a +> container block (such as a list item or blockquote). + +(Indeed, the spec for [list items] and [block quotes] presupposes +this principle.) This principle implies that if + +``` markdown + * I need to buy + - new shoes + - a coat + - a plane ticket +``` + +is a list item containing a paragraph followed by a nested sublist, +as all Markdown implementations agree it is (though the paragraph +may be rendered without `

` tags, since the list is "tight"), +then + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +by itself should be a paragraph followed by a nested sublist. + +Since it is well established Markdown practice to allow lists to +interrupt paragraphs inside list items, the [principle of +uniformity] requires us to allow this outside list items as +well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +takes a different approach, requiring blank lines before lists +even inside other list items.) + +In order to solve of unwanted lists in paragraphs with +hard-wrapped numerals, we allow only lists starting with `1` to +interrupt paragraphs. Thus, + +```````````````````````````````` example +The number of windows in my house is +14. The number of doors is 6. +. +

The number of windows in my house is +14. The number of doors is 6.

+```````````````````````````````` + +We may still get an unintended result in cases like + +```````````````````````````````` example +The number of windows in my house is +1. The number of doors is 6. +. +

The number of windows in my house is

+
    +
  1. The number of doors is 6.
  2. +
+```````````````````````````````` + +but this rule should prevent most spurious list captures. + +There can be any number of blank lines between items: + +```````````````````````````````` example +- foo + +- bar + + +- baz +. +
    +
  • +

    foo

    +
  • +
  • +

    bar

    +
  • +
  • +

    baz

    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +- foo + - bar + - baz + + + bim +. +
    +
  • foo +
      +
    • bar +
        +
      • +

        baz

        +

        bim

        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +To separate consecutive lists of the same type, or to separate a +list from an indented code block that would otherwise be parsed +as a subparagraph of the final list item, you can insert a blank HTML +comment: + +```````````````````````````````` example +- foo +- bar + + + +- baz +- bim +. +
    +
  • foo
  • +
  • bar
  • +
+ +
    +
  • baz
  • +
  • bim
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- foo + + notcode + +- foo + + + + code +. +
    +
  • +

    foo

    +

    notcode

    +
  • +
  • +

    foo

    +
  • +
+ +
code
+
+```````````````````````````````` + + +List items need not be indented to the same level. The following +list items will be treated as items at the same list level, +since none is indented enough to belong to the previous list +item: + +```````````````````````````````` example +- a + - b + - c + - d + - e + - f +- g +. +
    +
  • a
  • +
  • b
  • +
  • c
  • +
  • d
  • +
  • e
  • +
  • f
  • +
  • g
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
    +
  1. +

    a

    +
  2. +
  3. +

    b

    +
  4. +
  5. +

    c

    +
  6. +
+```````````````````````````````` + +Note, however, that list items may not be indented more than +three spaces. Here `- e` is treated as a paragraph continuation +line, because it is indented more than three spaces: + +```````````````````````````````` example +- a + - b + - c + - d + - e +. +
    +
  • a
  • +
  • b
  • +
  • c
  • +
  • d +- e
  • +
+```````````````````````````````` + +And here, `3. c` is treated as in indented code block, +because it is indented four spaces and preceded by a +blank line. + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
    +
  1. +

    a

    +
  2. +
  3. +

    b

    +
  4. +
+
3. c
+
+```````````````````````````````` + + +This is a loose list, because there is a blank line between +two of the list items: + +```````````````````````````````` example +- a +- b + +- c +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +So is this, with a empty second item: + +```````````````````````````````` example +* a +* + +* c +. +
    +
  • +

    a

    +
  • +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +These are loose lists, even though there is no space between the items, +because one of the items directly contains two block-level elements +with a blank line between them: + +```````````````````````````````` example +- a +- b + + c +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +

    c

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a +- b + + [ref]: /url +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +This is a tight list, because the blank lines are in a code block: + +```````````````````````````````` example +- a +- ``` + b + + + ``` +- c +. +
    +
  • a
  • +
  • +
    b
    +
    +
    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is between two +paragraphs of a sublist. So the sublist is loose while +the outer list is tight: + +```````````````````````````````` example +- a + - b + + c +- d +. +
    +
  • a +
      +
    • +

      b

      +

      c

      +
    • +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is inside the +block quote: + +```````````````````````````````` example +* a + > b + > +* c +. +
    +
  • a +
    +

    b

    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This list is tight, because the consecutive block elements +are not separated by blank lines: + +```````````````````````````````` example +- a + > b + ``` + c + ``` +- d +. +
    +
  • a +
    +

    b

    +
    +
    c
    +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +A single-paragraph list is tight: + +```````````````````````````````` example +- a +. +
    +
  • a
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b +. +
    +
  • a +
      +
    • b
    • +
    +
  • +
+```````````````````````````````` + + +This list is loose, because of the blank line between the +two block elements in the list item: + +```````````````````````````````` example +1. ``` + foo + ``` + + bar +. +
    +
  1. +
    foo
    +
    +

    bar

    +
  2. +
+```````````````````````````````` + + +Here the outer list is loose, the inner list tight: + +```````````````````````````````` example +* foo + * bar + + baz +. +
    +
  • +

    foo

    +
      +
    • bar
    • +
    +

    baz

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b + - c + +- d + - e + - f +. +
    +
  • +

    a

    +
      +
    • b
    • +
    • c
    • +
    +
  • +
  • +

    d

    +
      +
    • e
    • +
    • f
    • +
    +
  • +
+```````````````````````````````` + + +# Inlines + +Inlines are parsed sequentially from the beginning of the character +stream to the end (left to right, in left-to-right languages). +Thus, for example, in + +```````````````````````````````` example +`hi`lo` +. +

hilo`

+```````````````````````````````` + +`hi` is parsed as code, leaving the backtick at the end as a literal +backtick. + + +## Backslash escapes + +Any ASCII punctuation character may be backslash-escaped: + +```````````````````````````````` example +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ +. +

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

+```````````````````````````````` + + +Backslashes before other characters are treated as literal +backslashes: + +```````````````````````````````` example +\→\A\a\ \3\φ\« +. +

\→\A\a\ \3\φ\«

+```````````````````````````````` + + +Escaped characters are treated as regular characters and do +not have their usual Markdown meanings: + +```````````````````````````````` example +\*not emphasized* +\
not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a heading +\[foo]: /url "not a reference" +\ö not a character entity +. +

*not emphasized* +<br/> not a tag +[not a link](/foo) +`not code` +1. not a list +* not a list +# not a heading +[foo]: /url "not a reference" +&ouml; not a character entity

+```````````````````````````````` + + +If a backslash is itself escaped, the following character is not: + +```````````````````````````````` example +\\*emphasis* +. +

\emphasis

+```````````````````````````````` + + +A backslash at the end of the line is a [hard line break]: + +```````````````````````````````` example +foo\ +bar +. +

foo
+bar

+```````````````````````````````` + + +Backslash escapes do not work in code blocks, code spans, autolinks, or +raw HTML: + +```````````````````````````````` example +`` \[\` `` +. +

\[\`

+```````````````````````````````` + + +```````````````````````````````` example + \[\] +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +\[\] +~~~ +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://example.com?find=\*

+```````````````````````````````` + + +```````````````````````````````` example + +. + +```````````````````````````````` + + +But they work in all other contexts, including URLs and link titles, +link references, and [info strings] in [fenced code blocks]: + +```````````````````````````````` example +[foo](/bar\* "ti\*tle") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /bar\* "ti\*tle" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` foo\+bar +foo +``` +. +
foo
+
+```````````````````````````````` + + + +## Entity and numeric character references + +Valid HTML entity references and numeric character references +can be used in place of the corresponding Unicode character, +with the following exceptions: + +- Entity and character references are not recognized in code + blocks and code spans. + +- Entity and character references cannot stand in place of + special characters that define structural elements in + CommonMark. For example, although `*` can be used + in place of a literal `*` character, `*` cannot replace + `*` in emphasis delimiters, bullet list markers, or thematic + breaks. + +Conforming CommonMark parsers need not store information about +whether a particular character was represented in the source +using a Unicode character or an entity reference. + +[Entity references](@) consist of `&` + any of the valid +HTML5 entity names + `;`. The +document +is used as an authoritative source for the valid entity +references and their corresponding code points. + +```````````````````````````````` example +  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸ +. +

  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸

+```````````````````````````````` + + +[Decimal numeric character +references](@) +consist of `&#` + a string of 1--7 arabic digits + `;`. A +numeric character reference is parsed as the corresponding +Unicode character. Invalid Unicode code points will be replaced by +the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, +the code point `U+0000` will also be replaced by `U+FFFD`. + +```````````````````````````````` example +# Ӓ Ϡ � +. +

# Ӓ Ϡ �

+```````````````````````````````` + + +[Hexadecimal numeric character +references](@) consist of `&#` + +either `X` or `x` + a string of 1-6 hexadecimal digits + `;`. +They too are parsed as the corresponding Unicode character (this +time specified with a hexadecimal numeral instead of decimal). + +```````````````````````````````` example +" ആ ಫ +. +

" ആ ಫ

+```````````````````````````````` + + +Here are some nonentities: + +```````````````````````````````` example +  &x; &#; &#x; +� +&#abcdef0; +&ThisIsNotDefined; &hi?; +. +

&nbsp &x; &#; &#x; +&#87654321; +&#abcdef0; +&ThisIsNotDefined; &hi?;

+```````````````````````````````` + + +Although HTML5 does accept some entity references +without a trailing semicolon (such as `©`), these are not +recognized here, because it makes the grammar too ambiguous: + +```````````````````````````````` example +© +. +

&copy

+```````````````````````````````` + + +Strings that are not on the list of HTML5 named entities are not +recognized as entity references either: + +```````````````````````````````` example +&MadeUpEntity; +. +

&MadeUpEntity;

+```````````````````````````````` + + +Entity and numeric character references are recognized in any +context besides code spans or code blocks, including +URLs, [link titles], and [fenced code block][] [info strings]: + +```````````````````````````````` example + +. + +```````````````````````````````` + + +```````````````````````````````` example +[foo](/föö "föö") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /föö "föö" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` föö +foo +``` +. +
foo
+
+```````````````````````````````` + + +Entity and numeric character references are treated as literal +text in code spans and code blocks: + +```````````````````````````````` example +`föö` +. +

f&ouml;&ouml;

+```````````````````````````````` + + +```````````````````````````````` example + föfö +. +
f&ouml;f&ouml;
+
+```````````````````````````````` + + +Entity and numeric character references cannot be used +in place of symbols indicating structure in CommonMark +documents. + +```````````````````````````````` example +*foo* +*foo* +. +

*foo* +foo

+```````````````````````````````` + +```````````````````````````````` example +* foo + +* foo +. +

* foo

+
    +
  • foo
  • +
+```````````````````````````````` + +```````````````````````````````` example +foo bar +. +

foo + +bar

+```````````````````````````````` + +```````````````````````````````` example + foo +. +

→foo

+```````````````````````````````` + + +```````````````````````````````` example +[a](url "tit") +. +

[a](url "tit")

+```````````````````````````````` + + +## Code spans + +A [backtick string](@) +is a string of one or more backtick characters (`` ` ``) that is neither +preceded nor followed by a backtick. + +A [code span](@) begins with a backtick string and ends with +a backtick string of equal length. The contents of the code span are +the characters between the two backtick strings, normalized in the +following ways: + +- First, [line endings] are converted to [spaces]. +- If the resulting string both begins *and* ends with a [space] + character, but does not consist entirely of [space] + characters, a single [space] character is removed from the + front and back. This allows you to include code that begins + or ends with backtick characters, which must be separated by + whitespace from the opening or closing backtick strings. + +This is a simple code span: + +```````````````````````````````` example +`foo` +. +

foo

+```````````````````````````````` + + +Here two backticks are used, because the code contains a backtick. +This example also illustrates stripping of a single leading and +trailing space: + +```````````````````````````````` example +`` foo ` bar `` +. +

foo ` bar

+```````````````````````````````` + + +This example shows the motivation for stripping leading and trailing +spaces: + +```````````````````````````````` example +` `` ` +. +

``

+```````````````````````````````` + +Note that only *one* space is stripped: + +```````````````````````````````` example +` `` ` +. +

``

+```````````````````````````````` + +The stripping only happens if the space is on both +sides of the string: + +```````````````````````````````` example +` a` +. +

a

+```````````````````````````````` + +Only [spaces], and not [unicode whitespace] in general, are +stripped in this way: + +```````````````````````````````` example +` b ` +. +

 b 

+```````````````````````````````` + +No stripping occurs if the code span contains only spaces: + +```````````````````````````````` example +` ` +` ` +. +

  +

+```````````````````````````````` + + +[Line endings] are treated like spaces: + +```````````````````````````````` example +`` +foo +bar +baz +`` +. +

foo bar baz

+```````````````````````````````` + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` + + +Interior spaces are not collapsed: + +```````````````````````````````` example +`foo bar +baz` +. +

foo bar baz

+```````````````````````````````` + +Note that browsers will typically collapse consecutive spaces +when rendering `` elements, so it is recommended that +the following CSS be used: + + code{white-space: pre-wrap;} + + +Note that backslash escapes do not work in code spans. All backslashes +are treated literally: + +```````````````````````````````` example +`foo\`bar` +. +

foo\bar`

+```````````````````````````````` + + +Backslash escapes are never needed, because one can always choose a +string of *n* backtick characters as delimiters, where the code does +not contain any strings of exactly *n* backtick characters. + +```````````````````````````````` example +``foo`bar`` +. +

foo`bar

+```````````````````````````````` + +```````````````````````````````` example +` foo `` bar ` +. +

foo `` bar

+```````````````````````````````` + + +Code span backticks have higher precedence than any other inline +constructs except HTML tags and autolinks. Thus, for example, this is +not parsed as emphasized text, since the second `*` is part of a code +span: + +```````````````````````````````` example +*foo`*` +. +

*foo*

+```````````````````````````````` + + +And this is not parsed as a link: + +```````````````````````````````` example +[not a `link](/foo`) +. +

[not a link](/foo)

+```````````````````````````````` + + +Code spans, HTML tags, and autolinks have the same precedence. +Thus, this is code: + +```````````````````````````````` example +`` +. +

<a href="">`

+```````````````````````````````` + + +But this is an HTML tag: + +```````````````````````````````` example +
` +. +

`

+```````````````````````````````` + + +And this is code: + +```````````````````````````````` example +`` +. +

<http://foo.bar.baz>`

+```````````````````````````````` + + +But this is an autolink: + +```````````````````````````````` example +` +. +

http://foo.bar.`baz`

+```````````````````````````````` + + +When a backtick string is not closed by a matching backtick string, +we just have literal backticks: + +```````````````````````````````` example +```foo`` +. +

```foo``

+```````````````````````````````` + + +```````````````````````````````` example +`foo +. +

`foo

+```````````````````````````````` + +The following case also illustrates the need for opening and +closing backtick strings to be equal in length: + +```````````````````````````````` example +`foo``bar`` +. +

`foobar

+```````````````````````````````` + + +## Emphasis and strong emphasis + +John Gruber's original [Markdown syntax +description](http://daringfireball.net/projects/markdown/syntax#em) says: + +> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML +> `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` +> tag. + +This is enough for most users, but these rules leave much undecided, +especially when it comes to nested emphasis. The original +`Markdown.pl` test suite makes it clear that triple `***` and +`___` delimiters can be used for strong emphasis, and most +implementations have also allowed the following patterns: + +``` markdown +***strong emph*** +***strong** in emph* +***emph* in strong** +**in strong *emph*** +*in emph **strong*** +``` + +The following patterns are less widely supported, but the intent +is clear and they are useful (especially in contexts like bibliography +entries): + +``` markdown +*emph *with emph* in it* +**strong **with strong** in it** +``` + +Many implementations have also restricted intraword emphasis to +the `*` forms, to avoid unwanted emphasis in words containing +internal underscores. (It is best practice to put these in code +spans, but users often do not.) + +``` markdown +internal emphasis: foo*bar*baz +no emphasis: foo_bar_baz +``` + +The rules given below capture all of these patterns, while allowing +for efficient parsing strategies that do not backtrack. + +First, some definitions. A [delimiter run](@) is either +a sequence of one or more `*` characters that is not preceded or +followed by a non-backslash-escaped `*` character, or a sequence +of one or more `_` characters that is not preceded or followed by +a non-backslash-escaped `_` character. + +A [left-flanking delimiter run](@) is +a [delimiter run] that is (1) not followed by [Unicode whitespace], +and either (2a) not followed by a [punctuation character], or +(2b) followed by a [punctuation character] and +preceded by [Unicode whitespace] or a [punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +A [right-flanking delimiter run](@) is +a [delimiter run] that is (1) not preceded by [Unicode whitespace], +and either (2a) not preceded by a [punctuation character], or +(2b) preceded by a [punctuation character] and +followed by [Unicode whitespace] or a [punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +Here are some examples of delimiter runs. + + - left-flanking but not right-flanking: + + ``` + ***abc + _abc + **"abc" + _"abc" + ``` + + - right-flanking but not left-flanking: + + ``` + abc*** + abc_ + "abc"** + "abc"_ + ``` + + - Both left and right-flanking: + + ``` + abc***def + "abc"_"def" + ``` + + - Neither left nor right-flanking: + + ``` + abc *** def + a _ b + ``` + +(The idea of distinguishing left-flanking and right-flanking +delimiter runs based on the character before and the character +after comes from Roopesh Chander's +[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +vfmd uses the terminology "emphasis indicator string" instead of "delimiter +run," and its rules for distinguishing left- and right-flanking runs +are a bit more complex than the ones given here.) + +The following rules define emphasis and strong emphasis: + +1. A single `*` character [can open emphasis](@) + iff (if and only if) it is part of a [left-flanking delimiter run]. + +2. A single `_` character [can open emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by punctuation. + +3. A single `*` character [can close emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +4. A single `_` character [can close emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by punctuation. + +5. A double `**` [can open strong emphasis](@) + iff it is part of a [left-flanking delimiter run]. + +6. A double `__` [can open strong emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by punctuation. + +7. A double `**` [can close strong emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +8. A double `__` [can close strong emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by punctuation. + +9. Emphasis begins with a delimiter that [can open emphasis] and ends + with a delimiter that [can close emphasis], and that uses the same + character (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both + open and close emphasis, then the sum of the lengths of the + delimiter runs containing the opening and closing delimiters + must not be a multiple of 3 unless both lengths are + multiples of 3. + +10. Strong emphasis begins with a delimiter that + [can open strong emphasis] and ends with a delimiter that + [can close strong emphasis], and that uses the same character + (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both open + and close strong emphasis, then the sum of the lengths of + the delimiter runs containing the opening and closing + delimiters must not be a multiple of 3 unless both lengths + are multiples of 3. + +11. A literal `*` character cannot occur at the beginning or end of + `*`-delimited emphasis or `**`-delimited strong emphasis, unless it + is backslash-escaped. + +12. A literal `_` character cannot occur at the beginning or end of + `_`-delimited emphasis or `__`-delimited strong emphasis, unless it + is backslash-escaped. + +Where rules 1--12 above are compatible with multiple parsings, +the following principles resolve ambiguity: + +13. The number of nestings should be minimized. Thus, for example, + an interpretation `...` is always preferred to + `...`. + +14. An interpretation `...` is always + preferred to `...`. + +15. When two potential emphasis or strong emphasis spans overlap, + so that the second begins before the first ends and ends after + the first ends, the first takes precedence. Thus, for example, + `*foo _bar* baz_` is parsed as `foo _bar baz_` rather + than `*foo bar* baz`. + +16. When there are two potential emphasis or strong emphasis spans + with the same closing delimiter, the shorter one (the one that + opens later) takes precedence. Thus, for example, + `**foo **bar baz**` is parsed as `**foo bar baz` + rather than `foo **bar baz`. + +17. Inline code spans, links, images, and HTML tags group more tightly + than emphasis. So, when there is a choice between an interpretation + that contains one of these elements and one that does not, the + former always wins. Thus, for example, `*[foo*](bar)` is + parsed as `*foo*` rather than as + `[foo](bar)`. + +These rules can be illustrated through a series of examples. + +Rule 1: + +```````````````````````````````` example +*foo bar* +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is followed by +whitespace, and hence not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a * foo bar* +. +

a * foo bar*

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a*"foo"* +. +

a*"foo"*

+```````````````````````````````` + + +Unicode nonbreaking spaces count as whitespace, too: + +```````````````````````````````` example +* a * +. +

* a *

+```````````````````````````````` + + +Intraword emphasis with `*` is permitted: + +```````````````````````````````` example +foo*bar* +. +

foobar

+```````````````````````````````` + + +```````````````````````````````` example +5*6*78 +. +

5678

+```````````````````````````````` + + +Rule 2: + +```````````````````````````````` example +_foo bar_ +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is followed by +whitespace: + +```````````````````````````````` example +_ foo bar_ +. +

_ foo bar_

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a_"foo"_ +. +

a_"foo"_

+```````````````````````````````` + + +Emphasis with `_` is not allowed inside words: + +```````````````````````````````` example +foo_bar_ +. +

foo_bar_

+```````````````````````````````` + + +```````````````````````````````` example +5_6_78 +. +

5_6_78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням_стремятся_ +. +

пристаням_стремятся_

+```````````````````````````````` + + +Here `_` does not generate emphasis, because the first delimiter run +is right-flanking and the second left-flanking: + +```````````````````````````````` example +aa_"bb"_cc +. +

aa_"bb"_cc

+```````````````````````````````` + + +This is emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-_(bar)_ +. +

foo-(bar)

+```````````````````````````````` + + +Rule 3: + +This is not emphasis, because the closing delimiter does +not match the opening delimiter: + +```````````````````````````````` example +_foo* +. +

_foo*

+```````````````````````````````` + + +This is not emphasis, because the closing `*` is preceded by +whitespace: + +```````````````````````````````` example +*foo bar * +. +

*foo bar *

+```````````````````````````````` + + +A newline also counts as whitespace: + +```````````````````````````````` example +*foo bar +* +. +

*foo bar +*

+```````````````````````````````` + + +This is not emphasis, because the second `*` is +preceded by punctuation and followed by an alphanumeric +(hence it is not part of a [right-flanking delimiter run]: + +```````````````````````````````` example +*(*foo) +. +

*(*foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +*(*foo*)* +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis with `*` is allowed: + +```````````````````````````````` example +*foo*bar +. +

foobar

+```````````````````````````````` + + + +Rule 4: + +This is not emphasis, because the closing `_` is preceded by +whitespace: + +```````````````````````````````` example +_foo bar _ +. +

_foo bar _

+```````````````````````````````` + + +This is not emphasis, because the second `_` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +_(_foo) +. +

_(_foo)

+```````````````````````````````` + + +This is emphasis within emphasis: + +```````````````````````````````` example +_(_foo_)_ +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis is disallowed for `_`: + +```````````````````````````````` example +_foo_bar +. +

_foo_bar

+```````````````````````````````` + + +```````````````````````````````` example +_пристаням_стремятся +. +

_пристаням_стремятся

+```````````````````````````````` + + +```````````````````````````````` example +_foo_bar_baz_ +. +

foo_bar_baz

+```````````````````````````````` + + +This is emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +_(bar)_. +. +

(bar).

+```````````````````````````````` + + +Rule 5: + +```````````````````````````````` example +**foo bar** +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +** foo bar** +. +

** foo bar**

+```````````````````````````````` + + +This is not strong emphasis, because the opening `**` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a**"foo"** +. +

a**"foo"**

+```````````````````````````````` + + +Intraword strong emphasis with `**` is permitted: + +```````````````````````````````` example +foo**bar** +. +

foobar

+```````````````````````````````` + + +Rule 6: + +```````````````````````````````` example +__foo bar__ +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +__ foo bar__ +. +

__ foo bar__

+```````````````````````````````` + + +A newline counts as whitespace: +```````````````````````````````` example +__ +foo bar__ +. +

__ +foo bar__

+```````````````````````````````` + + +This is not strong emphasis, because the opening `__` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a__"foo"__ +. +

a__"foo"__

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +foo__bar__ +. +

foo__bar__

+```````````````````````````````` + + +```````````````````````````````` example +5__6__78 +. +

5__6__78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням__стремятся__ +. +

пристаням__стремятся__

+```````````````````````````````` + + +```````````````````````````````` example +__foo, __bar__, baz__ +. +

foo, bar, baz

+```````````````````````````````` + + +This is strong emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-__(bar)__ +. +

foo-(bar)

+```````````````````````````````` + + + +Rule 7: + +This is not strong emphasis, because the closing delimiter is preceded +by whitespace: + +```````````````````````````````` example +**foo bar ** +. +

**foo bar **

+```````````````````````````````` + + +(Nor can it be interpreted as an emphasized `*foo bar *`, because of +Rule 11.) + +This is not strong emphasis, because the second `**` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +**(**foo) +. +

**(**foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with these examples: + +```````````````````````````````` example +*(**foo**)* +. +

(foo)

+```````````````````````````````` + + +```````````````````````````````` example +**Gomphocarpus (*Gomphocarpus physocarpus*, syn. +*Asclepias physocarpa*)** +. +

Gomphocarpus (Gomphocarpus physocarpus, syn. +Asclepias physocarpa)

+```````````````````````````````` + + +```````````````````````````````` example +**foo "*bar*" foo** +. +

foo "bar" foo

+```````````````````````````````` + + +Intraword emphasis: + +```````````````````````````````` example +**foo**bar +. +

foobar

+```````````````````````````````` + + +Rule 8: + +This is not strong emphasis, because the closing delimiter is +preceded by whitespace: + +```````````````````````````````` example +__foo bar __ +. +

__foo bar __

+```````````````````````````````` + + +This is not strong emphasis, because the second `__` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +__(__foo) +. +

__(__foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +_(__foo__)_ +. +

(foo)

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +__foo__bar +. +

__foo__bar

+```````````````````````````````` + + +```````````````````````````````` example +__пристаням__стремятся +. +

__пристаням__стремятся

+```````````````````````````````` + + +```````````````````````````````` example +__foo__bar__baz__ +. +

foo__bar__baz

+```````````````````````````````` + + +This is strong emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +__(bar)__. +. +

(bar).

+```````````````````````````````` + + +Rule 9: + +Any nonempty sequence of inline elements can be the contents of an +emphasized span. + +```````````````````````````````` example +*foo [bar](/url)* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo +bar* +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside emphasis: + +```````````````````````````````` example +_foo __bar__ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +_foo _bar_ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ bar_ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar** baz* +. +

foo bar baz

+```````````````````````````````` + +```````````````````````````````` example +*foo**bar**baz* +. +

foobarbaz

+```````````````````````````````` + +Note that in the preceding case, the interpretation + +``` markdown +

foobarbaz

+``` + + +is precluded by the condition that a delimiter that +can both open and close (like the `*` after `foo`) +cannot form emphasis if the sum of the lengths of +the delimiter runs containing the opening and +closing delimiters is a multiple of 3 unless +both lengths are multiples of 3. + + +For the same reason, we don't get two consecutive +emphasis sections in this example: + +```````````````````````````````` example +*foo**bar* +. +

foo**bar

+```````````````````````````````` + + +The same condition ensures that the following +cases are all strong emphasis nested inside +emphasis, even when the interior spaces are +omitted: + + +```````````````````````````````` example +***foo** bar* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar*** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo**bar*** +. +

foobar

+```````````````````````````````` + + +When the lengths of the interior closing and opening +delimiter runs are *both* multiples of 3, though, +they can match to create emphasis: + +```````````````````````````````` example +foo***bar***baz +. +

foobarbaz

+```````````````````````````````` + +```````````````````````````````` example +foo******bar*********baz +. +

foobar***baz

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +*foo **bar *baz* bim** bop* +. +

foo bar baz bim bop

+```````````````````````````````` + + +```````````````````````````````` example +*foo [*bar*](/url)* +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +** is not an empty emphasis +. +

** is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +**** is not an empty strong emphasis +. +

**** is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 10: + +Any nonempty sequence of inline elements can be the contents of an +strongly emphasized span. + +```````````````````````````````` example +**foo [bar](/url)** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo +bar** +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside strong emphasis: + +```````````````````````````````` example +__foo _bar_ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo __bar__ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +____foo__ bar__ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo **bar**** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar* baz** +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +**foo*bar*baz** +. +

foobarbaz

+```````````````````````````````` + + +```````````````````````````````` example +***foo* bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar*** +. +

foo bar

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +**foo *bar **baz** +bim* bop** +. +

foo bar baz +bim bop

+```````````````````````````````` + + +```````````````````````````````` example +**foo [*bar*](/url)** +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +__ is not an empty emphasis +. +

__ is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +____ is not an empty strong emphasis +. +

____ is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 11: + +```````````````````````````````` example +foo *** +. +

foo ***

+```````````````````````````````` + + +```````````````````````````````` example +foo *\** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo *_* +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo ***** +. +

foo *****

+```````````````````````````````` + + +```````````````````````````````` example +foo **\*** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo **_** +. +

foo _

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 11 determines +that the excess literal `*` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +**foo* +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +*foo** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +***foo** +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +****foo* +. +

***foo

+```````````````````````````````` + + +```````````````````````````````` example +**foo*** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +*foo**** +. +

foo***

+```````````````````````````````` + + + +Rule 12: + +```````````````````````````````` example +foo ___ +. +

foo ___

+```````````````````````````````` + + +```````````````````````````````` example +foo _\__ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo _*_ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo _____ +. +

foo _____

+```````````````````````````````` + + +```````````````````````````````` example +foo __\___ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo __*__ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ +. +

_foo

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 12 determines +that the excess literal `_` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +_foo__ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +___foo__ +. +

_foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo_ +. +

___foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo___ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +_foo____ +. +

foo___

+```````````````````````````````` + + +Rule 13 implies that if you want emphasis nested directly inside +emphasis, you must use different delimiters: + +```````````````````````````````` example +**foo** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*_foo_* +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo__ +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_*foo*_ +. +

foo

+```````````````````````````````` + + +However, strong emphasis within strong emphasis is possible without +switching delimiters: + +```````````````````````````````` example +****foo**** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo____ +. +

foo

+```````````````````````````````` + + + +Rule 13 can be applied to arbitrarily long sequences of +delimiters: + +```````````````````````````````` example +******foo****** +. +

foo

+```````````````````````````````` + + +Rule 14: + +```````````````````````````````` example +***foo*** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_____foo_____ +. +

foo

+```````````````````````````````` + + +Rule 15: + +```````````````````````````````` example +*foo _bar* baz_ +. +

foo _bar baz_

+```````````````````````````````` + + +```````````````````````````````` example +*foo __bar *baz bim__ bam* +. +

foo bar *baz bim bam

+```````````````````````````````` + + +Rule 16: + +```````````````````````````````` example +**foo **bar baz** +. +

**foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar baz* +. +

*foo bar baz

+```````````````````````````````` + + +Rule 17: + +```````````````````````````````` example +*[bar*](/url) +. +

*bar*

+```````````````````````````````` + + +```````````````````````````````` example +_foo [bar_](/url) +. +

_foo bar_

+```````````````````````````````` + + +```````````````````````````````` example +* +. +

*

+```````````````````````````````` + + +```````````````````````````````` example +** +. +

**

+```````````````````````````````` + + +```````````````````````````````` example +__ +. +

__

+```````````````````````````````` + + +```````````````````````````````` example +*a `*`* +. +

a *

+```````````````````````````````` + + +```````````````````````````````` example +_a `_`_ +. +

a _

+```````````````````````````````` + + +```````````````````````````````` example +**a +. +

**ahttp://foo.bar/?q=**

+```````````````````````````````` + + +```````````````````````````````` example +__a +. +

__ahttp://foo.bar/?q=__

+```````````````````````````````` + + + +## Links + +A link contains [link text] (the visible text), a [link destination] +(the URI that is the link destination), and optionally a [link title]. +There are two basic kinds of links in Markdown. In [inline links] the +destination and title are given immediately after the link text. In +[reference links] the destination and title are defined elsewhere in +the document. + +A [link text](@) consists of a sequence of zero or more +inline elements enclosed by square brackets (`[` and `]`). The +following rules apply: + +- Links may not contain other links, at any level of nesting. If + multiple otherwise valid link definitions appear nested inside each + other, the inner-most definition is used. + +- Brackets are allowed in the [link text] only if (a) they + are backslash-escaped or (b) they appear as a matched pair of brackets, + with an open bracket `[`, a sequence of zero or more inlines, and + a close bracket `]`. + +- Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly + than the brackets in link text. Thus, for example, + `` [foo`]` `` could not be a link text, since the second `]` + is part of a code span. + +- The brackets in link text bind more tightly than markers for + [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link. + +A [link destination](@) consists of either + +- a sequence of zero or more characters between an opening `<` and a + closing `>` that contains no line breaks or unescaped + `<` or `>` characters, or + +- a nonempty sequence of characters that does not start with + `<`, does not include ASCII space or control characters, and + includes parentheses only if (a) they are backslash-escaped or + (b) they are part of a balanced pair of unescaped parentheses. + (Implementations may impose limits on parentheses nesting to + avoid performance issues, but at least three levels of nesting + should be supported.) + +A [link title](@) consists of either + +- a sequence of zero or more characters between straight double-quote + characters (`"`), including a `"` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between straight single-quote + characters (`'`), including a `'` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between matching parentheses + (`(...)`), including a `(` or `)` character only if it is + backslash-escaped. + +Although [link titles] may span multiple lines, they may not contain +a [blank line]. + +An [inline link](@) consists of a [link text] followed immediately +by a left parenthesis `(`, optional [whitespace], an optional +[link destination], an optional [link title] separated from the link +destination by [whitespace], optional [whitespace], and a right +parenthesis `)`. The link's text consists of the inlines contained +in the [link text] (excluding the enclosing square brackets). +The link's URI consists of the link destination, excluding enclosing +`<...>` if present, with backslash-escapes in effect as described +above. The link's title consists of the link title, excluding its +enclosing delimiters, with backslash-escapes in effect as described +above. + +Here is a simple inline link: + +```````````````````````````````` example +[link](/uri "title") +. +

link

+```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[link](/uri) +. +

link

+```````````````````````````````` + + +Both the title and the destination may be omitted: + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link](<>) +. +

link

+```````````````````````````````` + +The destination can only contain spaces if it is +enclosed in pointy brackets: + +```````````````````````````````` example +[link](/my uri) +. +

[link](/my uri)

+```````````````````````````````` + +```````````````````````````````` example +[link](
) +. +

link

+```````````````````````````````` + +The destination cannot contain line breaks, +even if enclosed in pointy brackets: + +```````````````````````````````` example +[link](foo +bar) +. +

[link](foo +bar)

+```````````````````````````````` + +```````````````````````````````` example +[link]() +. +

[link]()

+```````````````````````````````` + +The destination can contain `)` if it is enclosed +in pointy brackets: + +```````````````````````````````` example +[a]() +. +

a

+```````````````````````````````` + +Pointy brackets that enclose links must be unescaped: + +```````````````````````````````` example +[link]() +. +

[link](<foo>)

+```````````````````````````````` + +These are not links, because the opening pointy bracket +is not matched properly: + +```````````````````````````````` example +[a]( +[a](c) +. +

[a](<b)c +[a](<b)c> +[a](c)

+```````````````````````````````` + +Parentheses inside the link destination may be escaped: + +```````````````````````````````` example +[link](\(foo\)) +. +

link

+```````````````````````````````` + +Any number of parentheses are allowed without escaping, as long as they are +balanced: + +```````````````````````````````` example +[link](foo(and(bar))) +. +

link

+```````````````````````````````` + +However, if you have unbalanced parentheses, you need to escape or use the +`<...>` form: + +```````````````````````````````` example +[link](foo\(and\(bar\)) +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +Parentheses and other symbols can also be escaped, as usual +in Markdown: + +```````````````````````````````` example +[link](foo\)\:) +. +

link

+```````````````````````````````` + + +A link can contain fragment identifiers and queries: + +```````````````````````````````` example +[link](#fragment) + +[link](http://example.com#fragment) + +[link](http://example.com?foo=3#frag) +. +

link

+

link

+

link

+```````````````````````````````` + + +Note that a backslash before a non-escapable character is +just a backslash: + +```````````````````````````````` example +[link](foo\bar) +. +

link

+```````````````````````````````` + + +URL-escaping should be left alone inside the destination, as all +URL-escaped characters are also valid URL characters. Entity and +numerical character references in the destination will be parsed +into the corresponding Unicode code points, as usual. These may +be optionally URL-escaped when written as HTML, but this spec +does not enforce any particular policy for rendering URLs in +HTML or other formats. Renderers may make different decisions +about how to escape or normalize URLs in the output. + +```````````````````````````````` example +[link](foo%20bä) +. +

link

+```````````````````````````````` + + +Note that, because titles can often be parsed as destinations, +if you try to omit the destination and keep the title, you'll +get unexpected results: + +```````````````````````````````` example +[link]("title") +. +

link

+```````````````````````````````` + + +Titles may be in single quotes, double quotes, or parentheses: + +```````````````````````````````` example +[link](/url "title") +[link](/url 'title') +[link](/url (title)) +. +

link +link +link

+```````````````````````````````` + + +Backslash escapes and entity and numeric character references +may be used in titles: + +```````````````````````````````` example +[link](/url "title \""") +. +

link

+```````````````````````````````` + + +Titles must be separated from the link using a [whitespace]. +Other [Unicode whitespace] like non-breaking space doesn't work. + +```````````````````````````````` example +[link](/url "title") +. +

link

+```````````````````````````````` + + +Nested balanced quotes are not allowed without escaping: + +```````````````````````````````` example +[link](/url "title "and" title") +. +

[link](/url "title "and" title")

+```````````````````````````````` + + +But it is easy to work around this by using a different quote type: + +```````````````````````````````` example +[link](/url 'title "and" title') +. +

link

+```````````````````````````````` + + +(Note: `Markdown.pl` did allow double quotes inside a double-quoted +title, and its test suite included a test demonstrating this. +But it is hard to see a good rationale for the extra complexity this +brings, since there are already many ways---backslash escaping, +entity and numeric character references, or using a different +quote type for the enclosing title---to write titles containing +double quotes. `Markdown.pl`'s handling of titles has a number +of other strange features. For example, it allows single-quoted +titles in inline links, but not reference links. And, in +reference links but not inline links, it allows a title to begin +with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows +titles with no closing quotation mark, though 1.0.2b8 does not. +It seems preferable to adopt a simple, rational rule that works +the same way in inline links and link reference definitions.) + +[Whitespace] is allowed around the destination and title: + +```````````````````````````````` example +[link]( /uri + "title" ) +. +

link

+```````````````````````````````` + + +But it is not allowed between the link text and the +following parenthesis: + +```````````````````````````````` example +[link] (/uri) +. +

[link] (/uri)

+```````````````````````````````` + + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]](/uri) +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link] bar](/uri) +. +

[link] bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[link [bar](/uri) +. +

[link bar

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar](/uri) +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*](/uri) +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)](/uri) +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)](/uri) +. +

[foo bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo *[bar [baz](/uri)](/uri)*](/uri) +. +

[foo [bar baz](/uri)](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +![[[foo](uri1)](uri2)](uri3) +. +

[foo](uri2)

+```````````````````````````````` + + +These cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*](/uri) +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar](baz*) +. +

foo *bar

+```````````````````````````````` + + +Note that brackets that *aren't* part of links do not take +precedence: + +```````````````````````````````` example +*foo [bar* baz] +. +

foo [bar baz]

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`](/uri)` +. +

[foo](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo +. +

[foohttp://example.com/?search=](uri)

+```````````````````````````````` + + +There are three kinds of [reference link](@)s: +[full](#full-reference-link), [collapsed](#collapsed-reference-link), +and [shortcut](#shortcut-reference-link). + +A [full reference link](@) +consists of a [link text] immediately followed by a [link label] +that [matches] a [link reference definition] elsewhere in the document. + +A [link label](@) begins with a left bracket (`[`) and ends +with the first right bracket (`]`) that is not backslash-escaped. +Between these brackets there must be at least one [non-whitespace character]. +Unescaped square bracket characters are not allowed inside the +opening and closing square brackets of [link labels]. A link +label can have at most 999 characters inside the square +brackets. + +One label [matches](@) +another just in case their normalized forms are equal. To normalize a +label, strip off the opening and closing brackets, +perform the *Unicode case fold*, strip leading and trailing +[whitespace] and collapse consecutive internal +[whitespace] to a single space. If there are multiple +matching reference link definitions, the one that comes first in the +document is used. (It is desirable in such cases to emit a warning.) + +The contents of the first link label are parsed as inlines, which are +used as the link's text. The link's URI and title are provided by the +matching [link reference definition]. + +Here is a simple example: + +```````````````````````````````` example +[foo][bar] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +The rules for the [link text] are the same as with +[inline links]. Thus: + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]][ref] + +[ref]: /uri +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar][ref] + +[ref]: /uri +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*][ref] + +[ref]: /uri +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)][ref] + +[ref]: /uri +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)][ref] + +[ref]: /uri +. +

[foo bar]ref

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar [baz][ref]*][ref] + +[ref]: /uri +. +

[foo bar baz]ref

+```````````````````````````````` + + +(In the examples above, we have two [shortcut reference links] +instead of one [full reference link].) + +The following cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*][ref] + +[ref]: /uri +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar][ref] + +[ref]: /uri +. +

foo *bar

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`][ref]` + +[ref]: /uri +. +

[foo][ref]

+```````````````````````````````` + + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foohttp://example.com/?search=][ref]

+```````````````````````````````` + + +Matching is case-insensitive: + +```````````````````````````````` example +[foo][BaR] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +Unicode case fold is used: + +```````````````````````````````` example +[Толпой][Толпой] is a Russian word. + +[ТОЛПОЙ]: /url +. +

Толпой is a Russian word.

+```````````````````````````````` + + +Consecutive internal [whitespace] is treated as one space for +purposes of determining matching: + +```````````````````````````````` example +[Foo + bar]: /url + +[Baz][Foo bar] +. +

Baz

+```````````````````````````````` + + +No [whitespace] is allowed between the [link text] and the +[link label]: + +```````````````````````````````` example +[foo] [bar] + +[bar]: /url "title" +. +

[foo] bar

+```````````````````````````````` + + +```````````````````````````````` example +[foo] +[bar] + +[bar]: /url "title" +. +

[foo] +bar

+```````````````````````````````` + + +This is a departure from John Gruber's original Markdown syntax +description, which explicitly allows whitespace between the link +text and the link label. It brings reference links in line with +[inline links], which (according to both original Markdown and +this spec) cannot have whitespace after the link text. More +importantly, it prevents inadvertent capture of consecutive +[shortcut reference links]. If whitespace is allowed between the +link text and the link label, then in the following we will have +a single reference link, not two shortcut reference links, as +intended: + +``` markdown +[foo] +[bar] + +[foo]: /url1 +[bar]: /url2 +``` + +(Note that [shortcut reference links] were introduced by Gruber +himself in a beta version of `Markdown.pl`, but never included +in the official syntax description. Without shortcut reference +links, it is harmless to allow space between the link text and +link label; but once shortcut references are introduced, it is +too dangerous to allow this, as it frequently leads to +unintended results.) + +When there are multiple matching [link reference definitions], +the first is used: + +```````````````````````````````` example +[foo]: /url1 + +[foo]: /url2 + +[bar][foo] +. +

bar

+```````````````````````````````` + + +Note that matching is performed on normalized strings, not parsed +inline content. So the following does not match, even though the +labels define equivalent inline content: + +```````````````````````````````` example +[bar][foo\!] + +[foo!]: /url +. +

[bar][foo!]

+```````````````````````````````` + + +[Link labels] cannot contain brackets, unless they are +backslash-escaped: + +```````````````````````````````` example +[foo][ref[] + +[ref[]: /uri +. +

[foo][ref[]

+

[ref[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref[bar]] + +[ref[bar]]: /uri +. +

[foo][ref[bar]]

+

[ref[bar]]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[[[foo]]] + +[[[foo]]]: /url +. +

[[[foo]]]

+

[[[foo]]]: /url

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref\[] + +[ref\[]: /uri +. +

foo

+```````````````````````````````` + + +Note that in this example `]` is not backslash-escaped: + +```````````````````````````````` example +[bar\\]: /uri + +[bar\\] +. +

bar\

+```````````````````````````````` + + +A [link label] must contain at least one [non-whitespace character]: + +```````````````````````````````` example +[] + +[]: /uri +. +

[]

+

[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[ + ] + +[ + ]: /uri +. +

[ +]

+

[ +]: /uri

+```````````````````````````````` + + +A [collapsed reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document, followed by the string `[]`. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title are +provided by the matching reference link definition. Thus, +`[foo][]` is equivalent to `[foo][foo]`. + +```````````````````````````````` example +[foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + + +As with full reference links, [whitespace] is not +allowed between the two sets of brackets: + +```````````````````````````````` example +[foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +A [shortcut reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document and is not followed by `[]` or a link label. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title +are provided by the matching link reference definition. +Thus, `[foo]` is equivalent to `[foo][]`. + +```````````````````````````````` example +[foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +[[*foo* bar]] + +[*foo* bar]: /url "title" +. +

[foo bar]

+```````````````````````````````` + + +```````````````````````````````` example +[[bar [foo] + +[foo]: /url +. +

[[bar foo

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +A space after the link text should be preserved: + +```````````````````````````````` example +[foo] bar + +[foo]: /url +. +

foo bar

+```````````````````````````````` + + +If you just want bracketed text, you can backslash-escape the +opening bracket to avoid links: + +```````````````````````````````` example +\[foo] + +[foo]: /url "title" +. +

[foo]

+```````````````````````````````` + + +Note that this is a link, because a link label ends with the first +following closing bracket: + +```````````````````````````````` example +[foo*]: /url + +*[foo*] +. +

*foo*

+```````````````````````````````` + + +Full and compact references take precedence over shortcut +references: + +```````````````````````````````` example +[foo][bar] + +[foo]: /url1 +[bar]: /url2 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo][] + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +Inline links also take precedence: + +```````````````````````````````` example +[foo]() + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo](not a link) + +[foo]: /url1 +. +

foo(not a link)

+```````````````````````````````` + +In the following case `[bar][baz]` is parsed as a reference, +`[foo]` as normal text: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url +. +

[foo]bar

+```````````````````````````````` + + +Here, though, `[foo][bar]` is parsed as a reference, since +`[bar]` is defined: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[bar]: /url2 +. +

foobaz

+```````````````````````````````` + + +Here `[foo]` is not parsed as a shortcut reference, because it +is followed by a link label (even though `[bar]` is not defined): + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[foo]: /url2 +. +

[foo]bar

+```````````````````````````````` + + + +## Images + +Syntax for images is like the syntax for links, with one +difference. Instead of [link text], we have an +[image description](@). The rules for this are the +same as for [link text], except that (a) an +image description starts with `![` rather than `[`, and +(b) an image description may contain links. +An image description has inline elements +as its contents. When an image is rendered to HTML, +this is standardly used as the image's `alt` attribute. + +```````````````````````````````` example +![foo](/url "title") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo ![bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo [bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +Though this spec is concerned with parsing, not rendering, it is +recommended that in rendering to HTML, only the plain string content +of the [image description] be used. Note that in +the above example, the alt attribute's value is `foo bar`, not `foo +[bar](/url)` or `foo bar`. Only the plain string +content is rendered, without formatting. + +```````````````````````````````` example +![foo *bar*][] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*][foobar] + +[FOOBAR]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo](train.jpg) +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +My ![foo bar](/path/to/train.jpg "title" ) +. +

My foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo]() +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![](/url) +. +

+```````````````````````````````` + + +Reference-style: + +```````````````````````````````` example +![foo][bar] + +[bar]: /url +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo][bar] + +[BAR]: /url +. +

foo

+```````````````````````````````` + + +Collapsed: + +```````````````````````````````` example +![foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The labels are case-insensitive: + +```````````````````````````````` example +![Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +As with reference links, [whitespace] is not allowed +between the two sets of brackets: + +```````````````````````````````` example +![foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +Shortcut: + +```````````````````````````````` example +![foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +Note that link labels cannot contain unescaped brackets: + +```````````````````````````````` example +![[foo]] + +[[foo]]: /url "title" +. +

![[foo]]

+

[[foo]]: /url "title"

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +![Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +If you just want a literal `!` followed by bracketed text, you can +backslash-escape the opening `[`: + +```````````````````````````````` example +!\[foo] + +[foo]: /url "title" +. +

![foo]

+```````````````````````````````` + + +If you want a link after a literal `!`, backslash-escape the +`!`: + +```````````````````````````````` example +\![foo] + +[foo]: /url "title" +. +

!foo

+```````````````````````````````` + + +## Autolinks + +[Autolink](@)s are absolute URIs and email addresses inside +`<` and `>`. They are parsed as links, with the URL or email address +as the link label. + +A [URI autolink](@) consists of `<`, followed by an +[absolute URI] followed by `>`. It is parsed as +a link to the URI, with the URI as the link's label. + +An [absolute URI](@), +for these purposes, consists of a [scheme] followed by a colon (`:`) +followed by zero or more characters other than ASCII +[whitespace] and control characters, `<`, and `>`. If +the URI includes these characters, they must be percent-encoded +(e.g. `%20` for a space). + +For purposes of this spec, a [scheme](@) is any sequence +of 2--32 characters beginning with an ASCII letter and followed +by any combination of ASCII letters, digits, or the symbols plus +("+"), period ("."), or hyphen ("-"). + +Here are some valid autolinks: + +```````````````````````````````` example + +. +

http://foo.bar.baz

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://foo.bar.baz/test?q=hello&id=22&boolean

+```````````````````````````````` + + +```````````````````````````````` example + +. +

irc://foo.bar:2233/baz

+```````````````````````````````` + + +Uppercase is also fine: + +```````````````````````````````` example + +. +

MAILTO:FOO@BAR.BAZ

+```````````````````````````````` + + +Note that many strings that count as [absolute URIs] for +purposes of this spec are not valid URIs, because their +schemes are not registered or because of other problems +with their syntax: + +```````````````````````````````` example + +. +

a+b+c:d

+```````````````````````````````` + + +```````````````````````````````` example + +. +

made-up-scheme://foo,bar

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://../

+```````````````````````````````` + + +```````````````````````````````` example + +. +

localhost:5001/foo

+```````````````````````````````` + + +Spaces are not allowed in autolinks: + +```````````````````````````````` example + +. +

<http://foo.bar/baz bim>

+```````````````````````````````` + + +Backslash-escapes do not work inside autolinks: + +```````````````````````````````` example + +. +

http://example.com/\[\

+```````````````````````````````` + + +An [email autolink](@) +consists of `<`, followed by an [email address], +followed by `>`. The link's label is the email address, +and the URL is `mailto:` followed by the email address. + +An [email address](@), +for these purposes, is anything that matches +the [non-normative regex from the HTML5 +spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)): + + /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + +Examples of email autolinks: + +```````````````````````````````` example + +. +

foo@bar.example.com

+```````````````````````````````` + + +```````````````````````````````` example + +. +

foo+special@Bar.baz-bar0.com

+```````````````````````````````` + + +Backslash-escapes do not work inside email autolinks: + +```````````````````````````````` example + +. +

<foo+@bar.example.com>

+```````````````````````````````` + + +These are not autolinks: + +```````````````````````````````` example +<> +. +

<>

+```````````````````````````````` + + +```````````````````````````````` example +< http://foo.bar > +. +

< http://foo.bar >

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<m:abc>

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<foo.bar.baz>

+```````````````````````````````` + + +```````````````````````````````` example +http://example.com +. +

http://example.com

+```````````````````````````````` + + +```````````````````````````````` example +foo@bar.example.com +. +

foo@bar.example.com

+```````````````````````````````` + + +## Raw HTML + +Text between `<` and `>` that looks like an HTML tag is parsed as a +raw HTML tag and will be rendered in HTML without escaping. +Tag and attribute names are not limited to current HTML tags, +so custom tags (and even, say, DocBook tags) may be used. + +Here is the grammar for tags: + +A [tag name](@) consists of an ASCII letter +followed by zero or more ASCII letters, digits, or +hyphens (`-`). + +An [attribute](@) consists of [whitespace], +an [attribute name], and an optional +[attribute value specification]. + +An [attribute name](@) +consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII +letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML +specification restricted to ASCII. HTML5 is laxer.) + +An [attribute value specification](@) +consists of optional [whitespace], +a `=` character, optional [whitespace], and an [attribute +value]. + +An [attribute value](@) +consists of an [unquoted attribute value], +a [single-quoted attribute value], or a [double-quoted attribute value]. + +An [unquoted attribute value](@) +is a nonempty string of characters not +including [whitespace], `"`, `'`, `=`, `<`, `>`, or `` ` ``. + +A [single-quoted attribute value](@) +consists of `'`, zero or more +characters not including `'`, and a final `'`. + +A [double-quoted attribute value](@) +consists of `"`, zero or more +characters not including `"`, and a final `"`. + +An [open tag](@) consists of a `<` character, a [tag name], +zero or more [attributes], optional [whitespace], an optional `/` +character, and a `>` character. + +A [closing tag](@) consists of the string ``. + +An [HTML comment](@) consists of ``, +where *text* does not start with `>` or `->`, does not end with `-`, +and does not contain `--`. (See the +[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) + +A [processing instruction](@) +consists of the string ``, and the string +`?>`. + +A [declaration](@) consists of the +string ``, and the character `>`. + +A [CDATA section](@) consists of +the string ``, and the string `]]>`. + +An [HTML tag](@) consists of an [open tag], a [closing tag], +an [HTML comment], a [processing instruction], a [declaration], +or a [CDATA section]. + +Here are some simple open tags: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Empty elements: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +[Whitespace] is allowed: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +With attributes: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Custom tag names can be used: + +```````````````````````````````` example +Foo +. +

Foo

+```````````````````````````````` + + +Illegal tag names, not parsed as HTML: + +```````````````````````````````` example +<33> <__> +. +

<33> <__>

+```````````````````````````````` + + +Illegal attribute names: + +```````````````````````````````` example +
+. +

<a h*#ref="hi">

+```````````````````````````````` + + +Illegal attribute values: + +```````````````````````````````` example +
+. +

</a href="foo">

+```````````````````````````````` + + +Comments: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +foo +. +

foo <!-- not a comment -- two hyphens -->

+```````````````````````````````` + + +Not comments: + +```````````````````````````````` example +foo foo --> + +foo +. +

foo <!--> foo -->

+

foo <!-- foo--->

+```````````````````````````````` + + +Processing instructions: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +Declarations: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +CDATA sections: + +```````````````````````````````` example +foo &<]]> +. +

foo &<]]>

+```````````````````````````````` + + +Entity and numeric character references are preserved in HTML +attributes: + +```````````````````````````````` example +foo
+. +

foo

+```````````````````````````````` + + +Backslash escapes do not work in HTML attributes: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<a href=""">

+```````````````````````````````` + + +## Hard line breaks + +A line break (not in a code span or HTML tag) that is preceded +by two or more spaces and does not occur at the end of a block +is parsed as a [hard line break](@) (rendered +in HTML as a `
` tag): + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +For a more visible alternative, a backslash before the +[line ending] may be used instead of two spaces: + +```````````````````````````````` example +foo\ +baz +. +

foo
+baz

+```````````````````````````````` + + +More than two spaces can be used: + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +Leading spaces at the beginning of the next line are ignored: + +```````````````````````````````` example +foo + bar +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +foo\ + bar +. +

foo
+bar

+```````````````````````````````` + + +Line breaks can occur inside emphasis, links, and other constructs +that allow inline content: + +```````````````````````````````` example +*foo +bar* +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo\ +bar* +. +

foo
+bar

+```````````````````````````````` + + +Line breaks do not occur inside code spans + +```````````````````````````````` example +`code +span` +. +

code span

+```````````````````````````````` + + +```````````````````````````````` example +`code\ +span` +. +

code\ span

+```````````````````````````````` + + +or HTML tags: + +```````````````````````````````` example +
+. +

+```````````````````````````````` + + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Hard line breaks are for separating inline content within a block. +Neither syntax for hard line breaks works at the end of a paragraph or +other block element: + +```````````````````````````````` example +foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +### foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +### foo +. +

foo

+```````````````````````````````` + + +## Soft line breaks + +A regular line break (not in a code span or HTML tag) that is not +preceded by two or more spaces or a backslash is parsed as a +[softbreak](@). (A softbreak may be rendered in HTML either as a +[line ending] or as a space. The result will be the same in +browsers. In the examples here, a [line ending] will be used.) + +```````````````````````````````` example +foo +baz +. +

foo +baz

+```````````````````````````````` + + +Spaces at the end of the line and beginning of the next line are +removed: + +```````````````````````````````` example +foo + baz +. +

foo +baz

+```````````````````````````````` + + +A conforming parser may render a soft line break in HTML either as a +line break or as a space. + +A renderer may also provide an option to render soft line breaks +as hard line breaks. + +## Textual content + +Any characters not given an interpretation by the above rules will +be parsed as plain textual content. + +```````````````````````````````` example +hello $.;'there +. +

hello $.;'there

+```````````````````````````````` + + +```````````````````````````````` example +Foo χρῆν +. +

Foo χρῆν

+```````````````````````````````` + + +Internal spaces are preserved verbatim: + +```````````````````````````````` example +Multiple spaces +. +

Multiple spaces

+```````````````````````````````` + + + + +# Appendix: A parsing strategy + +In this appendix we describe some features of the parsing strategy +used in the CommonMark reference implementations. + +## Overview + +Parsing has two phases: + +1. In the first phase, lines of input are consumed and the block +structure of the document---its division into paragraphs, block quotes, +list items, and so on---is constructed. Text is assigned to these +blocks but not parsed. Link reference definitions are parsed and a +map of links is constructed. + +2. In the second phase, the raw text contents of paragraphs and headings +are parsed into sequences of Markdown inline elements (strings, +code spans, links, emphasis, and so on), using the map of link +references constructed in phase 1. + +At each point in processing, the document is represented as a tree of +**blocks**. The root of the tree is a `document` block. The `document` +may have any number of other blocks as **children**. These children +may, in turn, have other blocks as children. The last child of a block +is normally considered **open**, meaning that subsequent lines of input +can alter its contents. (Blocks that are not open are **closed**.) +Here, for example, is a possible document tree, with the open blocks +marked by arrows: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 1: block structure + +Each line that is processed has an effect on this tree. The line is +analyzed and, depending on its contents, the document may be altered +in one or more of the following ways: + +1. One or more open blocks may be closed. +2. One or more new blocks may be created as children of the + last open block. +3. Text may be added to the last (deepest) open block remaining + on the tree. + +Once a line has been incorporated into the tree in this way, +it can be discarded, so input can be read in a stream. + +For each line, we follow this procedure: + +1. First we iterate through the open blocks, starting with the +root document, and descending through last children down to the last +open block. Each block imposes a condition that the line must satisfy +if the block is to remain open. For example, a block quote requires a +`>` character. A paragraph requires a non-blank line. +In this phase we may match all or just some of the open +blocks. But we cannot close unmatched blocks yet, because we may have a +[lazy continuation line]. + +2. Next, after consuming the continuation markers for existing +blocks, we look for new block starts (e.g. `>` for a block quote). +If we encounter a new block start, we close any blocks unmatched +in step 1 before creating the new block as a child of the last +matched block. + +3. Finally, we look at the remainder of the line (after block +markers like `>`, list markers, and indentation have been consumed). +This is text that can be incorporated into the last open +block (a paragraph, code block, heading, or raw HTML). + +Setext headings are formed when we see a line of a paragraph +that is a [setext heading underline]. + +Reference link definitions are detected when a paragraph is closed; +the accumulated text lines are parsed to see if they begin with +one or more reference link definitions. Any remainder becomes a +normal paragraph. + +We can see how this works by considering how the tree above is +generated by four lines of Markdown: + +``` markdown +> Lorem ipsum dolor +sit amet. +> - Qui *quodsi iracundia* +> - aliquando id +``` + +At the outset, our document model is just + +``` tree +-> document +``` + +The first line of our text, + +``` markdown +> Lorem ipsum dolor +``` + +causes a `block_quote` block to be created as a child of our +open `document` block, and a `paragraph` block as a child of +the `block_quote`. Then the text is added to the last open +block, the `paragraph`: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor" +``` + +The next line, + +``` markdown +sit amet. +``` + +is a "lazy continuation" of the open `paragraph`, so it gets added +to the paragraph's text: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor\nsit amet." +``` + +The third line, + +``` markdown +> - Qui *quodsi iracundia* +``` + +causes the `paragraph` block to be closed, and a new `list` block +opened as a child of the `block_quote`. A `list_item` is also +added as a child of the `list`, and a `paragraph` as a child of +the `list_item`. The text is then added to the new `paragraph`: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + -> list_item + -> paragraph + "Qui *quodsi iracundia*" +``` + +The fourth line, + +``` markdown +> - aliquando id +``` + +causes the `list_item` (and its child the `paragraph`) to be closed, +and a new `list_item` opened up as child of the `list`. A `paragraph` +is added as a child of the new `list_item`, to contain the text. +We thus obtain the final tree: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 2: inline structure + +Once all of the input has been parsed, all open blocks are closed. + +We then "walk the tree," visiting every node, and parse raw +string contents of paragraphs and headings as inlines. At this +point we have seen all the link reference definitions, so we can +resolve reference links as we go. + +``` tree +document + block_quote + paragraph + str "Lorem ipsum dolor" + softbreak + str "sit amet." + list (type=bullet tight=true bullet_char=-) + list_item + paragraph + str "Qui " + emph + str "quodsi iracundia" + list_item + paragraph + str "aliquando id" +``` + +Notice how the [line ending] in the first paragraph has +been parsed as a `softbreak`, and the asterisks in the first list item +have become an `emph`. + +### An algorithm for parsing nested emphasis and links + +By far the trickiest part of inline parsing is handling emphasis, +strong emphasis, links, and images. This is done using the following +algorithm. + +When we're parsing inlines and we hit either + +- a run of `*` or `_` characters, or +- a `[` or `![` + +we insert a text node with these symbols as its literal content, and we +add a pointer to this text node to the [delimiter stack](@). + +The [delimiter stack] is a doubly linked list. Each +element contains a pointer to a text node, plus information about + +- the type of delimiter (`[`, `![`, `*`, `_`) +- the number of delimiters, +- whether the delimiter is "active" (all are active to start), and +- whether the delimiter is a potential opener, a potential closer, + or both (which depends on what sort of characters precede + and follow the delimiters). + +When we hit a `]` character, we call the *look for link or image* +procedure (see below). + +When we hit the end of the input, we call the *process emphasis* +procedure (see below), with `stack_bottom` = NULL. + +#### *look for link or image* + +Starting at the top of the delimiter stack, we look backwards +through the stack for an opening `[` or `![` delimiter. + +- If we don't find one, we return a literal text node `]`. + +- If we do find one, but it's not *active*, we remove the inactive + delimiter from the stack, and return a literal text node `]`. + +- If we find one and it's active, then we parse ahead to see if + we have an inline link/image, reference link/image, compact reference + link/image, or shortcut reference link/image. + + + If we don't, then we remove the opening delimiter from the + delimiter stack and return a literal text node `]`. + + + If we do, then + + * We return a link or image node whose children are the inlines + after the text node pointed to by the opening delimiter. + + * We run *process emphasis* on these inlines, with the `[` opener + as `stack_bottom`. + + * We remove the opening delimiter. + + * If we have a link (and not an image), we also set all + `[` delimiters before the opening delimiter to *inactive*. (This + will prevent us from getting links within links.) + +#### *process emphasis* + +Parameter `stack_bottom` sets a lower bound to how far we +descend in the [delimiter stack]. If it is NULL, we can +go all the way to the bottom. Otherwise, we stop before +visiting `stack_bottom`. + +Let `current_position` point to the element on the [delimiter stack] +just above `stack_bottom` (or the first element if `stack_bottom` +is NULL). + +We keep track of the `openers_bottom` for each delimiter +type (`*`, `_`) and each length of the closing delimiter run +(modulo 3). Initialize this to `stack_bottom`. + +Then we repeat the following until we run out of potential +closers: + +- Move `current_position` forward in the delimiter stack (if needed) + until we find the first potential closer with delimiter `*` or `_`. + (This will be the potential closer closest + to the beginning of the input -- the first one in parse order.) + +- Now, look back in the stack (staying above `stack_bottom` and + the `openers_bottom` for this delimiter type) for the + first matching potential opener ("matching" means same delimiter). + +- If one is found: + + + Figure out whether we have emphasis or strong emphasis: + if both closer and opener spans have length >= 2, we have + strong, otherwise regular. + + + Insert an emph or strong emph node accordingly, after + the text node corresponding to the opener. + + + Remove any delimiters between the opener and closer from + the delimiter stack. + + + Remove 1 (for regular emph) or 2 (for strong emph) delimiters + from the opening and closing text nodes. If they become empty + as a result, remove them and remove the corresponding element + of the delimiter stack. If the closing node is removed, reset + `current_position` to the next element in the stack. + +- If none is found: + + + Set `openers_bottom` to the element before `current_position`. + (We know that there are no openers for this kind of closer up to and + including this point, so this puts a lower bound on future searches.) + + + If the closer at `current_position` is not a potential opener, + remove it from the delimiter stack (since we know it can't + be a closer either). + + + Advance `current_position` to the next element in the stack. + +After we're done, we remove all delimiters above `stack_bottom` from the +delimiter stack. + diff --git a/_benchmark/benchmark_test.go b/_benchmark/benchmark_test.go new file mode 100644 index 0000000..16749ff --- /dev/null +++ b/_benchmark/benchmark_test.go @@ -0,0 +1,55 @@ +package main + +import ( + "bytes" + "io/ioutil" + "testing" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/renderer/html" + + "gitlab.com/golang-commonmark/markdown" + + "gopkg.in/russross/blackfriday.v2" +) + +func BenchmarkGoldMark(b *testing.B) { + b.ResetTimer() + source, err := ioutil.ReadFile("_data.md") + if err != nil { + panic(err) + } + markdown := goldmark.New(goldmark.WithRendererOptions(html.WithXHTML())) + var out bytes.Buffer + markdown.Convert([]byte(""), &out) + + for i := 0; i < b.N; i++ { + out.Reset() + if err := markdown.Convert(source, &out); err != nil { + panic(err) + } + } +} + +func BenchmarkGolangCommonMark(b *testing.B) { + b.ResetTimer() + source, err := ioutil.ReadFile("_data.md") + if err != nil { + panic(err) + } + md := markdown.New(markdown.XHTMLOutput(true)) + for i := 0; i < b.N; i++ { + md.RenderToString(source) + } +} + +func BenchmarkBlackFriday(b *testing.B) { + b.ResetTimer() + source, err := ioutil.ReadFile("_data.md") + if err != nil { + panic(err) + } + for i := 0; i < b.N; i++ { + blackfriday.Run(source) + } +} diff --git a/_test/spec.json b/_test/spec.json new file mode 100644 index 0000000..222fee4 --- /dev/null +++ b/_test/spec.json @@ -0,0 +1,5194 @@ +[ + { + "markdown": "\tfoo\tbaz\t\tbim\n", + "html": "
foo\tbaz\t\tbim\n
\n", + "example": 1, + "start_line": 352, + "end_line": 357, + "section": "Tabs" + }, + { + "markdown": " \tfoo\tbaz\t\tbim\n", + "html": "
foo\tbaz\t\tbim\n
\n", + "example": 2, + "start_line": 359, + "end_line": 364, + "section": "Tabs" + }, + { + "markdown": " a\ta\n ὐ\ta\n", + "html": "
a\ta\nὐ\ta\n
\n", + "example": 3, + "start_line": 366, + "end_line": 373, + "section": "Tabs" + }, + { + "markdown": " - foo\n\n\tbar\n", + "html": "
    \n
  • \n

    foo

    \n

    bar

    \n
  • \n
\n", + "example": 4, + "start_line": 379, + "end_line": 390, + "section": "Tabs" + }, + { + "markdown": "- foo\n\n\t\tbar\n", + "html": "
    \n
  • \n

    foo

    \n
      bar\n
    \n
  • \n
\n", + "example": 5, + "start_line": 392, + "end_line": 404, + "section": "Tabs" + }, + { + "markdown": ">\t\tfoo\n", + "html": "
\n
  foo\n
\n
\n", + "example": 6, + "start_line": 415, + "end_line": 422, + "section": "Tabs" + }, + { + "markdown": "-\t\tfoo\n", + "html": "
    \n
  • \n
      foo\n
    \n
  • \n
\n", + "example": 7, + "start_line": 424, + "end_line": 433, + "section": "Tabs" + }, + { + "markdown": " foo\n\tbar\n", + "html": "
foo\nbar\n
\n", + "example": 8, + "start_line": 436, + "end_line": 443, + "section": "Tabs" + }, + { + "markdown": " - foo\n - bar\n\t - baz\n", + "html": "
    \n
  • foo\n
      \n
    • bar\n
        \n
      • baz
      • \n
      \n
    • \n
    \n
  • \n
\n", + "example": 9, + "start_line": 445, + "end_line": 461, + "section": "Tabs" + }, + { + "markdown": "#\tFoo\n", + "html": "

Foo

\n", + "example": 10, + "start_line": 463, + "end_line": 467, + "section": "Tabs" + }, + { + "markdown": "*\t*\t*\t\n", + "html": "
\n", + "example": 11, + "start_line": 469, + "end_line": 473, + "section": "Tabs" + }, + { + "markdown": "- `one\n- two`\n", + "html": "
    \n
  • `one
  • \n
  • two`
  • \n
\n", + "example": 12, + "start_line": 496, + "end_line": 504, + "section": "Precedence" + }, + { + "markdown": "***\n---\n___\n", + "html": "
\n
\n
\n", + "example": 13, + "start_line": 535, + "end_line": 543, + "section": "Thematic breaks" + }, + { + "markdown": "+++\n", + "html": "

+++

\n", + "example": 14, + "start_line": 548, + "end_line": 552, + "section": "Thematic breaks" + }, + { + "markdown": "===\n", + "html": "

===

\n", + "example": 15, + "start_line": 555, + "end_line": 559, + "section": "Thematic breaks" + }, + { + "markdown": "--\n**\n__\n", + "html": "

--\n**\n__

\n", + "example": 16, + "start_line": 564, + "end_line": 572, + "section": "Thematic breaks" + }, + { + "markdown": " ***\n ***\n ***\n", + "html": "
\n
\n
\n", + "example": 17, + "start_line": 577, + "end_line": 585, + "section": "Thematic breaks" + }, + { + "markdown": " ***\n", + "html": "
***\n
\n", + "example": 18, + "start_line": 590, + "end_line": 595, + "section": "Thematic breaks" + }, + { + "markdown": "Foo\n ***\n", + "html": "

Foo\n***

\n", + "example": 19, + "start_line": 598, + "end_line": 604, + "section": "Thematic breaks" + }, + { + "markdown": "_____________________________________\n", + "html": "
\n", + "example": 20, + "start_line": 609, + "end_line": 613, + "section": "Thematic breaks" + }, + { + "markdown": " - - -\n", + "html": "
\n", + "example": 21, + "start_line": 618, + "end_line": 622, + "section": "Thematic breaks" + }, + { + "markdown": " ** * ** * ** * **\n", + "html": "
\n", + "example": 22, + "start_line": 625, + "end_line": 629, + "section": "Thematic breaks" + }, + { + "markdown": "- - - -\n", + "html": "
\n", + "example": 23, + "start_line": 632, + "end_line": 636, + "section": "Thematic breaks" + }, + { + "markdown": "- - - - \n", + "html": "
\n", + "example": 24, + "start_line": 641, + "end_line": 645, + "section": "Thematic breaks" + }, + { + "markdown": "_ _ _ _ a\n\na------\n\n---a---\n", + "html": "

_ _ _ _ a

\n

a------

\n

---a---

\n", + "example": 25, + "start_line": 650, + "end_line": 660, + "section": "Thematic breaks" + }, + { + "markdown": " *-*\n", + "html": "

-

\n", + "example": 26, + "start_line": 666, + "end_line": 670, + "section": "Thematic breaks" + }, + { + "markdown": "- foo\n***\n- bar\n", + "html": "
    \n
  • foo
  • \n
\n
\n
    \n
  • bar
  • \n
\n", + "example": 27, + "start_line": 675, + "end_line": 687, + "section": "Thematic breaks" + }, + { + "markdown": "Foo\n***\nbar\n", + "html": "

Foo

\n
\n

bar

\n", + "example": 28, + "start_line": 692, + "end_line": 700, + "section": "Thematic breaks" + }, + { + "markdown": "Foo\n---\nbar\n", + "html": "

Foo

\n

bar

\n", + "example": 29, + "start_line": 709, + "end_line": 716, + "section": "Thematic breaks" + }, + { + "markdown": "* Foo\n* * *\n* Bar\n", + "html": "
    \n
  • Foo
  • \n
\n
\n
    \n
  • Bar
  • \n
\n", + "example": 30, + "start_line": 722, + "end_line": 734, + "section": "Thematic breaks" + }, + { + "markdown": "- Foo\n- * * *\n", + "html": "
    \n
  • Foo
  • \n
  • \n
    \n
  • \n
\n", + "example": 31, + "start_line": 739, + "end_line": 749, + "section": "Thematic breaks" + }, + { + "markdown": "# foo\n## foo\n### foo\n#### foo\n##### foo\n###### foo\n", + "html": "

foo

\n

foo

\n

foo

\n

foo

\n
foo
\n
foo
\n", + "example": 32, + "start_line": 768, + "end_line": 782, + "section": "ATX headings" + }, + { + "markdown": "####### foo\n", + "html": "

####### foo

\n", + "example": 33, + "start_line": 787, + "end_line": 791, + "section": "ATX headings" + }, + { + "markdown": "#5 bolt\n\n#hashtag\n", + "html": "

#5 bolt

\n

#hashtag

\n", + "example": 34, + "start_line": 802, + "end_line": 809, + "section": "ATX headings" + }, + { + "markdown": "\\## foo\n", + "html": "

## foo

\n", + "example": 35, + "start_line": 814, + "end_line": 818, + "section": "ATX headings" + }, + { + "markdown": "# foo *bar* \\*baz\\*\n", + "html": "

foo bar *baz*

\n", + "example": 36, + "start_line": 823, + "end_line": 827, + "section": "ATX headings" + }, + { + "markdown": "# foo \n", + "html": "

foo

\n", + "example": 37, + "start_line": 832, + "end_line": 836, + "section": "ATX headings" + }, + { + "markdown": " ### foo\n ## foo\n # foo\n", + "html": "

foo

\n

foo

\n

foo

\n", + "example": 38, + "start_line": 841, + "end_line": 849, + "section": "ATX headings" + }, + { + "markdown": " # foo\n", + "html": "
# foo\n
\n", + "example": 39, + "start_line": 854, + "end_line": 859, + "section": "ATX headings" + }, + { + "markdown": "foo\n # bar\n", + "html": "

foo\n# bar

\n", + "example": 40, + "start_line": 862, + "end_line": 868, + "section": "ATX headings" + }, + { + "markdown": "## foo ##\n ### bar ###\n", + "html": "

foo

\n

bar

\n", + "example": 41, + "start_line": 873, + "end_line": 879, + "section": "ATX headings" + }, + { + "markdown": "# foo ##################################\n##### foo ##\n", + "html": "

foo

\n
foo
\n", + "example": 42, + "start_line": 884, + "end_line": 890, + "section": "ATX headings" + }, + { + "markdown": "### foo ### \n", + "html": "

foo

\n", + "example": 43, + "start_line": 895, + "end_line": 899, + "section": "ATX headings" + }, + { + "markdown": "### foo ### b\n", + "html": "

foo ### b

\n", + "example": 44, + "start_line": 906, + "end_line": 910, + "section": "ATX headings" + }, + { + "markdown": "# foo#\n", + "html": "

foo#

\n", + "example": 45, + "start_line": 915, + "end_line": 919, + "section": "ATX headings" + }, + { + "markdown": "### foo \\###\n## foo #\\##\n# foo \\#\n", + "html": "

foo ###

\n

foo ###

\n

foo #

\n", + "example": 46, + "start_line": 925, + "end_line": 933, + "section": "ATX headings" + }, + { + "markdown": "****\n## foo\n****\n", + "html": "
\n

foo

\n
\n", + "example": 47, + "start_line": 939, + "end_line": 947, + "section": "ATX headings" + }, + { + "markdown": "Foo bar\n# baz\nBar foo\n", + "html": "

Foo bar

\n

baz

\n

Bar foo

\n", + "example": 48, + "start_line": 950, + "end_line": 958, + "section": "ATX headings" + }, + { + "markdown": "## \n#\n### ###\n", + "html": "

\n

\n

\n", + "example": 49, + "start_line": 963, + "end_line": 971, + "section": "ATX headings" + }, + { + "markdown": "Foo *bar*\n=========\n\nFoo *bar*\n---------\n", + "html": "

Foo bar

\n

Foo bar

\n", + "example": 50, + "start_line": 1006, + "end_line": 1015, + "section": "Setext headings" + }, + { + "markdown": "Foo *bar\nbaz*\n====\n", + "html": "

Foo bar\nbaz

\n", + "example": 51, + "start_line": 1020, + "end_line": 1027, + "section": "Setext headings" + }, + { + "markdown": " Foo *bar\nbaz*\t\n====\n", + "html": "

Foo bar\nbaz

\n", + "example": 52, + "start_line": 1034, + "end_line": 1041, + "section": "Setext headings" + }, + { + "markdown": "Foo\n-------------------------\n\nFoo\n=\n", + "html": "

Foo

\n

Foo

\n", + "example": 53, + "start_line": 1046, + "end_line": 1055, + "section": "Setext headings" + }, + { + "markdown": " Foo\n---\n\n Foo\n-----\n\n Foo\n ===\n", + "html": "

Foo

\n

Foo

\n

Foo

\n", + "example": 54, + "start_line": 1061, + "end_line": 1074, + "section": "Setext headings" + }, + { + "markdown": " Foo\n ---\n\n Foo\n---\n", + "html": "
Foo\n---\n\nFoo\n
\n
\n", + "example": 55, + "start_line": 1079, + "end_line": 1092, + "section": "Setext headings" + }, + { + "markdown": "Foo\n ---- \n", + "html": "

Foo

\n", + "example": 56, + "start_line": 1098, + "end_line": 1103, + "section": "Setext headings" + }, + { + "markdown": "Foo\n ---\n", + "html": "

Foo\n---

\n", + "example": 57, + "start_line": 1108, + "end_line": 1114, + "section": "Setext headings" + }, + { + "markdown": "Foo\n= =\n\nFoo\n--- -\n", + "html": "

Foo\n= =

\n

Foo

\n
\n", + "example": 58, + "start_line": 1119, + "end_line": 1130, + "section": "Setext headings" + }, + { + "markdown": "Foo \n-----\n", + "html": "

Foo

\n", + "example": 59, + "start_line": 1135, + "end_line": 1140, + "section": "Setext headings" + }, + { + "markdown": "Foo\\\n----\n", + "html": "

Foo\\

\n", + "example": 60, + "start_line": 1145, + "end_line": 1150, + "section": "Setext headings" + }, + { + "markdown": "`Foo\n----\n`\n\n
\n", + "html": "

`Foo

\n

`

\n

<a title="a lot

\n

of dashes"/>

\n", + "example": 61, + "start_line": 1156, + "end_line": 1169, + "section": "Setext headings" + }, + { + "markdown": "> Foo\n---\n", + "html": "
\n

Foo

\n
\n
\n", + "example": 62, + "start_line": 1175, + "end_line": 1183, + "section": "Setext headings" + }, + { + "markdown": "> foo\nbar\n===\n", + "html": "
\n

foo\nbar\n===

\n
\n", + "example": 63, + "start_line": 1186, + "end_line": 1196, + "section": "Setext headings" + }, + { + "markdown": "- Foo\n---\n", + "html": "
    \n
  • Foo
  • \n
\n
\n", + "example": 64, + "start_line": 1199, + "end_line": 1207, + "section": "Setext headings" + }, + { + "markdown": "Foo\nBar\n---\n", + "html": "

Foo\nBar

\n", + "example": 65, + "start_line": 1214, + "end_line": 1221, + "section": "Setext headings" + }, + { + "markdown": "---\nFoo\n---\nBar\n---\nBaz\n", + "html": "
\n

Foo

\n

Bar

\n

Baz

\n", + "example": 66, + "start_line": 1227, + "end_line": 1239, + "section": "Setext headings" + }, + { + "markdown": "\n====\n", + "html": "

====

\n", + "example": 67, + "start_line": 1244, + "end_line": 1249, + "section": "Setext headings" + }, + { + "markdown": "---\n---\n", + "html": "
\n
\n", + "example": 68, + "start_line": 1256, + "end_line": 1262, + "section": "Setext headings" + }, + { + "markdown": "- foo\n-----\n", + "html": "
    \n
  • foo
  • \n
\n
\n", + "example": 69, + "start_line": 1265, + "end_line": 1273, + "section": "Setext headings" + }, + { + "markdown": " foo\n---\n", + "html": "
foo\n
\n
\n", + "example": 70, + "start_line": 1276, + "end_line": 1283, + "section": "Setext headings" + }, + { + "markdown": "> foo\n-----\n", + "html": "
\n

foo

\n
\n
\n", + "example": 71, + "start_line": 1286, + "end_line": 1294, + "section": "Setext headings" + }, + { + "markdown": "\\> foo\n------\n", + "html": "

> foo

\n", + "example": 72, + "start_line": 1300, + "end_line": 1305, + "section": "Setext headings" + }, + { + "markdown": "Foo\n\nbar\n---\nbaz\n", + "html": "

Foo

\n

bar

\n

baz

\n", + "example": 73, + "start_line": 1331, + "end_line": 1341, + "section": "Setext headings" + }, + { + "markdown": "Foo\nbar\n\n---\n\nbaz\n", + "html": "

Foo\nbar

\n
\n

baz

\n", + "example": 74, + "start_line": 1347, + "end_line": 1359, + "section": "Setext headings" + }, + { + "markdown": "Foo\nbar\n* * *\nbaz\n", + "html": "

Foo\nbar

\n
\n

baz

\n", + "example": 75, + "start_line": 1365, + "end_line": 1375, + "section": "Setext headings" + }, + { + "markdown": "Foo\nbar\n\\---\nbaz\n", + "html": "

Foo\nbar\n---\nbaz

\n", + "example": 76, + "start_line": 1380, + "end_line": 1390, + "section": "Setext headings" + }, + { + "markdown": " a simple\n indented code block\n", + "html": "
a simple\n  indented code block\n
\n", + "example": 77, + "start_line": 1408, + "end_line": 1415, + "section": "Indented code blocks" + }, + { + "markdown": " - foo\n\n bar\n", + "html": "
    \n
  • \n

    foo

    \n

    bar

    \n
  • \n
\n", + "example": 78, + "start_line": 1422, + "end_line": 1433, + "section": "Indented code blocks" + }, + { + "markdown": "1. foo\n\n - bar\n", + "html": "
    \n
  1. \n

    foo

    \n
      \n
    • bar
    • \n
    \n
  2. \n
\n", + "example": 79, + "start_line": 1436, + "end_line": 1449, + "section": "Indented code blocks" + }, + { + "markdown": "
\n *hi*\n\n - one\n", + "html": "
<a/>\n*hi*\n\n- one\n
\n", + "example": 80, + "start_line": 1456, + "end_line": 1467, + "section": "Indented code blocks" + }, + { + "markdown": " chunk1\n\n chunk2\n \n \n \n chunk3\n", + "html": "
chunk1\n\nchunk2\n\n\n\nchunk3\n
\n", + "example": 81, + "start_line": 1472, + "end_line": 1489, + "section": "Indented code blocks" + }, + { + "markdown": " chunk1\n \n chunk2\n", + "html": "
chunk1\n  \n  chunk2\n
\n", + "example": 82, + "start_line": 1495, + "end_line": 1504, + "section": "Indented code blocks" + }, + { + "markdown": "Foo\n bar\n\n", + "html": "

Foo\nbar

\n", + "example": 83, + "start_line": 1510, + "end_line": 1517, + "section": "Indented code blocks" + }, + { + "markdown": " foo\nbar\n", + "html": "
foo\n
\n

bar

\n", + "example": 84, + "start_line": 1524, + "end_line": 1531, + "section": "Indented code blocks" + }, + { + "markdown": "# Heading\n foo\nHeading\n------\n foo\n----\n", + "html": "

Heading

\n
foo\n
\n

Heading

\n
foo\n
\n
\n", + "example": 85, + "start_line": 1537, + "end_line": 1552, + "section": "Indented code blocks" + }, + { + "markdown": " foo\n bar\n", + "html": "
    foo\nbar\n
\n", + "example": 86, + "start_line": 1557, + "end_line": 1564, + "section": "Indented code blocks" + }, + { + "markdown": "\n \n foo\n \n\n", + "html": "
foo\n
\n", + "example": 87, + "start_line": 1570, + "end_line": 1579, + "section": "Indented code blocks" + }, + { + "markdown": " foo \n", + "html": "
foo  \n
\n", + "example": 88, + "start_line": 1584, + "end_line": 1589, + "section": "Indented code blocks" + }, + { + "markdown": "```\n<\n >\n```\n", + "html": "
<\n >\n
\n", + "example": 89, + "start_line": 1639, + "end_line": 1648, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~\n<\n >\n~~~\n", + "html": "
<\n >\n
\n", + "example": 90, + "start_line": 1653, + "end_line": 1662, + "section": "Fenced code blocks" + }, + { + "markdown": "``\nfoo\n``\n", + "html": "

foo

\n", + "example": 91, + "start_line": 1666, + "end_line": 1672, + "section": "Fenced code blocks" + }, + { + "markdown": "```\naaa\n~~~\n```\n", + "html": "
aaa\n~~~\n
\n", + "example": 92, + "start_line": 1677, + "end_line": 1686, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~\naaa\n```\n~~~\n", + "html": "
aaa\n```\n
\n", + "example": 93, + "start_line": 1689, + "end_line": 1698, + "section": "Fenced code blocks" + }, + { + "markdown": "````\naaa\n```\n``````\n", + "html": "
aaa\n```\n
\n", + "example": 94, + "start_line": 1703, + "end_line": 1712, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~~\naaa\n~~~\n~~~~\n", + "html": "
aaa\n~~~\n
\n", + "example": 95, + "start_line": 1715, + "end_line": 1724, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n", + "html": "
\n", + "example": 96, + "start_line": 1730, + "end_line": 1734, + "section": "Fenced code blocks" + }, + { + "markdown": "`````\n\n```\naaa\n", + "html": "
\n```\naaa\n
\n", + "example": 97, + "start_line": 1737, + "end_line": 1747, + "section": "Fenced code blocks" + }, + { + "markdown": "> ```\n> aaa\n\nbbb\n", + "html": "
\n
aaa\n
\n
\n

bbb

\n", + "example": 98, + "start_line": 1750, + "end_line": 1761, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n\n \n```\n", + "html": "
\n  \n
\n", + "example": 99, + "start_line": 1766, + "end_line": 1775, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n```\n", + "html": "
\n", + "example": 100, + "start_line": 1780, + "end_line": 1785, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\n aaa\naaa\n```\n", + "html": "
aaa\naaa\n
\n", + "example": 101, + "start_line": 1792, + "end_line": 1801, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\naaa\n aaa\naaa\n ```\n", + "html": "
aaa\naaa\naaa\n
\n", + "example": 102, + "start_line": 1804, + "end_line": 1815, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\n aaa\n aaa\n aaa\n ```\n", + "html": "
aaa\n aaa\naaa\n
\n", + "example": 103, + "start_line": 1818, + "end_line": 1829, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\n aaa\n ```\n", + "html": "
```\naaa\n```\n
\n", + "example": 104, + "start_line": 1834, + "end_line": 1843, + "section": "Fenced code blocks" + }, + { + "markdown": "```\naaa\n ```\n", + "html": "
aaa\n
\n", + "example": 105, + "start_line": 1849, + "end_line": 1856, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\naaa\n ```\n", + "html": "
aaa\n
\n", + "example": 106, + "start_line": 1859, + "end_line": 1866, + "section": "Fenced code blocks" + }, + { + "markdown": "```\naaa\n ```\n", + "html": "
aaa\n    ```\n
\n", + "example": 107, + "start_line": 1871, + "end_line": 1879, + "section": "Fenced code blocks" + }, + { + "markdown": "``` ```\naaa\n", + "html": "

\naaa

\n", + "example": 108, + "start_line": 1885, + "end_line": 1891, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~~~~\naaa\n~~~ ~~\n", + "html": "
aaa\n~~~ ~~\n
\n", + "example": 109, + "start_line": 1894, + "end_line": 1902, + "section": "Fenced code blocks" + }, + { + "markdown": "foo\n```\nbar\n```\nbaz\n", + "html": "

foo

\n
bar\n
\n

baz

\n", + "example": 110, + "start_line": 1908, + "end_line": 1919, + "section": "Fenced code blocks" + }, + { + "markdown": "foo\n---\n~~~\nbar\n~~~\n# baz\n", + "html": "

foo

\n
bar\n
\n

baz

\n", + "example": 111, + "start_line": 1925, + "end_line": 1937, + "section": "Fenced code blocks" + }, + { + "markdown": "```ruby\ndef foo(x)\n return 3\nend\n```\n", + "html": "
def foo(x)\n  return 3\nend\n
\n", + "example": 112, + "start_line": 1947, + "end_line": 1958, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~~ ruby startline=3 $%@#$\ndef foo(x)\n return 3\nend\n~~~~~~~\n", + "html": "
def foo(x)\n  return 3\nend\n
\n", + "example": 113, + "start_line": 1961, + "end_line": 1972, + "section": "Fenced code blocks" + }, + { + "markdown": "````;\n````\n", + "html": "
\n", + "example": 114, + "start_line": 1975, + "end_line": 1980, + "section": "Fenced code blocks" + }, + { + "markdown": "``` aa ```\nfoo\n", + "html": "

aa\nfoo

\n", + "example": 115, + "start_line": 1985, + "end_line": 1991, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~ aa ``` ~~~\nfoo\n~~~\n", + "html": "
foo\n
\n", + "example": 116, + "start_line": 1996, + "end_line": 2003, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n``` aaa\n```\n", + "html": "
``` aaa\n
\n", + "example": 117, + "start_line": 2008, + "end_line": 2015, + "section": "Fenced code blocks" + }, + { + "markdown": "
\n
\n**Hello**,\n\n_world_.\n
\n
\n", + "html": "
\n
\n**Hello**,\n

world.\n

\n
\n", + "example": 118, + "start_line": 2087, + "end_line": 2102, + "section": "HTML blocks" + }, + { + "markdown": "\n \n \n \n
\n hi\n
\n\nokay.\n", + "html": "\n \n \n \n
\n hi\n
\n

okay.

\n", + "example": 119, + "start_line": 2116, + "end_line": 2135, + "section": "HTML blocks" + }, + { + "markdown": "
\n*foo*\n", + "example": 121, + "start_line": 2151, + "end_line": 2157, + "section": "HTML blocks" + }, + { + "markdown": "
\n\n*Markdown*\n\n
\n", + "html": "
\n

Markdown

\n
\n", + "example": 122, + "start_line": 2162, + "end_line": 2172, + "section": "HTML blocks" + }, + { + "markdown": "
\n
\n", + "html": "
\n
\n", + "example": 123, + "start_line": 2178, + "end_line": 2186, + "section": "HTML blocks" + }, + { + "markdown": "
\n
\n", + "html": "
\n
\n", + "example": 124, + "start_line": 2189, + "end_line": 2197, + "section": "HTML blocks" + }, + { + "markdown": "
\n*foo*\n\n*bar*\n", + "html": "
\n*foo*\n

bar

\n", + "example": 125, + "start_line": 2201, + "end_line": 2210, + "section": "HTML blocks" + }, + { + "markdown": "
\n", + "html": "\n", + "example": 129, + "start_line": 2250, + "end_line": 2254, + "section": "HTML blocks" + }, + { + "markdown": "
\nfoo\n
\n", + "html": "
\nfoo\n
\n", + "example": 130, + "start_line": 2257, + "end_line": 2265, + "section": "HTML blocks" + }, + { + "markdown": "
\n``` c\nint x = 33;\n```\n", + "html": "
\n``` c\nint x = 33;\n```\n", + "example": 131, + "start_line": 2274, + "end_line": 2284, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n\n", + "html": "\n*bar*\n\n", + "example": 132, + "start_line": 2291, + "end_line": 2299, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n\n", + "html": "\n*bar*\n\n", + "example": 133, + "start_line": 2304, + "end_line": 2312, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n\n", + "html": "\n*bar*\n\n", + "example": 134, + "start_line": 2315, + "end_line": 2323, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n", + "html": "\n*bar*\n", + "example": 135, + "start_line": 2326, + "end_line": 2332, + "section": "HTML blocks" + }, + { + "markdown": "\n*foo*\n\n", + "html": "\n*foo*\n\n", + "example": 136, + "start_line": 2341, + "end_line": 2349, + "section": "HTML blocks" + }, + { + "markdown": "\n\n*foo*\n\n\n", + "html": "\n

foo

\n
\n", + "example": 137, + "start_line": 2356, + "end_line": 2366, + "section": "HTML blocks" + }, + { + "markdown": "*foo*\n", + "html": "

foo

\n", + "example": 138, + "start_line": 2374, + "end_line": 2378, + "section": "HTML blocks" + }, + { + "markdown": "
\nimport Text.HTML.TagSoup\n\nmain :: IO ()\nmain = print $ parseTags tags\n
\nokay\n", + "html": "
\nimport Text.HTML.TagSoup\n\nmain :: IO ()\nmain = print $ parseTags tags\n
\n

okay

\n", + "example": 139, + "start_line": 2390, + "end_line": 2406, + "section": "HTML blocks" + }, + { + "markdown": "\nokay\n", + "html": "\n

okay

\n", + "example": 140, + "start_line": 2411, + "end_line": 2425, + "section": "HTML blocks" + }, + { + "markdown": "\nh1 {color:red;}\n\np {color:blue;}\n\nokay\n", + "html": "\nh1 {color:red;}\n\np {color:blue;}\n\n

okay

\n", + "example": 141, + "start_line": 2430, + "end_line": 2446, + "section": "HTML blocks" + }, + { + "markdown": "\n\nfoo\n", + "html": "\n\nfoo\n", + "example": 142, + "start_line": 2453, + "end_line": 2463, + "section": "HTML blocks" + }, + { + "markdown": ">
\n> foo\n\nbar\n", + "html": "
\n
\nfoo\n
\n

bar

\n", + "example": 143, + "start_line": 2466, + "end_line": 2477, + "section": "HTML blocks" + }, + { + "markdown": "-
\n- foo\n", + "html": "
    \n
  • \n
    \n
  • \n
  • foo
  • \n
\n", + "example": 144, + "start_line": 2480, + "end_line": 2490, + "section": "HTML blocks" + }, + { + "markdown": "\n*foo*\n", + "html": "\n

foo

\n", + "example": 145, + "start_line": 2495, + "end_line": 2501, + "section": "HTML blocks" + }, + { + "markdown": "*bar*\n*baz*\n", + "html": "*bar*\n

baz

\n", + "example": 146, + "start_line": 2504, + "end_line": 2510, + "section": "HTML blocks" + }, + { + "markdown": "1. *bar*\n", + "html": "1. *bar*\n", + "example": 147, + "start_line": 2516, + "end_line": 2524, + "section": "HTML blocks" + }, + { + "markdown": "\nokay\n", + "html": "\n

okay

\n", + "example": 148, + "start_line": 2529, + "end_line": 2541, + "section": "HTML blocks" + }, + { + "markdown": "';\n\n?>\nokay\n", + "html": "';\n\n?>\n

okay

\n", + "example": 149, + "start_line": 2547, + "end_line": 2561, + "section": "HTML blocks" + }, + { + "markdown": "\n", + "html": "\n", + "example": 150, + "start_line": 2566, + "end_line": 2570, + "section": "HTML blocks" + }, + { + "markdown": "\nokay\n", + "html": "\n

okay

\n", + "example": 151, + "start_line": 2575, + "end_line": 2603, + "section": "HTML blocks" + }, + { + "markdown": " \n\n \n", + "html": " \n
<!-- foo -->\n
\n", + "example": 152, + "start_line": 2608, + "end_line": 2616, + "section": "HTML blocks" + }, + { + "markdown": "
\n\n
\n", + "html": "
\n
<div>\n
\n", + "example": 153, + "start_line": 2619, + "end_line": 2627, + "section": "HTML blocks" + }, + { + "markdown": "Foo\n
\nbar\n
\n", + "html": "

Foo

\n
\nbar\n
\n", + "example": 154, + "start_line": 2633, + "end_line": 2643, + "section": "HTML blocks" + }, + { + "markdown": "
\nbar\n
\n*foo*\n", + "html": "
\nbar\n
\n*foo*\n", + "example": 155, + "start_line": 2650, + "end_line": 2660, + "section": "HTML blocks" + }, + { + "markdown": "Foo\n\nbaz\n", + "html": "

Foo\n\nbaz

\n", + "example": 156, + "start_line": 2665, + "end_line": 2673, + "section": "HTML blocks" + }, + { + "markdown": "
\n\n*Emphasized* text.\n\n
\n", + "html": "
\n

Emphasized text.

\n
\n", + "example": 157, + "start_line": 2706, + "end_line": 2716, + "section": "HTML blocks" + }, + { + "markdown": "
\n*Emphasized* text.\n
\n", + "html": "
\n*Emphasized* text.\n
\n", + "example": 158, + "start_line": 2719, + "end_line": 2727, + "section": "HTML blocks" + }, + { + "markdown": "\n\n\n\n\n\n\n\n
\nHi\n
\n", + "html": "\n\n\n\n
\nHi\n
\n", + "example": 159, + "start_line": 2741, + "end_line": 2761, + "section": "HTML blocks" + }, + { + "markdown": "\n\n \n\n \n\n \n\n
\n Hi\n
\n", + "html": "\n \n
<td>\n  Hi\n</td>\n
\n \n
\n", + "example": 160, + "start_line": 2768, + "end_line": 2789, + "section": "HTML blocks" + }, + { + "markdown": "[foo]: /url \"title\"\n\n[foo]\n", + "html": "

foo

\n", + "example": 161, + "start_line": 2816, + "end_line": 2822, + "section": "Link reference definitions" + }, + { + "markdown": " [foo]: \n /url \n 'the title' \n\n[foo]\n", + "html": "

foo

\n", + "example": 162, + "start_line": 2825, + "end_line": 2833, + "section": "Link reference definitions" + }, + { + "markdown": "[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]\n", + "html": "

Foo*bar]

\n", + "example": 163, + "start_line": 2836, + "end_line": 2842, + "section": "Link reference definitions" + }, + { + "markdown": "[Foo bar]:\n\n'title'\n\n[Foo bar]\n", + "html": "

Foo bar

\n", + "example": 164, + "start_line": 2845, + "end_line": 2853, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url '\ntitle\nline1\nline2\n'\n\n[foo]\n", + "html": "

foo

\n", + "example": 165, + "start_line": 2858, + "end_line": 2872, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url 'title\n\nwith blank line'\n\n[foo]\n", + "html": "

[foo]: /url 'title

\n

with blank line'

\n

[foo]

\n", + "example": 166, + "start_line": 2877, + "end_line": 2887, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]:\n/url\n\n[foo]\n", + "html": "

foo

\n", + "example": 167, + "start_line": 2892, + "end_line": 2899, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]:\n\n[foo]\n", + "html": "

[foo]:

\n

[foo]

\n", + "example": 168, + "start_line": 2904, + "end_line": 2911, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: <>\n\n[foo]\n", + "html": "

foo

\n", + "example": 169, + "start_line": 2916, + "end_line": 2922, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: (baz)\n\n[foo]\n", + "html": "

[foo]: (baz)

\n

[foo]

\n", + "example": 170, + "start_line": 2927, + "end_line": 2934, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]\n", + "html": "

foo

\n", + "example": 171, + "start_line": 2940, + "end_line": 2946, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]\n\n[foo]: url\n", + "html": "

foo

\n", + "example": 172, + "start_line": 2951, + "end_line": 2957, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]\n\n[foo]: first\n[foo]: second\n", + "html": "

foo

\n", + "example": 173, + "start_line": 2963, + "end_line": 2970, + "section": "Link reference definitions" + }, + { + "markdown": "[FOO]: /url\n\n[Foo]\n", + "html": "

Foo

\n", + "example": 174, + "start_line": 2976, + "end_line": 2982, + "section": "Link reference definitions" + }, + { + "markdown": "[ΑΓΩ]: /φου\n\n[αγω]\n", + "html": "

αγω

\n", + "example": 175, + "start_line": 2985, + "end_line": 2991, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\n", + "html": "", + "example": 176, + "start_line": 2997, + "end_line": 3000, + "section": "Link reference definitions" + }, + { + "markdown": "[\nfoo\n]: /url\nbar\n", + "html": "

bar

\n", + "example": 177, + "start_line": 3005, + "end_line": 3012, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url \"title\" ok\n", + "html": "

[foo]: /url "title" ok

\n", + "example": 178, + "start_line": 3018, + "end_line": 3022, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\n\"title\" ok\n", + "html": "

"title" ok

\n", + "example": 179, + "start_line": 3027, + "end_line": 3032, + "section": "Link reference definitions" + }, + { + "markdown": " [foo]: /url \"title\"\n\n[foo]\n", + "html": "
[foo]: /url "title"\n
\n

[foo]

\n", + "example": 180, + "start_line": 3038, + "end_line": 3046, + "section": "Link reference definitions" + }, + { + "markdown": "```\n[foo]: /url\n```\n\n[foo]\n", + "html": "
[foo]: /url\n
\n

[foo]

\n", + "example": 181, + "start_line": 3052, + "end_line": 3062, + "section": "Link reference definitions" + }, + { + "markdown": "Foo\n[bar]: /baz\n\n[bar]\n", + "html": "

Foo\n[bar]: /baz

\n

[bar]

\n", + "example": 182, + "start_line": 3067, + "end_line": 3076, + "section": "Link reference definitions" + }, + { + "markdown": "# [Foo]\n[foo]: /url\n> bar\n", + "html": "

Foo

\n
\n

bar

\n
\n", + "example": 183, + "start_line": 3082, + "end_line": 3091, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\nbar\n===\n[foo]\n", + "html": "

bar

\n

foo

\n", + "example": 184, + "start_line": 3093, + "end_line": 3101, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\n===\n[foo]\n", + "html": "

===\nfoo

\n", + "example": 185, + "start_line": 3103, + "end_line": 3110, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]\n", + "html": "

foo,\nbar,\nbaz

\n", + "example": 186, + "start_line": 3116, + "end_line": 3129, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]\n\n> [foo]: /url\n", + "html": "

foo

\n
\n
\n", + "example": 187, + "start_line": 3137, + "end_line": 3145, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\n", + "html": "", + "example": 188, + "start_line": 3154, + "end_line": 3157, + "section": "Link reference definitions" + }, + { + "markdown": "aaa\n\nbbb\n", + "html": "

aaa

\n

bbb

\n", + "example": 189, + "start_line": 3171, + "end_line": 3178, + "section": "Paragraphs" + }, + { + "markdown": "aaa\nbbb\n\nccc\nddd\n", + "html": "

aaa\nbbb

\n

ccc\nddd

\n", + "example": 190, + "start_line": 3183, + "end_line": 3194, + "section": "Paragraphs" + }, + { + "markdown": "aaa\n\n\nbbb\n", + "html": "

aaa

\n

bbb

\n", + "example": 191, + "start_line": 3199, + "end_line": 3207, + "section": "Paragraphs" + }, + { + "markdown": " aaa\n bbb\n", + "html": "

aaa\nbbb

\n", + "example": 192, + "start_line": 3212, + "end_line": 3218, + "section": "Paragraphs" + }, + { + "markdown": "aaa\n bbb\n ccc\n", + "html": "

aaa\nbbb\nccc

\n", + "example": 193, + "start_line": 3224, + "end_line": 3232, + "section": "Paragraphs" + }, + { + "markdown": " aaa\nbbb\n", + "html": "

aaa\nbbb

\n", + "example": 194, + "start_line": 3238, + "end_line": 3244, + "section": "Paragraphs" + }, + { + "markdown": " aaa\nbbb\n", + "html": "
aaa\n
\n

bbb

\n", + "example": 195, + "start_line": 3247, + "end_line": 3254, + "section": "Paragraphs" + }, + { + "markdown": "aaa \nbbb \n", + "html": "

aaa
\nbbb

\n", + "example": 196, + "start_line": 3261, + "end_line": 3267, + "section": "Paragraphs" + }, + { + "markdown": " \n\naaa\n \n\n# aaa\n\n \n", + "html": "

aaa

\n

aaa

\n", + "example": 197, + "start_line": 3278, + "end_line": 3290, + "section": "Blank lines" + }, + { + "markdown": "> # Foo\n> bar\n> baz\n", + "html": "
\n

Foo

\n

bar\nbaz

\n
\n", + "example": 198, + "start_line": 3344, + "end_line": 3354, + "section": "Block quotes" + }, + { + "markdown": "># Foo\n>bar\n> baz\n", + "html": "
\n

Foo

\n

bar\nbaz

\n
\n", + "example": 199, + "start_line": 3359, + "end_line": 3369, + "section": "Block quotes" + }, + { + "markdown": " > # Foo\n > bar\n > baz\n", + "html": "
\n

Foo

\n

bar\nbaz

\n
\n", + "example": 200, + "start_line": 3374, + "end_line": 3384, + "section": "Block quotes" + }, + { + "markdown": " > # Foo\n > bar\n > baz\n", + "html": "
> # Foo\n> bar\n> baz\n
\n", + "example": 201, + "start_line": 3389, + "end_line": 3398, + "section": "Block quotes" + }, + { + "markdown": "> # Foo\n> bar\nbaz\n", + "html": "
\n

Foo

\n

bar\nbaz

\n
\n", + "example": 202, + "start_line": 3404, + "end_line": 3414, + "section": "Block quotes" + }, + { + "markdown": "> bar\nbaz\n> foo\n", + "html": "
\n

bar\nbaz\nfoo

\n
\n", + "example": 203, + "start_line": 3420, + "end_line": 3430, + "section": "Block quotes" + }, + { + "markdown": "> foo\n---\n", + "html": "
\n

foo

\n
\n
\n", + "example": 204, + "start_line": 3444, + "end_line": 3452, + "section": "Block quotes" + }, + { + "markdown": "> - foo\n- bar\n", + "html": "
\n
    \n
  • foo
  • \n
\n
\n
    \n
  • bar
  • \n
\n", + "example": 205, + "start_line": 3464, + "end_line": 3476, + "section": "Block quotes" + }, + { + "markdown": "> foo\n bar\n", + "html": "
\n
foo\n
\n
\n
bar\n
\n", + "example": 206, + "start_line": 3482, + "end_line": 3492, + "section": "Block quotes" + }, + { + "markdown": "> ```\nfoo\n```\n", + "html": "
\n
\n
\n

foo

\n
\n", + "example": 207, + "start_line": 3495, + "end_line": 3505, + "section": "Block quotes" + }, + { + "markdown": "> foo\n - bar\n", + "html": "
\n

foo\n- bar

\n
\n", + "example": 208, + "start_line": 3511, + "end_line": 3519, + "section": "Block quotes" + }, + { + "markdown": ">\n", + "html": "
\n
\n", + "example": 209, + "start_line": 3535, + "end_line": 3540, + "section": "Block quotes" + }, + { + "markdown": ">\n> \n> \n", + "html": "
\n
\n", + "example": 210, + "start_line": 3543, + "end_line": 3550, + "section": "Block quotes" + }, + { + "markdown": ">\n> foo\n> \n", + "html": "
\n

foo

\n
\n", + "example": 211, + "start_line": 3555, + "end_line": 3563, + "section": "Block quotes" + }, + { + "markdown": "> foo\n\n> bar\n", + "html": "
\n

foo

\n
\n
\n

bar

\n
\n", + "example": 212, + "start_line": 3568, + "end_line": 3579, + "section": "Block quotes" + }, + { + "markdown": "> foo\n> bar\n", + "html": "
\n

foo\nbar

\n
\n", + "example": 213, + "start_line": 3590, + "end_line": 3598, + "section": "Block quotes" + }, + { + "markdown": "> foo\n>\n> bar\n", + "html": "
\n

foo

\n

bar

\n
\n", + "example": 214, + "start_line": 3603, + "end_line": 3612, + "section": "Block quotes" + }, + { + "markdown": "foo\n> bar\n", + "html": "

foo

\n
\n

bar

\n
\n", + "example": 215, + "start_line": 3617, + "end_line": 3625, + "section": "Block quotes" + }, + { + "markdown": "> aaa\n***\n> bbb\n", + "html": "
\n

aaa

\n
\n
\n
\n

bbb

\n
\n", + "example": 216, + "start_line": 3631, + "end_line": 3643, + "section": "Block quotes" + }, + { + "markdown": "> bar\nbaz\n", + "html": "
\n

bar\nbaz

\n
\n", + "example": 217, + "start_line": 3649, + "end_line": 3657, + "section": "Block quotes" + }, + { + "markdown": "> bar\n\nbaz\n", + "html": "
\n

bar

\n
\n

baz

\n", + "example": 218, + "start_line": 3660, + "end_line": 3669, + "section": "Block quotes" + }, + { + "markdown": "> bar\n>\nbaz\n", + "html": "
\n

bar

\n
\n

baz

\n", + "example": 219, + "start_line": 3672, + "end_line": 3681, + "section": "Block quotes" + }, + { + "markdown": "> > > foo\nbar\n", + "html": "
\n
\n
\n

foo\nbar

\n
\n
\n
\n", + "example": 220, + "start_line": 3688, + "end_line": 3700, + "section": "Block quotes" + }, + { + "markdown": ">>> foo\n> bar\n>>baz\n", + "html": "
\n
\n
\n

foo\nbar\nbaz

\n
\n
\n
\n", + "example": 221, + "start_line": 3703, + "end_line": 3717, + "section": "Block quotes" + }, + { + "markdown": "> code\n\n> not code\n", + "html": "
\n
code\n
\n
\n
\n

not code

\n
\n", + "example": 222, + "start_line": 3725, + "end_line": 3737, + "section": "Block quotes" + }, + { + "markdown": "A paragraph\nwith two lines.\n\n indented code\n\n> A block quote.\n", + "html": "

A paragraph\nwith two lines.

\n
indented code\n
\n
\n

A block quote.

\n
\n", + "example": 223, + "start_line": 3779, + "end_line": 3794, + "section": "List items" + }, + { + "markdown": "1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
    \n
  1. \n

    A paragraph\nwith two lines.

    \n
    indented code\n
    \n
    \n

    A block quote.

    \n
    \n
  2. \n
\n", + "example": 224, + "start_line": 3801, + "end_line": 3820, + "section": "List items" + }, + { + "markdown": "- one\n\n two\n", + "html": "
    \n
  • one
  • \n
\n

two

\n", + "example": 225, + "start_line": 3834, + "end_line": 3843, + "section": "List items" + }, + { + "markdown": "- one\n\n two\n", + "html": "
    \n
  • \n

    one

    \n

    two

    \n
  • \n
\n", + "example": 226, + "start_line": 3846, + "end_line": 3857, + "section": "List items" + }, + { + "markdown": " - one\n\n two\n", + "html": "
    \n
  • one
  • \n
\n
 two\n
\n", + "example": 227, + "start_line": 3860, + "end_line": 3870, + "section": "List items" + }, + { + "markdown": " - one\n\n two\n", + "html": "
    \n
  • \n

    one

    \n

    two

    \n
  • \n
\n", + "example": 228, + "start_line": 3873, + "end_line": 3884, + "section": "List items" + }, + { + "markdown": " > > 1. one\n>>\n>> two\n", + "html": "
\n
\n
    \n
  1. \n

    one

    \n

    two

    \n
  2. \n
\n
\n
\n", + "example": 229, + "start_line": 3895, + "end_line": 3910, + "section": "List items" + }, + { + "markdown": ">>- one\n>>\n > > two\n", + "html": "
\n
\n
    \n
  • one
  • \n
\n

two

\n
\n
\n", + "example": 230, + "start_line": 3922, + "end_line": 3935, + "section": "List items" + }, + { + "markdown": "-one\n\n2.two\n", + "html": "

-one

\n

2.two

\n", + "example": 231, + "start_line": 3941, + "end_line": 3948, + "section": "List items" + }, + { + "markdown": "- foo\n\n\n bar\n", + "html": "
    \n
  • \n

    foo

    \n

    bar

    \n
  • \n
\n", + "example": 232, + "start_line": 3954, + "end_line": 3966, + "section": "List items" + }, + { + "markdown": "1. foo\n\n ```\n bar\n ```\n\n baz\n\n > bam\n", + "html": "
    \n
  1. \n

    foo

    \n
    bar\n
    \n

    baz

    \n
    \n

    bam

    \n
    \n
  2. \n
\n", + "example": 233, + "start_line": 3971, + "end_line": 3993, + "section": "List items" + }, + { + "markdown": "- Foo\n\n bar\n\n\n baz\n", + "html": "
    \n
  • \n

    Foo

    \n
    bar\n\n\nbaz\n
    \n
  • \n
\n", + "example": 234, + "start_line": 3999, + "end_line": 4017, + "section": "List items" + }, + { + "markdown": "123456789. ok\n", + "html": "
    \n
  1. ok
  2. \n
\n", + "example": 235, + "start_line": 4021, + "end_line": 4027, + "section": "List items" + }, + { + "markdown": "1234567890. not ok\n", + "html": "

1234567890. not ok

\n", + "example": 236, + "start_line": 4030, + "end_line": 4034, + "section": "List items" + }, + { + "markdown": "0. ok\n", + "html": "
    \n
  1. ok
  2. \n
\n", + "example": 237, + "start_line": 4039, + "end_line": 4045, + "section": "List items" + }, + { + "markdown": "003. ok\n", + "html": "
    \n
  1. ok
  2. \n
\n", + "example": 238, + "start_line": 4048, + "end_line": 4054, + "section": "List items" + }, + { + "markdown": "-1. not ok\n", + "html": "

-1. not ok

\n", + "example": 239, + "start_line": 4059, + "end_line": 4063, + "section": "List items" + }, + { + "markdown": "- foo\n\n bar\n", + "html": "
    \n
  • \n

    foo

    \n
    bar\n
    \n
  • \n
\n", + "example": 240, + "start_line": 4082, + "end_line": 4094, + "section": "List items" + }, + { + "markdown": " 10. foo\n\n bar\n", + "html": "
    \n
  1. \n

    foo

    \n
    bar\n
    \n
  2. \n
\n", + "example": 241, + "start_line": 4099, + "end_line": 4111, + "section": "List items" + }, + { + "markdown": " indented code\n\nparagraph\n\n more code\n", + "html": "
indented code\n
\n

paragraph

\n
more code\n
\n", + "example": 242, + "start_line": 4118, + "end_line": 4130, + "section": "List items" + }, + { + "markdown": "1. indented code\n\n paragraph\n\n more code\n", + "html": "
    \n
  1. \n
    indented code\n
    \n

    paragraph

    \n
    more code\n
    \n
  2. \n
\n", + "example": 243, + "start_line": 4133, + "end_line": 4149, + "section": "List items" + }, + { + "markdown": "1. indented code\n\n paragraph\n\n more code\n", + "html": "
    \n
  1. \n
     indented code\n
    \n

    paragraph

    \n
    more code\n
    \n
  2. \n
\n", + "example": 244, + "start_line": 4155, + "end_line": 4171, + "section": "List items" + }, + { + "markdown": " foo\n\nbar\n", + "html": "

foo

\n

bar

\n", + "example": 245, + "start_line": 4182, + "end_line": 4189, + "section": "List items" + }, + { + "markdown": "- foo\n\n bar\n", + "html": "
    \n
  • foo
  • \n
\n

bar

\n", + "example": 246, + "start_line": 4192, + "end_line": 4201, + "section": "List items" + }, + { + "markdown": "- foo\n\n bar\n", + "html": "
    \n
  • \n

    foo

    \n

    bar

    \n
  • \n
\n", + "example": 247, + "start_line": 4209, + "end_line": 4220, + "section": "List items" + }, + { + "markdown": "-\n foo\n-\n ```\n bar\n ```\n-\n baz\n", + "html": "
    \n
  • foo
  • \n
  • \n
    bar\n
    \n
  • \n
  • \n
    baz\n
    \n
  • \n
\n", + "example": 248, + "start_line": 4237, + "end_line": 4258, + "section": "List items" + }, + { + "markdown": "- \n foo\n", + "html": "
    \n
  • foo
  • \n
\n", + "example": 249, + "start_line": 4263, + "end_line": 4270, + "section": "List items" + }, + { + "markdown": "-\n\n foo\n", + "html": "
    \n
  • \n
\n

foo

\n", + "example": 250, + "start_line": 4277, + "end_line": 4286, + "section": "List items" + }, + { + "markdown": "- foo\n-\n- bar\n", + "html": "
    \n
  • foo
  • \n
  • \n
  • bar
  • \n
\n", + "example": 251, + "start_line": 4291, + "end_line": 4301, + "section": "List items" + }, + { + "markdown": "- foo\n- \n- bar\n", + "html": "
    \n
  • foo
  • \n
  • \n
  • bar
  • \n
\n", + "example": 252, + "start_line": 4306, + "end_line": 4316, + "section": "List items" + }, + { + "markdown": "1. foo\n2.\n3. bar\n", + "html": "
    \n
  1. foo
  2. \n
  3. \n
  4. bar
  5. \n
\n", + "example": 253, + "start_line": 4321, + "end_line": 4331, + "section": "List items" + }, + { + "markdown": "*\n", + "html": "
    \n
  • \n
\n", + "example": 254, + "start_line": 4336, + "end_line": 4342, + "section": "List items" + }, + { + "markdown": "foo\n*\n\nfoo\n1.\n", + "html": "

foo\n*

\n

foo\n1.

\n", + "example": 255, + "start_line": 4346, + "end_line": 4357, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
    \n
  1. \n

    A paragraph\nwith two lines.

    \n
    indented code\n
    \n
    \n

    A block quote.

    \n
    \n
  2. \n
\n", + "example": 256, + "start_line": 4368, + "end_line": 4387, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
    \n
  1. \n

    A paragraph\nwith two lines.

    \n
    indented code\n
    \n
    \n

    A block quote.

    \n
    \n
  2. \n
\n", + "example": 257, + "start_line": 4392, + "end_line": 4411, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
    \n
  1. \n

    A paragraph\nwith two lines.

    \n
    indented code\n
    \n
    \n

    A block quote.

    \n
    \n
  2. \n
\n", + "example": 258, + "start_line": 4416, + "end_line": 4435, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
1.  A paragraph\n    with two lines.\n\n        indented code\n\n    > A block quote.\n
\n", + "example": 259, + "start_line": 4440, + "end_line": 4455, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\nwith two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
    \n
  1. \n

    A paragraph\nwith two lines.

    \n
    indented code\n
    \n
    \n

    A block quote.

    \n
    \n
  2. \n
\n", + "example": 260, + "start_line": 4470, + "end_line": 4489, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n", + "html": "
    \n
  1. A paragraph\nwith two lines.
  2. \n
\n", + "example": 261, + "start_line": 4494, + "end_line": 4502, + "section": "List items" + }, + { + "markdown": "> 1. > Blockquote\ncontinued here.\n", + "html": "
\n
    \n
  1. \n
    \n

    Blockquote\ncontinued here.

    \n
    \n
  2. \n
\n
\n", + "example": 262, + "start_line": 4507, + "end_line": 4521, + "section": "List items" + }, + { + "markdown": "> 1. > Blockquote\n> continued here.\n", + "html": "
\n
    \n
  1. \n
    \n

    Blockquote\ncontinued here.

    \n
    \n
  2. \n
\n
\n", + "example": 263, + "start_line": 4524, + "end_line": 4538, + "section": "List items" + }, + { + "markdown": "- foo\n - bar\n - baz\n - boo\n", + "html": "
    \n
  • foo\n
      \n
    • bar\n
        \n
      • baz\n
          \n
        • boo
        • \n
        \n
      • \n
      \n
    • \n
    \n
  • \n
\n", + "example": 264, + "start_line": 4552, + "end_line": 4573, + "section": "List items" + }, + { + "markdown": "- foo\n - bar\n - baz\n - boo\n", + "html": "
    \n
  • foo
  • \n
  • bar
  • \n
  • baz
  • \n
  • boo
  • \n
\n", + "example": 265, + "start_line": 4578, + "end_line": 4590, + "section": "List items" + }, + { + "markdown": "10) foo\n - bar\n", + "html": "
    \n
  1. foo\n
      \n
    • bar
    • \n
    \n
  2. \n
\n", + "example": 266, + "start_line": 4595, + "end_line": 4606, + "section": "List items" + }, + { + "markdown": "10) foo\n - bar\n", + "html": "
    \n
  1. foo
  2. \n
\n
    \n
  • bar
  • \n
\n", + "example": 267, + "start_line": 4611, + "end_line": 4621, + "section": "List items" + }, + { + "markdown": "- - foo\n", + "html": "
    \n
  • \n
      \n
    • foo
    • \n
    \n
  • \n
\n", + "example": 268, + "start_line": 4626, + "end_line": 4636, + "section": "List items" + }, + { + "markdown": "1. - 2. foo\n", + "html": "
    \n
  1. \n
      \n
    • \n
        \n
      1. foo
      2. \n
      \n
    • \n
    \n
  2. \n
\n", + "example": 269, + "start_line": 4639, + "end_line": 4653, + "section": "List items" + }, + { + "markdown": "- # Foo\n- Bar\n ---\n baz\n", + "html": "
    \n
  • \n

    Foo

    \n
  • \n
  • \n

    Bar

    \nbaz
  • \n
\n", + "example": 270, + "start_line": 4658, + "end_line": 4672, + "section": "List items" + }, + { + "markdown": "- foo\n- bar\n+ baz\n", + "html": "
    \n
  • foo
  • \n
  • bar
  • \n
\n
    \n
  • baz
  • \n
\n", + "example": 271, + "start_line": 4894, + "end_line": 4906, + "section": "Lists" + }, + { + "markdown": "1. foo\n2. bar\n3) baz\n", + "html": "
    \n
  1. foo
  2. \n
  3. bar
  4. \n
\n
    \n
  1. baz
  2. \n
\n", + "example": 272, + "start_line": 4909, + "end_line": 4921, + "section": "Lists" + }, + { + "markdown": "Foo\n- bar\n- baz\n", + "html": "

Foo

\n
    \n
  • bar
  • \n
  • baz
  • \n
\n", + "example": 273, + "start_line": 4928, + "end_line": 4938, + "section": "Lists" + }, + { + "markdown": "The number of windows in my house is\n14. The number of doors is 6.\n", + "html": "

The number of windows in my house is\n14. The number of doors is 6.

\n", + "example": 274, + "start_line": 5005, + "end_line": 5011, + "section": "Lists" + }, + { + "markdown": "The number of windows in my house is\n1. The number of doors is 6.\n", + "html": "

The number of windows in my house is

\n
    \n
  1. The number of doors is 6.
  2. \n
\n", + "example": 275, + "start_line": 5015, + "end_line": 5023, + "section": "Lists" + }, + { + "markdown": "- foo\n\n- bar\n\n\n- baz\n", + "html": "
    \n
  • \n

    foo

    \n
  • \n
  • \n

    bar

    \n
  • \n
  • \n

    baz

    \n
  • \n
\n", + "example": 276, + "start_line": 5029, + "end_line": 5048, + "section": "Lists" + }, + { + "markdown": "- foo\n - bar\n - baz\n\n\n bim\n", + "html": "
    \n
  • foo\n
      \n
    • bar\n
        \n
      • \n

        baz

        \n

        bim

        \n
      • \n
      \n
    • \n
    \n
  • \n
\n", + "example": 277, + "start_line": 5050, + "end_line": 5072, + "section": "Lists" + }, + { + "markdown": "- foo\n- bar\n\n\n\n- baz\n- bim\n", + "html": "
    \n
  • foo
  • \n
  • bar
  • \n
\n\n
    \n
  • baz
  • \n
  • bim
  • \n
\n", + "example": 278, + "start_line": 5080, + "end_line": 5098, + "section": "Lists" + }, + { + "markdown": "- foo\n\n notcode\n\n- foo\n\n\n\n code\n", + "html": "
    \n
  • \n

    foo

    \n

    notcode

    \n
  • \n
  • \n

    foo

    \n
  • \n
\n\n
code\n
\n", + "example": 279, + "start_line": 5101, + "end_line": 5124, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n - c\n - d\n - e\n - f\n- g\n", + "html": "
    \n
  • a
  • \n
  • b
  • \n
  • c
  • \n
  • d
  • \n
  • e
  • \n
  • f
  • \n
  • g
  • \n
\n", + "example": 280, + "start_line": 5132, + "end_line": 5150, + "section": "Lists" + }, + { + "markdown": "1. a\n\n 2. b\n\n 3. c\n", + "html": "
    \n
  1. \n

    a

    \n
  2. \n
  3. \n

    b

    \n
  4. \n
  5. \n

    c

    \n
  6. \n
\n", + "example": 281, + "start_line": 5153, + "end_line": 5171, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n - c\n - d\n - e\n", + "html": "
    \n
  • a
  • \n
  • b
  • \n
  • c
  • \n
  • d\n- e
  • \n
\n", + "example": 282, + "start_line": 5177, + "end_line": 5191, + "section": "Lists" + }, + { + "markdown": "1. a\n\n 2. b\n\n 3. c\n", + "html": "
    \n
  1. \n

    a

    \n
  2. \n
  3. \n

    b

    \n
  4. \n
\n
3. c\n
\n", + "example": 283, + "start_line": 5197, + "end_line": 5214, + "section": "Lists" + }, + { + "markdown": "- a\n- b\n\n- c\n", + "html": "
    \n
  • \n

    a

    \n
  • \n
  • \n

    b

    \n
  • \n
  • \n

    c

    \n
  • \n
\n", + "example": 284, + "start_line": 5220, + "end_line": 5237, + "section": "Lists" + }, + { + "markdown": "* a\n*\n\n* c\n", + "html": "
    \n
  • \n

    a

    \n
  • \n
  • \n
  • \n

    c

    \n
  • \n
\n", + "example": 285, + "start_line": 5242, + "end_line": 5257, + "section": "Lists" + }, + { + "markdown": "- a\n- b\n\n c\n- d\n", + "html": "
    \n
  • \n

    a

    \n
  • \n
  • \n

    b

    \n

    c

    \n
  • \n
  • \n

    d

    \n
  • \n
\n", + "example": 286, + "start_line": 5264, + "end_line": 5283, + "section": "Lists" + }, + { + "markdown": "- a\n- b\n\n [ref]: /url\n- d\n", + "html": "
    \n
  • \n

    a

    \n
  • \n
  • \n

    b

    \n
  • \n
  • \n

    d

    \n
  • \n
\n", + "example": 287, + "start_line": 5286, + "end_line": 5304, + "section": "Lists" + }, + { + "markdown": "- a\n- ```\n b\n\n\n ```\n- c\n", + "html": "
    \n
  • a
  • \n
  • \n
    b\n\n\n
    \n
  • \n
  • c
  • \n
\n", + "example": 288, + "start_line": 5309, + "end_line": 5328, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n\n c\n- d\n", + "html": "
    \n
  • a\n
      \n
    • \n

      b

      \n

      c

      \n
    • \n
    \n
  • \n
  • d
  • \n
\n", + "example": 289, + "start_line": 5335, + "end_line": 5353, + "section": "Lists" + }, + { + "markdown": "* a\n > b\n >\n* c\n", + "html": "
    \n
  • a\n
    \n

    b

    \n
    \n
  • \n
  • c
  • \n
\n", + "example": 290, + "start_line": 5359, + "end_line": 5373, + "section": "Lists" + }, + { + "markdown": "- a\n > b\n ```\n c\n ```\n- d\n", + "html": "
    \n
  • a\n
    \n

    b

    \n
    \n
    c\n
    \n
  • \n
  • d
  • \n
\n", + "example": 291, + "start_line": 5379, + "end_line": 5397, + "section": "Lists" + }, + { + "markdown": "- a\n", + "html": "
    \n
  • a
  • \n
\n", + "example": 292, + "start_line": 5402, + "end_line": 5408, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n", + "html": "
    \n
  • a\n
      \n
    • b
    • \n
    \n
  • \n
\n", + "example": 293, + "start_line": 5411, + "end_line": 5422, + "section": "Lists" + }, + { + "markdown": "1. ```\n foo\n ```\n\n bar\n", + "html": "
    \n
  1. \n
    foo\n
    \n

    bar

    \n
  2. \n
\n", + "example": 294, + "start_line": 5428, + "end_line": 5442, + "section": "Lists" + }, + { + "markdown": "* foo\n * bar\n\n baz\n", + "html": "
    \n
  • \n

    foo

    \n
      \n
    • bar
    • \n
    \n

    baz

    \n
  • \n
\n", + "example": 295, + "start_line": 5447, + "end_line": 5462, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n - c\n\n- d\n - e\n - f\n", + "html": "
    \n
  • \n

    a

    \n
      \n
    • b
    • \n
    • c
    • \n
    \n
  • \n
  • \n

    d

    \n
      \n
    • e
    • \n
    • f
    • \n
    \n
  • \n
\n", + "example": 296, + "start_line": 5465, + "end_line": 5490, + "section": "Lists" + }, + { + "markdown": "`hi`lo`\n", + "html": "

hilo`

\n", + "example": 297, + "start_line": 5499, + "end_line": 5503, + "section": "Inlines" + }, + { + "markdown": "\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~\n", + "html": "

!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~

\n", + "example": 298, + "start_line": 5513, + "end_line": 5517, + "section": "Backslash escapes" + }, + { + "markdown": "\\\t\\A\\a\\ \\3\\φ\\«\n", + "html": "

\\\t\\A\\a\\ \\3\\φ\\«

\n", + "example": 299, + "start_line": 5523, + "end_line": 5527, + "section": "Backslash escapes" + }, + { + "markdown": "\\*not emphasized*\n\\
not a tag\n\\[not a link](/foo)\n\\`not code`\n1\\. not a list\n\\* not a list\n\\# not a heading\n\\[foo]: /url \"not a reference\"\n\\ö not a character entity\n", + "html": "

*not emphasized*\n<br/> not a tag\n[not a link](/foo)\n`not code`\n1. not a list\n* not a list\n# not a heading\n[foo]: /url "not a reference"\n&ouml; not a character entity

\n", + "example": 300, + "start_line": 5533, + "end_line": 5553, + "section": "Backslash escapes" + }, + { + "markdown": "\\\\*emphasis*\n", + "html": "

\\emphasis

\n", + "example": 301, + "start_line": 5558, + "end_line": 5562, + "section": "Backslash escapes" + }, + { + "markdown": "foo\\\nbar\n", + "html": "

foo
\nbar

\n", + "example": 302, + "start_line": 5567, + "end_line": 5573, + "section": "Backslash escapes" + }, + { + "markdown": "`` \\[\\` ``\n", + "html": "

\\[\\`

\n", + "example": 303, + "start_line": 5579, + "end_line": 5583, + "section": "Backslash escapes" + }, + { + "markdown": " \\[\\]\n", + "html": "
\\[\\]\n
\n", + "example": 304, + "start_line": 5586, + "end_line": 5591, + "section": "Backslash escapes" + }, + { + "markdown": "~~~\n\\[\\]\n~~~\n", + "html": "
\\[\\]\n
\n", + "example": 305, + "start_line": 5594, + "end_line": 5601, + "section": "Backslash escapes" + }, + { + "markdown": "\n", + "html": "

http://example.com?find=\\*

\n", + "example": 306, + "start_line": 5604, + "end_line": 5608, + "section": "Backslash escapes" + }, + { + "markdown": "\n", + "html": "\n", + "example": 307, + "start_line": 5611, + "end_line": 5615, + "section": "Backslash escapes" + }, + { + "markdown": "[foo](/bar\\* \"ti\\*tle\")\n", + "html": "

foo

\n", + "example": 308, + "start_line": 5621, + "end_line": 5625, + "section": "Backslash escapes" + }, + { + "markdown": "[foo]\n\n[foo]: /bar\\* \"ti\\*tle\"\n", + "html": "

foo

\n", + "example": 309, + "start_line": 5628, + "end_line": 5634, + "section": "Backslash escapes" + }, + { + "markdown": "``` foo\\+bar\nfoo\n```\n", + "html": "
foo\n
\n", + "example": 310, + "start_line": 5637, + "end_line": 5644, + "section": "Backslash escapes" + }, + { + "markdown": "  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n", + "html": "

  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸

\n", + "example": 311, + "start_line": 5674, + "end_line": 5682, + "section": "Entity and numeric character references" + }, + { + "markdown": "# Ӓ Ϡ �\n", + "html": "

# Ӓ Ϡ �

\n", + "example": 312, + "start_line": 5693, + "end_line": 5697, + "section": "Entity and numeric character references" + }, + { + "markdown": "" ആ ಫ\n", + "html": "

" ആ ಫ

\n", + "example": 313, + "start_line": 5706, + "end_line": 5710, + "section": "Entity and numeric character references" + }, + { + "markdown": "  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;\n", + "html": "

&nbsp &x; &#; &#x;\n&#987654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;

\n", + "example": 314, + "start_line": 5715, + "end_line": 5725, + "section": "Entity and numeric character references" + }, + { + "markdown": "©\n", + "html": "

&copy

\n", + "example": 315, + "start_line": 5732, + "end_line": 5736, + "section": "Entity and numeric character references" + }, + { + "markdown": "&MadeUpEntity;\n", + "html": "

&MadeUpEntity;

\n", + "example": 316, + "start_line": 5742, + "end_line": 5746, + "section": "Entity and numeric character references" + }, + { + "markdown": "\n", + "html": "\n", + "example": 317, + "start_line": 5753, + "end_line": 5757, + "section": "Entity and numeric character references" + }, + { + "markdown": "[foo](/föö \"föö\")\n", + "html": "

foo

\n", + "example": 318, + "start_line": 5760, + "end_line": 5764, + "section": "Entity and numeric character references" + }, + { + "markdown": "[foo]\n\n[foo]: /föö \"föö\"\n", + "html": "

foo

\n", + "example": 319, + "start_line": 5767, + "end_line": 5773, + "section": "Entity and numeric character references" + }, + { + "markdown": "``` föö\nfoo\n```\n", + "html": "
foo\n
\n", + "example": 320, + "start_line": 5776, + "end_line": 5783, + "section": "Entity and numeric character references" + }, + { + "markdown": "`föö`\n", + "html": "

f&ouml;&ouml;

\n", + "example": 321, + "start_line": 5789, + "end_line": 5793, + "section": "Entity and numeric character references" + }, + { + "markdown": " föfö\n", + "html": "
f&ouml;f&ouml;\n
\n", + "example": 322, + "start_line": 5796, + "end_line": 5801, + "section": "Entity and numeric character references" + }, + { + "markdown": "*foo*\n*foo*\n", + "html": "

*foo*\nfoo

\n", + "example": 323, + "start_line": 5808, + "end_line": 5814, + "section": "Entity and numeric character references" + }, + { + "markdown": "* foo\n\n* foo\n", + "html": "

* foo

\n
    \n
  • foo
  • \n
\n", + "example": 324, + "start_line": 5816, + "end_line": 5825, + "section": "Entity and numeric character references" + }, + { + "markdown": "foo bar\n", + "html": "

foo\n\nbar

\n", + "example": 325, + "start_line": 5827, + "end_line": 5833, + "section": "Entity and numeric character references" + }, + { + "markdown": " foo\n", + "html": "

\tfoo

\n", + "example": 326, + "start_line": 5835, + "end_line": 5839, + "section": "Entity and numeric character references" + }, + { + "markdown": "[a](url "tit")\n", + "html": "

[a](url "tit")

\n", + "example": 327, + "start_line": 5842, + "end_line": 5846, + "section": "Entity and numeric character references" + }, + { + "markdown": "`foo`\n", + "html": "

foo

\n", + "example": 328, + "start_line": 5870, + "end_line": 5874, + "section": "Code spans" + }, + { + "markdown": "`` foo ` bar ``\n", + "html": "

foo ` bar

\n", + "example": 329, + "start_line": 5881, + "end_line": 5885, + "section": "Code spans" + }, + { + "markdown": "` `` `\n", + "html": "

``

\n", + "example": 330, + "start_line": 5891, + "end_line": 5895, + "section": "Code spans" + }, + { + "markdown": "` `` `\n", + "html": "

``

\n", + "example": 331, + "start_line": 5899, + "end_line": 5903, + "section": "Code spans" + }, + { + "markdown": "` a`\n", + "html": "

a

\n", + "example": 332, + "start_line": 5908, + "end_line": 5912, + "section": "Code spans" + }, + { + "markdown": "` b `\n", + "html": "

 b 

\n", + "example": 333, + "start_line": 5917, + "end_line": 5921, + "section": "Code spans" + }, + { + "markdown": "` `\n` `\n", + "html": "

 \n

\n", + "example": 334, + "start_line": 5925, + "end_line": 5931, + "section": "Code spans" + }, + { + "markdown": "``\nfoo\nbar \nbaz\n``\n", + "html": "

foo bar baz

\n", + "example": 335, + "start_line": 5936, + "end_line": 5944, + "section": "Code spans" + }, + { + "markdown": "``\nfoo \n``\n", + "html": "

foo

\n", + "example": 336, + "start_line": 5946, + "end_line": 5952, + "section": "Code spans" + }, + { + "markdown": "`foo bar \nbaz`\n", + "html": "

foo bar baz

\n", + "example": 337, + "start_line": 5957, + "end_line": 5962, + "section": "Code spans" + }, + { + "markdown": "`foo\\`bar`\n", + "html": "

foo\\bar`

\n", + "example": 338, + "start_line": 5974, + "end_line": 5978, + "section": "Code spans" + }, + { + "markdown": "``foo`bar``\n", + "html": "

foo`bar

\n", + "example": 339, + "start_line": 5985, + "end_line": 5989, + "section": "Code spans" + }, + { + "markdown": "` foo `` bar `\n", + "html": "

foo `` bar

\n", + "example": 340, + "start_line": 5991, + "end_line": 5995, + "section": "Code spans" + }, + { + "markdown": "*foo`*`\n", + "html": "

*foo*

\n", + "example": 341, + "start_line": 6003, + "end_line": 6007, + "section": "Code spans" + }, + { + "markdown": "[not a `link](/foo`)\n", + "html": "

[not a link](/foo)

\n", + "example": 342, + "start_line": 6012, + "end_line": 6016, + "section": "Code spans" + }, + { + "markdown": "``\n", + "html": "

<a href="">`

\n", + "example": 343, + "start_line": 6022, + "end_line": 6026, + "section": "Code spans" + }, + { + "markdown": "
`\n", + "html": "

`

\n", + "example": 344, + "start_line": 6031, + "end_line": 6035, + "section": "Code spans" + }, + { + "markdown": "``\n", + "html": "

<http://foo.bar.baz>`

\n", + "example": 345, + "start_line": 6040, + "end_line": 6044, + "section": "Code spans" + }, + { + "markdown": "`\n", + "html": "

http://foo.bar.`baz`

\n", + "example": 346, + "start_line": 6049, + "end_line": 6053, + "section": "Code spans" + }, + { + "markdown": "```foo``\n", + "html": "

```foo``

\n", + "example": 347, + "start_line": 6059, + "end_line": 6063, + "section": "Code spans" + }, + { + "markdown": "`foo\n", + "html": "

`foo

\n", + "example": 348, + "start_line": 6066, + "end_line": 6070, + "section": "Code spans" + }, + { + "markdown": "`foo``bar``\n", + "html": "

`foobar

\n", + "example": 349, + "start_line": 6075, + "end_line": 6079, + "section": "Code spans" + }, + { + "markdown": "*foo bar*\n", + "html": "

foo bar

\n", + "example": 350, + "start_line": 6292, + "end_line": 6296, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a * foo bar*\n", + "html": "

a * foo bar*

\n", + "example": 351, + "start_line": 6302, + "end_line": 6306, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a*\"foo\"*\n", + "html": "

a*"foo"*

\n", + "example": 352, + "start_line": 6313, + "end_line": 6317, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "* a *\n", + "html": "

* a *

\n", + "example": 353, + "start_line": 6322, + "end_line": 6326, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo*bar*\n", + "html": "

foobar

\n", + "example": 354, + "start_line": 6331, + "end_line": 6335, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "5*6*78\n", + "html": "

5678

\n", + "example": 355, + "start_line": 6338, + "end_line": 6342, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo bar_\n", + "html": "

foo bar

\n", + "example": 356, + "start_line": 6347, + "end_line": 6351, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_ foo bar_\n", + "html": "

_ foo bar_

\n", + "example": 357, + "start_line": 6357, + "end_line": 6361, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a_\"foo\"_\n", + "html": "

a_"foo"_

\n", + "example": 358, + "start_line": 6367, + "end_line": 6371, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo_bar_\n", + "html": "

foo_bar_

\n", + "example": 359, + "start_line": 6376, + "end_line": 6380, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "5_6_78\n", + "html": "

5_6_78

\n", + "example": 360, + "start_line": 6383, + "end_line": 6387, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "пристаням_стремятся_\n", + "html": "

пристаням_стремятся_

\n", + "example": 361, + "start_line": 6390, + "end_line": 6394, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "aa_\"bb\"_cc\n", + "html": "

aa_"bb"_cc

\n", + "example": 362, + "start_line": 6400, + "end_line": 6404, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo-_(bar)_\n", + "html": "

foo-(bar)

\n", + "example": 363, + "start_line": 6411, + "end_line": 6415, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo*\n", + "html": "

_foo*

\n", + "example": 364, + "start_line": 6423, + "end_line": 6427, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo bar *\n", + "html": "

*foo bar *

\n", + "example": 365, + "start_line": 6433, + "end_line": 6437, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo bar\n*\n", + "html": "

*foo bar\n*

\n", + "example": 366, + "start_line": 6442, + "end_line": 6448, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*(*foo)\n", + "html": "

*(*foo)

\n", + "example": 367, + "start_line": 6455, + "end_line": 6459, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*(*foo*)*\n", + "html": "

(foo)

\n", + "example": 368, + "start_line": 6465, + "end_line": 6469, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo*bar\n", + "html": "

foobar

\n", + "example": 369, + "start_line": 6474, + "end_line": 6478, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo bar _\n", + "html": "

_foo bar _

\n", + "example": 370, + "start_line": 6487, + "end_line": 6491, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(_foo)\n", + "html": "

_(_foo)

\n", + "example": 371, + "start_line": 6497, + "end_line": 6501, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(_foo_)_\n", + "html": "

(foo)

\n", + "example": 372, + "start_line": 6506, + "end_line": 6510, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo_bar\n", + "html": "

_foo_bar

\n", + "example": 373, + "start_line": 6515, + "end_line": 6519, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_пристаням_стремятся\n", + "html": "

_пристаням_стремятся

\n", + "example": 374, + "start_line": 6522, + "end_line": 6526, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo_bar_baz_\n", + "html": "

foo_bar_baz

\n", + "example": 375, + "start_line": 6529, + "end_line": 6533, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(bar)_.\n", + "html": "

(bar).

\n", + "example": 376, + "start_line": 6540, + "end_line": 6544, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo bar**\n", + "html": "

foo bar

\n", + "example": 377, + "start_line": 6549, + "end_line": 6553, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "** foo bar**\n", + "html": "

** foo bar**

\n", + "example": 378, + "start_line": 6559, + "end_line": 6563, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a**\"foo\"**\n", + "html": "

a**"foo"**

\n", + "example": 379, + "start_line": 6570, + "end_line": 6574, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo**bar**\n", + "html": "

foobar

\n", + "example": 380, + "start_line": 6579, + "end_line": 6583, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo bar__\n", + "html": "

foo bar

\n", + "example": 381, + "start_line": 6588, + "end_line": 6592, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__ foo bar__\n", + "html": "

__ foo bar__

\n", + "example": 382, + "start_line": 6598, + "end_line": 6602, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__\nfoo bar__\n", + "html": "

__\nfoo bar__

\n", + "example": 383, + "start_line": 6606, + "end_line": 6612, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a__\"foo\"__\n", + "html": "

a__"foo"__

\n", + "example": 384, + "start_line": 6618, + "end_line": 6622, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo__bar__\n", + "html": "

foo__bar__

\n", + "example": 385, + "start_line": 6627, + "end_line": 6631, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "5__6__78\n", + "html": "

5__6__78

\n", + "example": 386, + "start_line": 6634, + "end_line": 6638, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "пристаням__стремятся__\n", + "html": "

пристаням__стремятся__

\n", + "example": 387, + "start_line": 6641, + "end_line": 6645, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo, __bar__, baz__\n", + "html": "

foo, bar, baz

\n", + "example": 388, + "start_line": 6648, + "end_line": 6652, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo-__(bar)__\n", + "html": "

foo-(bar)

\n", + "example": 389, + "start_line": 6659, + "end_line": 6663, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo bar **\n", + "html": "

**foo bar **

\n", + "example": 390, + "start_line": 6672, + "end_line": 6676, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**(**foo)\n", + "html": "

**(**foo)

\n", + "example": 391, + "start_line": 6685, + "end_line": 6689, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*(**foo**)*\n", + "html": "

(foo)

\n", + "example": 392, + "start_line": 6695, + "end_line": 6699, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**Gomphocarpus (*Gomphocarpus physocarpus*, syn.\n*Asclepias physocarpa*)**\n", + "html": "

Gomphocarpus (Gomphocarpus physocarpus, syn.\nAsclepias physocarpa)

\n", + "example": 393, + "start_line": 6702, + "end_line": 6708, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo \"*bar*\" foo**\n", + "html": "

foo "bar" foo

\n", + "example": 394, + "start_line": 6711, + "end_line": 6715, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo**bar\n", + "html": "

foobar

\n", + "example": 395, + "start_line": 6720, + "end_line": 6724, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo bar __\n", + "html": "

__foo bar __

\n", + "example": 396, + "start_line": 6732, + "end_line": 6736, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__(__foo)\n", + "html": "

__(__foo)

\n", + "example": 397, + "start_line": 6742, + "end_line": 6746, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(__foo__)_\n", + "html": "

(foo)

\n", + "example": 398, + "start_line": 6752, + "end_line": 6756, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo__bar\n", + "html": "

__foo__bar

\n", + "example": 399, + "start_line": 6761, + "end_line": 6765, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__пристаням__стремятся\n", + "html": "

__пристаням__стремятся

\n", + "example": 400, + "start_line": 6768, + "end_line": 6772, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo__bar__baz__\n", + "html": "

foo__bar__baz

\n", + "example": 401, + "start_line": 6775, + "end_line": 6779, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__(bar)__.\n", + "html": "

(bar).

\n", + "example": 402, + "start_line": 6786, + "end_line": 6790, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo [bar](/url)*\n", + "html": "

foo bar

\n", + "example": 403, + "start_line": 6798, + "end_line": 6802, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo\nbar*\n", + "html": "

foo\nbar

\n", + "example": 404, + "start_line": 6805, + "end_line": 6811, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo __bar__ baz_\n", + "html": "

foo bar baz

\n", + "example": 405, + "start_line": 6817, + "end_line": 6821, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo _bar_ baz_\n", + "html": "

foo bar baz

\n", + "example": 406, + "start_line": 6824, + "end_line": 6828, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo_ bar_\n", + "html": "

foo bar

\n", + "example": 407, + "start_line": 6831, + "end_line": 6835, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo *bar**\n", + "html": "

foo bar

\n", + "example": 408, + "start_line": 6838, + "end_line": 6842, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo **bar** baz*\n", + "html": "

foo bar baz

\n", + "example": 409, + "start_line": 6845, + "end_line": 6849, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**bar**baz*\n", + "html": "

foobarbaz

\n", + "example": 410, + "start_line": 6851, + "end_line": 6855, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**bar*\n", + "html": "

foo**bar

\n", + "example": 411, + "start_line": 6875, + "end_line": 6879, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo** bar*\n", + "html": "

foo bar

\n", + "example": 412, + "start_line": 6888, + "end_line": 6892, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo **bar***\n", + "html": "

foo bar

\n", + "example": 413, + "start_line": 6895, + "end_line": 6899, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**bar***\n", + "html": "

foobar

\n", + "example": 414, + "start_line": 6902, + "end_line": 6906, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo***bar***baz\n", + "html": "

foobarbaz

\n", + "example": 415, + "start_line": 6913, + "end_line": 6917, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo******bar*********baz\n", + "html": "

foobar***baz

\n", + "example": 416, + "start_line": 6919, + "end_line": 6923, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo **bar *baz* bim** bop*\n", + "html": "

foo bar baz bim bop

\n", + "example": 417, + "start_line": 6928, + "end_line": 6932, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo [*bar*](/url)*\n", + "html": "

foo bar

\n", + "example": 418, + "start_line": 6935, + "end_line": 6939, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "** is not an empty emphasis\n", + "html": "

** is not an empty emphasis

\n", + "example": 419, + "start_line": 6944, + "end_line": 6948, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**** is not an empty strong emphasis\n", + "html": "

**** is not an empty strong emphasis

\n", + "example": 420, + "start_line": 6951, + "end_line": 6955, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo [bar](/url)**\n", + "html": "

foo bar

\n", + "example": 421, + "start_line": 6964, + "end_line": 6968, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo\nbar**\n", + "html": "

foo\nbar

\n", + "example": 422, + "start_line": 6971, + "end_line": 6977, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo _bar_ baz__\n", + "html": "

foo bar baz

\n", + "example": 423, + "start_line": 6983, + "end_line": 6987, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo __bar__ baz__\n", + "html": "

foo bar baz

\n", + "example": 424, + "start_line": 6990, + "end_line": 6994, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____foo__ bar__\n", + "html": "

foo bar

\n", + "example": 425, + "start_line": 6997, + "end_line": 7001, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo **bar****\n", + "html": "

foo bar

\n", + "example": 426, + "start_line": 7004, + "end_line": 7008, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo *bar* baz**\n", + "html": "

foo bar baz

\n", + "example": 427, + "start_line": 7011, + "end_line": 7015, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo*bar*baz**\n", + "html": "

foobarbaz

\n", + "example": 428, + "start_line": 7018, + "end_line": 7022, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo* bar**\n", + "html": "

foo bar

\n", + "example": 429, + "start_line": 7025, + "end_line": 7029, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo *bar***\n", + "html": "

foo bar

\n", + "example": 430, + "start_line": 7032, + "end_line": 7036, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo *bar **baz**\nbim* bop**\n", + "html": "

foo bar baz\nbim bop

\n", + "example": 431, + "start_line": 7041, + "end_line": 7047, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo [*bar*](/url)**\n", + "html": "

foo bar

\n", + "example": 432, + "start_line": 7050, + "end_line": 7054, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__ is not an empty emphasis\n", + "html": "

__ is not an empty emphasis

\n", + "example": 433, + "start_line": 7059, + "end_line": 7063, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____ is not an empty strong emphasis\n", + "html": "

____ is not an empty strong emphasis

\n", + "example": 434, + "start_line": 7066, + "end_line": 7070, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo ***\n", + "html": "

foo ***

\n", + "example": 435, + "start_line": 7076, + "end_line": 7080, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo *\\**\n", + "html": "

foo *

\n", + "example": 436, + "start_line": 7083, + "end_line": 7087, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo *_*\n", + "html": "

foo _

\n", + "example": 437, + "start_line": 7090, + "end_line": 7094, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo *****\n", + "html": "

foo *****

\n", + "example": 438, + "start_line": 7097, + "end_line": 7101, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo **\\***\n", + "html": "

foo *

\n", + "example": 439, + "start_line": 7104, + "end_line": 7108, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo **_**\n", + "html": "

foo _

\n", + "example": 440, + "start_line": 7111, + "end_line": 7115, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo*\n", + "html": "

*foo

\n", + "example": 441, + "start_line": 7122, + "end_line": 7126, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**\n", + "html": "

foo*

\n", + "example": 442, + "start_line": 7129, + "end_line": 7133, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo**\n", + "html": "

*foo

\n", + "example": 443, + "start_line": 7136, + "end_line": 7140, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "****foo*\n", + "html": "

***foo

\n", + "example": 444, + "start_line": 7143, + "end_line": 7147, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo***\n", + "html": "

foo*

\n", + "example": 445, + "start_line": 7150, + "end_line": 7154, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo****\n", + "html": "

foo***

\n", + "example": 446, + "start_line": 7157, + "end_line": 7161, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo ___\n", + "html": "

foo ___

\n", + "example": 447, + "start_line": 7167, + "end_line": 7171, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo _\\__\n", + "html": "

foo _

\n", + "example": 448, + "start_line": 7174, + "end_line": 7178, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo _*_\n", + "html": "

foo *

\n", + "example": 449, + "start_line": 7181, + "end_line": 7185, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo _____\n", + "html": "

foo _____

\n", + "example": 450, + "start_line": 7188, + "end_line": 7192, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo __\\___\n", + "html": "

foo _

\n", + "example": 451, + "start_line": 7195, + "end_line": 7199, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo __*__\n", + "html": "

foo *

\n", + "example": 452, + "start_line": 7202, + "end_line": 7206, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo_\n", + "html": "

_foo

\n", + "example": 453, + "start_line": 7209, + "end_line": 7213, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo__\n", + "html": "

foo_

\n", + "example": 454, + "start_line": 7220, + "end_line": 7224, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "___foo__\n", + "html": "

_foo

\n", + "example": 455, + "start_line": 7227, + "end_line": 7231, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____foo_\n", + "html": "

___foo

\n", + "example": 456, + "start_line": 7234, + "end_line": 7238, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo___\n", + "html": "

foo_

\n", + "example": 457, + "start_line": 7241, + "end_line": 7245, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo____\n", + "html": "

foo___

\n", + "example": 458, + "start_line": 7248, + "end_line": 7252, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo**\n", + "html": "

foo

\n", + "example": 459, + "start_line": 7258, + "end_line": 7262, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*_foo_*\n", + "html": "

foo

\n", + "example": 460, + "start_line": 7265, + "end_line": 7269, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo__\n", + "html": "

foo

\n", + "example": 461, + "start_line": 7272, + "end_line": 7276, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_*foo*_\n", + "html": "

foo

\n", + "example": 462, + "start_line": 7279, + "end_line": 7283, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "****foo****\n", + "html": "

foo

\n", + "example": 463, + "start_line": 7289, + "end_line": 7293, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____foo____\n", + "html": "

foo

\n", + "example": 464, + "start_line": 7296, + "end_line": 7300, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "******foo******\n", + "html": "

foo

\n", + "example": 465, + "start_line": 7307, + "end_line": 7311, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo***\n", + "html": "

foo

\n", + "example": 466, + "start_line": 7316, + "end_line": 7320, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_____foo_____\n", + "html": "

foo

\n", + "example": 467, + "start_line": 7323, + "end_line": 7327, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo _bar* baz_\n", + "html": "

foo _bar baz_

\n", + "example": 468, + "start_line": 7332, + "end_line": 7336, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo __bar *baz bim__ bam*\n", + "html": "

foo bar *baz bim bam

\n", + "example": 469, + "start_line": 7339, + "end_line": 7343, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo **bar baz**\n", + "html": "

**foo bar baz

\n", + "example": 470, + "start_line": 7348, + "end_line": 7352, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo *bar baz*\n", + "html": "

*foo bar baz

\n", + "example": 471, + "start_line": 7355, + "end_line": 7359, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*[bar*](/url)\n", + "html": "

*bar*

\n", + "example": 472, + "start_line": 7364, + "end_line": 7368, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo [bar_](/url)\n", + "html": "

_foo bar_

\n", + "example": 473, + "start_line": 7371, + "end_line": 7375, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*\n", + "html": "

*

\n", + "example": 474, + "start_line": 7378, + "end_line": 7382, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**\n", + "html": "

**

\n", + "example": 475, + "start_line": 7385, + "end_line": 7389, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__\n", + "html": "

__

\n", + "example": 476, + "start_line": 7392, + "end_line": 7396, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*a `*`*\n", + "html": "

a *

\n", + "example": 477, + "start_line": 7399, + "end_line": 7403, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_a `_`_\n", + "html": "

a _

\n", + "example": 478, + "start_line": 7406, + "end_line": 7410, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**a\n", + "html": "

**ahttp://foo.bar/?q=**

\n", + "example": 479, + "start_line": 7413, + "end_line": 7417, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__a\n", + "html": "

__ahttp://foo.bar/?q=__

\n", + "example": 480, + "start_line": 7420, + "end_line": 7424, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "[link](/uri \"title\")\n", + "html": "

link

\n", + "example": 481, + "start_line": 7503, + "end_line": 7507, + "section": "Links" + }, + { + "markdown": "[link](/uri)\n", + "html": "

link

\n", + "example": 482, + "start_line": 7512, + "end_line": 7516, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

link

\n", + "example": 483, + "start_line": 7521, + "end_line": 7525, + "section": "Links" + }, + { + "markdown": "[link](<>)\n", + "html": "

link

\n", + "example": 484, + "start_line": 7528, + "end_line": 7532, + "section": "Links" + }, + { + "markdown": "[link](/my uri)\n", + "html": "

[link](/my uri)

\n", + "example": 485, + "start_line": 7537, + "end_line": 7541, + "section": "Links" + }, + { + "markdown": "[link](
)\n", + "html": "

link

\n", + "example": 486, + "start_line": 7543, + "end_line": 7547, + "section": "Links" + }, + { + "markdown": "[link](foo\nbar)\n", + "html": "

[link](foo\nbar)

\n", + "example": 487, + "start_line": 7552, + "end_line": 7558, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

[link]()

\n", + "example": 488, + "start_line": 7560, + "end_line": 7566, + "section": "Links" + }, + { + "markdown": "[a]()\n", + "html": "

a

\n", + "example": 489, + "start_line": 7571, + "end_line": 7575, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

[link](<foo>)

\n", + "example": 490, + "start_line": 7579, + "end_line": 7583, + "section": "Links" + }, + { + "markdown": "[a](\n[a](c)\n", + "html": "

[a](<b)c\n[a](<b)c>\n[a](c)

\n", + "example": 491, + "start_line": 7588, + "end_line": 7596, + "section": "Links" + }, + { + "markdown": "[link](\\(foo\\))\n", + "html": "

link

\n", + "example": 492, + "start_line": 7600, + "end_line": 7604, + "section": "Links" + }, + { + "markdown": "[link](foo(and(bar)))\n", + "html": "

link

\n", + "example": 493, + "start_line": 7609, + "end_line": 7613, + "section": "Links" + }, + { + "markdown": "[link](foo\\(and\\(bar\\))\n", + "html": "

link

\n", + "example": 494, + "start_line": 7618, + "end_line": 7622, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

link

\n", + "example": 495, + "start_line": 7625, + "end_line": 7629, + "section": "Links" + }, + { + "markdown": "[link](foo\\)\\:)\n", + "html": "

link

\n", + "example": 496, + "start_line": 7635, + "end_line": 7639, + "section": "Links" + }, + { + "markdown": "[link](#fragment)\n\n[link](http://example.com#fragment)\n\n[link](http://example.com?foo=3#frag)\n", + "html": "

link

\n

link

\n

link

\n", + "example": 497, + "start_line": 7644, + "end_line": 7654, + "section": "Links" + }, + { + "markdown": "[link](foo\\bar)\n", + "html": "

link

\n", + "example": 498, + "start_line": 7660, + "end_line": 7664, + "section": "Links" + }, + { + "markdown": "[link](foo%20bä)\n", + "html": "

link

\n", + "example": 499, + "start_line": 7676, + "end_line": 7680, + "section": "Links" + }, + { + "markdown": "[link](\"title\")\n", + "html": "

link

\n", + "example": 500, + "start_line": 7687, + "end_line": 7691, + "section": "Links" + }, + { + "markdown": "[link](/url \"title\")\n[link](/url 'title')\n[link](/url (title))\n", + "html": "

link\nlink\nlink

\n", + "example": 501, + "start_line": 7696, + "end_line": 7704, + "section": "Links" + }, + { + "markdown": "[link](/url \"title \\\""\")\n", + "html": "

link

\n", + "example": 502, + "start_line": 7710, + "end_line": 7714, + "section": "Links" + }, + { + "markdown": "[link](/url \"title\")\n", + "html": "

link

\n", + "example": 503, + "start_line": 7720, + "end_line": 7724, + "section": "Links" + }, + { + "markdown": "[link](/url \"title \"and\" title\")\n", + "html": "

[link](/url "title "and" title")

\n", + "example": 504, + "start_line": 7729, + "end_line": 7733, + "section": "Links" + }, + { + "markdown": "[link](/url 'title \"and\" title')\n", + "html": "

link

\n", + "example": 505, + "start_line": 7738, + "end_line": 7742, + "section": "Links" + }, + { + "markdown": "[link]( /uri\n \"title\" )\n", + "html": "

link

\n", + "example": 506, + "start_line": 7762, + "end_line": 7767, + "section": "Links" + }, + { + "markdown": "[link] (/uri)\n", + "html": "

[link] (/uri)

\n", + "example": 507, + "start_line": 7773, + "end_line": 7777, + "section": "Links" + }, + { + "markdown": "[link [foo [bar]]](/uri)\n", + "html": "

link [foo [bar]]

\n", + "example": 508, + "start_line": 7783, + "end_line": 7787, + "section": "Links" + }, + { + "markdown": "[link] bar](/uri)\n", + "html": "

[link] bar](/uri)

\n", + "example": 509, + "start_line": 7790, + "end_line": 7794, + "section": "Links" + }, + { + "markdown": "[link [bar](/uri)\n", + "html": "

[link bar

\n", + "example": 510, + "start_line": 7797, + "end_line": 7801, + "section": "Links" + }, + { + "markdown": "[link \\[bar](/uri)\n", + "html": "

link [bar

\n", + "example": 511, + "start_line": 7804, + "end_line": 7808, + "section": "Links" + }, + { + "markdown": "[link *foo **bar** `#`*](/uri)\n", + "html": "

link foo bar #

\n", + "example": 512, + "start_line": 7813, + "end_line": 7817, + "section": "Links" + }, + { + "markdown": "[![moon](moon.jpg)](/uri)\n", + "html": "

\"moon\"

\n", + "example": 513, + "start_line": 7820, + "end_line": 7824, + "section": "Links" + }, + { + "markdown": "[foo [bar](/uri)](/uri)\n", + "html": "

[foo bar](/uri)

\n", + "example": 514, + "start_line": 7829, + "end_line": 7833, + "section": "Links" + }, + { + "markdown": "[foo *[bar [baz](/uri)](/uri)*](/uri)\n", + "html": "

[foo [bar baz](/uri)](/uri)

\n", + "example": 515, + "start_line": 7836, + "end_line": 7840, + "section": "Links" + }, + { + "markdown": "![[[foo](uri1)](uri2)](uri3)\n", + "html": "

\"[foo](uri2)\"

\n", + "example": 516, + "start_line": 7843, + "end_line": 7847, + "section": "Links" + }, + { + "markdown": "*[foo*](/uri)\n", + "html": "

*foo*

\n", + "example": 517, + "start_line": 7853, + "end_line": 7857, + "section": "Links" + }, + { + "markdown": "[foo *bar](baz*)\n", + "html": "

foo *bar

\n", + "example": 518, + "start_line": 7860, + "end_line": 7864, + "section": "Links" + }, + { + "markdown": "*foo [bar* baz]\n", + "html": "

foo [bar baz]

\n", + "example": 519, + "start_line": 7870, + "end_line": 7874, + "section": "Links" + }, + { + "markdown": "[foo \n", + "html": "

[foo

\n", + "example": 520, + "start_line": 7880, + "end_line": 7884, + "section": "Links" + }, + { + "markdown": "[foo`](/uri)`\n", + "html": "

[foo](/uri)

\n", + "example": 521, + "start_line": 7887, + "end_line": 7891, + "section": "Links" + }, + { + "markdown": "[foo\n", + "html": "

[foohttp://example.com/?search=](uri)

\n", + "example": 522, + "start_line": 7894, + "end_line": 7898, + "section": "Links" + }, + { + "markdown": "[foo][bar]\n\n[bar]: /url \"title\"\n", + "html": "

foo

\n", + "example": 523, + "start_line": 7932, + "end_line": 7938, + "section": "Links" + }, + { + "markdown": "[link [foo [bar]]][ref]\n\n[ref]: /uri\n", + "html": "

link [foo [bar]]

\n", + "example": 524, + "start_line": 7947, + "end_line": 7953, + "section": "Links" + }, + { + "markdown": "[link \\[bar][ref]\n\n[ref]: /uri\n", + "html": "

link [bar

\n", + "example": 525, + "start_line": 7956, + "end_line": 7962, + "section": "Links" + }, + { + "markdown": "[link *foo **bar** `#`*][ref]\n\n[ref]: /uri\n", + "html": "

link foo bar #

\n", + "example": 526, + "start_line": 7967, + "end_line": 7973, + "section": "Links" + }, + { + "markdown": "[![moon](moon.jpg)][ref]\n\n[ref]: /uri\n", + "html": "

\"moon\"

\n", + "example": 527, + "start_line": 7976, + "end_line": 7982, + "section": "Links" + }, + { + "markdown": "[foo [bar](/uri)][ref]\n\n[ref]: /uri\n", + "html": "

[foo bar]ref

\n", + "example": 528, + "start_line": 7987, + "end_line": 7993, + "section": "Links" + }, + { + "markdown": "[foo *bar [baz][ref]*][ref]\n\n[ref]: /uri\n", + "html": "

[foo bar baz]ref

\n", + "example": 529, + "start_line": 7996, + "end_line": 8002, + "section": "Links" + }, + { + "markdown": "*[foo*][ref]\n\n[ref]: /uri\n", + "html": "

*foo*

\n", + "example": 530, + "start_line": 8011, + "end_line": 8017, + "section": "Links" + }, + { + "markdown": "[foo *bar][ref]\n\n[ref]: /uri\n", + "html": "

foo *bar

\n", + "example": 531, + "start_line": 8020, + "end_line": 8026, + "section": "Links" + }, + { + "markdown": "[foo \n\n[ref]: /uri\n", + "html": "

[foo

\n", + "example": 532, + "start_line": 8032, + "end_line": 8038, + "section": "Links" + }, + { + "markdown": "[foo`][ref]`\n\n[ref]: /uri\n", + "html": "

[foo][ref]

\n", + "example": 533, + "start_line": 8041, + "end_line": 8047, + "section": "Links" + }, + { + "markdown": "[foo\n\n[ref]: /uri\n", + "html": "

[foohttp://example.com/?search=][ref]

\n", + "example": 534, + "start_line": 8050, + "end_line": 8056, + "section": "Links" + }, + { + "markdown": "[foo][BaR]\n\n[bar]: /url \"title\"\n", + "html": "

foo

\n", + "example": 535, + "start_line": 8061, + "end_line": 8067, + "section": "Links" + }, + { + "markdown": "[Толпой][Толпой] is a Russian word.\n\n[ТОЛПОЙ]: /url\n", + "html": "

Толпой is a Russian word.

\n", + "example": 536, + "start_line": 8072, + "end_line": 8078, + "section": "Links" + }, + { + "markdown": "[Foo\n bar]: /url\n\n[Baz][Foo bar]\n", + "html": "

Baz

\n", + "example": 537, + "start_line": 8084, + "end_line": 8091, + "section": "Links" + }, + { + "markdown": "[foo] [bar]\n\n[bar]: /url \"title\"\n", + "html": "

[foo] bar

\n", + "example": 538, + "start_line": 8097, + "end_line": 8103, + "section": "Links" + }, + { + "markdown": "[foo]\n[bar]\n\n[bar]: /url \"title\"\n", + "html": "

[foo]\nbar

\n", + "example": 539, + "start_line": 8106, + "end_line": 8114, + "section": "Links" + }, + { + "markdown": "[foo]: /url1\n\n[foo]: /url2\n\n[bar][foo]\n", + "html": "

bar

\n", + "example": 540, + "start_line": 8147, + "end_line": 8155, + "section": "Links" + }, + { + "markdown": "[bar][foo\\!]\n\n[foo!]: /url\n", + "html": "

[bar][foo!]

\n", + "example": 541, + "start_line": 8162, + "end_line": 8168, + "section": "Links" + }, + { + "markdown": "[foo][ref[]\n\n[ref[]: /uri\n", + "html": "

[foo][ref[]

\n

[ref[]: /uri

\n", + "example": 542, + "start_line": 8174, + "end_line": 8181, + "section": "Links" + }, + { + "markdown": "[foo][ref[bar]]\n\n[ref[bar]]: /uri\n", + "html": "

[foo][ref[bar]]

\n

[ref[bar]]: /uri

\n", + "example": 543, + "start_line": 8184, + "end_line": 8191, + "section": "Links" + }, + { + "markdown": "[[[foo]]]\n\n[[[foo]]]: /url\n", + "html": "

[[[foo]]]

\n

[[[foo]]]: /url

\n", + "example": 544, + "start_line": 8194, + "end_line": 8201, + "section": "Links" + }, + { + "markdown": "[foo][ref\\[]\n\n[ref\\[]: /uri\n", + "html": "

foo

\n", + "example": 545, + "start_line": 8204, + "end_line": 8210, + "section": "Links" + }, + { + "markdown": "[bar\\\\]: /uri\n\n[bar\\\\]\n", + "html": "

bar\\

\n", + "example": 546, + "start_line": 8215, + "end_line": 8221, + "section": "Links" + }, + { + "markdown": "[]\n\n[]: /uri\n", + "html": "

[]

\n

[]: /uri

\n", + "example": 547, + "start_line": 8226, + "end_line": 8233, + "section": "Links" + }, + { + "markdown": "[\n ]\n\n[\n ]: /uri\n", + "html": "

[\n]

\n

[\n]: /uri

\n", + "example": 548, + "start_line": 8236, + "end_line": 8247, + "section": "Links" + }, + { + "markdown": "[foo][]\n\n[foo]: /url \"title\"\n", + "html": "

foo

\n", + "example": 549, + "start_line": 8259, + "end_line": 8265, + "section": "Links" + }, + { + "markdown": "[*foo* bar][]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

foo bar

\n", + "example": 550, + "start_line": 8268, + "end_line": 8274, + "section": "Links" + }, + { + "markdown": "[Foo][]\n\n[foo]: /url \"title\"\n", + "html": "

Foo

\n", + "example": 551, + "start_line": 8279, + "end_line": 8285, + "section": "Links" + }, + { + "markdown": "[foo] \n[]\n\n[foo]: /url \"title\"\n", + "html": "

foo\n[]

\n", + "example": 552, + "start_line": 8292, + "end_line": 8300, + "section": "Links" + }, + { + "markdown": "[foo]\n\n[foo]: /url \"title\"\n", + "html": "

foo

\n", + "example": 553, + "start_line": 8312, + "end_line": 8318, + "section": "Links" + }, + { + "markdown": "[*foo* bar]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

foo bar

\n", + "example": 554, + "start_line": 8321, + "end_line": 8327, + "section": "Links" + }, + { + "markdown": "[[*foo* bar]]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

[foo bar]

\n", + "example": 555, + "start_line": 8330, + "end_line": 8336, + "section": "Links" + }, + { + "markdown": "[[bar [foo]\n\n[foo]: /url\n", + "html": "

[[bar foo

\n", + "example": 556, + "start_line": 8339, + "end_line": 8345, + "section": "Links" + }, + { + "markdown": "[Foo]\n\n[foo]: /url \"title\"\n", + "html": "

Foo

\n", + "example": 557, + "start_line": 8350, + "end_line": 8356, + "section": "Links" + }, + { + "markdown": "[foo] bar\n\n[foo]: /url\n", + "html": "

foo bar

\n", + "example": 558, + "start_line": 8361, + "end_line": 8367, + "section": "Links" + }, + { + "markdown": "\\[foo]\n\n[foo]: /url \"title\"\n", + "html": "

[foo]

\n", + "example": 559, + "start_line": 8373, + "end_line": 8379, + "section": "Links" + }, + { + "markdown": "[foo*]: /url\n\n*[foo*]\n", + "html": "

*foo*

\n", + "example": 560, + "start_line": 8385, + "end_line": 8391, + "section": "Links" + }, + { + "markdown": "[foo][bar]\n\n[foo]: /url1\n[bar]: /url2\n", + "html": "

foo

\n", + "example": 561, + "start_line": 8397, + "end_line": 8404, + "section": "Links" + }, + { + "markdown": "[foo][]\n\n[foo]: /url1\n", + "html": "

foo

\n", + "example": 562, + "start_line": 8406, + "end_line": 8412, + "section": "Links" + }, + { + "markdown": "[foo]()\n\n[foo]: /url1\n", + "html": "

foo

\n", + "example": 563, + "start_line": 8416, + "end_line": 8422, + "section": "Links" + }, + { + "markdown": "[foo](not a link)\n\n[foo]: /url1\n", + "html": "

foo(not a link)

\n", + "example": 564, + "start_line": 8424, + "end_line": 8430, + "section": "Links" + }, + { + "markdown": "[foo][bar][baz]\n\n[baz]: /url\n", + "html": "

[foo]bar

\n", + "example": 565, + "start_line": 8435, + "end_line": 8441, + "section": "Links" + }, + { + "markdown": "[foo][bar][baz]\n\n[baz]: /url1\n[bar]: /url2\n", + "html": "

foobaz

\n", + "example": 566, + "start_line": 8447, + "end_line": 8454, + "section": "Links" + }, + { + "markdown": "[foo][bar][baz]\n\n[baz]: /url1\n[foo]: /url2\n", + "html": "

[foo]bar

\n", + "example": 567, + "start_line": 8460, + "end_line": 8467, + "section": "Links" + }, + { + "markdown": "![foo](/url \"title\")\n", + "html": "

\"foo\"

\n", + "example": 568, + "start_line": 8483, + "end_line": 8487, + "section": "Images" + }, + { + "markdown": "![foo *bar*]\n\n[foo *bar*]: train.jpg \"train & tracks\"\n", + "html": "

\"foo

\n", + "example": 569, + "start_line": 8490, + "end_line": 8496, + "section": "Images" + }, + { + "markdown": "![foo ![bar](/url)](/url2)\n", + "html": "

\"foo

\n", + "example": 570, + "start_line": 8499, + "end_line": 8503, + "section": "Images" + }, + { + "markdown": "![foo [bar](/url)](/url2)\n", + "html": "

\"foo

\n", + "example": 571, + "start_line": 8506, + "end_line": 8510, + "section": "Images" + }, + { + "markdown": "![foo *bar*][]\n\n[foo *bar*]: train.jpg \"train & tracks\"\n", + "html": "

\"foo

\n", + "example": 572, + "start_line": 8520, + "end_line": 8526, + "section": "Images" + }, + { + "markdown": "![foo *bar*][foobar]\n\n[FOOBAR]: train.jpg \"train & tracks\"\n", + "html": "

\"foo

\n", + "example": 573, + "start_line": 8529, + "end_line": 8535, + "section": "Images" + }, + { + "markdown": "![foo](train.jpg)\n", + "html": "

\"foo\"

\n", + "example": 574, + "start_line": 8538, + "end_line": 8542, + "section": "Images" + }, + { + "markdown": "My ![foo bar](/path/to/train.jpg \"title\" )\n", + "html": "

My \"foo

\n", + "example": 575, + "start_line": 8545, + "end_line": 8549, + "section": "Images" + }, + { + "markdown": "![foo]()\n", + "html": "

\"foo\"

\n", + "example": 576, + "start_line": 8552, + "end_line": 8556, + "section": "Images" + }, + { + "markdown": "![](/url)\n", + "html": "

\"\"

\n", + "example": 577, + "start_line": 8559, + "end_line": 8563, + "section": "Images" + }, + { + "markdown": "![foo][bar]\n\n[bar]: /url\n", + "html": "

\"foo\"

\n", + "example": 578, + "start_line": 8568, + "end_line": 8574, + "section": "Images" + }, + { + "markdown": "![foo][bar]\n\n[BAR]: /url\n", + "html": "

\"foo\"

\n", + "example": 579, + "start_line": 8577, + "end_line": 8583, + "section": "Images" + }, + { + "markdown": "![foo][]\n\n[foo]: /url \"title\"\n", + "html": "

\"foo\"

\n", + "example": 580, + "start_line": 8588, + "end_line": 8594, + "section": "Images" + }, + { + "markdown": "![*foo* bar][]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

\"foo

\n", + "example": 581, + "start_line": 8597, + "end_line": 8603, + "section": "Images" + }, + { + "markdown": "![Foo][]\n\n[foo]: /url \"title\"\n", + "html": "

\"Foo\"

\n", + "example": 582, + "start_line": 8608, + "end_line": 8614, + "section": "Images" + }, + { + "markdown": "![foo] \n[]\n\n[foo]: /url \"title\"\n", + "html": "

\"foo\"\n[]

\n", + "example": 583, + "start_line": 8620, + "end_line": 8628, + "section": "Images" + }, + { + "markdown": "![foo]\n\n[foo]: /url \"title\"\n", + "html": "

\"foo\"

\n", + "example": 584, + "start_line": 8633, + "end_line": 8639, + "section": "Images" + }, + { + "markdown": "![*foo* bar]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

\"foo

\n", + "example": 585, + "start_line": 8642, + "end_line": 8648, + "section": "Images" + }, + { + "markdown": "![[foo]]\n\n[[foo]]: /url \"title\"\n", + "html": "

![[foo]]

\n

[[foo]]: /url "title"

\n", + "example": 586, + "start_line": 8653, + "end_line": 8660, + "section": "Images" + }, + { + "markdown": "![Foo]\n\n[foo]: /url \"title\"\n", + "html": "

\"Foo\"

\n", + "example": 587, + "start_line": 8665, + "end_line": 8671, + "section": "Images" + }, + { + "markdown": "!\\[foo]\n\n[foo]: /url \"title\"\n", + "html": "

![foo]

\n", + "example": 588, + "start_line": 8677, + "end_line": 8683, + "section": "Images" + }, + { + "markdown": "\\![foo]\n\n[foo]: /url \"title\"\n", + "html": "

!foo

\n", + "example": 589, + "start_line": 8689, + "end_line": 8695, + "section": "Images" + }, + { + "markdown": "\n", + "html": "

http://foo.bar.baz

\n", + "example": 590, + "start_line": 8722, + "end_line": 8726, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

http://foo.bar.baz/test?q=hello&id=22&boolean

\n", + "example": 591, + "start_line": 8729, + "end_line": 8733, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

irc://foo.bar:2233/baz

\n", + "example": 592, + "start_line": 8736, + "end_line": 8740, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

MAILTO:FOO@BAR.BAZ

\n", + "example": 593, + "start_line": 8745, + "end_line": 8749, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

a+b+c:d

\n", + "example": 594, + "start_line": 8757, + "end_line": 8761, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

made-up-scheme://foo,bar

\n", + "example": 595, + "start_line": 8764, + "end_line": 8768, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

http://../

\n", + "example": 596, + "start_line": 8771, + "end_line": 8775, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

localhost:5001/foo

\n", + "example": 597, + "start_line": 8778, + "end_line": 8782, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

<http://foo.bar/baz bim>

\n", + "example": 598, + "start_line": 8787, + "end_line": 8791, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

http://example.com/\\[\\

\n", + "example": 599, + "start_line": 8796, + "end_line": 8800, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

foo@bar.example.com

\n", + "example": 600, + "start_line": 8818, + "end_line": 8822, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

foo+special@Bar.baz-bar0.com

\n", + "example": 601, + "start_line": 8825, + "end_line": 8829, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

<foo+@bar.example.com>

\n", + "example": 602, + "start_line": 8834, + "end_line": 8838, + "section": "Autolinks" + }, + { + "markdown": "<>\n", + "html": "

<>

\n", + "example": 603, + "start_line": 8843, + "end_line": 8847, + "section": "Autolinks" + }, + { + "markdown": "< http://foo.bar >\n", + "html": "

< http://foo.bar >

\n", + "example": 604, + "start_line": 8850, + "end_line": 8854, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

<m:abc>

\n", + "example": 605, + "start_line": 8857, + "end_line": 8861, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

<foo.bar.baz>

\n", + "example": 606, + "start_line": 8864, + "end_line": 8868, + "section": "Autolinks" + }, + { + "markdown": "http://example.com\n", + "html": "

http://example.com

\n", + "example": 607, + "start_line": 8871, + "end_line": 8875, + "section": "Autolinks" + }, + { + "markdown": "foo@bar.example.com\n", + "html": "

foo@bar.example.com

\n", + "example": 608, + "start_line": 8878, + "end_line": 8882, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

\n", + "example": 609, + "start_line": 8960, + "end_line": 8964, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

\n", + "example": 610, + "start_line": 8969, + "end_line": 8973, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

\n", + "example": 611, + "start_line": 8978, + "end_line": 8984, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

\n", + "example": 612, + "start_line": 8989, + "end_line": 8995, + "section": "Raw HTML" + }, + { + "markdown": "Foo \n", + "html": "

Foo

\n", + "example": 613, + "start_line": 9000, + "end_line": 9004, + "section": "Raw HTML" + }, + { + "markdown": "<33> <__>\n", + "html": "

<33> <__>

\n", + "example": 614, + "start_line": 9009, + "end_line": 9013, + "section": "Raw HTML" + }, + { + "markdown": "
\n", + "html": "

<a h*#ref="hi">

\n", + "example": 615, + "start_line": 9018, + "end_line": 9022, + "section": "Raw HTML" + }, + { + "markdown": "
\n", + "html": "

<a href="hi'> <a href=hi'>

\n", + "example": 616, + "start_line": 9027, + "end_line": 9031, + "section": "Raw HTML" + }, + { + "markdown": "< a><\nfoo>\n\n", + "html": "

< a><\nfoo><bar/ >\n<foo bar=baz\nbim!bop />

\n", + "example": 617, + "start_line": 9036, + "end_line": 9046, + "section": "Raw HTML" + }, + { + "markdown": "
\n", + "html": "

<a href='bar'title=title>

\n", + "example": 618, + "start_line": 9051, + "end_line": 9055, + "section": "Raw HTML" + }, + { + "markdown": "
\n", + "html": "

\n", + "example": 619, + "start_line": 9060, + "end_line": 9064, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

</a href="foo">

\n", + "example": 620, + "start_line": 9069, + "end_line": 9073, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

foo

\n", + "example": 621, + "start_line": 9078, + "end_line": 9084, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

foo <!-- not a comment -- two hyphens -->

\n", + "example": 622, + "start_line": 9087, + "end_line": 9091, + "section": "Raw HTML" + }, + { + "markdown": "foo foo -->\n\nfoo \n", + "html": "

foo <!--> foo -->

\n

foo <!-- foo--->

\n", + "example": 623, + "start_line": 9096, + "end_line": 9103, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

foo

\n", + "example": 624, + "start_line": 9108, + "end_line": 9112, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

foo

\n", + "example": 625, + "start_line": 9117, + "end_line": 9121, + "section": "Raw HTML" + }, + { + "markdown": "foo &<]]>\n", + "html": "

foo &<]]>

\n", + "example": 626, + "start_line": 9126, + "end_line": 9130, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

foo

\n", + "example": 627, + "start_line": 9136, + "end_line": 9140, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

foo

\n", + "example": 628, + "start_line": 9145, + "end_line": 9149, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

<a href=""">

\n", + "example": 629, + "start_line": 9152, + "end_line": 9156, + "section": "Raw HTML" + }, + { + "markdown": "foo \nbaz\n", + "html": "

foo
\nbaz

\n", + "example": 630, + "start_line": 9166, + "end_line": 9172, + "section": "Hard line breaks" + }, + { + "markdown": "foo\\\nbaz\n", + "html": "

foo
\nbaz

\n", + "example": 631, + "start_line": 9178, + "end_line": 9184, + "section": "Hard line breaks" + }, + { + "markdown": "foo \nbaz\n", + "html": "

foo
\nbaz

\n", + "example": 632, + "start_line": 9189, + "end_line": 9195, + "section": "Hard line breaks" + }, + { + "markdown": "foo \n bar\n", + "html": "

foo
\nbar

\n", + "example": 633, + "start_line": 9200, + "end_line": 9206, + "section": "Hard line breaks" + }, + { + "markdown": "foo\\\n bar\n", + "html": "

foo
\nbar

\n", + "example": 634, + "start_line": 9209, + "end_line": 9215, + "section": "Hard line breaks" + }, + { + "markdown": "*foo \nbar*\n", + "html": "

foo
\nbar

\n", + "example": 635, + "start_line": 9221, + "end_line": 9227, + "section": "Hard line breaks" + }, + { + "markdown": "*foo\\\nbar*\n", + "html": "

foo
\nbar

\n", + "example": 636, + "start_line": 9230, + "end_line": 9236, + "section": "Hard line breaks" + }, + { + "markdown": "`code \nspan`\n", + "html": "

code span

\n", + "example": 637, + "start_line": 9241, + "end_line": 9246, + "section": "Hard line breaks" + }, + { + "markdown": "`code\\\nspan`\n", + "html": "

code\\ span

\n", + "example": 638, + "start_line": 9249, + "end_line": 9254, + "section": "Hard line breaks" + }, + { + "markdown": "
\n", + "html": "

\n", + "example": 639, + "start_line": 9259, + "end_line": 9265, + "section": "Hard line breaks" + }, + { + "markdown": "\n", + "html": "

\n", + "example": 640, + "start_line": 9268, + "end_line": 9274, + "section": "Hard line breaks" + }, + { + "markdown": "foo\\\n", + "html": "

foo\\

\n", + "example": 641, + "start_line": 9281, + "end_line": 9285, + "section": "Hard line breaks" + }, + { + "markdown": "foo \n", + "html": "

foo

\n", + "example": 642, + "start_line": 9288, + "end_line": 9292, + "section": "Hard line breaks" + }, + { + "markdown": "### foo\\\n", + "html": "

foo\\

\n", + "example": 643, + "start_line": 9295, + "end_line": 9299, + "section": "Hard line breaks" + }, + { + "markdown": "### foo \n", + "html": "

foo

\n", + "example": 644, + "start_line": 9302, + "end_line": 9306, + "section": "Hard line breaks" + }, + { + "markdown": "foo\nbaz\n", + "html": "

foo\nbaz

\n", + "example": 645, + "start_line": 9317, + "end_line": 9323, + "section": "Soft line breaks" + }, + { + "markdown": "foo \n baz\n", + "html": "

foo\nbaz

\n", + "example": 646, + "start_line": 9329, + "end_line": 9335, + "section": "Soft line breaks" + }, + { + "markdown": "hello $.;'there\n", + "html": "

hello $.;'there

\n", + "example": 647, + "start_line": 9349, + "end_line": 9353, + "section": "Textual content" + }, + { + "markdown": "Foo χρῆν\n", + "html": "

Foo χρῆν

\n", + "example": 648, + "start_line": 9356, + "end_line": 9360, + "section": "Textual content" + }, + { + "markdown": "Multiple spaces\n", + "html": "

Multiple spaces

\n", + "example": 649, + "start_line": 9365, + "end_line": 9369, + "section": "Textual content" + } +] \ No newline at end of file diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..73ec675 --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,342 @@ +// Package ast defines AST nodes that represent markdown elements. +package ast + +import ( + "bytes" + "fmt" + textm "github.com/yuin/goldmark/text" + "strings" +) + +// A NodeType indicates what type a node belongs to. +type NodeType int + +const ( + // BlockNode indicates that a node is kind of block nodes. + BlockNode NodeType = iota + 1 + // InlineNode indicates that a node is kind of inline nodes. + InlineNode +) + +// A Node interface defines basic AST node functionalities. +type Node interface { + // Type returns a type of this node. + Type() NodeType + + // NextSibling returns a next sibling node of this node. + NextSibling() Node + + // PreviousSibling returns a previous sibling node of this node. + PreviousSibling() Node + + // Parent returns a parent node of this node. + Parent() Node + + // SetParent sets a parent node to this node. + SetParent(Node) + + // SetPreviousSibling sets a previous sibling node to this node. + SetPreviousSibling(Node) + + // SetNextSibling sets a next sibling node to this node. + SetNextSibling(Node) + + // HasChildren returns true if this node has any children, otherwise false. + HasChildren() bool + + // ChildCount returns a total number of children. + ChildCount() int + + // FirstChild returns a first child of this node. + FirstChild() Node + + // LastChild returns a last child of this node. + LastChild() Node + + // AppendChild append a node child to the tail of the children. + AppendChild(self, child Node) + + // RemoveChild removes a node child from this node. + // If a node child is not children of this node, RemoveChild nothing to do. + RemoveChild(self, child Node) + + // RemoveChildren removes all children from this node. + RemoveChildren(self Node) + + // ReplaceChild replace a node v1 with a node insertee. + // If v1 is not children of this node, ReplaceChild append a insetee to the + // tail of the children. + ReplaceChild(self, v1, insertee Node) + + // InsertBefore inserts a node insertee before a node v1. + // If v1 is not children of this node, InsertBefore append a insetee to the + // tail of the children. + InsertBefore(self, v1, insertee Node) + + // InsertAfterinserts a node insertee after a node v1. + // If v1 is not children of this node, InsertBefore append a insetee to the + // tail of the children. + InsertAfter(self, v1, insertee Node) + + // Dump dumps an AST tree structure to stdout. + // This function completely aimed for debugging. + // level is a indent level. Implementer should indent informations with + // 2 * level spaces. + Dump(source []byte, level int) + + // Text returns text values of this node. + Text(source []byte) []byte + + // HasBlankPreviousLines returns true if the row before this node is blank, + // otherwise false. + // This method is valid only for block nodes. + HasBlankPreviousLines() bool + + // SetBlankPreviousLines sets whether the row before this node is blank. + // This method is valid only for block nodes. + SetBlankPreviousLines(v bool) + + // Lines returns text segments that hold positions in a source. + // This method is valid only for block nodes. + Lines() *textm.Segments + + // SetLines sets text segments that hold positions in a source. + // This method is valid only for block nodes. + SetLines(*textm.Segments) + + // IsRaw returns true if contents should be rendered as 'raw' contents. + IsRaw() bool +} + +// A BaseNode struct implements the Node interface. +type BaseNode struct { + firstChild Node + lastChild Node + parent Node + next Node + prev Node +} + +func ensureIsolated(v Node) { + if p := v.Parent(); p != nil { + p.RemoveChild(p, v) + } +} + +// HasChildren implements Node.HasChildren . +func (n *BaseNode) HasChildren() bool { + return n.firstChild != nil +} + +// SetPreviousSibling implements Node.SetPreviousSibling . +func (n *BaseNode) SetPreviousSibling(v Node) { + n.prev = v +} + +// SetNextSibling implements Node.SetNextSibling . +func (n *BaseNode) SetNextSibling(v Node) { + n.next = v +} + +// PreviousSibling implements Node.PreviousSibling . +func (n *BaseNode) PreviousSibling() Node { + return n.prev +} + +// NextSibling implements Node.NextSibling . +func (n *BaseNode) NextSibling() Node { + return n.next +} + +// RemoveChild implements Node.RemoveChild . +func (n *BaseNode) RemoveChild(self, v Node) { + if v.Parent() != self { + return + } + prev := v.PreviousSibling() + next := v.NextSibling() + if prev != nil { + prev.SetNextSibling(next) + } else { + n.firstChild = next + } + if next != nil { + next.SetPreviousSibling(prev) + } else { + n.lastChild = prev + } + v.SetParent(nil) + v.SetPreviousSibling(nil) + v.SetNextSibling(nil) +} + +// RemoveChildren implements Node.RemoveChildren . +func (n *BaseNode) RemoveChildren(self Node) { + for c := n.firstChild; c != nil; c = c.NextSibling() { + c.SetParent(nil) + c.SetPreviousSibling(nil) + c.SetNextSibling(nil) + } + n.firstChild = nil + n.lastChild = nil +} + +// FirstChild implements Node.FirstChild . +func (n *BaseNode) FirstChild() Node { + return n.firstChild +} + +// LastChild implements Node.LastChild . +func (n *BaseNode) LastChild() Node { + return n.lastChild +} + +// ChildCount implements Node.ChildCount . +func (n *BaseNode) ChildCount() int { + count := 0 + for c := n.firstChild; c != nil; c = c.NextSibling() { + count++ + } + return count +} + +// Parent implements Node.Parent . +func (n *BaseNode) Parent() Node { + return n.parent +} + +// SetParent implements Node.SetParent . +func (n *BaseNode) SetParent(v Node) { + n.parent = v +} + +// AppendChild implements Node.AppendChild . +func (n *BaseNode) AppendChild(self, v Node) { + ensureIsolated(v) + if n.firstChild == nil { + n.firstChild = v + v.SetNextSibling(nil) + v.SetPreviousSibling(nil) + } else { + last := n.lastChild + last.SetNextSibling(v) + v.SetPreviousSibling(last) + } + v.SetParent(self) + n.lastChild = v +} + +// ReplaceChild implements Node.ReplaceChild . +func (n *BaseNode) ReplaceChild(self, v1, insertee Node) { + n.InsertBefore(self, v1, insertee) + n.RemoveChild(self, v1) +} + +// InsertAfter implements Node.InsertAfter . +func (n *BaseNode) InsertAfter(self, v1, insertee Node) { + n.InsertBefore(self, v1.NextSibling(), insertee) +} + +// InsertBefore implements Node.InsertBefore . +func (n *BaseNode) InsertBefore(self, v1, insertee Node) { + if v1 == nil { + n.AppendChild(self, insertee) + return + } + ensureIsolated(insertee) + if v1.Parent() == self { + c := v1 + prev := c.PreviousSibling() + if prev != nil { + prev.SetNextSibling(insertee) + insertee.SetPreviousSibling(prev) + } else { + n.firstChild = insertee + insertee.SetPreviousSibling(nil) + } + insertee.SetNextSibling(c) + c.SetPreviousSibling(insertee) + insertee.SetParent(self) + } +} + +// Text implements Node.Text . +func (n *BaseNode) Text(source []byte) []byte { + var buf bytes.Buffer + for c := n.firstChild; c != nil; c = c.NextSibling() { + buf.Write(c.Text(source)) + } + return buf.Bytes() +} + +// DumpHelper is a helper function to implement Node.Dump. +// name is a name of the node. +// kv is pairs of an attribute name and an attribute value. +// cb is a function called after wrote a name and attributes. +func DumpHelper(v Node, source []byte, level int, name string, kv map[string]string, cb func(int)) { + indent := strings.Repeat(" ", level) + fmt.Printf("%s%s {\n", indent, name) + indent2 := strings.Repeat(" ", level+1) + if v.Type() == BlockNode { + fmt.Printf("%sRawText: \"", indent2) + for i := 0; i < v.Lines().Len(); i++ { + line := v.Lines().At(i) + fmt.Printf("%s", line.Value(source)) + } + fmt.Printf("\"\n") + fmt.Printf("%sHasBlankPreviousLines: %v\n", indent2, v.HasBlankPreviousLines()) + } + if kv != nil { + for name, value := range kv { + fmt.Printf("%s%s: %s\n", indent2, name, value) + } + } + if cb != nil { + cb(level + 1) + } + for c := v.FirstChild(); c != nil; c = c.NextSibling() { + c.Dump(source, level+1) + } + fmt.Printf("%s}\n", indent) +} + +// WalkStatus represents a current status of the Walk function. +type WalkStatus int + +const ( + // WalkStop indicates no more walking needed. + WalkStop = iota + 1 + + // WalkSkipChildren indicates that Walk wont walk on children of current + // node. + WalkSkipChildren + + // WalkContinue indicates that Walk can continue to walk. + WalkContinue +) + +// Walker is a function that will be called when Walk find a +// new node. +// entering is set true before walks children, false after walked children. +// If Walker returns error, Walk function immediately stop walking. +type Walker func(n Node, entering bool) (WalkStatus, error) + +// Walk walks a AST tree by the depth first search algorighm. +func Walk(n Node, walker Walker) error { + status, err := walker(n, true) + if err != nil || status == WalkStop { + return err + } + if status != WalkSkipChildren { + for c := n.FirstChild(); c != nil; c = c.NextSibling() { + if err := Walk(c, walker); err != nil { + return err + } + } + } + status, err = walker(n, false) + if err != nil || status == WalkStop { + return err + } + return nil +} diff --git a/ast/block.go b/ast/block.go new file mode 100644 index 0000000..281d146 --- /dev/null +++ b/ast/block.go @@ -0,0 +1,364 @@ +package ast + +import ( + "fmt" + textm "github.com/yuin/goldmark/text" + "strings" +) + +// A BaseBlock struct implements the Node interface. +type BaseBlock struct { + BaseNode + blankPreviousLines bool + lines *textm.Segments +} + +// Type implements Node.Type +func (b *BaseBlock) Type() NodeType { + return BlockNode +} + +// IsRaw implements Node.IsRaw +func (b *BaseBlock) IsRaw() bool { + return false +} + +// HasBlankPreviousLines implements Node.HasBlankPreviousLines. +func (b *BaseBlock) HasBlankPreviousLines() bool { + return b.blankPreviousLines +} + +// SetBlankPreviousLines implements Node.SetBlankPreviousLines. +func (b *BaseBlock) SetBlankPreviousLines(v bool) { + b.blankPreviousLines = v +} + +// Lines implements Node.Lines +func (b *BaseBlock) Lines() *textm.Segments { + if b.lines == nil { + b.lines = textm.NewSegments() + } + return b.lines +} + +// SetLines implements Node.SetLines +func (b *BaseBlock) SetLines(v *textm.Segments) { + b.lines = v +} + +// A Document struct is a root node of Markdown text. +type Document struct { + BaseBlock +} + +// Dump impelements Node.Dump . +func (n *Document) Dump(source []byte, level int) { + DumpHelper(n, source, level, "Document", nil, nil) +} + +// NewDocument returns a new Document node. +func NewDocument() *Document { + return &Document{ + BaseBlock: BaseBlock{}, + } +} + +// A TextBlock struct is a node whose lines +// should be rendered without any containers. +type TextBlock struct { + BaseBlock +} + +// Dump impelements Node.Dump . +func (n *TextBlock) Dump(source []byte, level int) { + DumpHelper(n, source, level, "TextBlock", nil, nil) +} + +// NewTextBlock returns a new TextBlock node. +func NewTextBlock() *TextBlock { + return &TextBlock{ + BaseBlock: BaseBlock{}, + } +} + +// A Paragraph struct represents a paragraph of Markdown text. +type Paragraph struct { + BaseBlock +} + +// Dump impelements Node.Dump . +func (n *Paragraph) Dump(source []byte, level int) { + DumpHelper(n, source, level, "Paragraph", nil, nil) +} + +// NewParagraph returns a new Paragraph node. +func NewParagraph() *Paragraph { + return &Paragraph{ + BaseBlock: BaseBlock{}, + } +} + +// IsParagraph returns true if given node implements the Paragraph interface, +// otherwise false. +func IsParagraph(node Node) bool { + _, ok := node.(*Paragraph) + return ok +} + +// A Heading struct represents headings like SetextHeading and ATXHeading. +type Heading struct { + BaseBlock + // Level returns a level of this heading. + // This value is between 1 and 6. + Level int + + // ID returns an ID of this heading. + ID []byte +} + +// Dump impelements Node.Dump . +func (n *Heading) Dump(source []byte, level int) { + m := map[string]string{ + "Level": fmt.Sprintf("%d", n.Level), + } + DumpHelper(n, source, level, "Heading", m, nil) +} + +// NewHeading returns a new Heading node. +func NewHeading(level int) *Heading { + return &Heading{ + BaseBlock: BaseBlock{}, + Level: level, + } +} + +// A ThemanticBreak struct represents a themantic break of Markdown text. +type ThemanticBreak struct { + BaseBlock +} + +// Dump impelements Node.Dump . +func (n *ThemanticBreak) Dump(source []byte, level int) { + DumpHelper(n, source, level, "ThemanticBreak", nil, nil) +} + +// NewThemanticBreak returns a new ThemanticBreak node. +func NewThemanticBreak() *ThemanticBreak { + return &ThemanticBreak{ + BaseBlock: BaseBlock{}, + } +} + +// A CodeBlock interface represents an indented code block of Markdown text. +type CodeBlock struct { + BaseBlock +} + +// IsRaw implements Node.IsRaw. +func (n *CodeBlock) IsRaw() bool { + return true +} + +// Dump impelements Node.Dump . +func (n *CodeBlock) Dump(source []byte, level int) { + DumpHelper(n, source, level, "CodeBlock", nil, nil) +} + +// NewCodeBlock returns a new CodeBlock node. +func NewCodeBlock() *CodeBlock { + return &CodeBlock{ + BaseBlock: BaseBlock{}, + } +} + +// A FencedCodeBlock struct represents a fenced code block of Markdown text. +type FencedCodeBlock struct { + BaseBlock + // Info returns a info text of this fenced code block. + Info *Text +} + +// IsRaw implements Node.IsRaw. +func (n *FencedCodeBlock) IsRaw() bool { + return true +} + +// Dump impelements Node.Dump . +func (n *FencedCodeBlock) Dump(source []byte, level int) { + m := map[string]string{} + if n.Info != nil { + m["Info"] = fmt.Sprintf("\"%s\"", n.Info.Text(source)) + } + DumpHelper(n, source, level, "FencedCodeBlock", m, nil) +} + +// NewFencedCodeBlock return a new FencedCodeBlock node. +func NewFencedCodeBlock(info *Text) *FencedCodeBlock { + return &FencedCodeBlock{ + BaseBlock: BaseBlock{}, + Info: info, + } +} + +// A Blockquote struct represents an blockquote block of Markdown text. +type Blockquote struct { + BaseBlock +} + +// Dump impelements Node.Dump . +func (n *Blockquote) Dump(source []byte, level int) { + DumpHelper(n, source, level, "Blockquote", nil, nil) +} + +// NewBlockquote returns a new Blockquote node. +func NewBlockquote() *Blockquote { + return &Blockquote{ + BaseBlock: BaseBlock{}, + } +} + +// A List structr represents a list of Markdown text. +type List struct { + BaseBlock + + // Marker is a markar character like '-', '+', ')' and '.'. + Marker byte + + // IsTight is a true if this list is a 'tight' list. + // See https://spec.commonmark.org/0.29/#loose for details. + IsTight bool + + // Start is an initial number of this ordered list. + // If this list is not an ordered list, Start is 0. + Start int +} + +// IsOrdered returns true if this list is an ordered list, otherwise false. +func (l *List) IsOrdered() bool { + return l.Marker == '.' || l.Marker == ')' +} + +// CanContinue returns true if this list can continue with +// given mark and a list type, otherwise false. +func (l *List) CanContinue(marker byte, isOrdered bool) bool { + return marker == l.Marker && isOrdered == l.IsOrdered() +} + +// Dump implements Node.Dump. +func (l *List) Dump(source []byte, level int) { + name := "List" + if l.IsOrdered() { + name = "OrderedList" + } + m := map[string]string{ + "Marker": fmt.Sprintf("%c", l.Marker), + "Tight": fmt.Sprintf("%v", l.IsTight), + } + if l.IsOrdered() { + m["Start"] = fmt.Sprintf("%d", l.Start) + } + DumpHelper(l, source, level, name, m, nil) +} + +// NewList returns a new List node. +func NewList(marker byte) *List { + return &List{ + BaseBlock: BaseBlock{}, + Marker: marker, + IsTight: true, + } +} + +// A ListItem struct represents a list item of Markdown text. +type ListItem struct { + BaseBlock + + // Offset is an offset potision of this item. + Offset int +} + +// Dump implements Node.Dump. +func (n *ListItem) Dump(source []byte, level int) { + DumpHelper(n, source, level, "ListItem", nil, nil) +} + +// NewListItem returns a new ListItem node. +func NewListItem(offset int) *ListItem { + return &ListItem{ + BaseBlock: BaseBlock{}, + Offset: offset, + } +} + +// HTMLBlockType represents kinds of an html blocks. +// See https://spec.commonmark.org/0.29/#html-blocks +type HTMLBlockType int + +const ( + // HTMLBlockType1 represents type 1 html blocks + HTMLBlockType1 = iota + 1 + // HTMLBlockType2 represents type 2 html blocks + HTMLBlockType2 + // HTMLBlockType3 represents type 3 html blocks + HTMLBlockType3 + // HTMLBlockType4 represents type 4 html blocks + HTMLBlockType4 + // HTMLBlockType5 represents type 5 html blocks + HTMLBlockType5 + // HTMLBlockType6 represents type 6 html blocks + HTMLBlockType6 + // HTMLBlockType7 represents type 7 html blocks + HTMLBlockType7 +) + +// An HTMLBlock struct represents an html block of Markdown text. +type HTMLBlock struct { + BaseBlock + + // Type is a type of this html block. + HTMLBlockType HTMLBlockType + + // ClosureLine is a line that closes this html block. + ClosureLine textm.Segment +} + +// IsRaw implements Node.IsRaw. +func (n *HTMLBlock) IsRaw() bool { + return true +} + +// HasClosure returns true if this html block has a closure line, +// otherwise false. +func (n *HTMLBlock) HasClosure() bool { + return n.ClosureLine.Start >= 0 +} + +// Dump implements Node.Dump. +func (n *HTMLBlock) Dump(source []byte, level int) { + indent := strings.Repeat(" ", level) + fmt.Printf("%s%s {\n", indent, "HTMLBlock") + indent2 := strings.Repeat(" ", level+1) + fmt.Printf("%sRawText: \"", indent2) + for i := 0; i < n.Lines().Len(); i++ { + s := n.Lines().At(i) + fmt.Print(string(source[s.Start:s.Stop])) + } + fmt.Printf("\"\n") + for c := n.FirstChild(); c != nil; c = c.NextSibling() { + c.Dump(source, level+1) + } + if n.HasClosure() { + cl := n.ClosureLine + fmt.Printf("%sClosure: \"%s\"\n", indent2, string(cl.Value(source))) + } + fmt.Printf("%s}\n", indent) +} + +// NewHTMLBlock returns a new HTMLBlock node. +func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock { + return &HTMLBlock{ + BaseBlock: BaseBlock{}, + HTMLBlockType: typ, + ClosureLine: textm.NewSegment(-1, -1), + } +} diff --git a/ast/inline.go b/ast/inline.go new file mode 100644 index 0000000..43ce81f --- /dev/null +++ b/ast/inline.go @@ -0,0 +1,371 @@ +package ast + +import ( + "fmt" + "strings" + + textm "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// A BaseInline struct implements the Node interface. +type BaseInline struct { + BaseNode +} + +// Type implements Node.Type +func (b *BaseInline) Type() NodeType { + return InlineNode +} + +// IsRaw implements Node.IsRaw +func (b *BaseInline) IsRaw() bool { + return false +} + +// HasBlankPreviousLines implements Node.HasBlankPreviousLines. +func (b *BaseInline) HasBlankPreviousLines() bool { + panic("can not call with inline nodes.") +} + +// SetBlankPreviousLines implements Node.SetBlankPreviousLines. +func (b *BaseInline) SetBlankPreviousLines(v bool) { + panic("can not call with inline nodes.") +} + +// Lines implements Node.Lines +func (b *BaseInline) Lines() *textm.Segments { + panic("can not call with inline nodes.") +} + +// SetLines implements Node.SetLines +func (b *BaseInline) SetLines(v *textm.Segments) { + panic("can not call with inline nodes.") +} + +// A Text struct represents a textual content of the Markdown text. +type Text struct { + BaseInline + // Segment is a position in a source text. + Segment textm.Segment + + flags uint8 +} + +const ( + textSoftLineBreak = 1 << iota + textHardLineBreak + textRaw +) + +// Inline implements Inline.Inline. +func (n *Text) Inline() { +} + +// SoftLineBreak returns true if this node ends with a new line, +// otherwise false. +func (n *Text) SoftLineBreak() bool { + return n.flags&textSoftLineBreak != 0 +} + +// SetSoftLineBreak sets whether this node ends with a new line. +func (n *Text) SetSoftLineBreak(v bool) { + if v { + n.flags |= textSoftLineBreak + } else { + n.flags = n.flags &^ textHardLineBreak + } +} + +// IsRaw returns true if this text should be rendered without unescaping +// back slash escapes and resolving references. +func (n *Text) IsRaw() bool { + return n.flags&textRaw != 0 +} + +// SetRaw sets whether this text should be rendered as raw contents. +func (n *Text) SetRaw(v bool) { + if v { + n.flags |= textRaw + } else { + n.flags = n.flags &^ textRaw + } +} + +// HardLineBreak returns true if this node ends with a hard line break. +// See https://spec.commonmark.org/0.29/#hard-line-breaks for details. +func (n *Text) HardLineBreak() bool { + return n.flags&textHardLineBreak != 0 +} + +// SetHardLineBreak sets whether this node ends with a hard line break. +func (n *Text) SetHardLineBreak(v bool) { + if v { + n.flags |= textHardLineBreak + } else { + n.flags = n.flags &^ textHardLineBreak + } +} + +// Merge merges a Node n into this node. +// Merge returns true if given node has been merged, otherwise false. +func (n *Text) Merge(node Node, source []byte) bool { + t, ok := node.(*Text) + if !ok { + return false + } + if n.Segment.Stop != t.Segment.Start || t.Segment.Padding != 0 || source[n.Segment.Stop-1] == '\n' || t.IsRaw() != n.IsRaw() { + return false + } + n.Segment.Stop = t.Segment.Stop + n.SetSoftLineBreak(t.SoftLineBreak()) + n.SetHardLineBreak(t.HardLineBreak()) + return true +} + +// Text implements Node.Text. +func (n *Text) Text(source []byte) []byte { + return n.Segment.Value(source) +} + +// Dump implements Node.Dump. +func (n *Text) Dump(source []byte, level int) { + fmt.Printf("%sText: \"%s\"\n", strings.Repeat(" ", level), strings.TrimRight(string(n.Text(source)), "\n")) +} + +// NewText returns a new Text node. +func NewText() *Text { + return &Text{ + BaseInline: BaseInline{}, + } +} + +// NewTextSegment returns a new Text node with given source potision. +func NewTextSegment(v textm.Segment) *Text { + return &Text{ + BaseInline: BaseInline{}, + Segment: v, + } +} + +// NewRawTextSegment returns a new Text node with given source position. +// The new node should be rendered as raw contents. +func NewRawTextSegment(v textm.Segment) *Text { + t := &Text{ + BaseInline: BaseInline{}, + Segment: v, + } + t.SetRaw(true) + return t +} + +// MergeOrAppendTextSegment merges a given s into the last child of the parent if +// it can be merged, otherwise creates a new Text node and appends it to after current +// last child. +func MergeOrAppendTextSegment(parent Node, s textm.Segment) { + last := parent.LastChild() + t, ok := last.(*Text) + if ok && t.Segment.Stop == s.Start && !t.SoftLineBreak() { + ts := t.Segment + t.Segment = ts.WithStop(s.Stop) + } else { + parent.AppendChild(parent, NewTextSegment(s)) + } +} + +// MergeOrReplaceTextSegment merges a given s into a previous sibling of the node n +// if a previous sibling of the node n is *Text, otherwise replaces Node n with s. +func MergeOrReplaceTextSegment(parent Node, n Node, s textm.Segment) { + prev := n.PreviousSibling() + if t, ok := prev.(*Text); ok && t.Segment.Stop == s.Start && !t.SoftLineBreak() { + t.Segment = t.Segment.WithStop(s.Stop) + parent.RemoveChild(parent, n) + } else { + parent.ReplaceChild(parent, n, NewTextSegment(s)) + } +} + +// A CodeSpan struct represents a code span of Markdown text. +type CodeSpan struct { + BaseInline +} + +// Inline implements Inline.Inline . +func (n *CodeSpan) Inline() { +} + +// IsBlank returns true if this node consists of spaces, otherwise false. +func (n *CodeSpan) IsBlank(source []byte) bool { + for c := n.FirstChild(); c != nil; c = c.NextSibling() { + text := c.(*Text).Segment + if !util.IsBlank(text.Value(source)) { + return false + } + } + return true +} + +// Dump implements Node.Dump +func (n *CodeSpan) Dump(source []byte, level int) { + DumpHelper(n, source, level, "CodeSpan", nil, nil) +} + +// NewCodeSpan returns a new CodeSpan node. +func NewCodeSpan() *CodeSpan { + return &CodeSpan{ + BaseInline: BaseInline{}, + } +} + +// An Emphasis struct represents an emphasis of Markdown text. +type Emphasis struct { + BaseInline + + // Level is a level of the emphasis. + Level int +} + +// Inline implements Inline.Inline. +func (n *Emphasis) Inline() { +} + +// Dump implements Node.Dump. +func (n *Emphasis) Dump(source []byte, level int) { + DumpHelper(n, source, level, fmt.Sprintf("Emphasis(%d)", n.Level), nil, nil) +} + +// NewEmphasis returns a new Emphasis node with given level. +func NewEmphasis(level int) *Emphasis { + return &Emphasis{ + BaseInline: BaseInline{}, + Level: level, + } +} + +type baseLink struct { + BaseInline + + // Destination is a destination(URL) of this link. + Destination []byte + + // Title is a title of this link. + Title []byte +} + +// Inline implements Inline.Inline. +func (n *baseLink) Inline() { +} + +func (n *baseLink) Dump(source []byte, level int) { + m := map[string]string{} + m["Destination"] = string(n.Destination) + m["Title"] = string(n.Title) + DumpHelper(n, source, level, "Link", m, nil) +} + +// A Link struct represents a link of the Markdown text. +type Link struct { + baseLink +} + +// NewLink returns a new Link node. +func NewLink() *Link { + c := &Link{ + baseLink: baseLink{ + BaseInline: BaseInline{}, + }, + } + return c +} + +// An Image struct represents an image of the Markdown text. +type Image struct { + baseLink +} + +// Dump implements Node.Dump. +func (n *Image) Dump(source []byte, level int) { + m := map[string]string{} + m["Destination"] = string(n.Destination) + m["Title"] = string(n.Title) + DumpHelper(n, source, level, "Image", m, nil) +} + +// NewImage returns a new Image node. +func NewImage(link *Link) *Image { + c := &Image{ + baseLink: baseLink{ + BaseInline: BaseInline{}, + }, + } + c.Destination = link.Destination + c.Title = link.Title + for n := link.FirstChild(); n != nil; { + next := n.NextSibling() + link.RemoveChild(link, n) + c.AppendChild(c, n) + n = next + } + + return c +} + +// AutoLinkType defines kind of auto links. +type AutoLinkType int + +const ( + // AutoLinkEmail indicates that an autolink is an email address. + AutoLinkEmail = iota + 1 + // AutoLinkURL indicates that an autolink is a generic URL. + AutoLinkURL +) + +// An AutoLink struct represents an autolink of the Markdown text. +type AutoLink struct { + BaseInline + // Value is a link text of this node. + Value *Text + // Type is a type of this autolink. + AutoLinkType AutoLinkType +} + +// Inline implements Inline.Inline. +func (n *AutoLink) Inline() {} + +// Dump implenets Node.Dump +func (n *AutoLink) Dump(source []byte, level int) { + segment := n.Value.Segment + m := map[string]string{ + "Value": string(segment.Value(source)), + } + DumpHelper(n, source, level, "AutoLink", m, nil) +} + +// NewAutoLink returns a new AutoLink node. +func NewAutoLink(typ AutoLinkType, value *Text) *AutoLink { + return &AutoLink{ + BaseInline: BaseInline{}, + Value: value, + AutoLinkType: typ, + } +} + +// A RawHTML struct represents an inline raw HTML of the Markdown text. +type RawHTML struct { + BaseInline +} + +// Inline implements Inline.Inline. +func (n *RawHTML) Inline() {} + +// Dump implements Node.Dump. +func (n *RawHTML) Dump(source []byte, level int) { + DumpHelper(n, source, level, "RawHTML", nil, nil) +} + +// NewRawHTML returns a new RawHTML node. +func NewRawHTML() *RawHTML { + return &RawHTML{ + BaseInline: BaseInline{}, + } +} diff --git a/commonmark_test.go b/commonmark_test.go new file mode 100644 index 0000000..0318cc1 --- /dev/null +++ b/commonmark_test.go @@ -0,0 +1,53 @@ +package goldmark + +import ( + "bytes" + "encoding/json" + "io/ioutil" + "testing" + + "github.com/yuin/goldmark/renderer/html" +) + +type commonmarkSpecTestCase struct { + Markdown string `json:"markdown"` + HTML string `json:"html"` + Example int `json:"example"` + StartLine int `json:"start_line"` + EndLine int `json:"end_line"` + Section string `json:"section"` +} + +func TestSpec(t *testing.T) { + bs, err := ioutil.ReadFile("_test/spec.json") + if err != nil { + panic(err) + } + var testCases []commonmarkSpecTestCase + if err := json.Unmarshal(bs, &testCases); err != nil { + panic(err) + } + markdown := New(WithRendererOptions(html.WithXHTML())) + for _, testCase := range testCases { + var out bytes.Buffer + if err := markdown.Convert([]byte(testCase.Markdown), &out); err != nil { + panic(err) + } + if !bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.HTML))) { + format := `============= case %d ================ +Markdown: +----------- +%s + +Expected: +---------- +%s + +Actual +--------- +%s +` + t.Errorf(format, testCase.Example, testCase.Markdown, testCase.HTML, out.Bytes()) + } + } +} diff --git a/extension/ast/strikethrough.go b/extension/ast/strikethrough.go new file mode 100644 index 0000000..e6f45a5 --- /dev/null +++ b/extension/ast/strikethrough.go @@ -0,0 +1,23 @@ +// Package ast defines AST nodes that represents extension's elements +package ast + +import ( + gast "github.com/yuin/goldmark/ast" +) + +// A Strikethrough struct represents a strikethrough of GFM text. +type Strikethrough struct { + gast.BaseInline +} + +func (n *Strikethrough) Inline() { +} + +func (n *Strikethrough) Dump(source []byte, level int) { + gast.DumpHelper(n, source, level, "Strikethrough", nil, nil) +} + +// NewStrikethrough returns a new Strikethrough node. +func NewStrikethrough() *Strikethrough { + return &Strikethrough{} +} diff --git a/extension/ast/table.go b/extension/ast/table.go new file mode 100644 index 0000000..7d205ef --- /dev/null +++ b/extension/ast/table.go @@ -0,0 +1,113 @@ +package ast + +import ( + "fmt" + gast "github.com/yuin/goldmark/ast" + "strings" +) + +// Alignment is a text alignment of table cells. +type Alignment int + +const ( + // AlignLeft indicates text should be left justified. + AlignLeft Alignment = iota + 1 + + // AlignRight indicates text should be right justified. + AlignRight + + // AlignCenter indicates text should be centered. + AlignCenter + + // AlignNone indicates text should be aligned by default manner. + AlignNone +) + +func (a Alignment) String() string { + switch a { + case AlignLeft: + return "left" + case AlignRight: + return "right" + case AlignCenter: + return "center" + case AlignNone: + return "none" + } + return "" +} + +// A Table struct represents a table of Markdown(GFM) text. +type Table struct { + gast.BaseBlock + + // Alignments returns alignments of the columns. + Alignments []Alignment +} + +// Dump implements Node.Dump +func (n *Table) Dump(source []byte, level int) { + gast.DumpHelper(n, source, level, "Table", nil, func(level int) { + indent := strings.Repeat(" ", level) + fmt.Printf("%sAlignments {\n", indent) + for i, alignment := range n.Alignments { + indent2 := strings.Repeat(" ", level+1) + fmt.Printf("%s%s", indent2, alignment.String()) + if i != len(n.Alignments)-1 { + fmt.Println("") + } + } + fmt.Printf("\n%s}\n", indent) + }) +} + +// NewTable returns a new Table node. +func NewTable() *Table { + return &Table{ + Alignments: []Alignment{}, + } +} + +// A TableRow struct represents a table row of Markdown(GFM) text. +type TableRow struct { + gast.BaseBlock + Alignments []Alignment +} + +// Dump implements Node.Dump. +func (n *TableRow) Dump(source []byte, level int) { + gast.DumpHelper(n, source, level, "TableRow", nil, nil) +} + +// NewTableRow returns a new TableRow node. +func NewTableRow(alignments []Alignment) *TableRow { + return &TableRow{} +} + +// A TableHeader struct represents a table header of Markdown(GFM) text. +type TableHeader struct { + *TableRow +} + +// NewTableHeader returns a new TableHeader node. +func NewTableHeader(row *TableRow) *TableHeader { + return &TableHeader{row} +} + +// A TableCell struct represents a table cell of a Markdown(GFM) text. +type TableCell struct { + gast.BaseBlock + Alignment Alignment +} + +// Dump implements Node.Dump. +func (n *TableCell) Dump(source []byte, level int) { + gast.DumpHelper(n, source, level, "TableCell", nil, nil) +} + +// NewTableCell returns a new TableCell node. +func NewTableCell() *TableCell { + return &TableCell{ + Alignment: AlignNone, + } +} diff --git a/extension/ast/tasklist.go b/extension/ast/tasklist.go new file mode 100644 index 0000000..38f7be4 --- /dev/null +++ b/extension/ast/tasklist.go @@ -0,0 +1,27 @@ +package ast + +import ( + "fmt" + gast "github.com/yuin/goldmark/ast" +) + +// A TaskCheckBox struct represents a checkbox of a task list. +type TaskCheckBox struct { + gast.BaseInline + IsChecked bool +} + +// Dump impelemtns Node.Dump. +func (n *TaskCheckBox) Dump(source []byte, level int) { + m := map[string]string{ + "Checked": fmt.Sprintf("%v", n.IsChecked), + } + gast.DumpHelper(n, source, level, "TaskCheckBox", m, nil) +} + +// NewTaskCheckBox returns a new TaskCheckBox node. +func NewTaskCheckBox(checked bool) *TaskCheckBox { + return &TaskCheckBox{ + IsChecked: checked, + } +} diff --git a/extension/gfm.go b/extension/gfm.go new file mode 100644 index 0000000..3b9bda5 --- /dev/null +++ b/extension/gfm.go @@ -0,0 +1,32 @@ +package extension + +import ( + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/parser" +) + +type gfm struct { +} + +// GFM is an extension that provides Github Flavored markdown functionalities. +var GFM = &gfm{} + +var filterTags = []string{ + "title", + "textarea", + "style", + "xmp", + "iframe", + "noembed", + "noframes", + "script", + "plaintext", +} + +func (e *gfm) Extend(m goldmark.Markdown) { + m.Parser().AddOption(parser.WithFilterTags(filterTags...)) + Linkify.Extend(m) + Table.Extend(m) + Strikethrough.Extend(m) + TaskList.Extend(m) +} diff --git a/extension/linkify.go b/extension/linkify.go new file mode 100644 index 0000000..ec1dd07 --- /dev/null +++ b/extension/linkify.go @@ -0,0 +1,125 @@ +package extension + +import ( + "bytes" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "regexp" +) + +var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&//=\(\);]*)`) + +var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=\(\);]*)`) + +var emailRegexp = regexp.MustCompile(`^[a-zA-Z0-9\.\-_\+]+@([a-zA-Z0-9\.\-_]+)`) + +type linkifyParser struct { +} + +var defaultLinkifyParser = &linkifyParser{} + +// NewLinkifyParser return a new InlineParser can parse +// text that seems like a URL. +func NewLinkifyParser() parser.InlineParser { + return defaultLinkifyParser +} + +func (s *linkifyParser) Trigger() []byte { + // ' ' indicates any white spaces and a line head + return []byte{' ', '*', '_', '~', '('} +} + +func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + line, segment := block.PeekLine() + consumes := 0 + start := segment.Start + c := line[0] + // advance if current position is not a line head. + if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' { + consumes++ + start++ + line = line[1:] + } + + var m []int + typ := ast.AutoLinkType(ast.AutoLinkEmail) + typ = ast.AutoLinkURL + m = urlRegexp.FindSubmatchIndex(line) + if m == nil { + m = wwwURLRegxp.FindSubmatchIndex(line) + } + if m != nil { + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } else if lastChar == ')' { + closing := 0 + for i := m[1] - 1; i >= m[0]; i-- { + if line[i] == ')' { + closing++ + } else if line[i] == '(' { + closing-- + } + } + if closing > 0 { + m[1]-- + } + } else if lastChar == ';' { + i := m[1] - 2 + for ; i >= m[0]; i-- { + if util.IsAlphaNumeric(line[i]) { + continue + } + break + } + if i != m[1]-2 { + if line[i] == '&' { + m[1] -= m[1] - i + } + } + } + } + if m == nil { + typ = ast.AutoLinkEmail + m = emailRegexp.FindSubmatchIndex(line) + if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 { + return nil + } + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } else if lastChar == '-' || lastChar == '_' { + return nil + } + } + if m == nil { + return nil + } + if consumes != 0 { + s := segment.WithStop(segment.Start + 1) + ast.MergeOrAppendTextSegment(parent, s) + } + consumes += m[1] + block.Advance(consumes) + n := ast.NewTextSegment(text.NewSegment(start, start+m[1])) + return ast.NewAutoLink(typ, n) +} + +func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { + // nothing to do +} + +type linkify struct { +} + +// Linkify is an extension that allow you to parse text that seems like a URL. +var Linkify = &linkify{} + +func (e *linkify) Extend(m goldmark.Markdown) { + m.Parser().AddOption(parser.WithInlineParsers( + util.Prioritized(NewLinkifyParser(), 999), + )) +} diff --git a/extension/strikethrough.go b/extension/strikethrough.go new file mode 100644 index 0000000..9e70fe5 --- /dev/null +++ b/extension/strikethrough.go @@ -0,0 +1,111 @@ +package extension + +import ( + "github.com/yuin/goldmark" + gast "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type strikethroughDelimiterProcessor struct { +} + +func (p *strikethroughDelimiterProcessor) IsDelimiter(b byte) bool { + return b == '~' +} + +func (p *strikethroughDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool { + return opener.Char == closer.Char +} + +func (p *strikethroughDelimiterProcessor) OnMatch(consumes int) gast.Node { + return ast.NewStrikethrough() +} + +var defaultStrikethroughDelimiterProcessor = &strikethroughDelimiterProcessor{} + +type strikethroughParser struct { +} + +var defaultStrikethroughParser = &strikethroughParser{} + +// NewStrikethroughParser return a new InlineParser that parses +// strikethrough expressions. +func NewStrikethroughParser() parser.InlineParser { + return defaultStrikethroughParser +} + +func (s *strikethroughParser) Trigger() []byte { + return []byte{'~'} +} + +func (s *strikethroughParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node { + before := block.PrecendingCharacter() + line, segment := block.PeekLine() + node := parser.ScanDelimiter(line, before, 2, defaultStrikethroughDelimiterProcessor) + if node == nil { + return nil + } + node.Segment = segment.WithStop(segment.Start + node.OriginalLength) + block.Advance(node.OriginalLength) + pc.PushDelimiter(node) + return node +} + +func (s *strikethroughParser) CloseBlock(parent gast.Node, pc parser.Context) { + // nothing to do +} + +// StrikethroughHTMLRenderer is a renderer.NodeRenderer implementation that +// renders Strikethrough nodes. +type StrikethroughHTMLRenderer struct { + html.Config +} + +// NewStrikethroughHTMLRenderer returns a new StrikethroughHTMLRenderer. +func NewStrikethroughHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &StrikethroughHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// Render implements renderer.NodeRenderer.Render. +func (r *StrikethroughHTMLRenderer) Render(writer util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { + switch node := n.(type) { + case *ast.Strikethrough: + return r.renderStrikethrough(writer, source, node, entering), nil + } + return gast.WalkContinue, renderer.NotSupported +} + +func (r *StrikethroughHTMLRenderer) renderStrikethrough(w util.BufWriter, source []byte, n *ast.Strikethrough, entering bool) gast.WalkStatus { + if entering { + w.WriteString("") + } else { + w.WriteString("") + } + return gast.WalkContinue +} + +type strikethrough struct { +} + +// Strikethrough is an extension that allow you to use strikethrough expression like '~~text~~' . +var Strikethrough = &strikethrough{} + +func (e *strikethrough) Extend(m goldmark.Markdown) { + m.Parser().AddOption(parser.WithInlineParsers( + util.Prioritized(NewStrikethroughParser(), 500), + )) + m.Renderer().AddOption(renderer.WithNodeRenderers( + util.Prioritized(NewStrikethroughHTMLRenderer(), 500), + )) +} diff --git a/extension/table.go b/extension/table.go new file mode 100644 index 0000000..3cc65c6 --- /dev/null +++ b/extension/table.go @@ -0,0 +1,233 @@ +package extension + +import ( + "bytes" + "fmt" + "regexp" + + "github.com/yuin/goldmark" + gast "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var tableDelimRegexp = regexp.MustCompile(`^[\s\-\|\:]+$`) +var tableDelimLeft = regexp.MustCompile(`^\s*\:\-+\s*$`) +var tableDelimRight = regexp.MustCompile(`^\s*\-+\:\s*$`) +var tableDelimCenter = regexp.MustCompile(`^\s*\:\-+\:\s*$`) +var tableDelimNone = regexp.MustCompile(`^\s*\-+\s*$`) + +type tableParagraphTransformer struct { +} + +var defaultTableParagraphTransformer = &tableParagraphTransformer{} + +// NewTableParagraphTransformer returns a new ParagraphTransformer +// that can transform pargraphs into tables. +func NewTableParagraphTransformer() parser.ParagraphTransformer { + return defaultTableParagraphTransformer +} + +func (b *tableParagraphTransformer) Transform(node *gast.Paragraph, pc parser.Context) { + lines := node.Lines() + if lines.Len() < 2 { + return + } + alignments := b.parseDelimiter(lines.At(1), pc) + if alignments == nil { + return + } + header := b.parseRow(lines.At(0), alignments, pc) + if header == nil || len(alignments) != header.ChildCount() { + return + } + table := ast.NewTable() + table.Alignments = alignments + table.AppendChild(table, ast.NewTableHeader(header)) + if lines.Len() > 2 { + for i := 2; i < lines.Len(); i++ { + table.AppendChild(table, b.parseRow(lines.At(i), alignments, pc)) + } + } + node.Parent().InsertBefore(node.Parent(), node, table) + node.Parent().RemoveChild(node.Parent(), node) + return +} + +func (b *tableParagraphTransformer) parseRow(segment text.Segment, alignments []ast.Alignment, pc parser.Context) *ast.TableRow { + line := segment.Value(pc.Source()) + pos := 0 + pos += util.TrimLeftSpaceLength(line) + limit := len(line) + limit -= util.TrimRightSpaceLength(line) + row := ast.NewTableRow(alignments) + if len(line) > 0 && line[pos] == '|' { + pos++ + } + if len(line) > 0 && line[limit-1] == '|' { + limit-- + } + for i := 0; pos < limit; i++ { + closure := util.FindClosure(line[pos:], byte(0), '|', true, false) + if closure < 0 { + closure = len(line[pos:]) + } + node := ast.NewTableCell() + segment := text.NewSegment(segment.Start+pos, segment.Start+pos+closure) + segment = segment.TrimLeftSpace(pc.Source()) + segment = segment.TrimRightSpace(pc.Source()) + node.Lines().Append(segment) + node.Alignment = alignments[i] + row.AppendChild(row, node) + pos += closure + 1 + } + return row +} + +func (b *tableParagraphTransformer) parseDelimiter(segment text.Segment, pc parser.Context) []ast.Alignment { + line := segment.Value(pc.Source()) + if !tableDelimRegexp.Match(line) { + return nil + } + cols := bytes.Split(line, []byte{'|'}) + if util.IsBlank(cols[0]) { + cols = cols[1:] + } + if len(cols) > 0 && util.IsBlank(cols[len(cols)-1]) { + cols = cols[:len(cols)-1] + } + + var alignments []ast.Alignment + for _, col := range cols { + if tableDelimLeft.Match(col) { + if alignments == nil { + alignments = []ast.Alignment{} + } + alignments = append(alignments, ast.AlignLeft) + } else if tableDelimRight.Match(col) { + if alignments == nil { + alignments = []ast.Alignment{} + } + alignments = append(alignments, ast.AlignRight) + } else if tableDelimCenter.Match(col) { + if alignments == nil { + alignments = []ast.Alignment{} + } + alignments = append(alignments, ast.AlignCenter) + } else if tableDelimNone.Match(col) { + if alignments == nil { + alignments = []ast.Alignment{} + } + alignments = append(alignments, ast.AlignNone) + } else { + return nil + } + } + return alignments +} + +// TableHTMLRenderer is a renderer.NodeRenderer implementation that +// renders Table nodes. +type TableHTMLRenderer struct { + html.Config +} + +// NewTableHTMLRenderer returns a new TableHTMLRenderer. +func NewTableHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &TableHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// Render implements renderer.Renderer.Render. +func (r *TableHTMLRenderer) Render(writer util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { + switch node := n.(type) { + case *ast.Table: + return r.renderTable(writer, source, node, entering), nil + case *ast.TableHeader: + return r.renderTableHeader(writer, source, node, entering), nil + case *ast.TableRow: + return r.renderTableRow(writer, source, node, entering), nil + case *ast.TableCell: + return r.renderTableCell(writer, source, node, entering), nil + } + return gast.WalkContinue, renderer.NotSupported +} + +func (r *TableHTMLRenderer) renderTable(w util.BufWriter, source []byte, n *ast.Table, entering bool) gast.WalkStatus { + if entering { + w.WriteString("\n") + } else { + w.WriteString("
\n") + } + return gast.WalkContinue +} + +func (r *TableHTMLRenderer) renderTableHeader(w util.BufWriter, source []byte, n *ast.TableHeader, entering bool) gast.WalkStatus { + if entering { + w.WriteString("\n") + w.WriteString("\n") + } else { + w.WriteString("\n") + w.WriteString("\n") + if n.NextSibling() != nil { + w.WriteString("\n") + } + if n.Parent().LastChild() == n { + w.WriteString("\n") + } + } + return gast.WalkContinue +} + +func (r *TableHTMLRenderer) renderTableRow(w util.BufWriter, source []byte, n *ast.TableRow, entering bool) gast.WalkStatus { + if entering { + w.WriteString("\n") + } else { + w.WriteString("\n") + if n.Parent().LastChild() == n { + w.WriteString("\n") + } + } + return gast.WalkContinue +} + +func (r *TableHTMLRenderer) renderTableCell(w util.BufWriter, source []byte, n *ast.TableCell, entering bool) gast.WalkStatus { + tag := "td" + if n.Parent().Parent().FirstChild() == n.Parent() { + tag = "th" + } + if entering { + align := "" + if n.Alignment != ast.AlignNone { + align = fmt.Sprintf(` align="%s"`, n.Alignment.String()) + } + fmt.Fprintf(w, "<%s%s>", tag, align) + } else { + fmt.Fprintf(w, "\n", tag) + } + return gast.WalkContinue +} + +type table struct { +} + +// Table is an extension that allow you to use GFM tables . +var Table = &table{} + +func (e *table) Extend(m goldmark.Markdown) { + m.Parser().AddOption(parser.WithParagraphTransformers( + util.Prioritized(NewTableParagraphTransformer(), 200), + )) + m.Renderer().AddOption(renderer.WithNodeRenderers( + util.Prioritized(NewTableHTMLRenderer(), 500), + )) +} diff --git a/extension/tasklist.go b/extension/tasklist.go new file mode 100644 index 0000000..1de59f9 --- /dev/null +++ b/extension/tasklist.go @@ -0,0 +1,118 @@ +package extension + +import ( + "github.com/yuin/goldmark" + gast "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "regexp" +) + +var taskListRegexp = regexp.MustCompile(`^\[([\sxX])\]\s*`) + +type taskCheckBoxParser struct { +} + +var defaultTaskCheckBoxParser = &taskCheckBoxParser{} + +// NewTaskCheckBoxParser returns a new InlineParser that can parse +// checkboxes in list items. +// This parser must take precedence over the parser.LinkParser. +func NewTaskCheckBoxParser() parser.InlineParser { + return defaultTaskCheckBoxParser +} + +func (s *taskCheckBoxParser) Trigger() []byte { + return []byte{'['} +} + +func (s *taskCheckBoxParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node { + // Given AST structure must be like + // - List + // - ListItem : parent.Parent + // - TextBlock : parent + // (current line) + if parent.Parent() == nil || parent.Parent().FirstChild() != parent { + return nil + } + + if _, ok := parent.Parent().(*gast.ListItem); !ok { + return nil + } + line, _ := block.PeekLine() + m := taskListRegexp.FindSubmatchIndex(line) + if m == nil { + return nil + } + value := line[m[2]:m[3]][0] + block.Advance(m[1]) + checked := value == 'x' || value == 'X' + return ast.NewTaskCheckBox(checked) +} + +func (s *taskCheckBoxParser) CloseBlock(parent gast.Node, pc parser.Context) { + // nothing to do +} + +// TaskCheckBoxHTMLRenderer is a renderer.NodeRenderer implementation that +// renders checkboxes in list items. +type TaskCheckBoxHTMLRenderer struct { + html.Config +} + +// NewTaskCheckBoxHTMLRenderer returns a new TaskCheckBoxHTMLRenderer. +func NewTaskCheckBoxHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &TaskCheckBoxHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// Render implements renderer.NodeRenderer.Render. +func (r *TaskCheckBoxHTMLRenderer) Render(writer util.BufWriter, source []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { + switch node := n.(type) { + case *ast.TaskCheckBox: + return r.renderTaskCheckBox(writer, source, node, entering), nil + } + return gast.WalkContinue, renderer.NotSupported +} + +func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, n *ast.TaskCheckBox, entering bool) gast.WalkStatus { + if !entering { + return gast.WalkContinue + } + + if n.IsChecked { + w.WriteString(`") + } else { + w.WriteString(">") + } + return gast.WalkContinue +} + +type taskList struct { +} + +// TaskList is an extension that allow you to use GFM task lists. +var TaskList = &taskList{} + +func (e *taskList) Extend(m goldmark.Markdown) { + m.Parser().AddOption(parser.WithInlineParsers( + util.Prioritized(NewTaskCheckBoxParser(), 0), + )) + m.Renderer().AddOption(renderer.WithNodeRenderers( + util.Prioritized(NewTaskCheckBoxHTMLRenderer(), 500), + )) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..96f419a --- /dev/null +++ b/go.mod @@ -0,0 +1 @@ +module github.com/yuin/goldmark diff --git a/markdown.go b/markdown.go new file mode 100644 index 0000000..117e90f --- /dev/null +++ b/markdown.go @@ -0,0 +1,144 @@ +// Package goldmark implements functions to convert markdown text to a desired format. +package goldmark + +import ( + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "io" +) + +// DefaultParser returns a new Parser that is configured by default values. +func DefaultParser() parser.Parser { + return parser.NewParser(parser.WithBlockParsers(parser.DefaultBlockParsers()...), + parser.WithInlineParsers(parser.DefaultInlineParsers()...), + parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...), + ) +} + +// DefaultRenderer returns a new Renderer that is configured by default values. +func DefaultRenderer() renderer.Renderer { + return renderer.NewRenderer(renderer.WithNodeRenderers(util.Prioritized(html.NewRenderer(), 1000))) +} + +var defaultMarkdown = New() + +// Convert interprets a UTF-8 bytes source in Markdown and +// write rendered contents to a writer w. +func Convert(source []byte, w io.Writer) error { + return defaultMarkdown.Convert(source, w) +} + +// A Markdown interface offers functions to convert Markdown text to +// a desired format. +type Markdown interface { + // Convert interprets a UTF-8 bytes source in Markdown and write rendered + // contents to a writer w. + Convert(source []byte, writer io.Writer) error + + // Parser returns a Parser that will be used for conversion. + Parser() parser.Parser + + // SetParser sets a Parser to this object. + SetParser(parser.Parser) + + // Parser returns a Renderer that will be used for conversion. + Renderer() renderer.Renderer + + // SetRenderer sets a Renderer to this object. + SetRenderer(renderer.Renderer) +} + +// Option is a functional option type for Markdown objects. +type Option func(*markdown) + +// WithExtensions adds extensions. +func WithExtensions(ext ...Extender) Option { + return func(m *markdown) { + m.extensions = append(m.extensions, ext...) + } +} + +// WithParser allows you to override the default parser. +func WithParser(p parser.Parser) Option { + return func(m *markdown) { + m.parser = p + } +} + +// WithParserOptions applies options for the parser. +func WithParserOptions(opts ...parser.Option) Option { + return func(m *markdown) { + for _, opt := range opts { + m.parser.AddOption(opt) + } + } +} + +// WithRenderer allows you to override the default renderer. +func WithRenderer(r renderer.Renderer) Option { + return func(m *markdown) { + m.renderer = r + } +} + +// WithRendererOptions applies options for the renderer. +func WithRendererOptions(opts ...renderer.Option) Option { + return func(m *markdown) { + for _, opt := range opts { + m.renderer.AddOption(opt) + } + } +} + +type markdown struct { + parser parser.Parser + renderer renderer.Renderer + extensions []Extender +} + +// New returns a new Markdown with given options. +func New(options ...Option) Markdown { + md := &markdown{ + parser: DefaultParser(), + renderer: DefaultRenderer(), + extensions: []Extender{}, + } + for _, opt := range options { + opt(md) + } + for _, e := range md.extensions { + e.Extend(md) + } + return md +} + +func (m *markdown) Convert(source []byte, writer io.Writer) error { + reader := text.NewReader(source) + doc, _ := m.parser.Parse(reader) + return m.renderer.Render(writer, reader.Source(), doc) +} + +func (m *markdown) Parser() parser.Parser { + return m.parser +} + +func (m *markdown) SetParser(v parser.Parser) { + m.parser = v +} + +func (m *markdown) Renderer() renderer.Renderer { + return m.renderer +} + +func (m *markdown) SetRenderer(v renderer.Renderer) { + m.renderer = v +} + +// An Extender interface is used for extending Markdown. +type Extender interface { + // Extend extends the Markdown. + Extend(Markdown) +} diff --git a/parser/atx_heading.go b/parser/atx_heading.go new file mode 100644 index 0000000..46adbee --- /dev/null +++ b/parser/atx_heading.go @@ -0,0 +1,146 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "regexp" +) + +// A HeadingConfig struct is a data structure that holds configuration of the renderers related to headings. +type HeadingConfig struct { + HeadingID bool +} + +// SetOption implements SetOptioner. +func (b *HeadingConfig) SetOption(name OptionName, value interface{}) { + switch name { + case HeadingID: + b.HeadingID = true + } +} + +// A HeadingOption interface sets options for heading parsers. +type HeadingOption interface { + SetHeadingOption(*HeadingConfig) +} + +// HeadingID is an option name that enables custom and auto IDs for headings. +var HeadingID OptionName = "HeadingID" + +type withHeadingID struct { +} + +func (o *withHeadingID) SetConfig(c *Config) { + c.Options[HeadingID] = true +} + +func (o *withHeadingID) SetHeadingOption(p *HeadingConfig) { + p.HeadingID = true +} + +// WithHeadingID is a functional option that enables custom heading ids and +// auto generated heading ids. +func WithHeadingID() interface { + Option + HeadingOption +} { + return &withHeadingID{} +} + +var atxHeadingRegexp = regexp.MustCompile(`^[ ]{0,3}(#{1,6})(?:\s+(.*?)\s*([\s]#+\s*)?)?\n?$`) + +type atxHeadingParser struct { + HeadingConfig +} + +// NewATXHeadingParser return a new BlockParser that can parse ATX headings. +func NewATXHeadingParser(opts ...HeadingOption) BlockParser { + p := &atxHeadingParser{} + for _, o := range opts { + o.SetHeadingOption(&p.HeadingConfig) + } + return p +} + +func (b *atxHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + line, segment := reader.PeekLine() + pos := pc.BlockOffset() + i := pos + for ; i < len(line) && line[i] == '#'; i++ { + } + level := i - pos + if i == pos || level > 6 { + return nil, NoChildren + } + l := util.TrimLeftSpaceLength(line[i:]) + if l == 0 { + return nil, NoChildren + } + start := i + l + stop := len(line) - util.TrimRightSpaceLength(line) + if stop <= start { // empty headings like '##[space]' + stop = start + 1 + } else { + i = stop - 1 + for ; line[i] == '#' && i >= start; i-- { + } + if i != stop-1 && !util.IsSpace(line[i]) { + i = stop - 1 + } + i++ + stop = i + } + + node := ast.NewHeading(level) + if len(util.TrimRight(line[start:stop], []byte{'#'})) != 0 { // empty heading like '### ###' + node.Lines().Append(text.NewSegment(segment.Start+start, segment.Start+stop)) + } + return node, NoChildren +} + +func (b *atxHeadingParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + return Close +} + +func (b *atxHeadingParser) Close(node ast.Node, pc Context) { + if !b.HeadingID { + return + } + parseOrGenerateHeadingID(node.(*ast.Heading), pc) +} + +func (b *atxHeadingParser) CanInterruptParagraph() bool { + return true +} + +func (b *atxHeadingParser) CanAcceptIndentedLine() bool { + return false +} + +var headingIDRegexp = regexp.MustCompile(`^(.*[^\\])({#([^}]+)}\s*)\n?$`) +var headingIDMap = NewContextKey() + +func parseOrGenerateHeadingID(node *ast.Heading, pc Context) { + existsv := pc.Get(headingIDMap) + var exists map[string]bool + if existsv == nil { + exists = map[string]bool{} + pc.Set(headingIDMap, exists) + } else { + exists = existsv.(map[string]bool) + } + lastIndex := node.Lines().Len() - 1 + lastLine := node.Lines().At(lastIndex) + line := lastLine.Value(pc.Source()) + m := headingIDRegexp.FindSubmatchIndex(line) + var headingID []byte + if m != nil { + headingID = line[m[6]:m[7]] + lastLine.Stop -= m[5] - m[4] + node.Lines().Set(lastIndex, lastLine) + } else { + headingID = util.GenerateLinkID(line, exists) + } + node.ID = headingID +} diff --git a/parser/auto_link.go b/parser/auto_link.go new file mode 100644 index 0000000..f7656bc --- /dev/null +++ b/parser/auto_link.go @@ -0,0 +1,46 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "regexp" +) + +type autoLinkParser struct { +} + +var defaultAutoLinkParser = &autoLinkParser{} + +// NewAutoLinkParser returns a new InlineParser that parses autolinks +// surrounded by '<' and '>' . +func NewAutoLinkParser() InlineParser { + return defaultAutoLinkParser +} + +func (s *autoLinkParser) Trigger() []byte { + return []byte{'<'} +} + +var emailAutoLinkRegexp = regexp.MustCompile(`^<([a-zA-Z0-9.!#$%&'*+\/=?^_` + "`" + `{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>`) + +var autoLinkRegexp = regexp.MustCompile(`(?i)^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>`) + +func (s *autoLinkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node { + line, segment := block.PeekLine() + match := emailAutoLinkRegexp.FindSubmatchIndex(line) + typ := ast.AutoLinkType(ast.AutoLinkEmail) + if match == nil { + match = autoLinkRegexp.FindSubmatchIndex(line) + typ = ast.AutoLinkURL + } + if match == nil { + return nil + } + value := ast.NewTextSegment(text.NewSegment(segment.Start+1, segment.Start+match[1]-1)) + block.Advance(match[1]) + return ast.NewAutoLink(typ, value) +} + +func (s *autoLinkParser) CloseBlock(parent ast.Node, pc Context) { + // nothing to do +} diff --git a/parser/blockquote.go b/parser/blockquote.go new file mode 100644 index 0000000..e77530f --- /dev/null +++ b/parser/blockquote.go @@ -0,0 +1,65 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type blockquoteParser struct { +} + +var defaultBlockquoteParser = &blockquoteParser{} + +// NewBlockquoteParser returns a new BlockParser that +// parses blockquotes. +func NewBlockquoteParser() BlockParser { + return defaultBlockquoteParser +} + +func (b *blockquoteParser) process(reader text.Reader) bool { + line, _ := reader.PeekLine() + w, pos := util.IndentWidth(line, 0) + if w > 3 || pos >= len(line) || line[pos] != '>' { + return false + } + pos++ + if pos >= len(line) || line[pos] == '\n' { + reader.Advance(pos) + return true + } + if line[pos] == ' ' || line[pos] == '\t' { + pos++ + } + reader.Advance(pos) + if line[pos-1] == '\t' { + reader.SetPadding(2) + } + return true +} + +func (b *blockquoteParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + if b.process(reader) { + return ast.NewBlockquote(), HasChildren + } + return nil, NoChildren +} + +func (b *blockquoteParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + if b.process(reader) { + return Continue | HasChildren + } + return Close +} + +func (b *blockquoteParser) Close(node ast.Node, pc Context) { + // nothing to do +} + +func (b *blockquoteParser) CanInterruptParagraph() bool { + return true +} + +func (b *blockquoteParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/parser/code_block.go b/parser/code_block.go new file mode 100644 index 0000000..a630978 --- /dev/null +++ b/parser/code_block.go @@ -0,0 +1,75 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type codeBlockParser struct { +} + +// CodeBlockParser is a BlockParser implementation that parses indented code blocks. +var defaultCodeBlockParser = &codeBlockParser{} + +// NewCodeBlockParser returns a new BlockParser that +// parses code blocks. +func NewCodeBlockParser() BlockParser { + return defaultCodeBlockParser +} + +func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + line, segment := reader.PeekLine() + pos, padding := util.IndentPosition(line, reader.LineOffset(), 4) + if pos < 0 { + return nil, NoChildren + } + node := ast.NewCodeBlock() + reader.AdvanceAndSetPadding(pos, padding) + _, segment = reader.PeekLine() + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return node, NoChildren + +} + +func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + line, segment := reader.PeekLine() + if util.IsBlank(line) { + node.Lines().Append(segment.TrimLeftSpaceWidth(4, reader.Source())) + return Continue | NoChildren + } + pos, padding := util.IndentPosition(line, reader.LineOffset(), 4) + if pos < 0 { + return Close + } + reader.AdvanceAndSetPadding(pos, padding) + _, segment = reader.PeekLine() + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return Continue | NoChildren +} + +func (b *codeBlockParser) Close(node ast.Node, pc Context) { + // trim trailing blank lines + lines := node.Lines() + length := lines.Len() - 1 + source := pc.Source() + for { + line := lines.At(length) + if util.IsBlank(line.Value(source)) { + length-- + } else { + break + } + } + lines.SetSliced(0, length+1) +} + +func (b *codeBlockParser) CanInterruptParagraph() bool { + return false +} + +func (b *codeBlockParser) CanAcceptIndentedLine() bool { + return true +} diff --git a/parser/code_span.go b/parser/code_span.go new file mode 100644 index 0000000..e5452a4 --- /dev/null +++ b/parser/code_span.go @@ -0,0 +1,87 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type codeSpanParser struct { +} + +var defaultCodeSpanParser = &codeSpanParser{} + +// NewCodeSpanParser return a new InlineParser that parses inline codes +// surrounded by '`' . +func NewCodeSpanParser() InlineParser { + return defaultCodeSpanParser +} + +func (s *codeSpanParser) Trigger() []byte { + return []byte{'`'} +} + +func (s *codeSpanParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node { + line, startSegment := block.PeekLine() + opener := 0 + for ; opener < len(line) && line[opener] == '`'; opener++ { + } + block.Advance(opener) + l, pos := block.Position() + node := ast.NewCodeSpan() + for { + line, segment := block.PeekLine() + if line == nil { + block.SetPosition(l, pos) + return ast.NewTextSegment(startSegment.WithStop(startSegment.Start + opener)) + } + for i := 0; i < len(line); i++ { + c := line[i] + if c == '`' { + oldi := i + for ; i < len(line) && line[i] == '`'; i++ { + } + closure := i - oldi + if closure == opener && (i+1 >= len(line) || line[i+1] != '`') { + segment := segment.WithStop(segment.Start + i - closure) + if !segment.IsEmpty() { + node.AppendChild(node, ast.NewRawTextSegment(segment)) + } + block.Advance(i) + goto end + } + } + } + if !util.IsBlank(line) { + node.AppendChild(node, ast.NewRawTextSegment(segment)) + } + block.AdvanceLine() + } +end: + if !node.IsBlank(pc.Source()) { + // trim first halfspace and last halfspace + segment := node.FirstChild().(*ast.Text).Segment + shouldTrimmed := true + if !(!segment.IsEmpty() && pc.Source()[segment.Start] == ' ') { + shouldTrimmed = false + } + segment = node.LastChild().(*ast.Text).Segment + if !(!segment.IsEmpty() && pc.Source()[segment.Stop-1] == ' ') { + shouldTrimmed = false + } + if shouldTrimmed { + t := node.FirstChild().(*ast.Text) + segment := t.Segment + t.Segment = segment.WithStart(segment.Start + 1) + t = node.LastChild().(*ast.Text) + segment = node.LastChild().(*ast.Text).Segment + t.Segment = segment.WithStop(segment.Stop - 1) + } + + } + return node +} + +func (s *codeSpanParser) CloseBlock(parent ast.Node, pc Context) { + // nothing to do +} diff --git a/parser/delimiter.go b/parser/delimiter.go new file mode 100644 index 0000000..f86c677 --- /dev/null +++ b/parser/delimiter.go @@ -0,0 +1,232 @@ +package parser + +import ( + "fmt" + "strings" + "unicode" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// A DelimiterProcessor interface provides a set of functions about +// Deliiter nodes. +type DelimiterProcessor interface { + // IsDelimiter returns true if given character is a delimiter, otherwise false. + IsDelimiter(byte) bool + + // CanOpenCloser returns true if given opener can close given closer, otherwise false. + CanOpenCloser(opener, closer *Delimiter) bool + + // OnMatch will be called when new matched delimiter found. + // OnMatch should return a new Node correspond to the matched delimiter. + OnMatch(consumes int) ast.Node +} + +// A Delimiter struct represents a delimiter like '*' of the Markdown text. +type Delimiter struct { + ast.BaseInline + + Segment text.Segment + + // CanOpen is set true if this delimiter can open a span for a new node. + // See https://spec.commonmark.org/0.29/#can-open-emphasis for details. + CanOpen bool + + // CanClose is set true if this delimiter can close a span for a new node. + // See https://spec.commonmark.org/0.29/#can-open-emphasis for details. + CanClose bool + + // Length is a remaining length of this delmiter. + Length int + + // OriginalLength is a original length of this delimiter. + OriginalLength int + + // Char is a character of this delimiter. + Char byte + + // PreviousDelimiter is a previous sibling delimiter node of this delimiter. + PreviousDelimiter *Delimiter + + // NextDelimiter is a next sibling delimiter node of this delimiter. + NextDelimiter *Delimiter + + // Processor is a DelimiterProcessor associated with this delimiter. + Processor DelimiterProcessor +} + +// Inline implements Inline.Inline. +func (d *Delimiter) Inline() {} + +// Dump implements Node.Dump. +func (d *Delimiter) Dump(source []byte, level int) { + fmt.Printf("%sDelimiter: \"%s\"\n", strings.Repeat(" ", level), string(d.Text(source))) +} + +// Text implements Node.Text +func (d *Delimiter) Text(source []byte) []byte { + return d.Segment.Value(source) +} + +// ConsumeCharacters consumes delimiters. +func (d *Delimiter) ConsumeCharacters(n int) { + d.Length -= n + d.Segment = d.Segment.WithStop(d.Segment.Start + d.Length) +} + +// CalcComsumption calculates how many characters should be used for opening +// a new span correspond to given closer. +func (d *Delimiter) CalcComsumption(closer *Delimiter) int { + if (d.CanClose || closer.CanOpen) && (d.OriginalLength+closer.OriginalLength)%3 == 0 && closer.OriginalLength%3 != 0 { + return 0 + } + if d.Length >= 2 && closer.Length >= 2 { + return 2 + } + return 1 +} + +// NewDelimiter returns a new Delimiter node. +func NewDelimiter(canOpen, canClose bool, length int, char byte, processor DelimiterProcessor) *Delimiter { + c := &Delimiter{ + BaseInline: ast.BaseInline{}, + CanOpen: canOpen, + CanClose: canClose, + Length: length, + OriginalLength: length, + Char: char, + PreviousDelimiter: nil, + NextDelimiter: nil, + Processor: processor, + } + return c +} + +// ScanDelimiter scans a delimiter by given DelimiterProcessor. +func ScanDelimiter(line []byte, before rune, min int, processor DelimiterProcessor) *Delimiter { + i := 0 + c := line[i] + j := i + if !processor.IsDelimiter(c) { + return nil + } + for ; j < len(line) && c == line[j]; j++ { + } + if (j - i) >= min { + after := rune(' ') + if j != len(line) { + after = util.ToRune(line, j) + } + + isLeft, isRight, canOpen, canClose := false, false, false, false + beforeIsPunctuation := unicode.IsPunct(before) + beforeIsWhitespace := unicode.IsSpace(before) + afterIsPunctuation := unicode.IsPunct(after) + afterIsWhitespace := unicode.IsSpace(after) + + isLeft = !afterIsWhitespace && + (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation) + isRight = !beforeIsWhitespace && + (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation) + + if line[i] == '_' { + canOpen = isLeft && (!isRight || beforeIsPunctuation) + canClose = isRight && (!isLeft || afterIsPunctuation) + } else { + canOpen = isLeft + canClose = isRight + } + return NewDelimiter(canOpen, canClose, j-i, c, processor) + } + return nil +} + +// ProcessDelimiters processes the delimiter list in the context. +// Processing will be stop when reaching the bottom. +// +// If you implement an inline parser that can have other inline nodes as +// children, you should call this function when nesting span has closed. +func ProcessDelimiters(bottom ast.Node, pc Context) { + if pc.LastDelimiter() == nil { + return + } + var closer *Delimiter + if bottom != nil { + for c := pc.LastDelimiter().PreviousSibling(); c != nil; { + if d, ok := c.(*Delimiter); ok { + closer = d + } + prev := c.PreviousSibling() + if prev == bottom { + break + } + c = prev + } + } else { + closer = pc.FirstDelimiter() + } + if closer == nil { + pc.ClearDelimiters(bottom) + return + } + for closer != nil { + if !closer.CanClose { + closer = closer.NextDelimiter + continue + } + consume := 0 + found := false + maybeOpener := false + var opener *Delimiter + for opener = closer.PreviousDelimiter; opener != nil; opener = opener.PreviousDelimiter { + if opener.CanOpen && opener.Processor.CanOpenCloser(opener, closer) { + maybeOpener = true + consume = opener.CalcComsumption(closer) + if consume > 0 { + found = true + break + } + } + } + if !found { + if !maybeOpener && !closer.CanOpen { + pc.RemoveDelimiter(closer) + } + closer = closer.NextDelimiter + continue + } + opener.ConsumeCharacters(consume) + closer.ConsumeCharacters(consume) + + node := opener.Processor.OnMatch(consume) + + parent := opener.Parent() + child := opener.NextSibling() + + for child != nil && child != closer { + next := child.NextSibling() + node.AppendChild(node, child) + child = next + } + parent.InsertAfter(parent, opener, node) + + for c := opener.NextDelimiter; c != nil && c != closer; { + next := c.NextDelimiter + pc.RemoveDelimiter(c) + c = next + } + + if opener.Length == 0 { + pc.RemoveDelimiter(opener) + } + + if closer.Length == 0 { + next := closer.NextDelimiter + pc.RemoveDelimiter(closer) + closer = next + } + } + pc.ClearDelimiters(bottom) +} diff --git a/parser/emphasis.go b/parser/emphasis.go new file mode 100644 index 0000000..73b32f2 --- /dev/null +++ b/parser/emphasis.go @@ -0,0 +1,54 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" +) + +type emphasisDelimiterProcessor struct { +} + +func (p *emphasisDelimiterProcessor) IsDelimiter(b byte) bool { + return b == '*' || b == '_' +} + +func (p *emphasisDelimiterProcessor) CanOpenCloser(opener, closer *Delimiter) bool { + return opener.Char == closer.Char +} + +func (p *emphasisDelimiterProcessor) OnMatch(consumes int) ast.Node { + return ast.NewEmphasis(consumes) +} + +var defaultEmphasisDelimiterProcessor = &emphasisDelimiterProcessor{} + +type emphasisParser struct { +} + +var defaultEmphasisParser = &emphasisParser{} + +// NewEmphasisParser return a new InlineParser that parses emphasises. +func NewEmphasisParser() InlineParser { + return defaultEmphasisParser +} + +func (s *emphasisParser) Trigger() []byte { + return []byte{'*', '_'} +} + +func (s *emphasisParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node { + before := block.PrecendingCharacter() + line, segment := block.PeekLine() + node := ScanDelimiter(line, before, 1, defaultEmphasisDelimiterProcessor) + if node == nil { + return nil + } + node.Segment = segment.WithStop(segment.Start + node.OriginalLength) + block.Advance(node.OriginalLength) + pc.PushDelimiter(node) + return node +} + +func (s *emphasisParser) CloseBlock(parent ast.Node, pc Context) { + // nothing to do +} diff --git a/parser/fcode_block.go b/parser/fcode_block.go new file mode 100644 index 0000000..2e117c8 --- /dev/null +++ b/parser/fcode_block.go @@ -0,0 +1,96 @@ +package parser + +import ( + "bytes" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type fencedCodeBlockParser struct { +} + +var defaultFencedCodeBlockParser = &fencedCodeBlockParser{} + +// NewFencedCodeBlockParser returns a new BlockParser that +// parses fenced code blocks. +func NewFencedCodeBlockParser() BlockParser { + return defaultFencedCodeBlockParser +} + +type fenceData struct { + char byte + indent int + length int +} + +var fencedCodeBlockInfoKey = NewContextKey() + +func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + line, segment := reader.PeekLine() + pos := pc.BlockOffset() + if line[pos] != '`' && line[pos] != '~' { + return nil, NoChildren + } + findent := pos + fenceChar := line[pos] + i := pos + for ; i < len(line) && line[i] == fenceChar; i++ { + } + oFenceLength := i - pos + if oFenceLength < 3 { + return nil, NoChildren + } + var info *ast.Text + if i < len(line)-1 { + rest := line[i:] + left := util.TrimLeftSpaceLength(rest) + right := util.TrimRightSpaceLength(rest) + infoStart, infoStop := segment.Start+i+left, segment.Stop-right + value := rest[left : len(rest)-right] + if fenceChar == '`' && bytes.IndexByte(value, '`') > -1 { + return nil, NoChildren + } else if infoStart != infoStop { + info = ast.NewTextSegment(text.NewSegment(infoStart, infoStop)) + } + } + pc.Set(fencedCodeBlockInfoKey, &fenceData{fenceChar, findent, oFenceLength}) + node := ast.NewFencedCodeBlock(info) + return node, NoChildren + +} + +func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + line, segment := reader.PeekLine() + fdata := pc.Get(fencedCodeBlockInfoKey).(*fenceData) + w, pos := util.IndentWidth(line, 0) + if w < 4 { + i := pos + for ; i < len(line) && line[i] == fdata.char; i++ { + } + length := i - pos + if length >= fdata.length && util.IsBlank(line[i:]) { + reader.Advance(segment.Stop - segment.Start - 1 - segment.Padding) + return Close + } + } + + pos, padding := util.DedentPosition(line, fdata.indent) + seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding) + node.Lines().Append(seg) + reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding) + return Continue | NoChildren +} + +func (b *fencedCodeBlockParser) Close(node ast.Node, pc Context) { + pc.Set(fencedCodeBlockInfoKey, nil) +} + +func (b *fencedCodeBlockParser) CanInterruptParagraph() bool { + return true +} + +func (b *fencedCodeBlockParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/parser/html_block.go b/parser/html_block.go new file mode 100644 index 0000000..1acf79d --- /dev/null +++ b/parser/html_block.go @@ -0,0 +1,278 @@ +package parser + +import ( + "bytes" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "regexp" + "strings" +) + +// An HTMLConfig struct is a data structure that holds configuration of the renderers related to raw htmls. +type HTMLConfig struct { + FilterTags map[string]bool +} + +// SetOption implements SetOptioner. +func (b *HTMLConfig) SetOption(name OptionName, value interface{}) { + switch name { + case FilterTags: + b.FilterTags = value.(map[string]bool) + } +} + +// A HTMLOption interface sets options for the raw HTML parsers. +type HTMLOption interface { + SetHTMLOption(*HTMLConfig) +} + +// FilterTags is an otpion name that specify forbidden tag names. +const FilterTags OptionName = "FilterTags" + +type withFilterTags struct { + value map[string]bool +} + +func (o *withFilterTags) SetConfig(c *Config) { + c.Options[FilterTags] = o.value +} + +func (o *withFilterTags) SetHTMLOption(p *HTMLConfig) { + p.FilterTags = o.value +} + +// WithFilterTags is a functional otpion that specify forbidden tag names. +func WithFilterTags(names ...string) interface { + Option + HTMLOption +} { + m := map[string]bool{} + for _, name := range names { + m[name] = true + } + return &withFilterTags{m} +} + +var allowedBlockTags = map[string]bool{ + "address": true, + "article": true, + "aside": true, + "base": true, + "basefont": true, + "blockquote": true, + "body": true, + "caption": true, + "center": true, + "col": true, + "colgroup": true, + "dd": true, + "details": true, + "dialog": true, + "dir": true, + "div": true, + "dl": true, + "dt": true, + "fieldset": true, + "figcaption": true, + "figure": true, + "footer": true, + "form": true, + "frame": true, + "frameset": true, + "h1": true, + "h2": true, + "h3": true, + "h4": true, + "h5": true, + "h6": true, + "head": true, + "header": true, + "hr": true, + "html": true, + "iframe": true, + "legend": true, + "li": true, + "link": true, + "main": true, + "menu": true, + "menuitem": true, + "meta": true, + "nav": true, + "noframes": true, + "ol": true, + "optgroup": true, + "option": true, + "p": true, + "param": true, + "section": true, + "source": true, + "summary": true, + "table": true, + "tbody": true, + "td": true, + "tfoot": true, + "th": true, + "thead": true, + "title": true, + "tr": true, + "track": true, + "ul": true, +} + +var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style)(?:\s.*|>.*|/>.*|)\n?$`) +var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}(?:[^ ].*|).*`) + +var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}'} + +var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`) +var htmlBlockType3Close = []byte{'?', '>'} + +var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}'} + +var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`) +var htmlBlockType5Close = []byte{']', ']', '>'} + +var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}.*|/>.*|)\n?$`) + +var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9]+)(` + attributePattern + `*)(:?>|/>)\s*\n?$`) + +type htmlBlockParser struct { + HTMLConfig +} + +// NewHTMLBlockParser return a new BlockParser that can parse html +// blocks. +func NewHTMLBlockParser(opts ...HTMLOption) BlockParser { + p := &htmlBlockParser{} + for _, o := range opts { + o.SetHTMLOption(&p.HTMLConfig) + } + return p +} + +func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + var node *ast.HTMLBlock + line, segment := reader.PeekLine() + last := pc.LastOpenedBlock().Node + if pos := pc.BlockOffset(); line[pos] != '<' { + return nil, NoChildren + } + + tagName := "" + if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil { + tagName = string(line[m[2]:m[3]]) + node = ast.NewHTMLBlock(ast.HTMLBlockType1) + } else if htmlBlockType2OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType2) + } else if htmlBlockType3OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType3) + } else if htmlBlockType4OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType4) + } else if htmlBlockType5OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType5) + } else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil { + isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/")) + hasAttr := match[6] != match[7] + tagName = strings.ToLower(string(line[match[4]:match[5]])) + _, ok := allowedBlockTags[strings.ToLower(string(tagName))] + if ok { + node = ast.NewHTMLBlock(ast.HTMLBlockType6) + } else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph + node = ast.NewHTMLBlock(ast.HTMLBlockType7) + } + } + if node == nil { + if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil { + tagName = string(line[match[2]:match[3]]) + _, ok := allowedBlockTags[strings.ToLower(tagName)] + if ok { + node = ast.NewHTMLBlock(ast.HTMLBlockType6) + } + } + } + if node != nil { + if b.FilterTags != nil { + if _, ok := b.FilterTags[tagName]; ok { + return nil, NoChildren + } + } + reader.Advance(segment.Len() - 1) + node.Lines().Append(segment) + return node, NoChildren + } + return nil, NoChildren +} + +func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + htmlBlock := node.(*ast.HTMLBlock) + lines := htmlBlock.Lines() + line, segment := reader.PeekLine() + var closurePattern []byte + + switch htmlBlock.HTMLBlockType { + case ast.HTMLBlockType1: + if lines.Len() == 1 { + firstLine := lines.At(0) + if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) { + return Close + } + } + if htmlBlockType1CloseRegexp.Match(line) { + htmlBlock.ClosureLine = segment + reader.Advance(segment.Len() - 1) + return Close + } + case ast.HTMLBlockType2: + closurePattern = htmlBlockType2Close + fallthrough + case ast.HTMLBlockType3: + if closurePattern == nil { + closurePattern = htmlBlockType3Close + } + fallthrough + case ast.HTMLBlockType4: + if closurePattern == nil { + closurePattern = htmlBlockType4Close + } + fallthrough + case ast.HTMLBlockType5: + if closurePattern == nil { + closurePattern = htmlBlockType5Close + } + + if lines.Len() == 1 { + firstLine := lines.At(0) + if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) { + return Close + } + } + if bytes.Contains(line, closurePattern) { + htmlBlock.ClosureLine = segment + reader.Advance(segment.Len() - 1) + return Close + } + + case ast.HTMLBlockType6, ast.HTMLBlockType7: + if util.IsBlank(line) { + return Close + } + } + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return Continue | NoChildren +} + +func (b *htmlBlockParser) Close(node ast.Node, pc Context) { + // nothing to do +} + +func (b *htmlBlockParser) CanInterruptParagraph() bool { + return true +} + +func (b *htmlBlockParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/parser/link.go b/parser/link.go new file mode 100644 index 0000000..e8cec9b --- /dev/null +++ b/parser/link.go @@ -0,0 +1,366 @@ +package parser + +import ( + "fmt" + "regexp" + "strings" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var linkLabelStateKey = NewContextKey() + +type linkLabelState struct { + ast.BaseInline + + Segment text.Segment + + IsImage bool + + Prev *linkLabelState + + Next *linkLabelState + + First *linkLabelState + + Last *linkLabelState +} + +func newLinkLabelState(segment text.Segment, isImage bool) *linkLabelState { + return &linkLabelState{ + Segment: segment, + IsImage: isImage, + } +} + +func (s *linkLabelState) Text(source []byte) []byte { + return s.Segment.Value(source) +} + +func (s *linkLabelState) Dump(source []byte, level int) { + fmt.Printf("%slinkLabelState: \"%s\"\n", strings.Repeat(" ", level), s.Text(source)) +} + +func pushLinkLabelState(pc Context, v *linkLabelState) { + tlist := pc.Get(linkLabelStateKey) + var list *linkLabelState + if tlist == nil { + list = v + v.First = v + v.Last = v + pc.Set(linkLabelStateKey, list) + } else { + list = tlist.(*linkLabelState) + l := list.Last + list.Last = v + l.Next = v + v.Prev = l + } +} + +func removeLinkLabelState(pc Context, d *linkLabelState) { + tlist := pc.Get(linkLabelStateKey) + var list *linkLabelState + if tlist == nil { + return + } + list = tlist.(*linkLabelState) + + if d.Prev == nil { + list = d.Next + if list != nil { + list.First = d + list.Last = d.Last + list.Prev = nil + pc.Set(linkLabelStateKey, list) + } else { + pc.Set(linkLabelStateKey, nil) + } + } else { + d.Prev.Next = d.Next + if d.Next != nil { + d.Next.Prev = d.Prev + } + } + if list != nil && d.Next == nil { + list.Last = d.Prev + } + d.Next = nil + d.Prev = nil + d.First = nil + d.Last = nil +} + +type linkParser struct { +} + +var defaultLinkParser = &linkParser{} + +// NewLinkParser return a new InlineParser that parses links. +func NewLinkParser() InlineParser { + return defaultLinkParser +} + +func (s *linkParser) Trigger() []byte { + return []byte{'!', '[', ']'} +} + +var linkDestinationRegexp = regexp.MustCompile(`\s*([^\s].+)`) +var linkTitleRegexp = regexp.MustCompile(`\s+(\)|["'\(].+)`) +var linkBottom = NewContextKey() + +func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node { + line, segment := block.PeekLine() + if line[0] == '!' && len(line) > 1 && line[1] == '[' { + block.Advance(1) + pc.Set(linkBottom, pc.LastDelimiter()) + return processLinkLabelOpen(block, segment.Start+1, true, pc) + } + if line[0] == '[' { + pc.Set(linkBottom, pc.LastDelimiter()) + return processLinkLabelOpen(block, segment.Start, false, pc) + } + + // line[0] == ']' + tlist := pc.Get(linkLabelStateKey) + if tlist == nil { + return nil + } + last := tlist.(*linkLabelState).Last + if last == nil { + return nil + } + block.Advance(1) + removeLinkLabelState(pc, last) + if s.containsLink(last) { // a link in a link text is not allowed + ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) + return nil + } + labelValue := block.Value(text.NewSegment(last.Segment.Start+1, segment.Start)) + if util.IsBlank(labelValue) && !last.IsImage { + ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) + return nil + } + + c := block.Peek() + l, pos := block.Position() + var link *ast.Link + var hasValue bool + if c == '(' { // normal link + link = s.parseLink(parent, last, block, pc) + } else if c == '[' { // reference link + link, hasValue = s.parseReferenceLink(parent, last, block, pc) + if link == nil && hasValue { + ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) + return nil + } + } + + if link == nil { + // maybe shortcut reference link + block.SetPosition(l, pos) + ssegment := text.NewSegment(last.Segment.Stop, segment.Start) + maybeReference := block.Value(ssegment) + ref, ok := pc.Reference(util.ToLinkReference(maybeReference)) + if !ok { + ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) + return nil + } + link = ast.NewLink() + s.processLinkLabel(parent, link, last, pc) + link.Title = ref.Title() + link.Destination = ref.Destination() + } + if last.IsImage { + last.Parent().RemoveChild(last.Parent(), last) + return ast.NewImage(link) + } + last.Parent().RemoveChild(last.Parent(), last) + return link +} + +func (s *linkParser) containsLink(last *linkLabelState) bool { + if last.IsImage { + return false + } + var c ast.Node + for c = last; c != nil; c = c.NextSibling() { + if _, ok := c.(*ast.Link); ok { + return true + } + } + return false +} + +func processLinkLabelOpen(block text.Reader, pos int, isImage bool, pc Context) *linkLabelState { + start := pos + if isImage { + start-- + } + state := newLinkLabelState(text.NewSegment(start, pos+1), isImage) + pushLinkLabelState(pc, state) + block.Advance(1) + return state +} + +func (s *linkParser) processLinkLabel(parent ast.Node, link *ast.Link, last *linkLabelState, pc Context) { + var bottom ast.Node + if v := pc.Get(linkBottom); v != nil { + bottom = v.(ast.Node) + } + pc.Set(linkBottom, nil) + ProcessDelimiters(bottom, pc) + for c := last.NextSibling(); c != nil; { + next := c.NextSibling() + parent.RemoveChild(parent, c) + link.AppendChild(link, c) + c = next + } +} + +func (s *linkParser) parseReferenceLink(parent ast.Node, last *linkLabelState, block text.Reader, pc Context) (*ast.Link, bool) { + _, orgpos := block.Position() + block.Advance(1) // skip '[' + line, segment := block.PeekLine() + endIndex := util.FindClosure(line, '[', ']', false, true) + if endIndex < 0 { + return nil, false + } + + block.Advance(endIndex + 1) + ssegment := segment.WithStop(segment.Start + endIndex) + maybeReference := block.Value(ssegment) + if util.IsBlank(maybeReference) { // collapsed reference link + ssegment = text.NewSegment(last.Segment.Stop, orgpos.Start-1) + maybeReference = block.Value(ssegment) + } + + ref, ok := pc.Reference(util.ToLinkReference(maybeReference)) + if !ok { + return nil, true + } + + link := ast.NewLink() + s.processLinkLabel(parent, link, last, pc) + link.Title = ref.Title() + link.Destination = ref.Destination() + return link, true +} + +func (s *linkParser) parseLink(parent ast.Node, last *linkLabelState, block text.Reader, pc Context) *ast.Link { + block.Advance(1) // skip '(' + block.SkipSpaces() + var title []byte + var destination []byte + var ok bool + if block.Peek() == ')' { // empty link like '[link]()' + block.Advance(1) + } else { + destination, ok = parseLinkDestination(block) + if !ok { + return nil + } + block.SkipSpaces() + if block.Peek() == ')' { + block.Advance(1) + } else { + title, ok = parseLinkTitle(block) + if !ok { + return nil + } + block.SkipSpaces() + if block.Peek() == ')' { + block.Advance(1) + } else { + return nil + } + } + } + + link := ast.NewLink() + s.processLinkLabel(parent, link, last, pc) + link.Destination = destination + link.Title = title + return link +} + +func parseLinkDestination(block text.Reader) ([]byte, bool) { + block.SkipSpaces() + line, _ := block.PeekLine() + buf := []byte{} + if block.Peek() == '<' { + i := 1 + for i < len(line) { + c := line[i] + if c == '\\' && i < len(line)-1 && util.IsPunct(line[i+1]) { + buf = append(buf, '\\', line[i+1]) + i += 2 + continue + } else if c == '>' { + block.Advance(i + 1) + return line[1:i], true + } + buf = append(buf, c) + i++ + } + return nil, false + } + opened := 0 + i := 0 + for i < len(line) { + c := line[i] + if c == '\\' && i < len(line)-1 && util.IsPunct(line[i+1]) { + buf = append(buf, '\\', line[i+1]) + i += 2 + continue + } else if c == '(' { + opened++ + } else if c == ')' { + opened-- + if opened < 0 { + break + } + } else if util.IsSpace(c) { + break + } + buf = append(buf, c) + i++ + } + block.Advance(i) + return line[:i], len(line[:i]) != 0 +} + +func parseLinkTitle(block text.Reader) ([]byte, bool) { + block.SkipSpaces() + opener := block.Peek() + if opener != '"' && opener != '\'' && opener != '(' { + return nil, false + } + closer := opener + if opener == '(' { + closer = ')' + } + line, _ := block.PeekLine() + pos := util.FindClosure(line[1:], opener, closer, false, true) + if pos < 0 { + return nil, false + } + pos += 2 // opener + closer + block.Advance(pos) + return line[1 : pos-1], true +} + +func (s *linkParser) CloseBlock(parent ast.Node, pc Context) { + tlist := pc.Get(linkLabelStateKey) + if tlist == nil { + return + } + for s := tlist.(*linkLabelState); s != nil; { + next := s.Next + removeLinkLabelState(pc, s) + s.Parent().ReplaceChild(s.Parent(), s, ast.NewTextSegment(s.Segment)) + s = next + } +} diff --git a/parser/link_ref.go b/parser/link_ref.go new file mode 100644 index 0000000..acaf798 --- /dev/null +++ b/parser/link_ref.go @@ -0,0 +1,163 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type linkReferenceParagraphTransformer struct { +} + +// LinkReferenceParagraphTransformer is a ParagraphTransformer implementation +// that parses and extracts link reference from paragraphs. +var LinkReferenceParagraphTransformer = &linkReferenceParagraphTransformer{} + +func (p *linkReferenceParagraphTransformer) Transform(node *ast.Paragraph, pc Context) { + lines := node.Lines() + block := text.NewBlockReader(pc.Source(), lines) + removes := [][2]int{} + for { + start, end := parseLinkReferenceDefinition(block, pc) + if start > -1 { + if start == end { + end++ + } + removes = append(removes, [2]int{start, end}) + continue + } + break + } + + offset := 0 + for _, remove := range removes { + if lines.Len() == 0 { + break + } + s := lines.Sliced(remove[1]-offset, lines.Len()) + lines.SetSliced(0, remove[0]-offset) + lines.AppendAll(s) + offset = remove[1] + } + + if lines.Len() == 0 { + t := ast.NewTextBlock() + t.SetBlankPreviousLines(node.HasBlankPreviousLines()) + node.Parent().ReplaceChild(node.Parent(), node, t) + return + } + + node.SetLines(lines) +} + +func parseLinkReferenceDefinition(block text.Reader, pc Context) (int, int) { + block.SkipSpaces() + line, segment := block.PeekLine() + if line == nil { + return -1, -1 + } + startLine, _ := block.Position() + width, pos := util.IndentWidth(line, 0) + if width > 3 { + return -1, -1 + } + if width != 0 { + pos++ + } + if line[pos] != '[' { + return -1, -1 + } + open := segment.Start + pos + 1 + closes := -1 + block.Advance(pos + 1) + for { + line, segment = block.PeekLine() + if line == nil { + return -1, -1 + } + closure := util.FindClosure(line, '[', ']', false, false) + if closure > -1 { + closes = segment.Start + closure + next := closure + 1 + if next >= len(line) || line[next] != ':' { + return -1, -1 + } + block.Advance(next + 1) + break + } + block.AdvanceLine() + } + if closes < 0 { + return -1, -1 + } + label := block.Value(text.NewSegment(open, closes)) + if util.IsBlank(label) { + return -1, -1 + } + block.SkipSpaces() + destination, ok := parseLinkDestination(block) + if !ok { + return -1, -1 + } + line, segment = block.PeekLine() + isNewLine := line == nil || util.IsBlank(line) + + endLine, _ := block.Position() + _, spaces, _ := block.SkipSpaces() + opener := block.Peek() + if opener != '"' && opener != '\'' && opener != '(' { + if !isNewLine { + return -1, -1 + } + ref := NewReference(label, destination, nil) + pc.AddReference(ref) + return startLine, endLine + 1 + } + if spaces == 0 { + return -1, -1 + } + block.Advance(1) + open = -1 + closes = -1 + closer := opener + if opener == '(' { + closer = ')' + } + for { + line, segment = block.PeekLine() + if line == nil { + return -1, -1 + } + if open < 0 { + open = segment.Start + } + closure := util.FindClosure(line, opener, closer, false, true) + if closure > -1 { + closes = segment.Start + closure + block.Advance(closure + 1) + break + } + block.AdvanceLine() + } + if closes < 0 { + return -1, -1 + } + + line, segment = block.PeekLine() + if line != nil && !util.IsBlank(line) { + if !isNewLine { + return -1, -1 + } + title := block.Value(text.NewSegment(open, closes)) + ref := NewReference(label, destination, title) + pc.AddReference(ref) + return startLine, endLine + } + + title := block.Value(text.NewSegment(open, closes)) + + endLine, _ = block.Position() + ref := NewReference(label, destination, title) + pc.AddReference(ref) + return startLine, endLine + 1 +} diff --git a/parser/list.go b/parser/list.go new file mode 100644 index 0000000..9d78706 --- /dev/null +++ b/parser/list.go @@ -0,0 +1,241 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "strconv" +) + +type listItemType int + +const ( + notList listItemType = iota + bulletList + orderedList +) + +// Same as +// `^(([ ]*)([\-\*\+]))(\s+.*)?\n?$`.FindSubmatchIndex or +// `^(([ ]*)(\d{1,9}[\.\)]))(\s+.*)?\n?$`.FindSubmatchIndex +func parseListItem(line []byte) ([6]int, listItemType) { + i := 0 + l := len(line) + ret := [6]int{} + for ; i < l && line[i] == ' '; i++ { + c := line[i] + if c == '\t' { + return ret, notList + } + } + if i > 3 { + return ret, notList + } + ret[0] = 0 + ret[1] = i + ret[2] = i + var typ listItemType + if i < l && line[i] == '-' || line[i] == '*' || line[i] == '+' { + i++ + ret[3] = i + typ = bulletList + } else if i < l { + for ; i < l && util.IsNumeric(line[i]); i++ { + } + ret[3] = i + if ret[3] == ret[2] || ret[3]-ret[2] > 9 { + return ret, notList + } + if i < l && line[i] == '.' || line[i] == ')' { + i++ + ret[3] = i + } else { + return ret, notList + } + typ = orderedList + } else { + return ret, notList + } + if line[i] != '\n' { + w, _ := util.IndentWidth(line[i:], 0) + if w == 0 { + return ret, notList + } + } + ret[4] = i + ret[5] = len(line) + if line[ret[5]-1] == '\n' && line[i] != '\n' { + ret[5]-- + } + return ret, typ +} + +func matchesListItem(source []byte, strict bool) ([6]int, listItemType) { + m, typ := parseListItem(source) + if typ != notList && (!strict || strict && m[1] < 4) { + return m, typ + } + return m, notList +} + +func calcListOffset(source []byte, match [6]int) int { + offset := 0 + if util.IsBlank(source[match[4]:]) { // list item starts with a blank line + offset = 1 + } else { + offset, _ = util.IndentWidth(source[match[4]:], match[2]) + if offset > 4 { // offseted codeblock + offset = 1 + } + } + return offset +} + +func lastOffset(node ast.Node) int { + lastChild := node.LastChild() + if lastChild != nil { + return lastChild.(*ast.ListItem).Offset + } + return 0 +} + +type listParser struct { +} + +var defaultListParser = &listParser{} + +// NewListParser returns a new BlockParser that +// parses lists. +// This parser must take predecence over the ListItemParser. +func NewListParser() BlockParser { + return defaultListParser +} + +func (b *listParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + last := pc.LastOpenedBlock().Node + if _, lok := last.(*ast.List); lok || pc.Get(skipListParser) != nil { + pc.Set(skipListParser, nil) + return nil, NoChildren + } + line, _ := reader.PeekLine() + match, typ := matchesListItem(line, true) + if typ == notList { + return nil, NoChildren + } + start := -1 + if typ == orderedList { + number := line[match[2] : match[3]-1] + start, _ = strconv.Atoi(string(number)) + } + + if ast.IsParagraph(last) && last.Parent() == parent { + // we allow only lists starting with 1 to interrupt paragraphs. + if typ == orderedList && start != 1 { + return nil, NoChildren + } + //an empty list item cannot interrupt a paragraph: + if match[5]-match[4] == 1 { + return nil, NoChildren + } + } + + marker := line[match[3]-1] + node := ast.NewList(marker) + if start > -1 { + node.Start = start + } + return node, HasChildren +} + +func (b *listParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + list := node.(*ast.List) + line, _ := reader.PeekLine() + if util.IsBlank(line) { + // A list item can begin with at most one blank line + if node.ChildCount() == 1 && node.LastChild().ChildCount() == 0 { + return Close + } + return Continue | HasChildren + } + // Themantic Breaks take predecence over lists + if isThemanticBreak(line) { + isHeading := false + last := pc.LastOpenedBlock().Node + if ast.IsParagraph(last) { + c, ok := matchesSetextHeadingBar(line) + if ok && c == '-' { + isHeading = true + } + } + if !isHeading { + return Close + } + } + + // "offset" means a width that bar indicates. + // - aaaaaaaa + // |----| + // + // If the indent is less than the last offset like + // - a + // - b <--- current line + // it maybe a new child of the list. + offset := lastOffset(node) + indent, _ := util.IndentWidth(line, 0) + + if indent < offset { + if indent < 4 { + match, typ := matchesListItem(line, false) // may have a leading spaces more than 3 + if typ != notList && match[1]-offset < 4 { + marker := line[match[3]-1] + if !list.CanContinue(marker, typ == orderedList) { + return Close + } + return Continue | HasChildren + } + } + return Close + } + return Continue | HasChildren +} + +func (b *listParser) Close(node ast.Node, pc Context) { + list := node.(*ast.List) + + for c := node.FirstChild(); c != nil && list.IsTight; c = c.NextSibling() { + if c.FirstChild() != nil && c.FirstChild() != c.LastChild() { + for c1 := c.FirstChild().NextSibling(); c1 != nil; c1 = c1.NextSibling() { + if bl, ok := c1.(ast.Node); ok && bl.HasBlankPreviousLines() { + list.IsTight = false + break + } + } + } + if c != node.FirstChild() { + if bl, ok := c.(ast.Node); ok && bl.HasBlankPreviousLines() { + list.IsTight = false + } + } + } + + if list.IsTight { + for child := node.FirstChild(); child != nil; child = child.NextSibling() { + for gc := child.FirstChild(); gc != nil; gc = gc.NextSibling() { + paragraph, ok := gc.(*ast.Paragraph) + if ok { + textBlock := ast.NewTextBlock() + textBlock.SetLines(paragraph.Lines()) + child.ReplaceChild(child, paragraph, textBlock) + } + } + } + } +} + +func (b *listParser) CanInterruptParagraph() bool { + return true +} + +func (b *listParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/parser/list_item.go b/parser/list_item.go new file mode 100644 index 0000000..5a05af6 --- /dev/null +++ b/parser/list_item.go @@ -0,0 +1,81 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type listItemParser struct { +} + +var defaultListItemParser = &listItemParser{} + +// NewListItemParser returns a new BlockParser that +// parses list items. +func NewListItemParser() BlockParser { + return defaultListItemParser +} + +var skipListParser = NewContextKey() +var skipListParserValue interface{} = true + +func (b *listItemParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + list, lok := parent.(*ast.List) + if !lok { // list item must be a child of a list + return nil, NoChildren + } + offset := lastOffset(list) + line, _ := reader.PeekLine() + match, typ := matchesListItem(line, false) + if typ == notList { + return nil, NoChildren + } + if match[1]-offset > 3 { + return nil, NoChildren + } + itemOffset := calcListOffset(line, match) + node := ast.NewListItem(match[3] + itemOffset) + if match[5]-match[4] == 1 { + return node, NoChildren + } + + pos, padding := util.IndentPosition(line[match[4]:], match[4], itemOffset) + child := match[3] + pos + reader.AdvanceAndSetPadding(child, padding) + return node, HasChildren +} + +func (b *listItemParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + line, _ := reader.PeekLine() + if util.IsBlank(line) { + return Continue | HasChildren + } + + indent, _ := util.IndentWidth(line, reader.LineOffset()) + offset := lastOffset(node.Parent()) + if indent < offset && indent < 4 { + _, typ := matchesListItem(line, true) + // new list item found + if typ != notList { + pc.Set(skipListParser, skipListParserValue) + } + return Close + } + pos, padding := util.IndentPosition(line, reader.LineOffset(), offset) + reader.AdvanceAndSetPadding(pos, padding) + + return Continue | HasChildren +} + +func (b *listItemParser) Close(node ast.Node, pc Context) { + // nothing to do +} + +func (b *listItemParser) CanInterruptParagraph() bool { + return true +} + +func (b *listItemParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/parser/paragraph.go b/parser/paragraph.go new file mode 100644 index 0000000..12a62a6 --- /dev/null +++ b/parser/paragraph.go @@ -0,0 +1,62 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" +) + +type paragraphParser struct { +} + +var defaultParagraphParser = ¶graphParser{} + +// NewParagraphParser returns a new BlockParser that +// parses paragraphs. +func NewParagraphParser() BlockParser { + return defaultParagraphParser +} + +func (b *paragraphParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + _, segment := reader.PeekLine() + segment = segment.TrimLeftSpace(reader.Source()) + if segment.IsEmpty() { + return nil, NoChildren + } + node := ast.NewParagraph() + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return node, NoChildren +} + +func (b *paragraphParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + _, segment := reader.PeekLine() + segment = segment.TrimLeftSpace(reader.Source()) + if segment.IsEmpty() { + return Close + } + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return Continue | NoChildren +} + +func (b *paragraphParser) Close(node ast.Node, pc Context) { + lines := node.Lines() + if lines.Len() != 0 { + // trim trailing spaces + length := lines.Len() + lastLine := node.Lines().At(length - 1) + node.Lines().Set(length-1, lastLine.TrimRightSpace(pc.Source())) + } + if lines.Len() == 0 { + node.Parent().RemoveChild(node.Parent(), node) + return + } +} + +func (b *paragraphParser) CanInterruptParagraph() bool { + return false +} + +func (b *paragraphParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..51ff945 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,987 @@ +// Package parser contains stuff that are related to parsing a Markdown text. +package parser + +import ( + "fmt" + "strings" + "sync" + "sync/atomic" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// A Reference interface represents a link reference in Markdown text. +type Reference interface { + // String implements Stringer. + String() string + + // Label returns a label of the reference. + Label() []byte + + // Destination returns a destination(URL) of the reference. + Destination() []byte + + // Title returns a title of the reference. + Title() []byte +} + +type reference struct { + label []byte + destination []byte + title []byte +} + +// NewReference returns a new Reference. +func NewReference(label, destination, title []byte) Reference { + return &reference{label, destination, title} +} + +func (r *reference) Label() []byte { + return r.label +} + +func (r *reference) Destination() []byte { + return r.destination +} + +func (r *reference) Title() []byte { + return r.title +} + +func (r *reference) String() string { + return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title) +} + +// ContextKey is a key that is used to set arbitary values to the context. +type ContextKey int32 + +// New returns a new ContextKey value. +func (c *ContextKey) New() ContextKey { + return ContextKey(atomic.AddInt32((*int32)(c), 1)) +} + +var contextKey ContextKey + +// NewContextKey return a new ContextKey value. +func NewContextKey() ContextKey { + return contextKey.New() +} + +// A Context interface holds a information that are necessary to parse +// Markdown text. +type Context interface { + // String implements Stringer. + String() string + + // Source returns a source of Markdown text. + Source() []byte + + // Get returns a value associated with given key. + Get(ContextKey) interface{} + + // Set sets given value to the context. + Set(ContextKey, interface{}) + + // AddReference adds given reference to this context. + AddReference(Reference) + + // Reference returns (a reference, true) if a reference associated with + // given label exists, otherwise (nil, false). + Reference(label string) (Reference, bool) + + // References returns a list of references. + References() []Reference + + // BlockOffset returns a first non-space character position on current line. + // This value is valid only for BlockParser.Open. + BlockOffset() int + + // BlockOffset sets a first non-space character position on current line. + // This value is valid only for BlockParser.Open. + SetBlockOffset(int) + + // FirstDelimiter returns a first delimiter of the current delimiter list. + FirstDelimiter() *Delimiter + + // LastDelimiter returns a last delimiter of the current delimiter list. + LastDelimiter() *Delimiter + + // PushDelimiter appends given delimiter to the tail of the current + // delimiter list. + PushDelimiter(delimiter *Delimiter) + + // RemoveDelimiter removes given delimiter from the current delimiter list. + RemoveDelimiter(d *Delimiter) + + // ClearDelimiters clears the current delimiter list. + ClearDelimiters(bottom ast.Node) + + // OpenedBlocks returns a list of nodes that are currently in parsing. + OpenedBlocks() []Block + + // SetOpenedBlocks sets a list of nodes that are currently in parsing. + SetOpenedBlocks([]Block) + + // LastOpenedBlock returns a last node that is currently in parsing. + LastOpenedBlock() Block + + // SetLastOpenedBlock sets a last node that is currently in parsing. + SetLastOpenedBlock(Block) +} + +// A Result interface holds a result of parsing Markdown text. +type Result interface { + // Reference returns (a reference, true) if a reference associated with + // given label exists, otherwise (nil, false). + Reference(label string) (Reference, bool) +} + +type parseContext struct { + store []interface{} + source []byte + refs map[string]Reference + blockOffset int + delimiters *Delimiter + lastDelimiter *Delimiter + openedBlocks []Block + lastOpenedBlock Block +} + +func newContext(source []byte) Context { + return &parseContext{ + store: make([]interface{}, contextKey+1), + source: source, + refs: map[string]Reference{}, + blockOffset: 0, + delimiters: nil, + lastDelimiter: nil, + openedBlocks: []Block{}, + lastOpenedBlock: Block{}, + } +} + +func (p *parseContext) Get(key ContextKey) interface{} { + return p.store[key] +} + +func (p *parseContext) Set(key ContextKey, value interface{}) { + p.store[key] = value +} + +func (p *parseContext) BlockOffset() int { + return p.blockOffset +} + +func (p *parseContext) SetBlockOffset(v int) { + p.blockOffset = v +} + +func (p *parseContext) Source() []byte { + return p.source +} + +func (p *parseContext) LastDelimiter() *Delimiter { + return p.lastDelimiter +} + +func (p *parseContext) FirstDelimiter() *Delimiter { + return p.delimiters +} + +func (p *parseContext) PushDelimiter(d *Delimiter) { + if p.delimiters == nil { + p.delimiters = d + p.lastDelimiter = d + } else { + l := p.lastDelimiter + p.lastDelimiter = d + l.NextDelimiter = d + d.PreviousDelimiter = l + } +} + +func (p *parseContext) RemoveDelimiter(d *Delimiter) { + if d.PreviousDelimiter == nil { + p.delimiters = d.NextDelimiter + } else { + d.PreviousDelimiter.NextDelimiter = d.NextDelimiter + if d.NextDelimiter != nil { + d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter + } + } + if d.NextDelimiter == nil { + p.lastDelimiter = d.PreviousDelimiter + } + if p.delimiters != nil { + p.delimiters.PreviousDelimiter = nil + } + if p.lastDelimiter != nil { + p.lastDelimiter.NextDelimiter = nil + } + d.NextDelimiter = nil + d.PreviousDelimiter = nil + if d.Length != 0 { + ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment) + } else { + d.Parent().RemoveChild(d.Parent(), d) + } +} + +func (p *parseContext) ClearDelimiters(bottom ast.Node) { + if p.lastDelimiter == nil { + return + } + var c ast.Node + for c = p.lastDelimiter; c != nil && c != bottom; { + prev := c.PreviousSibling() + if d, ok := c.(*Delimiter); ok { + p.RemoveDelimiter(d) + } + c = prev + } +} + +func (p *parseContext) AddReference(ref Reference) { + key := util.ToLinkReference(ref.Label()) + if _, ok := p.refs[key]; !ok { + p.refs[key] = ref + } +} + +func (p *parseContext) Reference(label string) (Reference, bool) { + v, ok := p.refs[label] + return v, ok +} + +func (p *parseContext) References() []Reference { + ret := make([]Reference, 0, len(p.refs)) + for _, v := range p.refs { + ret = append(ret, v) + } + return ret +} + +func (p *parseContext) String() string { + refs := []string{} + for _, r := range p.refs { + refs = append(refs, r.String()) + } + + return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ",")) +} + +func (p *parseContext) OpenedBlocks() []Block { + return p.openedBlocks +} + +func (p *parseContext) SetOpenedBlocks(v []Block) { + p.openedBlocks = v +} + +func (p *parseContext) LastOpenedBlock() Block { + return p.lastOpenedBlock +} + +func (p *parseContext) SetLastOpenedBlock(v Block) { + p.lastOpenedBlock = v +} + +// State represents parser's state. +// State is designed to use as a bit flag. +type State int + +const ( + none State = 1 << iota + + // Continue indicates parser can continue parsing. + Continue + + // Close indicates parser cannot parse anymore. + Close + + // HasChildren indicates parser may have child blocks. + HasChildren + + // NoChildren indicates parser does not have child blocks. + NoChildren +) + +// A Config struct is a data structure that holds configuration of the Parser. +type Config struct { + Options map[OptionName]interface{} + BlockParsers util.PrioritizedSlice /**/ + InlineParsers util.PrioritizedSlice /**/ + ParagraphTransformers util.PrioritizedSlice /**/ + ASTTransformers util.PrioritizedSlice /**/ +} + +// NewConfig returns a new Config. +func NewConfig() *Config { + return &Config{ + Options: map[OptionName]interface{}{}, + BlockParsers: util.PrioritizedSlice{}, + InlineParsers: util.PrioritizedSlice{}, + ParagraphTransformers: util.PrioritizedSlice{}, + ASTTransformers: util.PrioritizedSlice{}, + } +} + +// An Option interface is a functional option type for the Parser. +type Option interface { + SetConfig(*Config) +} + +// OptionName is a name of parser options. +type OptionName string + +// A Parser interface parses Markdown text into AST nodes. +type Parser interface { + // Parse parses given Markdown text into AST nodes. + Parse(reader text.Reader) (ast.Node, Result) + + // AddOption adds given option to thie parser. + AddOption(Option) +} + +// A SetOptioner interface sets given option to the object. +type SetOptioner interface { + // SetOption sets given option to the object. + // Unacceptable options may be passed. + // Thus implementations must ignore unacceptable options. + SetOption(name OptionName, value interface{}) +} + +// A BlockParser interface parses a block level element like Paragraph, List, +// Blockquote etc. +type BlockParser interface { + // Open parses the current line and returns a result of parsing. + // + // Open must not parse beyond the current line. + // If Open has been able to parse the current line, Open must advance a reader + // position by consumed byte length. + // + // If Open has not been able to parse the current line, Open should returns + // (nil, NoChildren). If Open has been able to parse the current line, Open + // should returns a new Block node and returns HasChildren or NoChildren. + Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) + + // Continue parses the current line and returns a result of parsing. + // + // Continue must not parse beyond the current line. + // If Continue has been able to parse the current line, Continue must advance + // a reader position by consumed byte length. + // + // If Continue has not been able to parse the current line, Continue should + // returns Close. If Continue has been able to parse the current line, + // Continue should returns (Continue | NoChildren) or + // (Continue | HasChildren) + Continue(node ast.Node, reader text.Reader, pc Context) State + + // Close will be called when the parser returns Close. + Close(node ast.Node, pc Context) + + // CanInterruptParagraph returns true if the parser can interrupt pargraphs, + // otherwise false. + CanInterruptParagraph() bool + + // CanAcceptIndentedLine returns true if the parser can open new node when + // given line is being indented more than 3 spaces. + CanAcceptIndentedLine() bool +} + +// An InlineParser interface parses an inline level element like CodeSpan, Link etc. +type InlineParser interface { + // Trigger returns a list of characters that triggers Parse method of + // this parser. + // Trigger characters must be a punctuation or a halfspace. + // Halfspaces triggers this parser when character is any spaces characters or + // a head of line + Trigger() []byte + + // Parse parse given block into an inline node. + // + // Parse can parse beyond the current line. + // If Parse has been able to parse the current line, it must advance a reader + // position by consumed byte length. + Parse(parent ast.Node, block text.Reader, pc Context) ast.Node + + // CloseBlock will be called when a block is closed. + CloseBlock(parent ast.Node, pc Context) +} + +// A ParagraphTransformer transforms parsed Paragraph nodes. +// For example, link references are searched in parsed Paragraphs. +type ParagraphTransformer interface { + // Transform transforms given paragraph. + Transform(node *ast.Paragraph, pc Context) +} + +// ASTTransformer transforms entire Markdown document AST tree. +type ASTTransformer interface { + // Transform transforms given AST tree. + Transform(node *ast.Document, pc Context) +} + +// DefaultBlockParsers returns a new list of default BlockParsers. +// Priorities of default BlockParsers are: +// +// SetextHeadingParser, 100 +// ThemanticBreakParser, 200 +// ListParser, 300 +// ListItemParser, 400 +// CodeBlockParser, 500 +// ATXHeadingParser, 600 +// FencedCodeBlockParser, 700 +// BlockquoteParser, 800 +// HTMLBlockParser, 900 +// ParagraphParser, 1000 +func DefaultBlockParsers() []util.PrioritizedValue { + return []util.PrioritizedValue{ + util.Prioritized(NewSetextHeadingParser(), 100), + util.Prioritized(NewThemanticBreakParser(), 200), + util.Prioritized(NewListParser(), 300), + util.Prioritized(NewListItemParser(), 400), + util.Prioritized(NewCodeBlockParser(), 500), + util.Prioritized(NewATXHeadingParser(), 600), + util.Prioritized(NewFencedCodeBlockParser(), 700), + util.Prioritized(NewBlockquoteParser(), 800), + util.Prioritized(NewHTMLBlockParser(), 900), + util.Prioritized(NewParagraphParser(), 1000), + } +} + +// DefaultInlineParsers returns a new list of default InlineParsers. +// Priorities of default InlineParsers are: +// +// CodeSpanParser, 100 +// LinkParser, 200 +// AutoLinkParser, 300 +// RawHTMLParser, 400 +// EmphasisParser, 500 +func DefaultInlineParsers() []util.PrioritizedValue { + return []util.PrioritizedValue{ + util.Prioritized(NewCodeSpanParser(), 100), + util.Prioritized(NewLinkParser(), 200), + util.Prioritized(NewAutoLinkParser(), 300), + util.Prioritized(NewRawHTMLParser(), 400), + util.Prioritized(NewEmphasisParser(), 500), + } +} + +// DefaultParagraphTransformers returns a new list of default ParagraphTransformers. +// Priorities of default ParagraphTransformers are: +// +// LinkReferenceParagraphTransformer, 100 +func DefaultParagraphTransformers() []util.PrioritizedValue { + return []util.PrioritizedValue{ + util.Prioritized(LinkReferenceParagraphTransformer, 100), + } +} + +// A Block struct holds a node and correspond parser pair. +type Block struct { + // Node is a BlockNode. + Node ast.Node + // Parser is a BlockParser. + Parser BlockParser +} + +type parser struct { + options map[OptionName]interface{} + blockParsers []BlockParser + inlineParsers [256][]InlineParser + inlineParsersList []InlineParser + paragraphTransformers []ParagraphTransformer + astTransformers []ASTTransformer + config *Config + initSync sync.Once +} + +type withBlockParsers struct { + value []util.PrioritizedValue +} + +func (o *withBlockParsers) SetConfig(c *Config) { + c.BlockParsers = append(c.BlockParsers, o.value...) +} + +// WithBlockParsers is a functional option that allow you to add +// BlockParsers to the parser. +func WithBlockParsers(bs ...util.PrioritizedValue) Option { + return &withBlockParsers{bs} +} + +type withInlineParsers struct { + value []util.PrioritizedValue +} + +func (o *withInlineParsers) SetConfig(c *Config) { + c.InlineParsers = append(c.InlineParsers, o.value...) +} + +// WithInlineParsers is a functional option that allow you to add +// InlineParsers to the parser. +func WithInlineParsers(bs ...util.PrioritizedValue) Option { + return &withInlineParsers{bs} +} + +type withParagraphTransformers struct { + value []util.PrioritizedValue +} + +func (o *withParagraphTransformers) SetConfig(c *Config) { + c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...) +} + +// WithParagraphTransformers is a functional option that allow you to add +// ParagraphTransformers to the parser. +func WithParagraphTransformers(ps ...util.PrioritizedValue) Option { + return &withParagraphTransformers{ps} +} + +type withASTTransformers struct { + value []util.PrioritizedValue +} + +func (o *withASTTransformers) SetConfig(c *Config) { + c.ASTTransformers = append(c.ASTTransformers, o.value...) +} + +// WithASTTransformers is a functional option that allow you to add +// ASTTransformers to the parser. +func WithASTTransformers(ps ...util.PrioritizedValue) Option { + return &withASTTransformers{ps} +} + +type withOption struct { + name OptionName + value interface{} +} + +func (o *withOption) SetConfig(c *Config) { + c.Options[o.name] = o.value +} + +// WithOption is a functional option that allow you to set +// an arbitary option to the parser. +func WithOption(name OptionName, value interface{}) Option { + return &withOption{name, value} +} + +// NewParser returns a new Parser with given options. +func NewParser(options ...Option) Parser { + config := NewConfig() + for _, opt := range options { + opt.SetConfig(config) + } + + p := &parser{ + options: map[OptionName]interface{}{}, + config: config, + } + + return p +} + +func (p *parser) AddOption(o Option) { + o.SetConfig(p.config) +} + +func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) { + bp, ok := v.Value.(BlockParser) + if !ok { + panic(fmt.Sprintf("%v is not a BlockParser", v.Value)) + } + so, ok := v.Value.(SetOptioner) + if ok { + for oname, ovalue := range options { + so.SetOption(oname, ovalue) + } + } + p.blockParsers = append(p.blockParsers, bp) +} + +func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) { + ip, ok := v.Value.(InlineParser) + if !ok { + panic(fmt.Sprintf("%v is not a InlineParser", v.Value)) + } + tcs := ip.Trigger() + so, ok := v.Value.(SetOptioner) + if ok { + for oname, ovalue := range options { + so.SetOption(oname, ovalue) + } + } + p.inlineParsersList = append(p.inlineParsersList, ip) + for _, tc := range tcs { + if p.inlineParsers[tc] == nil { + p.inlineParsers[tc] = []InlineParser{} + } + p.inlineParsers[tc] = append(p.inlineParsers[tc], ip) + } +} + +func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) { + pt, ok := v.Value.(ParagraphTransformer) + if !ok { + panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value)) + } + so, ok := v.Value.(SetOptioner) + if ok { + for oname, ovalue := range options { + so.SetOption(oname, ovalue) + } + } + p.paragraphTransformers = append(p.paragraphTransformers, pt) +} + +func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) { + at, ok := v.Value.(ASTTransformer) + if !ok { + panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value)) + } + so, ok := v.Value.(SetOptioner) + if ok { + for oname, ovalue := range options { + so.SetOption(oname, ovalue) + } + } + p.astTransformers = append(p.astTransformers, at) +} + +func (p *parser) Parse(reader text.Reader) (ast.Node, Result) { + p.initSync.Do(func() { + p.config.BlockParsers.Sort() + for _, v := range p.config.BlockParsers { + p.addBlockParser(v, p.config.Options) + } + p.config.InlineParsers.Sort() + for _, v := range p.config.InlineParsers { + p.addInlineParser(v, p.config.Options) + } + p.config.ParagraphTransformers.Sort() + for _, v := range p.config.ParagraphTransformers { + p.addParagraphTransformer(v, p.config.Options) + } + p.config.ASTTransformers.Sort() + for _, v := range p.config.ASTTransformers { + p.addASTTransformer(v, p.config.Options) + } + p.config = nil + }) + root := ast.NewDocument() + pc := newContext(reader.Source()) + p.parseBlocks(root, reader, pc) + blockReader := text.NewBlockReader(reader.Source(), nil) + p.walkBlock(root, func(node ast.Node) { + p.parseBlock(blockReader, node, pc) + }) + for _, at := range p.astTransformers { + at.Transform(root, pc) + } + //root.Dump(reader.Source(), 0) + return root, pc +} + +func (p *parser) transformParagraph(node *ast.Paragraph, pc Context) { + for _, pt := range p.paragraphTransformers { + pt.Transform(node, pc) + if node.Parent() == nil { + break + } + } +} + +func (p *parser) closeBlocks(from, to int, pc Context) { + blocks := pc.OpenedBlocks() + last := pc.LastOpenedBlock() + for i := from; i >= to; i-- { + node := blocks[i].Node + if node.Parent() != nil { + blocks[i].Parser.Close(blocks[i].Node, pc) + paragraph, ok := node.(*ast.Paragraph) + if ok && node.Parent() != nil { + p.transformParagraph(paragraph, pc) + } + } + } + if from == len(blocks)-1 { + blocks = blocks[0:to] + } else { + blocks = append(blocks[0:to], blocks[from+1:]...) + } + l := len(blocks) + if l == 0 { + last.Node = nil + } else { + last = blocks[l-1] + } + pc.SetOpenedBlocks(blocks) + pc.SetLastOpenedBlock(last) +} + +type blockOpenResult int + +const ( + paragraphContinuation blockOpenResult = iota + 1 + newBlocksOpened + noBlocksOpened +) + +func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult { + result := blockOpenResult(noBlocksOpened) + continuable := false + lastBlock := pc.LastOpenedBlock() + if lastBlock.Node != nil { + continuable = ast.IsParagraph(lastBlock.Node) + } +retry: + shouldPeek := true + var currentLineNum int + var w int + var pos int + var line []byte + for _, bp := range p.blockParsers { + if shouldPeek { + currentLineNum, _ = reader.Position() + line, _ = reader.PeekLine() + w, pos = util.IndentWidth(line, 0) + pc.SetBlockOffset(pos) + shouldPeek = false + if line == nil || line[0] == '\n' { + break + } + } + if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() { + continue + } + if w > 3 && !bp.CanAcceptIndentedLine() { + continue + } + last := pc.LastOpenedBlock().Node + node, state := bp.Open(parent, reader, pc) + if l, _ := reader.Position(); l != currentLineNum { + panic("BlockParser.Open must not advance position beyond the current line") + } + if node != nil { + shouldPeek = true + node.SetBlankPreviousLines(blankLine) + if last != nil && last.Parent() == nil { + lastPos := len(pc.OpenedBlocks()) - 1 + p.closeBlocks(lastPos, lastPos, pc) + } + parent.AppendChild(parent, node) + result = newBlocksOpened + be := Block{node, bp} + pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be)) + pc.SetLastOpenedBlock(be) + if state == HasChildren { + parent = node + goto retry // try child block + } + break // no children, can not open more blocks on this line + } + } + if result == noBlocksOpened && continuable { + state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc) + if state&Continue != 0 { + result = paragraphContinuation + } + } + return result +} + +type lineStat struct { + lineNum int + level int + isBlank bool +} + +func isBlankLine(lineNum, level int, stats []lineStat) ([]lineStat, bool) { + ret := false + for i := len(stats) - 1 - level; i >= 0; i-- { + s := stats[i] + if s.lineNum == lineNum && s.level == level { + ret = s.isBlank + continue + } + if s.lineNum < lineNum { + return stats[i:], ret + } + } + return stats[0:0], ret +} + +func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) { + pc.SetLastOpenedBlock(Block{}) + pc.SetOpenedBlocks([]Block{}) + blankLines := make([]lineStat, 0, 64) + isBlank := false + for { // process blocks separated by blank lines + _, lines, ok := reader.SkipBlankLines() + if !ok { + return + } + // first, we try to open blocks + if p.openBlocks(parent, lines != 0, reader, pc) != newBlocksOpened { + return + } + lineNum, _ := reader.Position() + for i := 0; i < len(pc.OpenedBlocks()); i++ { + blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0}) + } + reader.AdvanceLine() + for len(pc.OpenedBlocks()) != 0 { // process opened blocks line by line + lastIndex := len(pc.OpenedBlocks()) - 1 + for i := 0; i < len(pc.OpenedBlocks()); i++ { + be := pc.OpenedBlocks()[i] + line, _ := reader.PeekLine() + if line == nil { + p.closeBlocks(lastIndex, 0, pc) + reader.AdvanceLine() + return + } + lineNum, _ := reader.Position() + blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)}) + // If node is a paragraph, p.openBlocks determines whether it is continuable. + // So we do not process paragraphs here. + if !ast.IsParagraph(be.Node) { + state := be.Parser.Continue(be.Node, reader, pc) + if state&Continue != 0 { + // When current node is a container block and has no children, + // we try to open new child nodes + if state&HasChildren != 0 && i == lastIndex { + blankLines, isBlank = isBlankLine(lineNum-1, i, blankLines) + p.openBlocks(be.Node, isBlank, reader, pc) + break + } + continue + } + } + // current node may be closed or lazy continuation + blankLines, isBlank = isBlankLine(lineNum-1, i, blankLines) + thisParent := parent + if i != 0 { + thisParent = pc.OpenedBlocks()[i-1].Node + } + result := p.openBlocks(thisParent, isBlank, reader, pc) + if result != paragraphContinuation { + p.closeBlocks(lastIndex, i, pc) + } + break + } + + reader.AdvanceLine() + } + } +} + +func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) { + for c := block.FirstChild(); c != nil; c = c.NextSibling() { + p.walkBlock(c, cb) + } + cb(block) +} + +func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) { + if parent.IsRaw() { + return + } + escaped := false + source := block.Source() + block.Reset(parent.Lines()) + for { + retry: + line, _ := block.PeekLine() + if line == nil { + break + } + lineLength := len(line) + l, startPosition := block.Position() + n := 0 + softLinebreak := false + for i := 0; i < lineLength; i++ { + c := line[i] + if c == '\n' { + softLinebreak = true + break + } + isSpace := util.IsSpace(c) + isPunct := util.IsPunct(c) + if (isPunct && !escaped) || isSpace || i == 0 { + parserChar := c + if isSpace || (i == 0 && !isPunct) { + parserChar = ' ' + } + ips := p.inlineParsers[parserChar] + if ips != nil { + block.Advance(n) + n = 0 + savedLine, savedPosition := block.Position() + if i != 0 { + _, currentPosition := block.Position() + ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition)) + _, startPosition = block.Position() + } + var inlineNode ast.Node + for _, ip := range ips { + inlineNode = ip.Parse(parent, block, pc) + if inlineNode != nil { + break + } + block.SetPosition(savedLine, savedPosition) + } + if inlineNode != nil { + parent.AppendChild(parent, inlineNode) + goto retry + } + } + } + if escaped { + escaped = false + n++ + continue + } + + if c == '\\' { + escaped = true + n++ + continue + } + + escaped = false + n++ + } + if n != 0 { + block.Advance(n) + } + currentL, currentPosition := block.Position() + if l != currentL { + continue + } + diff := startPosition.Between(currentPosition) + stop := diff.Stop + hardlineBreak := false + if lineLength > 2 && line[lineLength-2] == '\\' && softLinebreak { // ends with \\n + stop-- + hardlineBreak = true + } else if lineLength > 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && softLinebreak { // ends with [space][space]\n + hardlineBreak = true + } + rest := diff.WithStop(stop) + text := ast.NewTextSegment(rest.TrimRightSpace(source)) + text.SetSoftLineBreak(softLinebreak) + text.SetHardLineBreak(hardlineBreak) + parent.AppendChild(parent, text) + block.AdvanceLine() + } + + ProcessDelimiters(nil, pc) + for _, ip := range p.inlineParsersList { + ip.CloseBlock(parent, pc) + } + +} diff --git a/parser/raw_html.go b/parser/raw_html.go new file mode 100644 index 0000000..32908ba --- /dev/null +++ b/parser/raw_html.go @@ -0,0 +1,126 @@ +package parser + +import ( + "bytes" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "regexp" +) + +type rawHTMLParser struct { + HTMLConfig +} + +// NewRawHTMLParser return a new InlineParser that can parse +// inline htmls +func NewRawHTMLParser(opts ...HTMLOption) InlineParser { + p := &rawHTMLParser{} + for _, o := range opts { + o.SetHTMLOption(&p.HTMLConfig) + } + return p +} + +func (s *rawHTMLParser) Trigger() []byte { + return []byte{'<'} +} + +func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node { + line, _ := block.PeekLine() + if len(line) > 1 && util.IsAlphaNumeric(line[1]) { + return s.parseMultiLineRegexp(openTagRegexp, block, pc) + } + if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) { + return s.parseMultiLineRegexp(closeTagRegexp, block, pc) + } + if bytes.HasPrefix(line, []byte("|`) +var processingInstructionRegexp = regexp.MustCompile(`^(?:<\?).*?(?:\?>)`) +var declRegexp = regexp.MustCompile(`^]*>`) +var cdataRegexp = regexp.MustCompile(``) + +func (s *rawHTMLParser) parseSingleLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node { + line, segment := block.PeekLine() + match := reg.FindSubmatchIndex(line) + if match == nil { + return nil + } + node := ast.NewRawHTML() + node.AppendChild(node, ast.NewRawTextSegment(segment.WithStop(segment.Start+match[1]))) + block.Advance(match[1]) + return node +} + +var dummyMatch = [][]byte{} + +func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node { + sline, ssegment := block.Position() + var m [][]byte + if s.FilterTags != nil { + m = block.FindSubMatch(reg) + } else { + if block.Match(reg) { + m = dummyMatch + } + } + + if m != nil { + if s.FilterTags != nil { + tagName := string(m[1]) + if _, ok := s.FilterTags[tagName]; ok { + return nil + } + } + node := ast.NewRawHTML() + eline, esegment := block.Position() + block.SetPosition(sline, ssegment) + for { + line, segment := block.PeekLine() + if line == nil { + break + } + l, _ := block.Position() + start := segment.Start + if l == sline { + start = ssegment.Start + } + end := segment.Stop + if l == eline { + end = esegment.Start + } + + node.AppendChild(node, ast.NewRawTextSegment(text.NewSegment(start, end))) + if l == eline { + block.Advance(end - start) + break + } else { + block.AdvanceLine() + } + } + return node + } + return nil +} + +func (s *rawHTMLParser) CloseBlock(parent ast.Node, pc Context) { + // nothing to do +} diff --git a/parser/setext_headings.go b/parser/setext_headings.go new file mode 100644 index 0000000..6f78d63 --- /dev/null +++ b/parser/setext_headings.go @@ -0,0 +1,109 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var temporaryParagraphKey = NewContextKey() + +type setextHeadingParser struct { + HeadingConfig +} + +func matchesSetextHeadingBar(line []byte) (byte, bool) { + start := 0 + end := len(line) + space := util.TrimLeftLength(line, []byte{' '}) + if space > 3 { + return 0, false + } + start += space + level1 := util.TrimLeftLength(line[start:end], []byte{'='}) + c := byte('=') + var level2 int + if level1 == 0 { + level2 = util.TrimLeftLength(line[start:end], []byte{'-'}) + c = '-' + } + end -= util.TrimRightSpaceLength(line[start:end]) + if !((level1 > 0 && start+level1 == end) || (level2 > 0 && start+level2 == end)) { + return 0, false + } + return c, true +} + +// NewSetextHeadingParser return a new BlockParser that can parse Setext headings. +func NewSetextHeadingParser(opts ...HeadingOption) BlockParser { + p := &setextHeadingParser{} + for _, o := range opts { + o.SetHeadingOption(&p.HeadingConfig) + } + return p +} + +func (b *setextHeadingParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + last := pc.LastOpenedBlock().Node + if last == nil { + return nil, NoChildren + } + paragraph, ok := last.(*ast.Paragraph) + if !ok || paragraph.Parent() != parent { + return nil, NoChildren + } + line, segment := reader.PeekLine() + c, ok := matchesSetextHeadingBar(line) + if !ok { + return nil, NoChildren + } + level := 1 + if c == '-' { + level = 2 + } + node := ast.NewHeading(level) + node.Lines().Append(segment) + pc.Set(temporaryParagraphKey, paragraph) + return node, NoChildren +} + +func (b *setextHeadingParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + return Close +} + +func (b *setextHeadingParser) Close(node ast.Node, pc Context) { + heading := node.(*ast.Heading) + segment := node.Lines().At(0) + heading.Lines().Clear() + tmp := pc.Get(temporaryParagraphKey).(*ast.Paragraph) + pc.Set(temporaryParagraphKey, nil) + if tmp.Lines().Len() == 0 { + next := heading.NextSibling() + segment = segment.TrimLeftSpace(pc.Source()) + if next == nil || !ast.IsParagraph(next) { + para := ast.NewParagraph() + para.Lines().Append(segment) + heading.Parent().InsertAfter(heading.Parent(), heading, para) + } else { + next.(ast.Node).Lines().Unshift(segment) + } + heading.Parent().RemoveChild(heading.Parent(), heading) + } else { + heading.SetLines(tmp.Lines()) + heading.SetBlankPreviousLines(tmp.HasBlankPreviousLines()) + tmp.Parent().RemoveChild(tmp.Parent(), tmp) + } + + if !b.HeadingID { + return + } + parseOrGenerateHeadingID(heading, pc) +} + +func (b *setextHeadingParser) CanInterruptParagraph() bool { + return true +} + +func (b *setextHeadingParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/parser/themantic_break.go b/parser/themantic_break.go new file mode 100644 index 0000000..fe82362 --- /dev/null +++ b/parser/themantic_break.go @@ -0,0 +1,71 @@ +package parser + +import ( + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type themanticBreakParser struct { +} + +var defaultThemanticBreakParser = &themanticBreakParser{} + +// NewThemanticBreakParser returns a new BlockParser that +// parses themantic breaks. +func NewThemanticBreakParser() BlockParser { + return defaultThemanticBreakParser +} + +func isThemanticBreak(line []byte) bool { + w, pos := util.IndentWidth(line, 0) + if w > 3 { + return false + } + mark := byte(0) + count := 0 + for i := pos; i < len(line); i++ { + c := line[i] + if util.IsSpace(c) { + continue + } + if mark == 0 { + mark = c + count = 1 + if mark == '*' || mark == '-' || mark == '_' { + continue + } + return false + } + if c != mark { + return false + } + count++ + } + return count > 2 +} + +func (b *themanticBreakParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + line, segment := reader.PeekLine() + if isThemanticBreak(line) { + reader.Advance(segment.Len() - 1) + return ast.NewThemanticBreak(), NoChildren + } + return nil, NoChildren +} + +func (b *themanticBreakParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + return Close +} + +func (b *themanticBreakParser) Close(node ast.Node, pc Context) { + // nothing to do +} + +func (b *themanticBreakParser) CanInterruptParagraph() bool { + return true +} + +func (b *themanticBreakParser) CanAcceptIndentedLine() bool { + return false +} diff --git a/renderer/html/html.go b/renderer/html/html.go new file mode 100644 index 0000000..3fa5470 --- /dev/null +++ b/renderer/html/html.go @@ -0,0 +1,588 @@ +package html + +import ( + "bytes" + "fmt" + "strconv" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/util" +) + +// A Config struct has configurations for the HTML based renderers. +type Config struct { + Writer Writer + SoftLineBreak bool + XHTML bool +} + +// NewConfig returns a new Config with defaults. +func NewConfig() Config { + return Config{ + Writer: DefaultWriter, + SoftLineBreak: false, + XHTML: false, + } +} + +// SetOption implements renderer.NodeRenderer.SetOption. +func (c *Config) SetOption(name renderer.OptionName, value interface{}) { + switch name { + case SoftLineBreak: + c.SoftLineBreak = value.(bool) + case XHTML: + c.XHTML = value.(bool) + case TextWriter: + c.Writer = value.(Writer) + } +} + +// An Option interface sets options for HTML based renderers. +type Option interface { + SetHTMLOption(*Config) +} + +// TextWriter is an option name used in WithWriter. +const TextWriter renderer.OptionName = "Writer" + +type withWriter struct { + value Writer +} + +func (o *withWriter) SetConfig(c *renderer.Config) { + c.Options[TextWriter] = o.value +} + +func (o *withWriter) SetHTMLOption(c *Config) { + c.Writer = o.value +} + +// WithWriter is a functional option that allow you to set given writer to +// the renderer. +func WithWriter(writer Writer) interface { + renderer.Option + Option +} { + return &withWriter{writer} +} + +// SoftLineBreak is an option name used in WithSoftLineBreak. +const SoftLineBreak renderer.OptionName = "SoftLineBreak" + +type withSoftLineBreak struct { +} + +func (o *withSoftLineBreak) SetConfig(c *renderer.Config) { + c.Options[SoftLineBreak] = true +} + +func (o *withSoftLineBreak) SetHTMLOption(c *Config) { + c.SoftLineBreak = true +} + +// WithSoftLineBreak is a functional option that indicates whether softline breaks +// should be rendered as '
'. +func WithSoftLineBreak() interface { + renderer.Option + Option +} { + return &withSoftLineBreak{} +} + +// XHTML is an option name used in WithXHTML. +const XHTML renderer.OptionName = "XHTML" + +type withXHTML struct { +} + +func (o *withXHTML) SetConfig(c *renderer.Config) { + c.Options[XHTML] = true +} + +func (o *withXHTML) SetHTMLOption(c *Config) { + c.XHTML = true +} + +// WithXHTML is a functional option indicates that nodes should be rendered in +// xhtml instead of HTML5. +func WithXHTML() interface { + Option + renderer.Option +} { + return &withXHTML{} +} + +// A Renderer struct is an implementation of renderer.NodeRenderer that renders +// nodes as (X)HTML. +type Renderer struct { + Config +} + +// NewRenderer returns a new Renderer with given options. +func NewRenderer(opts ...Option) renderer.NodeRenderer { + r := &Renderer{ + Config: NewConfig(), + } + + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// Render implements renderer.NodeRenderer.Render. +func (r *Renderer) Render(writer util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { + switch node := n.(type) { + + // blocks + + case *ast.Document: + return r.renderDocument(writer, source, node, entering), nil + case *ast.Heading: + return r.renderHeading(writer, source, node, entering), nil + case *ast.Blockquote: + return r.renderBlockquote(writer, source, node, entering), nil + case *ast.CodeBlock: + return r.renderCodeBlock(writer, source, node, entering), nil + case *ast.FencedCodeBlock: + return r.renderFencedCodeBlock(writer, source, node, entering), nil + case *ast.HTMLBlock: + return r.renderHTMLBlock(writer, source, node, entering), nil + case *ast.List: + return r.renderList(writer, source, node, entering), nil + case *ast.ListItem: + return r.renderListItem(writer, source, node, entering), nil + case *ast.Paragraph: + return r.renderParagraph(writer, source, node, entering), nil + case *ast.TextBlock: + return r.renderTextBlock(writer, source, node, entering), nil + case *ast.ThemanticBreak: + return r.renderThemanticBreak(writer, source, node, entering), nil + // inlines + + case *ast.AutoLink: + return r.renderAutoLink(writer, source, node, entering), nil + case *ast.CodeSpan: + return r.renderCodeSpan(writer, source, node, entering), nil + case *ast.Emphasis: + return r.renderEmphasis(writer, source, node, entering), nil + case *ast.Image: + return r.renderImage(writer, source, node, entering), nil + case *ast.Link: + return r.renderLink(writer, source, node, entering), nil + case *ast.RawHTML: + return r.renderRawHTML(writer, source, node, entering), nil + case *ast.Text: + return r.renderText(writer, source, node, entering), nil + } + return ast.WalkContinue, renderer.NotSupported +} +func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) { + l := n.Lines().Len() + for i := 0; i < l; i++ { + line := n.Lines().At(i) + r.Writer.RawWrite(w, line.Value(source)) + } +} + +func (r *Renderer) renderDocument(w util.BufWriter, source []byte, n *ast.Document, entering bool) ast.WalkStatus { + // nothing to do + return ast.WalkContinue +} + +func (r *Renderer) renderHeading(w util.BufWriter, source []byte, n *ast.Heading, entering bool) ast.WalkStatus { + if entering { + w.WriteString("') + } else { + w.WriteString("\n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderBlockquote(w util.BufWriter, source []byte, n *ast.Blockquote, entering bool) ast.WalkStatus { + if entering { + w.WriteString("
\n") + } else { + w.WriteString("
\n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n *ast.CodeBlock, entering bool) ast.WalkStatus { + if entering { + w.WriteString("
")
+		r.writeLines(w, source, n)
+	} else {
+		w.WriteString("
\n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, n *ast.FencedCodeBlock, entering bool) ast.WalkStatus { + if entering { + w.WriteString("
\n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, n *ast.HTMLBlock, entering bool) ast.WalkStatus { + if entering { + l := n.Lines().Len() + for i := 0; i < l; i++ { + line := n.Lines().At(i) + w.Write(line.Value(source)) + } + } else { + if n.HasClosure() { + closure := n.ClosureLine + w.Write(closure.Value(source)) + } + } + return ast.WalkContinue +} + +func (r *Renderer) renderList(w util.BufWriter, source []byte, n *ast.List, entering bool) ast.WalkStatus { + tag := "ul" + if n.IsOrdered() { + tag = "ol" + } + if entering { + w.WriteByte('<') + w.WriteString(tag) + if n.IsOrdered() && n.Start != 1 { + fmt.Fprintf(w, " start=\"%d\">\n", n.Start) + } else { + w.WriteString(">\n") + } + } else { + w.WriteString("\n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n *ast.ListItem, entering bool) ast.WalkStatus { + if entering { + w.WriteString("
  • ") + fc := n.FirstChild() + if fc != nil { + if _, ok := fc.(*ast.TextBlock); !ok { + w.WriteByte('\n') + } + } + } else { + w.WriteString("
  • \n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n *ast.Paragraph, entering bool) ast.WalkStatus { + if entering { + w.WriteString("

    ") + } else { + w.WriteString("

    \n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n *ast.TextBlock, entering bool) ast.WalkStatus { + if !entering { + if _, ok := n.NextSibling().(ast.Node); ok && n.FirstChild() != nil { + w.WriteByte('\n') + } + return ast.WalkContinue + } + return ast.WalkContinue +} + +func (r *Renderer) renderThemanticBreak(w util.BufWriter, source []byte, n *ast.ThemanticBreak, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + if r.XHTML { + w.WriteString("
    \n") + } else { + w.WriteString("
    \n") + } + return ast.WalkContinue +} + +func (r *Renderer) renderAutoLink(w util.BufWriter, source []byte, n *ast.AutoLink, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + w.WriteString(``) + w.Write(util.EscapeHTML(value)) + w.WriteString(``) + return ast.WalkContinue +} + +func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n *ast.CodeSpan, entering bool) ast.WalkStatus { + if entering { + w.WriteString("") + for c := n.FirstChild(); c != nil; c = c.NextSibling() { + segment := c.(*ast.Text).Segment + value := segment.Value(source) + if bytes.HasSuffix(value, []byte("\n")) { + r.Writer.RawWrite(w, value[:len(value)-1]) + if c != n.LastChild() { + r.Writer.RawWrite(w, []byte(" ")) + } + } else { + r.Writer.RawWrite(w, value) + } + } + return ast.WalkSkipChildren + } + w.WriteString("") + return ast.WalkContinue +} + +func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, n *ast.Emphasis, entering bool) ast.WalkStatus { + tag := "em" + if n.Level == 2 { + tag = "strong" + } + if entering { + w.WriteByte('<') + w.WriteString(tag) + w.WriteByte('>') + } else { + w.WriteString("') + } + return ast.WalkContinue +} + +func (r *Renderer) renderLink(w util.BufWriter, source []byte, n *ast.Link, entering bool) ast.WalkStatus { + if entering { + w.WriteString("') + } else { + w.WriteString("") + } + return ast.WalkContinue +} +func (r *Renderer) renderImage(w util.BufWriter, source []byte, n *ast.Image, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + w.WriteString("`)
+	w.Write(n.Text(source))
+	w.WriteByte('") + } else { + w.WriteString(">") + } + return ast.WalkSkipChildren +} + +func (r *Renderer) renderRawHTML(w util.BufWriter, source []byte, n *ast.RawHTML, entering bool) ast.WalkStatus { + return ast.WalkContinue +} + +func (r *Renderer) renderText(w util.BufWriter, source []byte, n *ast.Text, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + segment := n.Segment + if n.IsRaw() { + w.Write(segment.Value(source)) + } else { + r.Writer.Write(w, segment.Value(source)) + if n.HardLineBreak() || (n.SoftLineBreak() && r.SoftLineBreak) { + if r.XHTML { + w.WriteString("
    \n") + } else { + w.WriteString("
    \n") + } + } else if n.SoftLineBreak() { + w.WriteByte('\n') + } + } + return ast.WalkContinue +} + +func readWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) { + j := index[0] + ok := false + for ; j < index[1]; j++ { + c1 := source[j] + if pred(c1) { + ok = true + continue + } + break + } + return j, ok +} + +// A Writer interface wirtes textual contents to a writer. +type Writer interface { + // Write writes given source to writer with resolving references and unescaping + // backslash escaped characters. + Write(writer util.BufWriter, source []byte) + + // RawWrite wirtes given source to writer without resolving references and + // unescaping backslash escaped characters. + RawWrite(writer util.BufWriter, source []byte) +} + +type defaultWriter struct { +} + +var htmlEscaleTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} + +func escapeRune(writer util.BufWriter, r rune) { + if r < 256 { + v := htmlEscaleTable[byte(r)] + if v != nil { + writer.Write(v) + return + } + } + writer.WriteRune(util.ToValidRune(r)) +} + +func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) { + n := 0 + l := len(source) + for i := 0; i < l; i++ { + v := htmlEscaleTable[source[i]] + if v != nil { + writer.Write(source[i-n : i]) + n = 0 + writer.Write(v) + continue + } + n++ + } + if n != 0 { + writer.Write(source[l-n:]) + } +} + +func (d *defaultWriter) Write(writer util.BufWriter, source []byte) { + escaped := false + ok := false + limit := len(source) + n := 0 + for i := 0; i < limit; i++ { + c := source[i] + if escaped { + if util.IsPunct(c) { + d.RawWrite(writer, source[n:i-1]) + n = i + escaped = false + continue + } + } + if c == '&' { + pos := i + next := i + 1 + if next < limit && source[next] == '#' { + nnext := next + 1 + nc := source[nnext] + // code point like #x22; + if nnext < limit && nc == 'x' || nc == 'X' { + start := nnext + 1 + i, ok = readWhile(source, [2]int{start, limit}, util.IsHexDecimal) + if ok && i < limit && source[i] == ';' { + v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32) + d.RawWrite(writer, source[n:pos]) + n = i + 1 + escapeRune(writer, rune(v)) + continue + } + // code point like #1234; + } else if nc >= '0' && nc <= '9' { + start := nnext + i, ok = readWhile(source, [2]int{start, limit}, util.IsNumeric) + if ok && i < limit && i-start < 8 && source[i] == ';' { + v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 0, 32) + d.RawWrite(writer, source[n:pos]) + n = i + 1 + escapeRune(writer, rune(v)) + continue + } + } + } else { + start := next + i, ok = readWhile(source, [2]int{start, limit}, util.IsAlphaNumeric) + // entity reference + if ok && i < limit && source[i] == ';' { + name := util.BytesToReadOnlyString(source[start:i]) + entity, ok := util.LookUpHTML5EntityByName(name) + if ok { + d.RawWrite(writer, source[n:pos]) + n = i + 1 + d.RawWrite(writer, entity.Characters) + continue + } + } + } + i = next - 1 + } + if c == '\\' { + escaped = true + continue + } + escaped = false + } + d.RawWrite(writer, source[n:len(source)]) +} + +// DefaultWriter is a default implementation of the Writer. +var DefaultWriter = &defaultWriter{} diff --git a/renderer/renderer.go b/renderer/renderer.go new file mode 100644 index 0000000..5dff406 --- /dev/null +++ b/renderer/renderer.go @@ -0,0 +1,161 @@ +// Package renderer renders given AST to certain formats. +package renderer + +import ( + "bufio" + "io" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/util" + + "sync" +) + +// A Config struct is a data structure that holds configuration of the Renderer. +type Config struct { + Options map[OptionName]interface{} + NodeRenderers util.PrioritizedSlice +} + +// NewConfig returns a new Config +func NewConfig() *Config { + return &Config{ + Options: map[OptionName]interface{}{}, + NodeRenderers: util.PrioritizedSlice{}, + } +} + +type notSupported struct { +} + +func (e *notSupported) Error() string { + return "not supported by this parser" +} + +// NotSupported indicates given node can not be rendered by this NodeRenderer. +var NotSupported = ¬Supported{} + +// An OptionName is a name of the option. +type OptionName string + +// An Option interface is a functional option type for the Renderer. +type Option interface { + SetConfig(*Config) +} + +type withNodeRenderers struct { + value []util.PrioritizedValue +} + +func (o *withNodeRenderers) SetConfig(c *Config) { + c.NodeRenderers = append(c.NodeRenderers, o.value...) +} + +// WithNodeRenderers is a functional option that allow you to add +// NodeRenderers to the renderer. +func WithNodeRenderers(ps ...util.PrioritizedValue) Option { + return &withNodeRenderers{ps} +} + +type withOption struct { + name OptionName + value interface{} +} + +func (o *withOption) SetConfig(c *Config) { + c.Options[o.name] = o.value +} + +// WithOption is a functional option that allow you to set +// an arbitary option to the parser. +func WithOption(name OptionName, value interface{}) Option { + return &withOption{name, value} +} + +// A SetOptioner interface sets given option to the object. +type SetOptioner interface { + // SetOption sets given option to the object. + // Unacceptable options may be passed. + // Thus implementations must ignore unacceptable options. + SetOption(name OptionName, value interface{}) +} + +// A NodeRenderer interface renders given AST node to given writer. +type NodeRenderer interface { + // Render renders given AST node to given writer. + Render(writer util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) +} + +// A Renderer interface renders given AST node to given +// writer with given Renderer. +type Renderer interface { + Render(w io.Writer, source []byte, n ast.Node) error + + // AddOption adds given option to thie parser. + AddOption(Option) +} + +type renderer struct { + config *Config + options map[OptionName]interface{} + nodeRenderers []NodeRenderer + initSync sync.Once +} + +// NewRenderer returns a new Renderer with given options. +func NewRenderer(options ...Option) Renderer { + config := NewConfig() + for _, opt := range options { + opt.SetConfig(config) + } + + r := &renderer{ + options: map[OptionName]interface{}{}, + config: config, + } + + return r +} + +func (r *renderer) AddOption(o Option) { + o.SetConfig(r.config) +} + +// Render renders given AST node to given writer with given Renderer. +func (r *renderer) Render(w io.Writer, source []byte, n ast.Node) error { + r.initSync.Do(func() { + r.options = r.config.Options + r.config.NodeRenderers.Sort() + r.nodeRenderers = make([]NodeRenderer, 0, len(r.config.NodeRenderers)) + for _, v := range r.config.NodeRenderers { + nr, _ := v.Value.(NodeRenderer) + if se, ok := v.Value.(SetOptioner); ok { + for oname, ovalue := range r.options { + se.SetOption(oname, ovalue) + } + } + r.nodeRenderers = append(r.nodeRenderers, nr) + } + r.config = nil + }) + writer, ok := w.(util.BufWriter) + if !ok { + writer = bufio.NewWriter(w) + } + err := ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + var s ast.WalkStatus + var err error + for _, nr := range r.nodeRenderers { + s, err = nr.Render(writer, source, n, entering) + if err == NotSupported { + continue + } + break + } + return s, err + }) + if err != nil { + return err + } + return writer.Flush() +} diff --git a/text/reader.go b/text/reader.go new file mode 100644 index 0000000..3d99b1b --- /dev/null +++ b/text/reader.go @@ -0,0 +1,492 @@ +package text + +import ( + "github.com/yuin/goldmark/util" + "io" + "regexp" + "unicode/utf8" +) + +const invalidValue = -1 + +// EOF indicates the end of file. +const EOF = byte(0xff) + +// A Reader interface provides abstracted method for reading text. +type Reader interface { + io.RuneReader + + // Source returns a source of the reader. + Source() []byte + + // Peek returns a byte at current position without advancing the internal pointer. + Peek() byte + + // PeekLine returns the current line without advancing the internal pointer. + PeekLine() ([]byte, Segment) + + // PrecendingCharacter returns a character just before current internal pointer. + PrecendingCharacter() rune + + // Value returns a value of given segment. + Value(Segment) []byte + + // LineOffset returns a distance from the line head to current position. + LineOffset() int + + // Position returns current line number and position. + Position() (int, Segment) + + // SetPosition sets current line number and position. + SetPosition(int, Segment) + + // SetPadding sets padding to the reader. + SetPadding(int) + + // Advance advances the internal pointer. + Advance(int) + + // AdvanceAndSetPadding advances the internal pointer and add padding to the + // reader. + AdvanceAndSetPadding(int, int) + + // AdvanceLine advances the internal pointer to the next line head. + AdvanceLine() + + // SkipSpaces skips space characters and returns a non-blank line. + // If it reaches EOF, returns false. + SkipSpaces() (Segment, int, bool) + + // SkipSpaces skips blank lines and returns a non-blank line. + // If it reaches EOF, returns false. + SkipBlankLines() (Segment, int, bool) + + // Match performs regular expression matching to current line. + Match(reg *regexp.Regexp) bool + + // Match performs regular expression searching to current line. + FindSubMatch(reg *regexp.Regexp) [][]byte +} + +type reader struct { + source []byte + sourceLength int + line int + peekedLine []byte + pos Segment + head int +} + +// NewReader return a new Reader that can read UTF-8 bytes . +func NewReader(source []byte) Reader { + r := &reader{ + source: source, + sourceLength: len(source), + line: -1, + head: 0, + } + r.AdvanceLine() + return r +} + +func (r *reader) Source() []byte { + return r.source +} + +func (r *reader) Value(seg Segment) []byte { + return seg.Value(r.source) +} + +func (r *reader) Peek() byte { + if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { + if r.pos.Padding != 0 { + return space[0] + } + return r.source[r.pos.Start] + } + return EOF +} + +func (r *reader) PeekLine() ([]byte, Segment) { + if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { + if r.peekedLine == nil { + r.peekedLine = r.pos.Value(r.Source()) + } + return r.peekedLine, r.pos + } + return nil, r.pos +} + +// io.RuneReader interface +func (r *reader) ReadRune() (rune, int, error) { + return readRuneReader(r) +} + +func (r *reader) LineOffset() int { + v := r.pos.Start - r.head + if r.pos.Padding > 0 { + v += util.TabWidth(v) - r.pos.Padding + } + return v +} + +func (r *reader) PrecendingCharacter() rune { + if r.pos.Start <= 0 { + if r.pos.Padding != 0 { + return rune(' ') + } + return rune('\n') + } + i := r.pos.Start - 1 + for ; i >= 0; i-- { + if utf8.RuneStart(r.source[i]) { + break + } + } + rn, _ := utf8.DecodeRune(r.source[i:]) + return rn +} + +func (r *reader) Advance(n int) { + if n < len(r.peekedLine) && r.pos.Padding == 0 { + r.pos.Start += n + r.peekedLine = nil + return + } + r.peekedLine = nil + l := r.sourceLength + for ; n > 0 && r.pos.Start < l; n-- { + if r.pos.Padding != 0 { + r.pos.Padding-- + continue + } + if r.source[r.pos.Start] == '\n' { + r.AdvanceLine() + continue + } + r.pos.Start++ + } +} + +func (r *reader) AdvanceAndSetPadding(n, padding int) { + r.Advance(n) + if padding > r.pos.Padding { + r.SetPadding(padding) + } +} + +func (r *reader) AdvanceLine() { + r.peekedLine = nil + r.pos.Start = r.pos.Stop + r.head = r.pos.Start + if r.pos.Start < 0 { + return + } + r.pos.Stop = invalidValue + for i := r.pos.Start; i < r.sourceLength; i++ { + c := r.source[i] + if c == '\n' { + r.pos.Stop = i + 1 + break + } + } + r.line++ + r.pos.Padding = 0 +} + +func (r *reader) Position() (int, Segment) { + return r.line, r.pos +} + +func (r *reader) SetPosition(line int, pos Segment) { + r.line = line + r.pos = pos +} + +func (r *reader) SetPadding(v int) { + r.pos.Padding = v +} + +func (r *reader) SkipSpaces() (Segment, int, bool) { + return skipSpacesReader(r) +} + +func (r *reader) SkipBlankLines() (Segment, int, bool) { + return skipBlankLinesReader(r) +} + +func (r *reader) Match(reg *regexp.Regexp) bool { + return matchReader(r, reg) +} + +func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte { + return findSubMatchReader(r, reg) +} + +// A BlockReader interface is a reader that is optimized for Blocks. +type BlockReader interface { + Reader + Reset(segment *Segments) +} + +type blockReader struct { + source []byte + segments *Segments + segmentsLength int + line int + pos Segment + head int + last int +} + +// NewBlockReader returns a new BlockReader. +func NewBlockReader(source []byte, segments *Segments) BlockReader { + r := &blockReader{ + source: source, + } + if segments != nil { + r.Reset(segments) + } + return r +} + +// Reset resets current state and sets new segments to the reader. +func (r *blockReader) Reset(segments *Segments) { + r.segments = segments + r.segmentsLength = segments.Len() + r.line = -1 + r.head = 0 + r.last = 0 + r.pos.Start = -1 + r.pos.Stop = -1 + r.pos.Padding = 0 + if r.segmentsLength > 0 { + last := r.segments.At(r.segmentsLength - 1) + r.last = last.Stop + } + r.AdvanceLine() +} + +func (r *blockReader) Source() []byte { + return r.source +} + +func (r *blockReader) Value(seg Segment) []byte { + line := r.segmentsLength - 1 + ret := make([]byte, 0, seg.Stop-seg.Start+1) + for ; line >= 0; line-- { + if seg.Start >= r.segments.At(line).Start { + break + } + } + i := seg.Start + for ; line < r.segmentsLength; line++ { + s := r.segments.At(line) + if i < 0 { + i = s.Start + } + ret = s.ConcatPadding(ret) + for ; i < seg.Stop && i < s.Stop; i++ { + ret = append(ret, r.source[i]) + } + i = -1 + if s.Stop > seg.Stop { + break + } + } + return ret +} + +// io.RuneReader interface +func (r *blockReader) ReadRune() (rune, int, error) { + return readRuneReader(r) +} + +func (r *blockReader) PrecendingCharacter() rune { + if r.pos.Padding != 0 { + return rune(' ') + } + if r.pos.Start <= 0 { + return rune('\n') + } + i := r.pos.Start - 1 + for ; i >= 0; i-- { + if utf8.RuneStart(r.source[i]) { + break + } + } + rn, _ := utf8.DecodeRune(r.source[i:]) + return rn +} + +func (r *blockReader) LineOffset() int { + v := r.pos.Start - r.head + if r.pos.Padding > 0 { + v += util.TabWidth(v) - r.pos.Padding + } + return v +} + +func (r *blockReader) Peek() byte { + if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { + if r.pos.Padding != 0 { + return space[0] + } + return r.source[r.pos.Start] + } + return EOF +} + +func (r *blockReader) PeekLine() ([]byte, Segment) { + if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { + return r.pos.Value(r.source), r.pos + } + return nil, r.pos +} + +func (r *blockReader) Advance(n int) { + if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 { + r.pos.Start += n + return + } + + for ; n > 0; n-- { + if r.pos.Padding != 0 { + r.pos.Padding-- + continue + } + if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last { + r.AdvanceLine() + continue + } + r.pos.Start++ + } +} + +func (r *blockReader) AdvanceAndSetPadding(n, padding int) { + r.Advance(n) + if padding > r.pos.Padding { + r.SetPadding(padding) + } +} + +func (r *blockReader) AdvanceLine() { + r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue)) + r.head = r.pos.Start +} + +func (r *blockReader) Position() (int, Segment) { + return r.line, r.pos +} + +func (r *blockReader) SetPosition(line int, pos Segment) { + r.line = line + if pos.Start == invalidValue { + if r.line < r.segmentsLength { + r.pos = r.segments.At(line) + } + } else { + r.pos = pos + } +} + +func (r *blockReader) SetPadding(v int) { + r.pos.Padding = v +} + +func (r *blockReader) SkipSpaces() (Segment, int, bool) { + return skipSpacesReader(r) +} + +func (r *blockReader) SkipBlankLines() (Segment, int, bool) { + return skipBlankLinesReader(r) +} + +func (r *blockReader) Match(reg *regexp.Regexp) bool { + return matchReader(r, reg) +} + +func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte { + return findSubMatchReader(r, reg) +} + +func skipBlankLinesReader(r Reader) (Segment, int, bool) { + lines := 0 + for { + line, seg := r.PeekLine() + if line == nil { + return seg, lines, false + } + if util.IsBlank(line) { + lines++ + r.AdvanceLine() + } else { + return seg, lines, true + } + } +} + +func skipSpacesReader(r Reader) (Segment, int, bool) { + chars := 0 + for { + line, segment := r.PeekLine() + if line == nil { + return segment, chars, false + } + for i, c := range line { + if util.IsSpace(c) { + chars++ + r.Advance(1) + continue + } + return segment.WithStart(segment.Start + i + 1), chars, true + } + } +} + +func matchReader(r Reader, reg *regexp.Regexp) bool { + oldline, oldseg := r.Position() + match := reg.FindReaderSubmatchIndex(r) + r.SetPosition(oldline, oldseg) + if match == nil { + return false + } + r.Advance(match[1] - match[0]) + return true +} + +func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte { + oldline, oldseg := r.Position() + match := reg.FindReaderSubmatchIndex(r) + r.SetPosition(oldline, oldseg) + if match == nil { + return nil + } + runes := make([]rune, 0, match[1]-match[0]) + for i := 0; i < match[1]; { + r, size, _ := readRuneReader(r) + i += size + runes = append(runes, r) + } + result := [][]byte{} + for i := 0; i < len(match); i += 2 { + result = append(result, []byte(string(runes[match[i]:match[i+1]]))) + } + + r.SetPosition(oldline, oldseg) + r.Advance(match[1] - match[0]) + return result +} + +func readRuneReader(r Reader) (rune, int, error) { + line, _ := r.PeekLine() + if line == nil { + return 0, 0, io.EOF + } + rn, size := utf8.DecodeRune(line) + if rn == utf8.RuneError { + return 0, 0, io.EOF + } + r.Advance(size) + return rn, size, nil +} diff --git a/text/segment.go b/text/segment.go new file mode 100644 index 0000000..2a9b571 --- /dev/null +++ b/text/segment.go @@ -0,0 +1,209 @@ +package text + +import ( + "bytes" + "github.com/yuin/goldmark/util" +) + +var space = []byte(" ") + +// A Segment struct holds information about source potisions. +type Segment struct { + // Start is a start position of the segment. + Start int + + // Stop is a stop position of the segment. + // This value should be excluded. + Stop int + + // Padding is a padding length of the segment. + Padding int +} + +// NewSegment return a new Segment. +func NewSegment(start, stop int) Segment { + return Segment{ + Start: start, + Stop: stop, + Padding: 0, + } +} + +// NewSegmentPadding returns a new Segment with given padding. +func NewSegmentPadding(start, stop, n int) Segment { + return Segment{ + Start: start, + Stop: stop, + Padding: n, + } +} + +// Value returns a value of the segment. +func (t *Segment) Value(buffer []byte) []byte { + if t.Padding == 0 { + return buffer[t.Start:t.Stop] + } + result := make([]byte, 0, t.Padding+t.Stop-t.Start+1) + result = append(result, bytes.Repeat(space, t.Padding)...) + return append(result, buffer[t.Start:t.Stop]...) +} + +// Len returns a length of the segment. +func (t *Segment) Len() int { + return t.Stop - t.Start + t.Padding +} + +// Between returns a segment between this segment and given segment. +func (t *Segment) Between(other Segment) Segment { + if t.Stop != other.Stop { + panic("invalid state") + } + return NewSegmentPadding( + t.Start, + other.Start, + t.Padding-other.Padding, + ) +} + +// IsEmpty returns true if this segment is empty, otherwise false. +func (t *Segment) IsEmpty() bool { + return t.Start >= t.Stop && t.Padding == 0 +} + +// TrimRightSpace returns a new segment by slicing off all trailing +// space characters. +func (t *Segment) TrimRightSpace(buffer []byte) Segment { + v := buffer[t.Start:t.Stop] + l := util.TrimRightSpaceLength(v) + if l == len(v) { + return NewSegment(t.Start, t.Start) + } + return NewSegmentPadding(t.Start, t.Stop-l, t.Padding) +} + +// TrimLeftSpace returns a new segment by slicing off all leading +// space characters including padding. +func (t *Segment) TrimLeftSpace(buffer []byte) Segment { + v := buffer[t.Start:t.Stop] + l := util.TrimLeftSpaceLength(v) + return NewSegment(t.Start+l, t.Stop) +} + +// TrimLeftSpaceWidth returns a new segment by slicing off leading space +// characters until given width. +func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment { + padding := t.Padding + for ; width > 0; width-- { + if padding == 0 { + break + } + padding-- + } + if width == 0 { + return NewSegmentPadding(t.Start, t.Stop, padding) + } + text := buffer[t.Start:t.Stop] + start := t.Start + for _, c := range text { + if start >= t.Stop-1 || width <= 0 { + break + } + if c == ' ' { + width-- + } else if c == '\t' { + width -= 4 + } else { + break + } + start++ + } + if width < 0 { + padding = width * -1 + } + return NewSegmentPadding(start, t.Stop, padding) +} + +// WithStart returns a new Segment with same value except Start. +func (t *Segment) WithStart(v int) Segment { + return NewSegmentPadding(v, t.Stop, t.Padding) +} + +// WithStop returns a new Segment with same value except Stop. +func (t *Segment) WithStop(v int) Segment { + return NewSegmentPadding(t.Start, v, t.Padding) +} + +// ConcatPadding concats the padding to given slice. +func (t *Segment) ConcatPadding(v []byte) []byte { + if t.Padding > 0 { + return append(v, bytes.Repeat(space, t.Padding)...) + } + return v +} + +// Segments is a collection of the Segment. +type Segments struct { + values []Segment +} + +// NewSegments return a new Segments. +func NewSegments() *Segments { + return &Segments{ + values: nil, + } +} + +// Append appends given segment after the tail of the collection. +func (s *Segments) Append(t Segment) { + if s.values == nil { + s.values = make([]Segment, 0, 20) + } + s.values = append(s.values, t) +} + +// AppendAll appends all elements of given segments after the tail of the collection. +func (s *Segments) AppendAll(t []Segment) { + if s.values == nil { + s.values = make([]Segment, 0, 20) + } + s.values = append(s.values, t...) +} + +// Len returns the length of the collection. +func (s *Segments) Len() int { + if s.values == nil { + return 0 + } + return len(s.values) +} + +// At returns a segment at given index. +func (s *Segments) At(i int) Segment { + return s.values[i] +} + +// Set sets given Segment. +func (s *Segments) Set(i int, v Segment) { + s.values[i] = v +} + +// SetSliced replace the collection with a subsliced value. +func (s *Segments) SetSliced(lo, hi int) { + s.values = s.values[lo:hi] +} + +// Sliced returns a subslice of the collection. +func (s *Segments) Sliced(lo, hi int) []Segment { + return s.values[lo:hi] +} + +// Clear delete all element of the collction. +func (s *Segments) Clear() { + s.values = nil +} + +// Unshift insert given Segment to head of the collection. +func (s *Segments) Unshift(v Segment) { + s.values = append(s.values[0:1], s.values[0:]...) + s.values[0] = v +} diff --git a/util/html5entities.go b/util/html5entities.go new file mode 100644 index 0000000..6ebdd74 --- /dev/null +++ b/util/html5entities.go @@ -0,0 +1,2142 @@ +package util + +// An HTML5Entity struct represents HTML5 entitites. +type HTML5Entity struct { + Name string + CodePoints []int + Characters []byte +} + +// LookUpHTML5EntityByName returns (an HTML5Entity, true) if an entity named +// given name is found, otherwise (nil, false) +func LookUpHTML5EntityByName(name string) (*HTML5Entity, bool) { + v, ok := html5entities[name] + return v, ok +} + +var html5entities = map[string]*HTML5Entity{ + "AElig": &HTML5Entity{Name: "AElig", CodePoints: []int{198}, Characters: []byte{0xc3, 0x86}}, + "AMP": &HTML5Entity{Name: "AMP", CodePoints: []int{38}, Characters: []byte{0x26}}, + "Aacute": &HTML5Entity{Name: "Aacute", CodePoints: []int{193}, Characters: []byte{0xc3, 0x81}}, + "Acirc": &HTML5Entity{Name: "Acirc", CodePoints: []int{194}, Characters: []byte{0xc3, 0x82}}, + "Acy": &HTML5Entity{Name: "Acy", CodePoints: []int{1040}, Characters: []byte{0xd0, 0x90}}, + "Afr": &HTML5Entity{Name: "Afr", CodePoints: []int{120068}, Characters: []byte{0xf0, 0x9d, 0x94, 0x84}}, + "Agrave": &HTML5Entity{Name: "Agrave", CodePoints: []int{192}, Characters: []byte{0xc3, 0x80}}, + "Alpha": &HTML5Entity{Name: "Alpha", CodePoints: []int{913}, Characters: []byte{0xce, 0x91}}, + "Amacr": &HTML5Entity{Name: "Amacr", CodePoints: []int{256}, Characters: []byte{0xc4, 0x80}}, + "And": &HTML5Entity{Name: "And", CodePoints: []int{10835}, Characters: []byte{0xe2, 0xa9, 0x93}}, + "Aogon": &HTML5Entity{Name: "Aogon", CodePoints: []int{260}, Characters: []byte{0xc4, 0x84}}, + "Aopf": &HTML5Entity{Name: "Aopf", CodePoints: []int{120120}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb8}}, + "ApplyFunction": &HTML5Entity{Name: "ApplyFunction", CodePoints: []int{8289}, Characters: []byte{0xe2, 0x81, 0xa1}}, + "Aring": &HTML5Entity{Name: "Aring", CodePoints: []int{197}, Characters: []byte{0xc3, 0x85}}, + "Ascr": &HTML5Entity{Name: "Ascr", CodePoints: []int{119964}, Characters: []byte{0xf0, 0x9d, 0x92, 0x9c}}, + "Assign": &HTML5Entity{Name: "Assign", CodePoints: []int{8788}, Characters: []byte{0xe2, 0x89, 0x94}}, + "Atilde": &HTML5Entity{Name: "Atilde", CodePoints: []int{195}, Characters: []byte{0xc3, 0x83}}, + "Auml": &HTML5Entity{Name: "Auml", CodePoints: []int{196}, Characters: []byte{0xc3, 0x84}}, + "Backslash": &HTML5Entity{Name: "Backslash", CodePoints: []int{8726}, Characters: []byte{0xe2, 0x88, 0x96}}, + "Barv": &HTML5Entity{Name: "Barv", CodePoints: []int{10983}, Characters: []byte{0xe2, 0xab, 0xa7}}, + "Barwed": &HTML5Entity{Name: "Barwed", CodePoints: []int{8966}, Characters: []byte{0xe2, 0x8c, 0x86}}, + "Bcy": &HTML5Entity{Name: "Bcy", CodePoints: []int{1041}, Characters: []byte{0xd0, 0x91}}, + "Because": &HTML5Entity{Name: "Because", CodePoints: []int{8757}, Characters: []byte{0xe2, 0x88, 0xb5}}, + "Bernoullis": &HTML5Entity{Name: "Bernoullis", CodePoints: []int{8492}, Characters: []byte{0xe2, 0x84, 0xac}}, + "Beta": &HTML5Entity{Name: "Beta", CodePoints: []int{914}, Characters: []byte{0xce, 0x92}}, + "Bfr": &HTML5Entity{Name: "Bfr", CodePoints: []int{120069}, Characters: []byte{0xf0, 0x9d, 0x94, 0x85}}, + "Bopf": &HTML5Entity{Name: "Bopf", CodePoints: []int{120121}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb9}}, + "Breve": &HTML5Entity{Name: "Breve", CodePoints: []int{728}, Characters: []byte{0xcb, 0x98}}, + "Bscr": &HTML5Entity{Name: "Bscr", CodePoints: []int{8492}, Characters: []byte{0xe2, 0x84, 0xac}}, + "Bumpeq": &HTML5Entity{Name: "Bumpeq", CodePoints: []int{8782}, Characters: []byte{0xe2, 0x89, 0x8e}}, + "CHcy": &HTML5Entity{Name: "CHcy", CodePoints: []int{1063}, Characters: []byte{0xd0, 0xa7}}, + "COPY": &HTML5Entity{Name: "COPY", CodePoints: []int{169}, Characters: []byte{0xc2, 0xa9}}, + "Cacute": &HTML5Entity{Name: "Cacute", CodePoints: []int{262}, Characters: []byte{0xc4, 0x86}}, + "Cap": &HTML5Entity{Name: "Cap", CodePoints: []int{8914}, Characters: []byte{0xe2, 0x8b, 0x92}}, + "CapitalDifferentialD": &HTML5Entity{Name: "CapitalDifferentialD", CodePoints: []int{8517}, Characters: []byte{0xe2, 0x85, 0x85}}, + "Cayleys": &HTML5Entity{Name: "Cayleys", CodePoints: []int{8493}, Characters: []byte{0xe2, 0x84, 0xad}}, + "Ccaron": &HTML5Entity{Name: "Ccaron", CodePoints: []int{268}, Characters: []byte{0xc4, 0x8c}}, + "Ccedil": &HTML5Entity{Name: "Ccedil", CodePoints: []int{199}, Characters: []byte{0xc3, 0x87}}, + "Ccirc": &HTML5Entity{Name: "Ccirc", CodePoints: []int{264}, Characters: []byte{0xc4, 0x88}}, + "Cconint": &HTML5Entity{Name: "Cconint", CodePoints: []int{8752}, Characters: []byte{0xe2, 0x88, 0xb0}}, + "Cdot": &HTML5Entity{Name: "Cdot", CodePoints: []int{266}, Characters: []byte{0xc4, 0x8a}}, + "Cedilla": &HTML5Entity{Name: "Cedilla", CodePoints: []int{184}, Characters: []byte{0xc2, 0xb8}}, + "CenterDot": &HTML5Entity{Name: "CenterDot", CodePoints: []int{183}, Characters: []byte{0xc2, 0xb7}}, + "Cfr": &HTML5Entity{Name: "Cfr", CodePoints: []int{8493}, Characters: []byte{0xe2, 0x84, 0xad}}, + "Chi": &HTML5Entity{Name: "Chi", CodePoints: []int{935}, Characters: []byte{0xce, 0xa7}}, + "CircleDot": &HTML5Entity{Name: "CircleDot", CodePoints: []int{8857}, Characters: []byte{0xe2, 0x8a, 0x99}}, + "CircleMinus": &HTML5Entity{Name: "CircleMinus", CodePoints: []int{8854}, Characters: []byte{0xe2, 0x8a, 0x96}}, + "CirclePlus": &HTML5Entity{Name: "CirclePlus", CodePoints: []int{8853}, Characters: []byte{0xe2, 0x8a, 0x95}}, + "CircleTimes": &HTML5Entity{Name: "CircleTimes", CodePoints: []int{8855}, Characters: []byte{0xe2, 0x8a, 0x97}}, + "ClockwiseContourIntegral": &HTML5Entity{Name: "ClockwiseContourIntegral", CodePoints: []int{8754}, Characters: []byte{0xe2, 0x88, 0xb2}}, + "CloseCurlyDoubleQuote": &HTML5Entity{Name: "CloseCurlyDoubleQuote", CodePoints: []int{8221}, Characters: []byte{0xe2, 0x80, 0x9d}}, + "CloseCurlyQuote": &HTML5Entity{Name: "CloseCurlyQuote", CodePoints: []int{8217}, Characters: []byte{0xe2, 0x80, 0x99}}, + "Colon": &HTML5Entity{Name: "Colon", CodePoints: []int{8759}, Characters: []byte{0xe2, 0x88, 0xb7}}, + "Colone": &HTML5Entity{Name: "Colone", CodePoints: []int{10868}, Characters: []byte{0xe2, 0xa9, 0xb4}}, + "Congruent": &HTML5Entity{Name: "Congruent", CodePoints: []int{8801}, Characters: []byte{0xe2, 0x89, 0xa1}}, + "Conint": &HTML5Entity{Name: "Conint", CodePoints: []int{8751}, Characters: []byte{0xe2, 0x88, 0xaf}}, + "ContourIntegral": &HTML5Entity{Name: "ContourIntegral", CodePoints: []int{8750}, Characters: []byte{0xe2, 0x88, 0xae}}, + "Copf": &HTML5Entity{Name: "Copf", CodePoints: []int{8450}, Characters: []byte{0xe2, 0x84, 0x82}}, + "Coproduct": &HTML5Entity{Name: "Coproduct", CodePoints: []int{8720}, Characters: []byte{0xe2, 0x88, 0x90}}, + "CounterClockwiseContourIntegral": &HTML5Entity{Name: "CounterClockwiseContourIntegral", CodePoints: []int{8755}, Characters: []byte{0xe2, 0x88, 0xb3}}, + "Cross": &HTML5Entity{Name: "Cross", CodePoints: []int{10799}, Characters: []byte{0xe2, 0xa8, 0xaf}}, + "Cscr": &HTML5Entity{Name: "Cscr", CodePoints: []int{119966}, Characters: []byte{0xf0, 0x9d, 0x92, 0x9e}}, + "Cup": &HTML5Entity{Name: "Cup", CodePoints: []int{8915}, Characters: []byte{0xe2, 0x8b, 0x93}}, + "CupCap": &HTML5Entity{Name: "CupCap", CodePoints: []int{8781}, Characters: []byte{0xe2, 0x89, 0x8d}}, + "DD": &HTML5Entity{Name: "DD", CodePoints: []int{8517}, Characters: []byte{0xe2, 0x85, 0x85}}, + "DDotrahd": &HTML5Entity{Name: "DDotrahd", CodePoints: []int{10513}, Characters: []byte{0xe2, 0xa4, 0x91}}, + "DJcy": &HTML5Entity{Name: "DJcy", CodePoints: []int{1026}, Characters: []byte{0xd0, 0x82}}, + "DScy": &HTML5Entity{Name: "DScy", CodePoints: []int{1029}, Characters: []byte{0xd0, 0x85}}, + "DZcy": &HTML5Entity{Name: "DZcy", CodePoints: []int{1039}, Characters: []byte{0xd0, 0x8f}}, + "Dagger": &HTML5Entity{Name: "Dagger", CodePoints: []int{8225}, Characters: []byte{0xe2, 0x80, 0xa1}}, + "Darr": &HTML5Entity{Name: "Darr", CodePoints: []int{8609}, Characters: []byte{0xe2, 0x86, 0xa1}}, + "Dashv": &HTML5Entity{Name: "Dashv", CodePoints: []int{10980}, Characters: []byte{0xe2, 0xab, 0xa4}}, + "Dcaron": &HTML5Entity{Name: "Dcaron", CodePoints: []int{270}, Characters: []byte{0xc4, 0x8e}}, + "Dcy": &HTML5Entity{Name: "Dcy", CodePoints: []int{1044}, Characters: []byte{0xd0, 0x94}}, + "Del": &HTML5Entity{Name: "Del", CodePoints: []int{8711}, Characters: []byte{0xe2, 0x88, 0x87}}, + "Delta": &HTML5Entity{Name: "Delta", CodePoints: []int{916}, Characters: []byte{0xce, 0x94}}, + "Dfr": &HTML5Entity{Name: "Dfr", CodePoints: []int{120071}, Characters: []byte{0xf0, 0x9d, 0x94, 0x87}}, + "DiacriticalAcute": &HTML5Entity{Name: "DiacriticalAcute", CodePoints: []int{180}, Characters: []byte{0xc2, 0xb4}}, + "DiacriticalDot": &HTML5Entity{Name: "DiacriticalDot", CodePoints: []int{729}, Characters: []byte{0xcb, 0x99}}, + "DiacriticalDoubleAcute": &HTML5Entity{Name: "DiacriticalDoubleAcute", CodePoints: []int{733}, Characters: []byte{0xcb, 0x9d}}, + "DiacriticalGrave": &HTML5Entity{Name: "DiacriticalGrave", CodePoints: []int{96}, Characters: []byte{0x60}}, + "DiacriticalTilde": &HTML5Entity{Name: "DiacriticalTilde", CodePoints: []int{732}, Characters: []byte{0xcb, 0x9c}}, + "Diamond": &HTML5Entity{Name: "Diamond", CodePoints: []int{8900}, Characters: []byte{0xe2, 0x8b, 0x84}}, + "DifferentialD": &HTML5Entity{Name: "DifferentialD", CodePoints: []int{8518}, Characters: []byte{0xe2, 0x85, 0x86}}, + "Dopf": &HTML5Entity{Name: "Dopf", CodePoints: []int{120123}, Characters: []byte{0xf0, 0x9d, 0x94, 0xbb}}, + "Dot": &HTML5Entity{Name: "Dot", CodePoints: []int{168}, Characters: []byte{0xc2, 0xa8}}, + "DotDot": &HTML5Entity{Name: "DotDot", CodePoints: []int{8412}, Characters: []byte{0xe2, 0x83, 0x9c}}, + "DotEqual": &HTML5Entity{Name: "DotEqual", CodePoints: []int{8784}, Characters: []byte{0xe2, 0x89, 0x90}}, + "DoubleContourIntegral": &HTML5Entity{Name: "DoubleContourIntegral", CodePoints: []int{8751}, Characters: []byte{0xe2, 0x88, 0xaf}}, + "DoubleDot": &HTML5Entity{Name: "DoubleDot", CodePoints: []int{168}, Characters: []byte{0xc2, 0xa8}}, + "DoubleDownArrow": &HTML5Entity{Name: "DoubleDownArrow", CodePoints: []int{8659}, Characters: []byte{0xe2, 0x87, 0x93}}, + "DoubleLeftArrow": &HTML5Entity{Name: "DoubleLeftArrow", CodePoints: []int{8656}, Characters: []byte{0xe2, 0x87, 0x90}}, + "DoubleLeftRightArrow": &HTML5Entity{Name: "DoubleLeftRightArrow", CodePoints: []int{8660}, Characters: []byte{0xe2, 0x87, 0x94}}, + "DoubleLeftTee": &HTML5Entity{Name: "DoubleLeftTee", CodePoints: []int{10980}, Characters: []byte{0xe2, 0xab, 0xa4}}, + "DoubleLongLeftArrow": &HTML5Entity{Name: "DoubleLongLeftArrow", CodePoints: []int{10232}, Characters: []byte{0xe2, 0x9f, 0xb8}}, + "DoubleLongLeftRightArrow": &HTML5Entity{Name: "DoubleLongLeftRightArrow", CodePoints: []int{10234}, Characters: []byte{0xe2, 0x9f, 0xba}}, + "DoubleLongRightArrow": &HTML5Entity{Name: "DoubleLongRightArrow", CodePoints: []int{10233}, Characters: []byte{0xe2, 0x9f, 0xb9}}, + "DoubleRightArrow": &HTML5Entity{Name: "DoubleRightArrow", CodePoints: []int{8658}, Characters: []byte{0xe2, 0x87, 0x92}}, + "DoubleRightTee": &HTML5Entity{Name: "DoubleRightTee", CodePoints: []int{8872}, Characters: []byte{0xe2, 0x8a, 0xa8}}, + "DoubleUpArrow": &HTML5Entity{Name: "DoubleUpArrow", CodePoints: []int{8657}, Characters: []byte{0xe2, 0x87, 0x91}}, + "DoubleUpDownArrow": &HTML5Entity{Name: "DoubleUpDownArrow", CodePoints: []int{8661}, Characters: []byte{0xe2, 0x87, 0x95}}, + "DoubleVerticalBar": &HTML5Entity{Name: "DoubleVerticalBar", CodePoints: []int{8741}, Characters: []byte{0xe2, 0x88, 0xa5}}, + "DownArrow": &HTML5Entity{Name: "DownArrow", CodePoints: []int{8595}, Characters: []byte{0xe2, 0x86, 0x93}}, + "DownArrowBar": &HTML5Entity{Name: "DownArrowBar", CodePoints: []int{10515}, Characters: []byte{0xe2, 0xa4, 0x93}}, + "DownArrowUpArrow": &HTML5Entity{Name: "DownArrowUpArrow", CodePoints: []int{8693}, Characters: []byte{0xe2, 0x87, 0xb5}}, + "DownBreve": &HTML5Entity{Name: "DownBreve", CodePoints: []int{785}, Characters: []byte{0xcc, 0x91}}, + "DownLeftRightVector": &HTML5Entity{Name: "DownLeftRightVector", CodePoints: []int{10576}, Characters: []byte{0xe2, 0xa5, 0x90}}, + "DownLeftTeeVector": &HTML5Entity{Name: "DownLeftTeeVector", CodePoints: []int{10590}, Characters: []byte{0xe2, 0xa5, 0x9e}}, + "DownLeftVector": &HTML5Entity{Name: "DownLeftVector", CodePoints: []int{8637}, Characters: []byte{0xe2, 0x86, 0xbd}}, + "DownLeftVectorBar": &HTML5Entity{Name: "DownLeftVectorBar", CodePoints: []int{10582}, Characters: []byte{0xe2, 0xa5, 0x96}}, + "DownRightTeeVector": &HTML5Entity{Name: "DownRightTeeVector", CodePoints: []int{10591}, Characters: []byte{0xe2, 0xa5, 0x9f}}, + "DownRightVector": &HTML5Entity{Name: "DownRightVector", CodePoints: []int{8641}, Characters: []byte{0xe2, 0x87, 0x81}}, + "DownRightVectorBar": &HTML5Entity{Name: "DownRightVectorBar", CodePoints: []int{10583}, Characters: []byte{0xe2, 0xa5, 0x97}}, + "DownTee": &HTML5Entity{Name: "DownTee", CodePoints: []int{8868}, Characters: []byte{0xe2, 0x8a, 0xa4}}, + "DownTeeArrow": &HTML5Entity{Name: "DownTeeArrow", CodePoints: []int{8615}, Characters: []byte{0xe2, 0x86, 0xa7}}, + "Downarrow": &HTML5Entity{Name: "Downarrow", CodePoints: []int{8659}, Characters: []byte{0xe2, 0x87, 0x93}}, + "Dscr": &HTML5Entity{Name: "Dscr", CodePoints: []int{119967}, Characters: []byte{0xf0, 0x9d, 0x92, 0x9f}}, + "Dstrok": &HTML5Entity{Name: "Dstrok", CodePoints: []int{272}, Characters: []byte{0xc4, 0x90}}, + "ENG": &HTML5Entity{Name: "ENG", CodePoints: []int{330}, Characters: []byte{0xc5, 0x8a}}, + "ETH": &HTML5Entity{Name: "ETH", CodePoints: []int{208}, Characters: []byte{0xc3, 0x90}}, + "Eacute": &HTML5Entity{Name: "Eacute", CodePoints: []int{201}, Characters: []byte{0xc3, 0x89}}, + "Ecaron": &HTML5Entity{Name: "Ecaron", CodePoints: []int{282}, Characters: []byte{0xc4, 0x9a}}, + "Ecirc": &HTML5Entity{Name: "Ecirc", CodePoints: []int{202}, Characters: []byte{0xc3, 0x8a}}, + "Ecy": &HTML5Entity{Name: "Ecy", CodePoints: []int{1069}, Characters: []byte{0xd0, 0xad}}, + "Edot": &HTML5Entity{Name: "Edot", CodePoints: []int{278}, Characters: []byte{0xc4, 0x96}}, + "Efr": &HTML5Entity{Name: "Efr", CodePoints: []int{120072}, Characters: []byte{0xf0, 0x9d, 0x94, 0x88}}, + "Egrave": &HTML5Entity{Name: "Egrave", CodePoints: []int{200}, Characters: []byte{0xc3, 0x88}}, + "Element": &HTML5Entity{Name: "Element", CodePoints: []int{8712}, Characters: []byte{0xe2, 0x88, 0x88}}, + "Emacr": &HTML5Entity{Name: "Emacr", CodePoints: []int{274}, Characters: []byte{0xc4, 0x92}}, + "EmptySmallSquare": &HTML5Entity{Name: "EmptySmallSquare", CodePoints: []int{9723}, Characters: []byte{0xe2, 0x97, 0xbb}}, + "EmptyVerySmallSquare": &HTML5Entity{Name: "EmptyVerySmallSquare", CodePoints: []int{9643}, Characters: []byte{0xe2, 0x96, 0xab}}, + "Eogon": &HTML5Entity{Name: "Eogon", CodePoints: []int{280}, Characters: []byte{0xc4, 0x98}}, + "Eopf": &HTML5Entity{Name: "Eopf", CodePoints: []int{120124}, Characters: []byte{0xf0, 0x9d, 0x94, 0xbc}}, + "Epsilon": &HTML5Entity{Name: "Epsilon", CodePoints: []int{917}, Characters: []byte{0xce, 0x95}}, + "Equal": &HTML5Entity{Name: "Equal", CodePoints: []int{10869}, Characters: []byte{0xe2, 0xa9, 0xb5}}, + "EqualTilde": &HTML5Entity{Name: "EqualTilde", CodePoints: []int{8770}, Characters: []byte{0xe2, 0x89, 0x82}}, + "Equilibrium": &HTML5Entity{Name: "Equilibrium", CodePoints: []int{8652}, Characters: []byte{0xe2, 0x87, 0x8c}}, + "Escr": &HTML5Entity{Name: "Escr", CodePoints: []int{8496}, Characters: []byte{0xe2, 0x84, 0xb0}}, + "Esim": &HTML5Entity{Name: "Esim", CodePoints: []int{10867}, Characters: []byte{0xe2, 0xa9, 0xb3}}, + "Eta": &HTML5Entity{Name: "Eta", CodePoints: []int{919}, Characters: []byte{0xce, 0x97}}, + "Euml": &HTML5Entity{Name: "Euml", CodePoints: []int{203}, Characters: []byte{0xc3, 0x8b}}, + "Exists": &HTML5Entity{Name: "Exists", CodePoints: []int{8707}, Characters: []byte{0xe2, 0x88, 0x83}}, + "ExponentialE": &HTML5Entity{Name: "ExponentialE", CodePoints: []int{8519}, Characters: []byte{0xe2, 0x85, 0x87}}, + "Fcy": &HTML5Entity{Name: "Fcy", CodePoints: []int{1060}, Characters: []byte{0xd0, 0xa4}}, + "Ffr": &HTML5Entity{Name: "Ffr", CodePoints: []int{120073}, Characters: []byte{0xf0, 0x9d, 0x94, 0x89}}, + "FilledSmallSquare": &HTML5Entity{Name: "FilledSmallSquare", CodePoints: []int{9724}, Characters: []byte{0xe2, 0x97, 0xbc}}, + "FilledVerySmallSquare": &HTML5Entity{Name: "FilledVerySmallSquare", CodePoints: []int{9642}, Characters: []byte{0xe2, 0x96, 0xaa}}, + "Fopf": &HTML5Entity{Name: "Fopf", CodePoints: []int{120125}, Characters: []byte{0xf0, 0x9d, 0x94, 0xbd}}, + "ForAll": &HTML5Entity{Name: "ForAll", CodePoints: []int{8704}, Characters: []byte{0xe2, 0x88, 0x80}}, + "Fouriertrf": &HTML5Entity{Name: "Fouriertrf", CodePoints: []int{8497}, Characters: []byte{0xe2, 0x84, 0xb1}}, + "Fscr": &HTML5Entity{Name: "Fscr", CodePoints: []int{8497}, Characters: []byte{0xe2, 0x84, 0xb1}}, + "GJcy": &HTML5Entity{Name: "GJcy", CodePoints: []int{1027}, Characters: []byte{0xd0, 0x83}}, + "GT": &HTML5Entity{Name: "GT", CodePoints: []int{62}, Characters: []byte{0x3e}}, + "Gamma": &HTML5Entity{Name: "Gamma", CodePoints: []int{915}, Characters: []byte{0xce, 0x93}}, + "Gammad": &HTML5Entity{Name: "Gammad", CodePoints: []int{988}, Characters: []byte{0xcf, 0x9c}}, + "Gbreve": &HTML5Entity{Name: "Gbreve", CodePoints: []int{286}, Characters: []byte{0xc4, 0x9e}}, + "Gcedil": &HTML5Entity{Name: "Gcedil", CodePoints: []int{290}, Characters: []byte{0xc4, 0xa2}}, + "Gcirc": &HTML5Entity{Name: "Gcirc", CodePoints: []int{284}, Characters: []byte{0xc4, 0x9c}}, + "Gcy": &HTML5Entity{Name: "Gcy", CodePoints: []int{1043}, Characters: []byte{0xd0, 0x93}}, + "Gdot": &HTML5Entity{Name: "Gdot", CodePoints: []int{288}, Characters: []byte{0xc4, 0xa0}}, + "Gfr": &HTML5Entity{Name: "Gfr", CodePoints: []int{120074}, Characters: []byte{0xf0, 0x9d, 0x94, 0x8a}}, + "Gg": &HTML5Entity{Name: "Gg", CodePoints: []int{8921}, Characters: []byte{0xe2, 0x8b, 0x99}}, + "Gopf": &HTML5Entity{Name: "Gopf", CodePoints: []int{120126}, Characters: []byte{0xf0, 0x9d, 0x94, 0xbe}}, + "GreaterEqual": &HTML5Entity{Name: "GreaterEqual", CodePoints: []int{8805}, Characters: []byte{0xe2, 0x89, 0xa5}}, + "GreaterEqualLess": &HTML5Entity{Name: "GreaterEqualLess", CodePoints: []int{8923}, Characters: []byte{0xe2, 0x8b, 0x9b}}, + "GreaterFullEqual": &HTML5Entity{Name: "GreaterFullEqual", CodePoints: []int{8807}, Characters: []byte{0xe2, 0x89, 0xa7}}, + "GreaterGreater": &HTML5Entity{Name: "GreaterGreater", CodePoints: []int{10914}, Characters: []byte{0xe2, 0xaa, 0xa2}}, + "GreaterLess": &HTML5Entity{Name: "GreaterLess", CodePoints: []int{8823}, Characters: []byte{0xe2, 0x89, 0xb7}}, + "GreaterSlantEqual": &HTML5Entity{Name: "GreaterSlantEqual", CodePoints: []int{10878}, Characters: []byte{0xe2, 0xa9, 0xbe}}, + "GreaterTilde": &HTML5Entity{Name: "GreaterTilde", CodePoints: []int{8819}, Characters: []byte{0xe2, 0x89, 0xb3}}, + "Gscr": &HTML5Entity{Name: "Gscr", CodePoints: []int{119970}, Characters: []byte{0xf0, 0x9d, 0x92, 0xa2}}, + "Gt": &HTML5Entity{Name: "Gt", CodePoints: []int{8811}, Characters: []byte{0xe2, 0x89, 0xab}}, + "HARDcy": &HTML5Entity{Name: "HARDcy", CodePoints: []int{1066}, Characters: []byte{0xd0, 0xaa}}, + "Hacek": &HTML5Entity{Name: "Hacek", CodePoints: []int{711}, Characters: []byte{0xcb, 0x87}}, + "Hat": &HTML5Entity{Name: "Hat", CodePoints: []int{94}, Characters: []byte{0x5e}}, + "Hcirc": &HTML5Entity{Name: "Hcirc", CodePoints: []int{292}, Characters: []byte{0xc4, 0xa4}}, + "Hfr": &HTML5Entity{Name: "Hfr", CodePoints: []int{8460}, Characters: []byte{0xe2, 0x84, 0x8c}}, + "HilbertSpace": &HTML5Entity{Name: "HilbertSpace", CodePoints: []int{8459}, Characters: []byte{0xe2, 0x84, 0x8b}}, + "Hopf": &HTML5Entity{Name: "Hopf", CodePoints: []int{8461}, Characters: []byte{0xe2, 0x84, 0x8d}}, + "HorizontalLine": &HTML5Entity{Name: "HorizontalLine", CodePoints: []int{9472}, Characters: []byte{0xe2, 0x94, 0x80}}, + "Hscr": &HTML5Entity{Name: "Hscr", CodePoints: []int{8459}, Characters: []byte{0xe2, 0x84, 0x8b}}, + "Hstrok": &HTML5Entity{Name: "Hstrok", CodePoints: []int{294}, Characters: []byte{0xc4, 0xa6}}, + "HumpDownHump": &HTML5Entity{Name: "HumpDownHump", CodePoints: []int{8782}, Characters: []byte{0xe2, 0x89, 0x8e}}, + "HumpEqual": &HTML5Entity{Name: "HumpEqual", CodePoints: []int{8783}, Characters: []byte{0xe2, 0x89, 0x8f}}, + "IEcy": &HTML5Entity{Name: "IEcy", CodePoints: []int{1045}, Characters: []byte{0xd0, 0x95}}, + "IJlig": &HTML5Entity{Name: "IJlig", CodePoints: []int{306}, Characters: []byte{0xc4, 0xb2}}, + "IOcy": &HTML5Entity{Name: "IOcy", CodePoints: []int{1025}, Characters: []byte{0xd0, 0x81}}, + "Iacute": &HTML5Entity{Name: "Iacute", CodePoints: []int{205}, Characters: []byte{0xc3, 0x8d}}, + "Icirc": &HTML5Entity{Name: "Icirc", CodePoints: []int{206}, Characters: []byte{0xc3, 0x8e}}, + "Icy": &HTML5Entity{Name: "Icy", CodePoints: []int{1048}, Characters: []byte{0xd0, 0x98}}, + "Idot": &HTML5Entity{Name: "Idot", CodePoints: []int{304}, Characters: []byte{0xc4, 0xb0}}, + "Ifr": &HTML5Entity{Name: "Ifr", CodePoints: []int{8465}, Characters: []byte{0xe2, 0x84, 0x91}}, + "Igrave": &HTML5Entity{Name: "Igrave", CodePoints: []int{204}, Characters: []byte{0xc3, 0x8c}}, + "Im": &HTML5Entity{Name: "Im", CodePoints: []int{8465}, Characters: []byte{0xe2, 0x84, 0x91}}, + "Imacr": &HTML5Entity{Name: "Imacr", CodePoints: []int{298}, Characters: []byte{0xc4, 0xaa}}, + "ImaginaryI": &HTML5Entity{Name: "ImaginaryI", CodePoints: []int{8520}, Characters: []byte{0xe2, 0x85, 0x88}}, + "Implies": &HTML5Entity{Name: "Implies", CodePoints: []int{8658}, Characters: []byte{0xe2, 0x87, 0x92}}, + "Int": &HTML5Entity{Name: "Int", CodePoints: []int{8748}, Characters: []byte{0xe2, 0x88, 0xac}}, + "Integral": &HTML5Entity{Name: "Integral", CodePoints: []int{8747}, Characters: []byte{0xe2, 0x88, 0xab}}, + "Intersection": &HTML5Entity{Name: "Intersection", CodePoints: []int{8898}, Characters: []byte{0xe2, 0x8b, 0x82}}, + "InvisibleComma": &HTML5Entity{Name: "InvisibleComma", CodePoints: []int{8291}, Characters: []byte{0xe2, 0x81, 0xa3}}, + "InvisibleTimes": &HTML5Entity{Name: "InvisibleTimes", CodePoints: []int{8290}, Characters: []byte{0xe2, 0x81, 0xa2}}, + "Iogon": &HTML5Entity{Name: "Iogon", CodePoints: []int{302}, Characters: []byte{0xc4, 0xae}}, + "Iopf": &HTML5Entity{Name: "Iopf", CodePoints: []int{120128}, Characters: []byte{0xf0, 0x9d, 0x95, 0x80}}, + "Iota": &HTML5Entity{Name: "Iota", CodePoints: []int{921}, Characters: []byte{0xce, 0x99}}, + "Iscr": &HTML5Entity{Name: "Iscr", CodePoints: []int{8464}, Characters: []byte{0xe2, 0x84, 0x90}}, + "Itilde": &HTML5Entity{Name: "Itilde", CodePoints: []int{296}, Characters: []byte{0xc4, 0xa8}}, + "Iukcy": &HTML5Entity{Name: "Iukcy", CodePoints: []int{1030}, Characters: []byte{0xd0, 0x86}}, + "Iuml": &HTML5Entity{Name: "Iuml", CodePoints: []int{207}, Characters: []byte{0xc3, 0x8f}}, + "Jcirc": &HTML5Entity{Name: "Jcirc", CodePoints: []int{308}, Characters: []byte{0xc4, 0xb4}}, + "Jcy": &HTML5Entity{Name: "Jcy", CodePoints: []int{1049}, Characters: []byte{0xd0, 0x99}}, + "Jfr": &HTML5Entity{Name: "Jfr", CodePoints: []int{120077}, Characters: []byte{0xf0, 0x9d, 0x94, 0x8d}}, + "Jopf": &HTML5Entity{Name: "Jopf", CodePoints: []int{120129}, Characters: []byte{0xf0, 0x9d, 0x95, 0x81}}, + "Jscr": &HTML5Entity{Name: "Jscr", CodePoints: []int{119973}, Characters: []byte{0xf0, 0x9d, 0x92, 0xa5}}, + "Jsercy": &HTML5Entity{Name: "Jsercy", CodePoints: []int{1032}, Characters: []byte{0xd0, 0x88}}, + "Jukcy": &HTML5Entity{Name: "Jukcy", CodePoints: []int{1028}, Characters: []byte{0xd0, 0x84}}, + "KHcy": &HTML5Entity{Name: "KHcy", CodePoints: []int{1061}, Characters: []byte{0xd0, 0xa5}}, + "KJcy": &HTML5Entity{Name: "KJcy", CodePoints: []int{1036}, Characters: []byte{0xd0, 0x8c}}, + "Kappa": &HTML5Entity{Name: "Kappa", CodePoints: []int{922}, Characters: []byte{0xce, 0x9a}}, + "Kcedil": &HTML5Entity{Name: "Kcedil", CodePoints: []int{310}, Characters: []byte{0xc4, 0xb6}}, + "Kcy": &HTML5Entity{Name: "Kcy", CodePoints: []int{1050}, Characters: []byte{0xd0, 0x9a}}, + "Kfr": &HTML5Entity{Name: "Kfr", CodePoints: []int{120078}, Characters: []byte{0xf0, 0x9d, 0x94, 0x8e}}, + "Kopf": &HTML5Entity{Name: "Kopf", CodePoints: []int{120130}, Characters: []byte{0xf0, 0x9d, 0x95, 0x82}}, + "Kscr": &HTML5Entity{Name: "Kscr", CodePoints: []int{119974}, Characters: []byte{0xf0, 0x9d, 0x92, 0xa6}}, + "LJcy": &HTML5Entity{Name: "LJcy", CodePoints: []int{1033}, Characters: []byte{0xd0, 0x89}}, + "LT": &HTML5Entity{Name: "LT", CodePoints: []int{60}, Characters: []byte{0x3c}}, + "Lacute": &HTML5Entity{Name: "Lacute", CodePoints: []int{313}, Characters: []byte{0xc4, 0xb9}}, + "Lambda": &HTML5Entity{Name: "Lambda", CodePoints: []int{923}, Characters: []byte{0xce, 0x9b}}, + "Lang": &HTML5Entity{Name: "Lang", CodePoints: []int{10218}, Characters: []byte{0xe2, 0x9f, 0xaa}}, + "Laplacetrf": &HTML5Entity{Name: "Laplacetrf", CodePoints: []int{8466}, Characters: []byte{0xe2, 0x84, 0x92}}, + "Larr": &HTML5Entity{Name: "Larr", CodePoints: []int{8606}, Characters: []byte{0xe2, 0x86, 0x9e}}, + "Lcaron": &HTML5Entity{Name: "Lcaron", CodePoints: []int{317}, Characters: []byte{0xc4, 0xbd}}, + "Lcedil": &HTML5Entity{Name: "Lcedil", CodePoints: []int{315}, Characters: []byte{0xc4, 0xbb}}, + "Lcy": &HTML5Entity{Name: "Lcy", CodePoints: []int{1051}, Characters: []byte{0xd0, 0x9b}}, + "LeftAngleBracket": &HTML5Entity{Name: "LeftAngleBracket", CodePoints: []int{10216}, Characters: []byte{0xe2, 0x9f, 0xa8}}, + "LeftArrow": &HTML5Entity{Name: "LeftArrow", CodePoints: []int{8592}, Characters: []byte{0xe2, 0x86, 0x90}}, + "LeftArrowBar": &HTML5Entity{Name: "LeftArrowBar", CodePoints: []int{8676}, Characters: []byte{0xe2, 0x87, 0xa4}}, + "LeftArrowRightArrow": &HTML5Entity{Name: "LeftArrowRightArrow", CodePoints: []int{8646}, Characters: []byte{0xe2, 0x87, 0x86}}, + "LeftCeiling": &HTML5Entity{Name: "LeftCeiling", CodePoints: []int{8968}, Characters: []byte{0xe2, 0x8c, 0x88}}, + "LeftDoubleBracket": &HTML5Entity{Name: "LeftDoubleBracket", CodePoints: []int{10214}, Characters: []byte{0xe2, 0x9f, 0xa6}}, + "LeftDownTeeVector": &HTML5Entity{Name: "LeftDownTeeVector", CodePoints: []int{10593}, Characters: []byte{0xe2, 0xa5, 0xa1}}, + "LeftDownVector": &HTML5Entity{Name: "LeftDownVector", CodePoints: []int{8643}, Characters: []byte{0xe2, 0x87, 0x83}}, + "LeftDownVectorBar": &HTML5Entity{Name: "LeftDownVectorBar", CodePoints: []int{10585}, Characters: []byte{0xe2, 0xa5, 0x99}}, + "LeftFloor": &HTML5Entity{Name: "LeftFloor", CodePoints: []int{8970}, Characters: []byte{0xe2, 0x8c, 0x8a}}, + "LeftRightArrow": &HTML5Entity{Name: "LeftRightArrow", CodePoints: []int{8596}, Characters: []byte{0xe2, 0x86, 0x94}}, + "LeftRightVector": &HTML5Entity{Name: "LeftRightVector", CodePoints: []int{10574}, Characters: []byte{0xe2, 0xa5, 0x8e}}, + "LeftTee": &HTML5Entity{Name: "LeftTee", CodePoints: []int{8867}, Characters: []byte{0xe2, 0x8a, 0xa3}}, + "LeftTeeArrow": &HTML5Entity{Name: "LeftTeeArrow", CodePoints: []int{8612}, Characters: []byte{0xe2, 0x86, 0xa4}}, + "LeftTeeVector": &HTML5Entity{Name: "LeftTeeVector", CodePoints: []int{10586}, Characters: []byte{0xe2, 0xa5, 0x9a}}, + "LeftTriangle": &HTML5Entity{Name: "LeftTriangle", CodePoints: []int{8882}, Characters: []byte{0xe2, 0x8a, 0xb2}}, + "LeftTriangleBar": &HTML5Entity{Name: "LeftTriangleBar", CodePoints: []int{10703}, Characters: []byte{0xe2, 0xa7, 0x8f}}, + "LeftTriangleEqual": &HTML5Entity{Name: "LeftTriangleEqual", CodePoints: []int{8884}, Characters: []byte{0xe2, 0x8a, 0xb4}}, + "LeftUpDownVector": &HTML5Entity{Name: "LeftUpDownVector", CodePoints: []int{10577}, Characters: []byte{0xe2, 0xa5, 0x91}}, + "LeftUpTeeVector": &HTML5Entity{Name: "LeftUpTeeVector", CodePoints: []int{10592}, Characters: []byte{0xe2, 0xa5, 0xa0}}, + "LeftUpVector": &HTML5Entity{Name: "LeftUpVector", CodePoints: []int{8639}, Characters: []byte{0xe2, 0x86, 0xbf}}, + "LeftUpVectorBar": &HTML5Entity{Name: "LeftUpVectorBar", CodePoints: []int{10584}, Characters: []byte{0xe2, 0xa5, 0x98}}, + "LeftVector": &HTML5Entity{Name: "LeftVector", CodePoints: []int{8636}, Characters: []byte{0xe2, 0x86, 0xbc}}, + "LeftVectorBar": &HTML5Entity{Name: "LeftVectorBar", CodePoints: []int{10578}, Characters: []byte{0xe2, 0xa5, 0x92}}, + "Leftarrow": &HTML5Entity{Name: "Leftarrow", CodePoints: []int{8656}, Characters: []byte{0xe2, 0x87, 0x90}}, + "Leftrightarrow": &HTML5Entity{Name: "Leftrightarrow", CodePoints: []int{8660}, Characters: []byte{0xe2, 0x87, 0x94}}, + "LessEqualGreater": &HTML5Entity{Name: "LessEqualGreater", CodePoints: []int{8922}, Characters: []byte{0xe2, 0x8b, 0x9a}}, + "LessFullEqual": &HTML5Entity{Name: "LessFullEqual", CodePoints: []int{8806}, Characters: []byte{0xe2, 0x89, 0xa6}}, + "LessGreater": &HTML5Entity{Name: "LessGreater", CodePoints: []int{8822}, Characters: []byte{0xe2, 0x89, 0xb6}}, + "LessLess": &HTML5Entity{Name: "LessLess", CodePoints: []int{10913}, Characters: []byte{0xe2, 0xaa, 0xa1}}, + "LessSlantEqual": &HTML5Entity{Name: "LessSlantEqual", CodePoints: []int{10877}, Characters: []byte{0xe2, 0xa9, 0xbd}}, + "LessTilde": &HTML5Entity{Name: "LessTilde", CodePoints: []int{8818}, Characters: []byte{0xe2, 0x89, 0xb2}}, + "Lfr": &HTML5Entity{Name: "Lfr", CodePoints: []int{120079}, Characters: []byte{0xf0, 0x9d, 0x94, 0x8f}}, + "Ll": &HTML5Entity{Name: "Ll", CodePoints: []int{8920}, Characters: []byte{0xe2, 0x8b, 0x98}}, + "Lleftarrow": &HTML5Entity{Name: "Lleftarrow", CodePoints: []int{8666}, Characters: []byte{0xe2, 0x87, 0x9a}}, + "Lmidot": &HTML5Entity{Name: "Lmidot", CodePoints: []int{319}, Characters: []byte{0xc4, 0xbf}}, + "LongLeftArrow": &HTML5Entity{Name: "LongLeftArrow", CodePoints: []int{10229}, Characters: []byte{0xe2, 0x9f, 0xb5}}, + "LongLeftRightArrow": &HTML5Entity{Name: "LongLeftRightArrow", CodePoints: []int{10231}, Characters: []byte{0xe2, 0x9f, 0xb7}}, + "LongRightArrow": &HTML5Entity{Name: "LongRightArrow", CodePoints: []int{10230}, Characters: []byte{0xe2, 0x9f, 0xb6}}, + "Longleftarrow": &HTML5Entity{Name: "Longleftarrow", CodePoints: []int{10232}, Characters: []byte{0xe2, 0x9f, 0xb8}}, + "Longleftrightarrow": &HTML5Entity{Name: "Longleftrightarrow", CodePoints: []int{10234}, Characters: []byte{0xe2, 0x9f, 0xba}}, + "Longrightarrow": &HTML5Entity{Name: "Longrightarrow", CodePoints: []int{10233}, Characters: []byte{0xe2, 0x9f, 0xb9}}, + "Lopf": &HTML5Entity{Name: "Lopf", CodePoints: []int{120131}, Characters: []byte{0xf0, 0x9d, 0x95, 0x83}}, + "LowerLeftArrow": &HTML5Entity{Name: "LowerLeftArrow", CodePoints: []int{8601}, Characters: []byte{0xe2, 0x86, 0x99}}, + "LowerRightArrow": &HTML5Entity{Name: "LowerRightArrow", CodePoints: []int{8600}, Characters: []byte{0xe2, 0x86, 0x98}}, + "Lscr": &HTML5Entity{Name: "Lscr", CodePoints: []int{8466}, Characters: []byte{0xe2, 0x84, 0x92}}, + "Lsh": &HTML5Entity{Name: "Lsh", CodePoints: []int{8624}, Characters: []byte{0xe2, 0x86, 0xb0}}, + "Lstrok": &HTML5Entity{Name: "Lstrok", CodePoints: []int{321}, Characters: []byte{0xc5, 0x81}}, + "Lt": &HTML5Entity{Name: "Lt", CodePoints: []int{8810}, Characters: []byte{0xe2, 0x89, 0xaa}}, + "Map": &HTML5Entity{Name: "Map", CodePoints: []int{10501}, Characters: []byte{0xe2, 0xa4, 0x85}}, + "Mcy": &HTML5Entity{Name: "Mcy", CodePoints: []int{1052}, Characters: []byte{0xd0, 0x9c}}, + "MediumSpace": &HTML5Entity{Name: "MediumSpace", CodePoints: []int{8287}, Characters: []byte{0xe2, 0x81, 0x9f}}, + "Mellintrf": &HTML5Entity{Name: "Mellintrf", CodePoints: []int{8499}, Characters: []byte{0xe2, 0x84, 0xb3}}, + "Mfr": &HTML5Entity{Name: "Mfr", CodePoints: []int{120080}, Characters: []byte{0xf0, 0x9d, 0x94, 0x90}}, + "MinusPlus": &HTML5Entity{Name: "MinusPlus", CodePoints: []int{8723}, Characters: []byte{0xe2, 0x88, 0x93}}, + "Mopf": &HTML5Entity{Name: "Mopf", CodePoints: []int{120132}, Characters: []byte{0xf0, 0x9d, 0x95, 0x84}}, + "Mscr": &HTML5Entity{Name: "Mscr", CodePoints: []int{8499}, Characters: []byte{0xe2, 0x84, 0xb3}}, + "Mu": &HTML5Entity{Name: "Mu", CodePoints: []int{924}, Characters: []byte{0xce, 0x9c}}, + "NJcy": &HTML5Entity{Name: "NJcy", CodePoints: []int{1034}, Characters: []byte{0xd0, 0x8a}}, + "Nacute": &HTML5Entity{Name: "Nacute", CodePoints: []int{323}, Characters: []byte{0xc5, 0x83}}, + "Ncaron": &HTML5Entity{Name: "Ncaron", CodePoints: []int{327}, Characters: []byte{0xc5, 0x87}}, + "Ncedil": &HTML5Entity{Name: "Ncedil", CodePoints: []int{325}, Characters: []byte{0xc5, 0x85}}, + "Ncy": &HTML5Entity{Name: "Ncy", CodePoints: []int{1053}, Characters: []byte{0xd0, 0x9d}}, + "NegativeMediumSpace": &HTML5Entity{Name: "NegativeMediumSpace", CodePoints: []int{8203}, Characters: []byte{0xe2, 0x80, 0x8b}}, + "NegativeThickSpace": &HTML5Entity{Name: "NegativeThickSpace", CodePoints: []int{8203}, Characters: []byte{0xe2, 0x80, 0x8b}}, + "NegativeThinSpace": &HTML5Entity{Name: "NegativeThinSpace", CodePoints: []int{8203}, Characters: []byte{0xe2, 0x80, 0x8b}}, + "NegativeVeryThinSpace": &HTML5Entity{Name: "NegativeVeryThinSpace", CodePoints: []int{8203}, Characters: []byte{0xe2, 0x80, 0x8b}}, + "NestedGreaterGreater": &HTML5Entity{Name: "NestedGreaterGreater", CodePoints: []int{8811}, Characters: []byte{0xe2, 0x89, 0xab}}, + "NestedLessLess": &HTML5Entity{Name: "NestedLessLess", CodePoints: []int{8810}, Characters: []byte{0xe2, 0x89, 0xaa}}, + "NewLine": &HTML5Entity{Name: "NewLine", CodePoints: []int{10}, Characters: []byte{0xa}}, + "Nfr": &HTML5Entity{Name: "Nfr", CodePoints: []int{120081}, Characters: []byte{0xf0, 0x9d, 0x94, 0x91}}, + "NoBreak": &HTML5Entity{Name: "NoBreak", CodePoints: []int{8288}, Characters: []byte{0xe2, 0x81, 0xa0}}, + "NonBreakingSpace": &HTML5Entity{Name: "NonBreakingSpace", CodePoints: []int{160}, Characters: []byte{0xc2, 0xa0}}, + "Nopf": &HTML5Entity{Name: "Nopf", CodePoints: []int{8469}, Characters: []byte{0xe2, 0x84, 0x95}}, + "Not": &HTML5Entity{Name: "Not", CodePoints: []int{10988}, Characters: []byte{0xe2, 0xab, 0xac}}, + "NotCongruent": &HTML5Entity{Name: "NotCongruent", CodePoints: []int{8802}, Characters: []byte{0xe2, 0x89, 0xa2}}, + "NotCupCap": &HTML5Entity{Name: "NotCupCap", CodePoints: []int{8813}, Characters: []byte{0xe2, 0x89, 0xad}}, + "NotDoubleVerticalBar": &HTML5Entity{Name: "NotDoubleVerticalBar", CodePoints: []int{8742}, Characters: []byte{0xe2, 0x88, 0xa6}}, + "NotElement": &HTML5Entity{Name: "NotElement", CodePoints: []int{8713}, Characters: []byte{0xe2, 0x88, 0x89}}, + "NotEqual": &HTML5Entity{Name: "NotEqual", CodePoints: []int{8800}, Characters: []byte{0xe2, 0x89, 0xa0}}, + "NotEqualTilde": &HTML5Entity{Name: "NotEqualTilde", CodePoints: []int{8770, 824}, Characters: []byte{0xe2, 0x89, 0x82, 0xcc, 0xb8}}, + "NotExists": &HTML5Entity{Name: "NotExists", CodePoints: []int{8708}, Characters: []byte{0xe2, 0x88, 0x84}}, + "NotGreater": &HTML5Entity{Name: "NotGreater", CodePoints: []int{8815}, Characters: []byte{0xe2, 0x89, 0xaf}}, + "NotGreaterEqual": &HTML5Entity{Name: "NotGreaterEqual", CodePoints: []int{8817}, Characters: []byte{0xe2, 0x89, 0xb1}}, + "NotGreaterFullEqual": &HTML5Entity{Name: "NotGreaterFullEqual", CodePoints: []int{8807, 824}, Characters: []byte{0xe2, 0x89, 0xa7, 0xcc, 0xb8}}, + "NotGreaterGreater": &HTML5Entity{Name: "NotGreaterGreater", CodePoints: []int{8811, 824}, Characters: []byte{0xe2, 0x89, 0xab, 0xcc, 0xb8}}, + "NotGreaterLess": &HTML5Entity{Name: "NotGreaterLess", CodePoints: []int{8825}, Characters: []byte{0xe2, 0x89, 0xb9}}, + "NotGreaterSlantEqual": &HTML5Entity{Name: "NotGreaterSlantEqual", CodePoints: []int{10878, 824}, Characters: []byte{0xe2, 0xa9, 0xbe, 0xcc, 0xb8}}, + "NotGreaterTilde": &HTML5Entity{Name: "NotGreaterTilde", CodePoints: []int{8821}, Characters: []byte{0xe2, 0x89, 0xb5}}, + "NotHumpDownHump": &HTML5Entity{Name: "NotHumpDownHump", CodePoints: []int{8782, 824}, Characters: []byte{0xe2, 0x89, 0x8e, 0xcc, 0xb8}}, + "NotHumpEqual": &HTML5Entity{Name: "NotHumpEqual", CodePoints: []int{8783, 824}, Characters: []byte{0xe2, 0x89, 0x8f, 0xcc, 0xb8}}, + "NotLeftTriangle": &HTML5Entity{Name: "NotLeftTriangle", CodePoints: []int{8938}, Characters: []byte{0xe2, 0x8b, 0xaa}}, + "NotLeftTriangleBar": &HTML5Entity{Name: "NotLeftTriangleBar", CodePoints: []int{10703, 824}, Characters: []byte{0xe2, 0xa7, 0x8f, 0xcc, 0xb8}}, + "NotLeftTriangleEqual": &HTML5Entity{Name: "NotLeftTriangleEqual", CodePoints: []int{8940}, Characters: []byte{0xe2, 0x8b, 0xac}}, + "NotLess": &HTML5Entity{Name: "NotLess", CodePoints: []int{8814}, Characters: []byte{0xe2, 0x89, 0xae}}, + "NotLessEqual": &HTML5Entity{Name: "NotLessEqual", CodePoints: []int{8816}, Characters: []byte{0xe2, 0x89, 0xb0}}, + "NotLessGreater": &HTML5Entity{Name: "NotLessGreater", CodePoints: []int{8824}, Characters: []byte{0xe2, 0x89, 0xb8}}, + "NotLessLess": &HTML5Entity{Name: "NotLessLess", CodePoints: []int{8810, 824}, Characters: []byte{0xe2, 0x89, 0xaa, 0xcc, 0xb8}}, + "NotLessSlantEqual": &HTML5Entity{Name: "NotLessSlantEqual", CodePoints: []int{10877, 824}, Characters: []byte{0xe2, 0xa9, 0xbd, 0xcc, 0xb8}}, + "NotLessTilde": &HTML5Entity{Name: "NotLessTilde", CodePoints: []int{8820}, Characters: []byte{0xe2, 0x89, 0xb4}}, + "NotNestedGreaterGreater": &HTML5Entity{Name: "NotNestedGreaterGreater", CodePoints: []int{10914, 824}, Characters: []byte{0xe2, 0xaa, 0xa2, 0xcc, 0xb8}}, + "NotNestedLessLess": &HTML5Entity{Name: "NotNestedLessLess", CodePoints: []int{10913, 824}, Characters: []byte{0xe2, 0xaa, 0xa1, 0xcc, 0xb8}}, + "NotPrecedes": &HTML5Entity{Name: "NotPrecedes", CodePoints: []int{8832}, Characters: []byte{0xe2, 0x8a, 0x80}}, + "NotPrecedesEqual": &HTML5Entity{Name: "NotPrecedesEqual", CodePoints: []int{10927, 824}, Characters: []byte{0xe2, 0xaa, 0xaf, 0xcc, 0xb8}}, + "NotPrecedesSlantEqual": &HTML5Entity{Name: "NotPrecedesSlantEqual", CodePoints: []int{8928}, Characters: []byte{0xe2, 0x8b, 0xa0}}, + "NotReverseElement": &HTML5Entity{Name: "NotReverseElement", CodePoints: []int{8716}, Characters: []byte{0xe2, 0x88, 0x8c}}, + "NotRightTriangle": &HTML5Entity{Name: "NotRightTriangle", CodePoints: []int{8939}, Characters: []byte{0xe2, 0x8b, 0xab}}, + "NotRightTriangleBar": &HTML5Entity{Name: "NotRightTriangleBar", CodePoints: []int{10704, 824}, Characters: []byte{0xe2, 0xa7, 0x90, 0xcc, 0xb8}}, + "NotRightTriangleEqual": &HTML5Entity{Name: "NotRightTriangleEqual", CodePoints: []int{8941}, Characters: []byte{0xe2, 0x8b, 0xad}}, + "NotSquareSubset": &HTML5Entity{Name: "NotSquareSubset", CodePoints: []int{8847, 824}, Characters: []byte{0xe2, 0x8a, 0x8f, 0xcc, 0xb8}}, + "NotSquareSubsetEqual": &HTML5Entity{Name: "NotSquareSubsetEqual", CodePoints: []int{8930}, Characters: []byte{0xe2, 0x8b, 0xa2}}, + "NotSquareSuperset": &HTML5Entity{Name: "NotSquareSuperset", CodePoints: []int{8848, 824}, Characters: []byte{0xe2, 0x8a, 0x90, 0xcc, 0xb8}}, + "NotSquareSupersetEqual": &HTML5Entity{Name: "NotSquareSupersetEqual", CodePoints: []int{8931}, Characters: []byte{0xe2, 0x8b, 0xa3}}, + "NotSubset": &HTML5Entity{Name: "NotSubset", CodePoints: []int{8834, 8402}, Characters: []byte{0xe2, 0x8a, 0x82, 0xe2, 0x83, 0x92}}, + "NotSubsetEqual": &HTML5Entity{Name: "NotSubsetEqual", CodePoints: []int{8840}, Characters: []byte{0xe2, 0x8a, 0x88}}, + "NotSucceeds": &HTML5Entity{Name: "NotSucceeds", CodePoints: []int{8833}, Characters: []byte{0xe2, 0x8a, 0x81}}, + "NotSucceedsEqual": &HTML5Entity{Name: "NotSucceedsEqual", CodePoints: []int{10928, 824}, Characters: []byte{0xe2, 0xaa, 0xb0, 0xcc, 0xb8}}, + "NotSucceedsSlantEqual": &HTML5Entity{Name: "NotSucceedsSlantEqual", CodePoints: []int{8929}, Characters: []byte{0xe2, 0x8b, 0xa1}}, + "NotSucceedsTilde": &HTML5Entity{Name: "NotSucceedsTilde", CodePoints: []int{8831, 824}, Characters: []byte{0xe2, 0x89, 0xbf, 0xcc, 0xb8}}, + "NotSuperset": &HTML5Entity{Name: "NotSuperset", CodePoints: []int{8835, 8402}, Characters: []byte{0xe2, 0x8a, 0x83, 0xe2, 0x83, 0x92}}, + "NotSupersetEqual": &HTML5Entity{Name: "NotSupersetEqual", CodePoints: []int{8841}, Characters: []byte{0xe2, 0x8a, 0x89}}, + "NotTilde": &HTML5Entity{Name: "NotTilde", CodePoints: []int{8769}, Characters: []byte{0xe2, 0x89, 0x81}}, + "NotTildeEqual": &HTML5Entity{Name: "NotTildeEqual", CodePoints: []int{8772}, Characters: []byte{0xe2, 0x89, 0x84}}, + "NotTildeFullEqual": &HTML5Entity{Name: "NotTildeFullEqual", CodePoints: []int{8775}, Characters: []byte{0xe2, 0x89, 0x87}}, + "NotTildeTilde": &HTML5Entity{Name: "NotTildeTilde", CodePoints: []int{8777}, Characters: []byte{0xe2, 0x89, 0x89}}, + "NotVerticalBar": &HTML5Entity{Name: "NotVerticalBar", CodePoints: []int{8740}, Characters: []byte{0xe2, 0x88, 0xa4}}, + "Nscr": &HTML5Entity{Name: "Nscr", CodePoints: []int{119977}, Characters: []byte{0xf0, 0x9d, 0x92, 0xa9}}, + "Ntilde": &HTML5Entity{Name: "Ntilde", CodePoints: []int{209}, Characters: []byte{0xc3, 0x91}}, + "Nu": &HTML5Entity{Name: "Nu", CodePoints: []int{925}, Characters: []byte{0xce, 0x9d}}, + "OElig": &HTML5Entity{Name: "OElig", CodePoints: []int{338}, Characters: []byte{0xc5, 0x92}}, + "Oacute": &HTML5Entity{Name: "Oacute", CodePoints: []int{211}, Characters: []byte{0xc3, 0x93}}, + "Ocirc": &HTML5Entity{Name: "Ocirc", CodePoints: []int{212}, Characters: []byte{0xc3, 0x94}}, + "Ocy": &HTML5Entity{Name: "Ocy", CodePoints: []int{1054}, Characters: []byte{0xd0, 0x9e}}, + "Odblac": &HTML5Entity{Name: "Odblac", CodePoints: []int{336}, Characters: []byte{0xc5, 0x90}}, + "Ofr": &HTML5Entity{Name: "Ofr", CodePoints: []int{120082}, Characters: []byte{0xf0, 0x9d, 0x94, 0x92}}, + "Ograve": &HTML5Entity{Name: "Ograve", CodePoints: []int{210}, Characters: []byte{0xc3, 0x92}}, + "Omacr": &HTML5Entity{Name: "Omacr", CodePoints: []int{332}, Characters: []byte{0xc5, 0x8c}}, + "Omega": &HTML5Entity{Name: "Omega", CodePoints: []int{937}, Characters: []byte{0xce, 0xa9}}, + "Omicron": &HTML5Entity{Name: "Omicron", CodePoints: []int{927}, Characters: []byte{0xce, 0x9f}}, + "Oopf": &HTML5Entity{Name: "Oopf", CodePoints: []int{120134}, Characters: []byte{0xf0, 0x9d, 0x95, 0x86}}, + "OpenCurlyDoubleQuote": &HTML5Entity{Name: "OpenCurlyDoubleQuote", CodePoints: []int{8220}, Characters: []byte{0xe2, 0x80, 0x9c}}, + "OpenCurlyQuote": &HTML5Entity{Name: "OpenCurlyQuote", CodePoints: []int{8216}, Characters: []byte{0xe2, 0x80, 0x98}}, + "Or": &HTML5Entity{Name: "Or", CodePoints: []int{10836}, Characters: []byte{0xe2, 0xa9, 0x94}}, + "Oscr": &HTML5Entity{Name: "Oscr", CodePoints: []int{119978}, Characters: []byte{0xf0, 0x9d, 0x92, 0xaa}}, + "Oslash": &HTML5Entity{Name: "Oslash", CodePoints: []int{216}, Characters: []byte{0xc3, 0x98}}, + "Otilde": &HTML5Entity{Name: "Otilde", CodePoints: []int{213}, Characters: []byte{0xc3, 0x95}}, + "Otimes": &HTML5Entity{Name: "Otimes", CodePoints: []int{10807}, Characters: []byte{0xe2, 0xa8, 0xb7}}, + "Ouml": &HTML5Entity{Name: "Ouml", CodePoints: []int{214}, Characters: []byte{0xc3, 0x96}}, + "OverBar": &HTML5Entity{Name: "OverBar", CodePoints: []int{8254}, Characters: []byte{0xe2, 0x80, 0xbe}}, + "OverBrace": &HTML5Entity{Name: "OverBrace", CodePoints: []int{9182}, Characters: []byte{0xe2, 0x8f, 0x9e}}, + "OverBracket": &HTML5Entity{Name: "OverBracket", CodePoints: []int{9140}, Characters: []byte{0xe2, 0x8e, 0xb4}}, + "OverParenthesis": &HTML5Entity{Name: "OverParenthesis", CodePoints: []int{9180}, Characters: []byte{0xe2, 0x8f, 0x9c}}, + "PartialD": &HTML5Entity{Name: "PartialD", CodePoints: []int{8706}, Characters: []byte{0xe2, 0x88, 0x82}}, + "Pcy": &HTML5Entity{Name: "Pcy", CodePoints: []int{1055}, Characters: []byte{0xd0, 0x9f}}, + "Pfr": &HTML5Entity{Name: "Pfr", CodePoints: []int{120083}, Characters: []byte{0xf0, 0x9d, 0x94, 0x93}}, + "Phi": &HTML5Entity{Name: "Phi", CodePoints: []int{934}, Characters: []byte{0xce, 0xa6}}, + "Pi": &HTML5Entity{Name: "Pi", CodePoints: []int{928}, Characters: []byte{0xce, 0xa0}}, + "PlusMinus": &HTML5Entity{Name: "PlusMinus", CodePoints: []int{177}, Characters: []byte{0xc2, 0xb1}}, + "Poincareplane": &HTML5Entity{Name: "Poincareplane", CodePoints: []int{8460}, Characters: []byte{0xe2, 0x84, 0x8c}}, + "Popf": &HTML5Entity{Name: "Popf", CodePoints: []int{8473}, Characters: []byte{0xe2, 0x84, 0x99}}, + "Pr": &HTML5Entity{Name: "Pr", CodePoints: []int{10939}, Characters: []byte{0xe2, 0xaa, 0xbb}}, + "Precedes": &HTML5Entity{Name: "Precedes", CodePoints: []int{8826}, Characters: []byte{0xe2, 0x89, 0xba}}, + "PrecedesEqual": &HTML5Entity{Name: "PrecedesEqual", CodePoints: []int{10927}, Characters: []byte{0xe2, 0xaa, 0xaf}}, + "PrecedesSlantEqual": &HTML5Entity{Name: "PrecedesSlantEqual", CodePoints: []int{8828}, Characters: []byte{0xe2, 0x89, 0xbc}}, + "PrecedesTilde": &HTML5Entity{Name: "PrecedesTilde", CodePoints: []int{8830}, Characters: []byte{0xe2, 0x89, 0xbe}}, + "Prime": &HTML5Entity{Name: "Prime", CodePoints: []int{8243}, Characters: []byte{0xe2, 0x80, 0xb3}}, + "Product": &HTML5Entity{Name: "Product", CodePoints: []int{8719}, Characters: []byte{0xe2, 0x88, 0x8f}}, + "Proportion": &HTML5Entity{Name: "Proportion", CodePoints: []int{8759}, Characters: []byte{0xe2, 0x88, 0xb7}}, + "Proportional": &HTML5Entity{Name: "Proportional", CodePoints: []int{8733}, Characters: []byte{0xe2, 0x88, 0x9d}}, + "Pscr": &HTML5Entity{Name: "Pscr", CodePoints: []int{119979}, Characters: []byte{0xf0, 0x9d, 0x92, 0xab}}, + "Psi": &HTML5Entity{Name: "Psi", CodePoints: []int{936}, Characters: []byte{0xce, 0xa8}}, + "QUOT": &HTML5Entity{Name: "QUOT", CodePoints: []int{34}, Characters: []byte{0x22}}, + "Qfr": &HTML5Entity{Name: "Qfr", CodePoints: []int{120084}, Characters: []byte{0xf0, 0x9d, 0x94, 0x94}}, + "Qopf": &HTML5Entity{Name: "Qopf", CodePoints: []int{8474}, Characters: []byte{0xe2, 0x84, 0x9a}}, + "Qscr": &HTML5Entity{Name: "Qscr", CodePoints: []int{119980}, Characters: []byte{0xf0, 0x9d, 0x92, 0xac}}, + "RBarr": &HTML5Entity{Name: "RBarr", CodePoints: []int{10512}, Characters: []byte{0xe2, 0xa4, 0x90}}, + "REG": &HTML5Entity{Name: "REG", CodePoints: []int{174}, Characters: []byte{0xc2, 0xae}}, + "Racute": &HTML5Entity{Name: "Racute", CodePoints: []int{340}, Characters: []byte{0xc5, 0x94}}, + "Rang": &HTML5Entity{Name: "Rang", CodePoints: []int{10219}, Characters: []byte{0xe2, 0x9f, 0xab}}, + "Rarr": &HTML5Entity{Name: "Rarr", CodePoints: []int{8608}, Characters: []byte{0xe2, 0x86, 0xa0}}, + "Rarrtl": &HTML5Entity{Name: "Rarrtl", CodePoints: []int{10518}, Characters: []byte{0xe2, 0xa4, 0x96}}, + "Rcaron": &HTML5Entity{Name: "Rcaron", CodePoints: []int{344}, Characters: []byte{0xc5, 0x98}}, + "Rcedil": &HTML5Entity{Name: "Rcedil", CodePoints: []int{342}, Characters: []byte{0xc5, 0x96}}, + "Rcy": &HTML5Entity{Name: "Rcy", CodePoints: []int{1056}, Characters: []byte{0xd0, 0xa0}}, + "Re": &HTML5Entity{Name: "Re", CodePoints: []int{8476}, Characters: []byte{0xe2, 0x84, 0x9c}}, + "ReverseElement": &HTML5Entity{Name: "ReverseElement", CodePoints: []int{8715}, Characters: []byte{0xe2, 0x88, 0x8b}}, + "ReverseEquilibrium": &HTML5Entity{Name: "ReverseEquilibrium", CodePoints: []int{8651}, Characters: []byte{0xe2, 0x87, 0x8b}}, + "ReverseUpEquilibrium": &HTML5Entity{Name: "ReverseUpEquilibrium", CodePoints: []int{10607}, Characters: []byte{0xe2, 0xa5, 0xaf}}, + "Rfr": &HTML5Entity{Name: "Rfr", CodePoints: []int{8476}, Characters: []byte{0xe2, 0x84, 0x9c}}, + "Rho": &HTML5Entity{Name: "Rho", CodePoints: []int{929}, Characters: []byte{0xce, 0xa1}}, + "RightAngleBracket": &HTML5Entity{Name: "RightAngleBracket", CodePoints: []int{10217}, Characters: []byte{0xe2, 0x9f, 0xa9}}, + "RightArrow": &HTML5Entity{Name: "RightArrow", CodePoints: []int{8594}, Characters: []byte{0xe2, 0x86, 0x92}}, + "RightArrowBar": &HTML5Entity{Name: "RightArrowBar", CodePoints: []int{8677}, Characters: []byte{0xe2, 0x87, 0xa5}}, + "RightArrowLeftArrow": &HTML5Entity{Name: "RightArrowLeftArrow", CodePoints: []int{8644}, Characters: []byte{0xe2, 0x87, 0x84}}, + "RightCeiling": &HTML5Entity{Name: "RightCeiling", CodePoints: []int{8969}, Characters: []byte{0xe2, 0x8c, 0x89}}, + "RightDoubleBracket": &HTML5Entity{Name: "RightDoubleBracket", CodePoints: []int{10215}, Characters: []byte{0xe2, 0x9f, 0xa7}}, + "RightDownTeeVector": &HTML5Entity{Name: "RightDownTeeVector", CodePoints: []int{10589}, Characters: []byte{0xe2, 0xa5, 0x9d}}, + "RightDownVector": &HTML5Entity{Name: "RightDownVector", CodePoints: []int{8642}, Characters: []byte{0xe2, 0x87, 0x82}}, + "RightDownVectorBar": &HTML5Entity{Name: "RightDownVectorBar", CodePoints: []int{10581}, Characters: []byte{0xe2, 0xa5, 0x95}}, + "RightFloor": &HTML5Entity{Name: "RightFloor", CodePoints: []int{8971}, Characters: []byte{0xe2, 0x8c, 0x8b}}, + "RightTee": &HTML5Entity{Name: "RightTee", CodePoints: []int{8866}, Characters: []byte{0xe2, 0x8a, 0xa2}}, + "RightTeeArrow": &HTML5Entity{Name: "RightTeeArrow", CodePoints: []int{8614}, Characters: []byte{0xe2, 0x86, 0xa6}}, + "RightTeeVector": &HTML5Entity{Name: "RightTeeVector", CodePoints: []int{10587}, Characters: []byte{0xe2, 0xa5, 0x9b}}, + "RightTriangle": &HTML5Entity{Name: "RightTriangle", CodePoints: []int{8883}, Characters: []byte{0xe2, 0x8a, 0xb3}}, + "RightTriangleBar": &HTML5Entity{Name: "RightTriangleBar", CodePoints: []int{10704}, Characters: []byte{0xe2, 0xa7, 0x90}}, + "RightTriangleEqual": &HTML5Entity{Name: "RightTriangleEqual", CodePoints: []int{8885}, Characters: []byte{0xe2, 0x8a, 0xb5}}, + "RightUpDownVector": &HTML5Entity{Name: "RightUpDownVector", CodePoints: []int{10575}, Characters: []byte{0xe2, 0xa5, 0x8f}}, + "RightUpTeeVector": &HTML5Entity{Name: "RightUpTeeVector", CodePoints: []int{10588}, Characters: []byte{0xe2, 0xa5, 0x9c}}, + "RightUpVector": &HTML5Entity{Name: "RightUpVector", CodePoints: []int{8638}, Characters: []byte{0xe2, 0x86, 0xbe}}, + "RightUpVectorBar": &HTML5Entity{Name: "RightUpVectorBar", CodePoints: []int{10580}, Characters: []byte{0xe2, 0xa5, 0x94}}, + "RightVector": &HTML5Entity{Name: "RightVector", CodePoints: []int{8640}, Characters: []byte{0xe2, 0x87, 0x80}}, + "RightVectorBar": &HTML5Entity{Name: "RightVectorBar", CodePoints: []int{10579}, Characters: []byte{0xe2, 0xa5, 0x93}}, + "Rightarrow": &HTML5Entity{Name: "Rightarrow", CodePoints: []int{8658}, Characters: []byte{0xe2, 0x87, 0x92}}, + "Ropf": &HTML5Entity{Name: "Ropf", CodePoints: []int{8477}, Characters: []byte{0xe2, 0x84, 0x9d}}, + "RoundImplies": &HTML5Entity{Name: "RoundImplies", CodePoints: []int{10608}, Characters: []byte{0xe2, 0xa5, 0xb0}}, + "Rrightarrow": &HTML5Entity{Name: "Rrightarrow", CodePoints: []int{8667}, Characters: []byte{0xe2, 0x87, 0x9b}}, + "Rscr": &HTML5Entity{Name: "Rscr", CodePoints: []int{8475}, Characters: []byte{0xe2, 0x84, 0x9b}}, + "Rsh": &HTML5Entity{Name: "Rsh", CodePoints: []int{8625}, Characters: []byte{0xe2, 0x86, 0xb1}}, + "RuleDelayed": &HTML5Entity{Name: "RuleDelayed", CodePoints: []int{10740}, Characters: []byte{0xe2, 0xa7, 0xb4}}, + "SHCHcy": &HTML5Entity{Name: "SHCHcy", CodePoints: []int{1065}, Characters: []byte{0xd0, 0xa9}}, + "SHcy": &HTML5Entity{Name: "SHcy", CodePoints: []int{1064}, Characters: []byte{0xd0, 0xa8}}, + "SOFTcy": &HTML5Entity{Name: "SOFTcy", CodePoints: []int{1068}, Characters: []byte{0xd0, 0xac}}, + "Sacute": &HTML5Entity{Name: "Sacute", CodePoints: []int{346}, Characters: []byte{0xc5, 0x9a}}, + "Sc": &HTML5Entity{Name: "Sc", CodePoints: []int{10940}, Characters: []byte{0xe2, 0xaa, 0xbc}}, + "Scaron": &HTML5Entity{Name: "Scaron", CodePoints: []int{352}, Characters: []byte{0xc5, 0xa0}}, + "Scedil": &HTML5Entity{Name: "Scedil", CodePoints: []int{350}, Characters: []byte{0xc5, 0x9e}}, + "Scirc": &HTML5Entity{Name: "Scirc", CodePoints: []int{348}, Characters: []byte{0xc5, 0x9c}}, + "Scy": &HTML5Entity{Name: "Scy", CodePoints: []int{1057}, Characters: []byte{0xd0, 0xa1}}, + "Sfr": &HTML5Entity{Name: "Sfr", CodePoints: []int{120086}, Characters: []byte{0xf0, 0x9d, 0x94, 0x96}}, + "ShortDownArrow": &HTML5Entity{Name: "ShortDownArrow", CodePoints: []int{8595}, Characters: []byte{0xe2, 0x86, 0x93}}, + "ShortLeftArrow": &HTML5Entity{Name: "ShortLeftArrow", CodePoints: []int{8592}, Characters: []byte{0xe2, 0x86, 0x90}}, + "ShortRightArrow": &HTML5Entity{Name: "ShortRightArrow", CodePoints: []int{8594}, Characters: []byte{0xe2, 0x86, 0x92}}, + "ShortUpArrow": &HTML5Entity{Name: "ShortUpArrow", CodePoints: []int{8593}, Characters: []byte{0xe2, 0x86, 0x91}}, + "Sigma": &HTML5Entity{Name: "Sigma", CodePoints: []int{931}, Characters: []byte{0xce, 0xa3}}, + "SmallCircle": &HTML5Entity{Name: "SmallCircle", CodePoints: []int{8728}, Characters: []byte{0xe2, 0x88, 0x98}}, + "Sopf": &HTML5Entity{Name: "Sopf", CodePoints: []int{120138}, Characters: []byte{0xf0, 0x9d, 0x95, 0x8a}}, + "Sqrt": &HTML5Entity{Name: "Sqrt", CodePoints: []int{8730}, Characters: []byte{0xe2, 0x88, 0x9a}}, + "Square": &HTML5Entity{Name: "Square", CodePoints: []int{9633}, Characters: []byte{0xe2, 0x96, 0xa1}}, + "SquareIntersection": &HTML5Entity{Name: "SquareIntersection", CodePoints: []int{8851}, Characters: []byte{0xe2, 0x8a, 0x93}}, + "SquareSubset": &HTML5Entity{Name: "SquareSubset", CodePoints: []int{8847}, Characters: []byte{0xe2, 0x8a, 0x8f}}, + "SquareSubsetEqual": &HTML5Entity{Name: "SquareSubsetEqual", CodePoints: []int{8849}, Characters: []byte{0xe2, 0x8a, 0x91}}, + "SquareSuperset": &HTML5Entity{Name: "SquareSuperset", CodePoints: []int{8848}, Characters: []byte{0xe2, 0x8a, 0x90}}, + "SquareSupersetEqual": &HTML5Entity{Name: "SquareSupersetEqual", CodePoints: []int{8850}, Characters: []byte{0xe2, 0x8a, 0x92}}, + "SquareUnion": &HTML5Entity{Name: "SquareUnion", CodePoints: []int{8852}, Characters: []byte{0xe2, 0x8a, 0x94}}, + "Sscr": &HTML5Entity{Name: "Sscr", CodePoints: []int{119982}, Characters: []byte{0xf0, 0x9d, 0x92, 0xae}}, + "Star": &HTML5Entity{Name: "Star", CodePoints: []int{8902}, Characters: []byte{0xe2, 0x8b, 0x86}}, + "Sub": &HTML5Entity{Name: "Sub", CodePoints: []int{8912}, Characters: []byte{0xe2, 0x8b, 0x90}}, + "Subset": &HTML5Entity{Name: "Subset", CodePoints: []int{8912}, Characters: []byte{0xe2, 0x8b, 0x90}}, + "SubsetEqual": &HTML5Entity{Name: "SubsetEqual", CodePoints: []int{8838}, Characters: []byte{0xe2, 0x8a, 0x86}}, + "Succeeds": &HTML5Entity{Name: "Succeeds", CodePoints: []int{8827}, Characters: []byte{0xe2, 0x89, 0xbb}}, + "SucceedsEqual": &HTML5Entity{Name: "SucceedsEqual", CodePoints: []int{10928}, Characters: []byte{0xe2, 0xaa, 0xb0}}, + "SucceedsSlantEqual": &HTML5Entity{Name: "SucceedsSlantEqual", CodePoints: []int{8829}, Characters: []byte{0xe2, 0x89, 0xbd}}, + "SucceedsTilde": &HTML5Entity{Name: "SucceedsTilde", CodePoints: []int{8831}, Characters: []byte{0xe2, 0x89, 0xbf}}, + "SuchThat": &HTML5Entity{Name: "SuchThat", CodePoints: []int{8715}, Characters: []byte{0xe2, 0x88, 0x8b}}, + "Sum": &HTML5Entity{Name: "Sum", CodePoints: []int{8721}, Characters: []byte{0xe2, 0x88, 0x91}}, + "Sup": &HTML5Entity{Name: "Sup", CodePoints: []int{8913}, Characters: []byte{0xe2, 0x8b, 0x91}}, + "Superset": &HTML5Entity{Name: "Superset", CodePoints: []int{8835}, Characters: []byte{0xe2, 0x8a, 0x83}}, + "SupersetEqual": &HTML5Entity{Name: "SupersetEqual", CodePoints: []int{8839}, Characters: []byte{0xe2, 0x8a, 0x87}}, + "Supset": &HTML5Entity{Name: "Supset", CodePoints: []int{8913}, Characters: []byte{0xe2, 0x8b, 0x91}}, + "THORN": &HTML5Entity{Name: "THORN", CodePoints: []int{222}, Characters: []byte{0xc3, 0x9e}}, + "TRADE": &HTML5Entity{Name: "TRADE", CodePoints: []int{8482}, Characters: []byte{0xe2, 0x84, 0xa2}}, + "TSHcy": &HTML5Entity{Name: "TSHcy", CodePoints: []int{1035}, Characters: []byte{0xd0, 0x8b}}, + "TScy": &HTML5Entity{Name: "TScy", CodePoints: []int{1062}, Characters: []byte{0xd0, 0xa6}}, + "Tab": &HTML5Entity{Name: "Tab", CodePoints: []int{9}, Characters: []byte{0x9}}, + "Tau": &HTML5Entity{Name: "Tau", CodePoints: []int{932}, Characters: []byte{0xce, 0xa4}}, + "Tcaron": &HTML5Entity{Name: "Tcaron", CodePoints: []int{356}, Characters: []byte{0xc5, 0xa4}}, + "Tcedil": &HTML5Entity{Name: "Tcedil", CodePoints: []int{354}, Characters: []byte{0xc5, 0xa2}}, + "Tcy": &HTML5Entity{Name: "Tcy", CodePoints: []int{1058}, Characters: []byte{0xd0, 0xa2}}, + "Tfr": &HTML5Entity{Name: "Tfr", CodePoints: []int{120087}, Characters: []byte{0xf0, 0x9d, 0x94, 0x97}}, + "Therefore": &HTML5Entity{Name: "Therefore", CodePoints: []int{8756}, Characters: []byte{0xe2, 0x88, 0xb4}}, + "Theta": &HTML5Entity{Name: "Theta", CodePoints: []int{920}, Characters: []byte{0xce, 0x98}}, + "ThickSpace": &HTML5Entity{Name: "ThickSpace", CodePoints: []int{8287, 8202}, Characters: []byte{0xe2, 0x81, 0x9f, 0xe2, 0x80, 0x8a}}, + "ThinSpace": &HTML5Entity{Name: "ThinSpace", CodePoints: []int{8201}, Characters: []byte{0xe2, 0x80, 0x89}}, + "Tilde": &HTML5Entity{Name: "Tilde", CodePoints: []int{8764}, Characters: []byte{0xe2, 0x88, 0xbc}}, + "TildeEqual": &HTML5Entity{Name: "TildeEqual", CodePoints: []int{8771}, Characters: []byte{0xe2, 0x89, 0x83}}, + "TildeFullEqual": &HTML5Entity{Name: "TildeFullEqual", CodePoints: []int{8773}, Characters: []byte{0xe2, 0x89, 0x85}}, + "TildeTilde": &HTML5Entity{Name: "TildeTilde", CodePoints: []int{8776}, Characters: []byte{0xe2, 0x89, 0x88}}, + "Topf": &HTML5Entity{Name: "Topf", CodePoints: []int{120139}, Characters: []byte{0xf0, 0x9d, 0x95, 0x8b}}, + "TripleDot": &HTML5Entity{Name: "TripleDot", CodePoints: []int{8411}, Characters: []byte{0xe2, 0x83, 0x9b}}, + "Tscr": &HTML5Entity{Name: "Tscr", CodePoints: []int{119983}, Characters: []byte{0xf0, 0x9d, 0x92, 0xaf}}, + "Tstrok": &HTML5Entity{Name: "Tstrok", CodePoints: []int{358}, Characters: []byte{0xc5, 0xa6}}, + "Uacute": &HTML5Entity{Name: "Uacute", CodePoints: []int{218}, Characters: []byte{0xc3, 0x9a}}, + "Uarr": &HTML5Entity{Name: "Uarr", CodePoints: []int{8607}, Characters: []byte{0xe2, 0x86, 0x9f}}, + "Uarrocir": &HTML5Entity{Name: "Uarrocir", CodePoints: []int{10569}, Characters: []byte{0xe2, 0xa5, 0x89}}, + "Ubrcy": &HTML5Entity{Name: "Ubrcy", CodePoints: []int{1038}, Characters: []byte{0xd0, 0x8e}}, + "Ubreve": &HTML5Entity{Name: "Ubreve", CodePoints: []int{364}, Characters: []byte{0xc5, 0xac}}, + "Ucirc": &HTML5Entity{Name: "Ucirc", CodePoints: []int{219}, Characters: []byte{0xc3, 0x9b}}, + "Ucy": &HTML5Entity{Name: "Ucy", CodePoints: []int{1059}, Characters: []byte{0xd0, 0xa3}}, + "Udblac": &HTML5Entity{Name: "Udblac", CodePoints: []int{368}, Characters: []byte{0xc5, 0xb0}}, + "Ufr": &HTML5Entity{Name: "Ufr", CodePoints: []int{120088}, Characters: []byte{0xf0, 0x9d, 0x94, 0x98}}, + "Ugrave": &HTML5Entity{Name: "Ugrave", CodePoints: []int{217}, Characters: []byte{0xc3, 0x99}}, + "Umacr": &HTML5Entity{Name: "Umacr", CodePoints: []int{362}, Characters: []byte{0xc5, 0xaa}}, + "UnderBar": &HTML5Entity{Name: "UnderBar", CodePoints: []int{95}, Characters: []byte{0x5f}}, + "UnderBrace": &HTML5Entity{Name: "UnderBrace", CodePoints: []int{9183}, Characters: []byte{0xe2, 0x8f, 0x9f}}, + "UnderBracket": &HTML5Entity{Name: "UnderBracket", CodePoints: []int{9141}, Characters: []byte{0xe2, 0x8e, 0xb5}}, + "UnderParenthesis": &HTML5Entity{Name: "UnderParenthesis", CodePoints: []int{9181}, Characters: []byte{0xe2, 0x8f, 0x9d}}, + "Union": &HTML5Entity{Name: "Union", CodePoints: []int{8899}, Characters: []byte{0xe2, 0x8b, 0x83}}, + "UnionPlus": &HTML5Entity{Name: "UnionPlus", CodePoints: []int{8846}, Characters: []byte{0xe2, 0x8a, 0x8e}}, + "Uogon": &HTML5Entity{Name: "Uogon", CodePoints: []int{370}, Characters: []byte{0xc5, 0xb2}}, + "Uopf": &HTML5Entity{Name: "Uopf", CodePoints: []int{120140}, Characters: []byte{0xf0, 0x9d, 0x95, 0x8c}}, + "UpArrow": &HTML5Entity{Name: "UpArrow", CodePoints: []int{8593}, Characters: []byte{0xe2, 0x86, 0x91}}, + "UpArrowBar": &HTML5Entity{Name: "UpArrowBar", CodePoints: []int{10514}, Characters: []byte{0xe2, 0xa4, 0x92}}, + "UpArrowDownArrow": &HTML5Entity{Name: "UpArrowDownArrow", CodePoints: []int{8645}, Characters: []byte{0xe2, 0x87, 0x85}}, + "UpDownArrow": &HTML5Entity{Name: "UpDownArrow", CodePoints: []int{8597}, Characters: []byte{0xe2, 0x86, 0x95}}, + "UpEquilibrium": &HTML5Entity{Name: "UpEquilibrium", CodePoints: []int{10606}, Characters: []byte{0xe2, 0xa5, 0xae}}, + "UpTee": &HTML5Entity{Name: "UpTee", CodePoints: []int{8869}, Characters: []byte{0xe2, 0x8a, 0xa5}}, + "UpTeeArrow": &HTML5Entity{Name: "UpTeeArrow", CodePoints: []int{8613}, Characters: []byte{0xe2, 0x86, 0xa5}}, + "Uparrow": &HTML5Entity{Name: "Uparrow", CodePoints: []int{8657}, Characters: []byte{0xe2, 0x87, 0x91}}, + "Updownarrow": &HTML5Entity{Name: "Updownarrow", CodePoints: []int{8661}, Characters: []byte{0xe2, 0x87, 0x95}}, + "UpperLeftArrow": &HTML5Entity{Name: "UpperLeftArrow", CodePoints: []int{8598}, Characters: []byte{0xe2, 0x86, 0x96}}, + "UpperRightArrow": &HTML5Entity{Name: "UpperRightArrow", CodePoints: []int{8599}, Characters: []byte{0xe2, 0x86, 0x97}}, + "Upsi": &HTML5Entity{Name: "Upsi", CodePoints: []int{978}, Characters: []byte{0xcf, 0x92}}, + "Upsilon": &HTML5Entity{Name: "Upsilon", CodePoints: []int{933}, Characters: []byte{0xce, 0xa5}}, + "Uring": &HTML5Entity{Name: "Uring", CodePoints: []int{366}, Characters: []byte{0xc5, 0xae}}, + "Uscr": &HTML5Entity{Name: "Uscr", CodePoints: []int{119984}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb0}}, + "Utilde": &HTML5Entity{Name: "Utilde", CodePoints: []int{360}, Characters: []byte{0xc5, 0xa8}}, + "Uuml": &HTML5Entity{Name: "Uuml", CodePoints: []int{220}, Characters: []byte{0xc3, 0x9c}}, + "VDash": &HTML5Entity{Name: "VDash", CodePoints: []int{8875}, Characters: []byte{0xe2, 0x8a, 0xab}}, + "Vbar": &HTML5Entity{Name: "Vbar", CodePoints: []int{10987}, Characters: []byte{0xe2, 0xab, 0xab}}, + "Vcy": &HTML5Entity{Name: "Vcy", CodePoints: []int{1042}, Characters: []byte{0xd0, 0x92}}, + "Vdash": &HTML5Entity{Name: "Vdash", CodePoints: []int{8873}, Characters: []byte{0xe2, 0x8a, 0xa9}}, + "Vdashl": &HTML5Entity{Name: "Vdashl", CodePoints: []int{10982}, Characters: []byte{0xe2, 0xab, 0xa6}}, + "Vee": &HTML5Entity{Name: "Vee", CodePoints: []int{8897}, Characters: []byte{0xe2, 0x8b, 0x81}}, + "Verbar": &HTML5Entity{Name: "Verbar", CodePoints: []int{8214}, Characters: []byte{0xe2, 0x80, 0x96}}, + "Vert": &HTML5Entity{Name: "Vert", CodePoints: []int{8214}, Characters: []byte{0xe2, 0x80, 0x96}}, + "VerticalBar": &HTML5Entity{Name: "VerticalBar", CodePoints: []int{8739}, Characters: []byte{0xe2, 0x88, 0xa3}}, + "VerticalLine": &HTML5Entity{Name: "VerticalLine", CodePoints: []int{124}, Characters: []byte{0x7c}}, + "VerticalSeparator": &HTML5Entity{Name: "VerticalSeparator", CodePoints: []int{10072}, Characters: []byte{0xe2, 0x9d, 0x98}}, + "VerticalTilde": &HTML5Entity{Name: "VerticalTilde", CodePoints: []int{8768}, Characters: []byte{0xe2, 0x89, 0x80}}, + "VeryThinSpace": &HTML5Entity{Name: "VeryThinSpace", CodePoints: []int{8202}, Characters: []byte{0xe2, 0x80, 0x8a}}, + "Vfr": &HTML5Entity{Name: "Vfr", CodePoints: []int{120089}, Characters: []byte{0xf0, 0x9d, 0x94, 0x99}}, + "Vopf": &HTML5Entity{Name: "Vopf", CodePoints: []int{120141}, Characters: []byte{0xf0, 0x9d, 0x95, 0x8d}}, + "Vscr": &HTML5Entity{Name: "Vscr", CodePoints: []int{119985}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb1}}, + "Vvdash": &HTML5Entity{Name: "Vvdash", CodePoints: []int{8874}, Characters: []byte{0xe2, 0x8a, 0xaa}}, + "Wcirc": &HTML5Entity{Name: "Wcirc", CodePoints: []int{372}, Characters: []byte{0xc5, 0xb4}}, + "Wedge": &HTML5Entity{Name: "Wedge", CodePoints: []int{8896}, Characters: []byte{0xe2, 0x8b, 0x80}}, + "Wfr": &HTML5Entity{Name: "Wfr", CodePoints: []int{120090}, Characters: []byte{0xf0, 0x9d, 0x94, 0x9a}}, + "Wopf": &HTML5Entity{Name: "Wopf", CodePoints: []int{120142}, Characters: []byte{0xf0, 0x9d, 0x95, 0x8e}}, + "Wscr": &HTML5Entity{Name: "Wscr", CodePoints: []int{119986}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb2}}, + "Xfr": &HTML5Entity{Name: "Xfr", CodePoints: []int{120091}, Characters: []byte{0xf0, 0x9d, 0x94, 0x9b}}, + "Xi": &HTML5Entity{Name: "Xi", CodePoints: []int{926}, Characters: []byte{0xce, 0x9e}}, + "Xopf": &HTML5Entity{Name: "Xopf", CodePoints: []int{120143}, Characters: []byte{0xf0, 0x9d, 0x95, 0x8f}}, + "Xscr": &HTML5Entity{Name: "Xscr", CodePoints: []int{119987}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb3}}, + "YAcy": &HTML5Entity{Name: "YAcy", CodePoints: []int{1071}, Characters: []byte{0xd0, 0xaf}}, + "YIcy": &HTML5Entity{Name: "YIcy", CodePoints: []int{1031}, Characters: []byte{0xd0, 0x87}}, + "YUcy": &HTML5Entity{Name: "YUcy", CodePoints: []int{1070}, Characters: []byte{0xd0, 0xae}}, + "Yacute": &HTML5Entity{Name: "Yacute", CodePoints: []int{221}, Characters: []byte{0xc3, 0x9d}}, + "Ycirc": &HTML5Entity{Name: "Ycirc", CodePoints: []int{374}, Characters: []byte{0xc5, 0xb6}}, + "Ycy": &HTML5Entity{Name: "Ycy", CodePoints: []int{1067}, Characters: []byte{0xd0, 0xab}}, + "Yfr": &HTML5Entity{Name: "Yfr", CodePoints: []int{120092}, Characters: []byte{0xf0, 0x9d, 0x94, 0x9c}}, + "Yopf": &HTML5Entity{Name: "Yopf", CodePoints: []int{120144}, Characters: []byte{0xf0, 0x9d, 0x95, 0x90}}, + "Yscr": &HTML5Entity{Name: "Yscr", CodePoints: []int{119988}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb4}}, + "Yuml": &HTML5Entity{Name: "Yuml", CodePoints: []int{376}, Characters: []byte{0xc5, 0xb8}}, + "ZHcy": &HTML5Entity{Name: "ZHcy", CodePoints: []int{1046}, Characters: []byte{0xd0, 0x96}}, + "Zacute": &HTML5Entity{Name: "Zacute", CodePoints: []int{377}, Characters: []byte{0xc5, 0xb9}}, + "Zcaron": &HTML5Entity{Name: "Zcaron", CodePoints: []int{381}, Characters: []byte{0xc5, 0xbd}}, + "Zcy": &HTML5Entity{Name: "Zcy", CodePoints: []int{1047}, Characters: []byte{0xd0, 0x97}}, + "Zdot": &HTML5Entity{Name: "Zdot", CodePoints: []int{379}, Characters: []byte{0xc5, 0xbb}}, + "ZeroWidthSpace": &HTML5Entity{Name: "ZeroWidthSpace", CodePoints: []int{8203}, Characters: []byte{0xe2, 0x80, 0x8b}}, + "Zeta": &HTML5Entity{Name: "Zeta", CodePoints: []int{918}, Characters: []byte{0xce, 0x96}}, + "Zfr": &HTML5Entity{Name: "Zfr", CodePoints: []int{8488}, Characters: []byte{0xe2, 0x84, 0xa8}}, + "Zopf": &HTML5Entity{Name: "Zopf", CodePoints: []int{8484}, Characters: []byte{0xe2, 0x84, 0xa4}}, + "Zscr": &HTML5Entity{Name: "Zscr", CodePoints: []int{119989}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb5}}, + "aacute": &HTML5Entity{Name: "aacute", CodePoints: []int{225}, Characters: []byte{0xc3, 0xa1}}, + "abreve": &HTML5Entity{Name: "abreve", CodePoints: []int{259}, Characters: []byte{0xc4, 0x83}}, + "ac": &HTML5Entity{Name: "ac", CodePoints: []int{8766}, Characters: []byte{0xe2, 0x88, 0xbe}}, + "acE": &HTML5Entity{Name: "acE", CodePoints: []int{8766, 819}, Characters: []byte{0xe2, 0x88, 0xbe, 0xcc, 0xb3}}, + "acd": &HTML5Entity{Name: "acd", CodePoints: []int{8767}, Characters: []byte{0xe2, 0x88, 0xbf}}, + "acirc": &HTML5Entity{Name: "acirc", CodePoints: []int{226}, Characters: []byte{0xc3, 0xa2}}, + "acute": &HTML5Entity{Name: "acute", CodePoints: []int{180}, Characters: []byte{0xc2, 0xb4}}, + "acy": &HTML5Entity{Name: "acy", CodePoints: []int{1072}, Characters: []byte{0xd0, 0xb0}}, + "aelig": &HTML5Entity{Name: "aelig", CodePoints: []int{230}, Characters: []byte{0xc3, 0xa6}}, + "af": &HTML5Entity{Name: "af", CodePoints: []int{8289}, Characters: []byte{0xe2, 0x81, 0xa1}}, + "afr": &HTML5Entity{Name: "afr", CodePoints: []int{120094}, Characters: []byte{0xf0, 0x9d, 0x94, 0x9e}}, + "agrave": &HTML5Entity{Name: "agrave", CodePoints: []int{224}, Characters: []byte{0xc3, 0xa0}}, + "alefsym": &HTML5Entity{Name: "alefsym", CodePoints: []int{8501}, Characters: []byte{0xe2, 0x84, 0xb5}}, + "aleph": &HTML5Entity{Name: "aleph", CodePoints: []int{8501}, Characters: []byte{0xe2, 0x84, 0xb5}}, + "alpha": &HTML5Entity{Name: "alpha", CodePoints: []int{945}, Characters: []byte{0xce, 0xb1}}, + "amacr": &HTML5Entity{Name: "amacr", CodePoints: []int{257}, Characters: []byte{0xc4, 0x81}}, + "amalg": &HTML5Entity{Name: "amalg", CodePoints: []int{10815}, Characters: []byte{0xe2, 0xa8, 0xbf}}, + "amp": &HTML5Entity{Name: "amp", CodePoints: []int{38}, Characters: []byte{0x26}}, + "and": &HTML5Entity{Name: "and", CodePoints: []int{8743}, Characters: []byte{0xe2, 0x88, 0xa7}}, + "andand": &HTML5Entity{Name: "andand", CodePoints: []int{10837}, Characters: []byte{0xe2, 0xa9, 0x95}}, + "andd": &HTML5Entity{Name: "andd", CodePoints: []int{10844}, Characters: []byte{0xe2, 0xa9, 0x9c}}, + "andslope": &HTML5Entity{Name: "andslope", CodePoints: []int{10840}, Characters: []byte{0xe2, 0xa9, 0x98}}, + "andv": &HTML5Entity{Name: "andv", CodePoints: []int{10842}, Characters: []byte{0xe2, 0xa9, 0x9a}}, + "ang": &HTML5Entity{Name: "ang", CodePoints: []int{8736}, Characters: []byte{0xe2, 0x88, 0xa0}}, + "ange": &HTML5Entity{Name: "ange", CodePoints: []int{10660}, Characters: []byte{0xe2, 0xa6, 0xa4}}, + "angle": &HTML5Entity{Name: "angle", CodePoints: []int{8736}, Characters: []byte{0xe2, 0x88, 0xa0}}, + "angmsd": &HTML5Entity{Name: "angmsd", CodePoints: []int{8737}, Characters: []byte{0xe2, 0x88, 0xa1}}, + "angmsdaa": &HTML5Entity{Name: "angmsdaa", CodePoints: []int{10664}, Characters: []byte{0xe2, 0xa6, 0xa8}}, + "angmsdab": &HTML5Entity{Name: "angmsdab", CodePoints: []int{10665}, Characters: []byte{0xe2, 0xa6, 0xa9}}, + "angmsdac": &HTML5Entity{Name: "angmsdac", CodePoints: []int{10666}, Characters: []byte{0xe2, 0xa6, 0xaa}}, + "angmsdad": &HTML5Entity{Name: "angmsdad", CodePoints: []int{10667}, Characters: []byte{0xe2, 0xa6, 0xab}}, + "angmsdae": &HTML5Entity{Name: "angmsdae", CodePoints: []int{10668}, Characters: []byte{0xe2, 0xa6, 0xac}}, + "angmsdaf": &HTML5Entity{Name: "angmsdaf", CodePoints: []int{10669}, Characters: []byte{0xe2, 0xa6, 0xad}}, + "angmsdag": &HTML5Entity{Name: "angmsdag", CodePoints: []int{10670}, Characters: []byte{0xe2, 0xa6, 0xae}}, + "angmsdah": &HTML5Entity{Name: "angmsdah", CodePoints: []int{10671}, Characters: []byte{0xe2, 0xa6, 0xaf}}, + "angrt": &HTML5Entity{Name: "angrt", CodePoints: []int{8735}, Characters: []byte{0xe2, 0x88, 0x9f}}, + "angrtvb": &HTML5Entity{Name: "angrtvb", CodePoints: []int{8894}, Characters: []byte{0xe2, 0x8a, 0xbe}}, + "angrtvbd": &HTML5Entity{Name: "angrtvbd", CodePoints: []int{10653}, Characters: []byte{0xe2, 0xa6, 0x9d}}, + "angsph": &HTML5Entity{Name: "angsph", CodePoints: []int{8738}, Characters: []byte{0xe2, 0x88, 0xa2}}, + "angst": &HTML5Entity{Name: "angst", CodePoints: []int{197}, Characters: []byte{0xc3, 0x85}}, + "angzarr": &HTML5Entity{Name: "angzarr", CodePoints: []int{9084}, Characters: []byte{0xe2, 0x8d, 0xbc}}, + "aogon": &HTML5Entity{Name: "aogon", CodePoints: []int{261}, Characters: []byte{0xc4, 0x85}}, + "aopf": &HTML5Entity{Name: "aopf", CodePoints: []int{120146}, Characters: []byte{0xf0, 0x9d, 0x95, 0x92}}, + "ap": &HTML5Entity{Name: "ap", CodePoints: []int{8776}, Characters: []byte{0xe2, 0x89, 0x88}}, + "apE": &HTML5Entity{Name: "apE", CodePoints: []int{10864}, Characters: []byte{0xe2, 0xa9, 0xb0}}, + "apacir": &HTML5Entity{Name: "apacir", CodePoints: []int{10863}, Characters: []byte{0xe2, 0xa9, 0xaf}}, + "ape": &HTML5Entity{Name: "ape", CodePoints: []int{8778}, Characters: []byte{0xe2, 0x89, 0x8a}}, + "apid": &HTML5Entity{Name: "apid", CodePoints: []int{8779}, Characters: []byte{0xe2, 0x89, 0x8b}}, + "apos": &HTML5Entity{Name: "apos", CodePoints: []int{39}, Characters: []byte{0x27}}, + "approx": &HTML5Entity{Name: "approx", CodePoints: []int{8776}, Characters: []byte{0xe2, 0x89, 0x88}}, + "approxeq": &HTML5Entity{Name: "approxeq", CodePoints: []int{8778}, Characters: []byte{0xe2, 0x89, 0x8a}}, + "aring": &HTML5Entity{Name: "aring", CodePoints: []int{229}, Characters: []byte{0xc3, 0xa5}}, + "ascr": &HTML5Entity{Name: "ascr", CodePoints: []int{119990}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb6}}, + "ast": &HTML5Entity{Name: "ast", CodePoints: []int{42}, Characters: []byte{0x2a}}, + "asymp": &HTML5Entity{Name: "asymp", CodePoints: []int{8776}, Characters: []byte{0xe2, 0x89, 0x88}}, + "asympeq": &HTML5Entity{Name: "asympeq", CodePoints: []int{8781}, Characters: []byte{0xe2, 0x89, 0x8d}}, + "atilde": &HTML5Entity{Name: "atilde", CodePoints: []int{227}, Characters: []byte{0xc3, 0xa3}}, + "auml": &HTML5Entity{Name: "auml", CodePoints: []int{228}, Characters: []byte{0xc3, 0xa4}}, + "awconint": &HTML5Entity{Name: "awconint", CodePoints: []int{8755}, Characters: []byte{0xe2, 0x88, 0xb3}}, + "awint": &HTML5Entity{Name: "awint", CodePoints: []int{10769}, Characters: []byte{0xe2, 0xa8, 0x91}}, + "bNot": &HTML5Entity{Name: "bNot", CodePoints: []int{10989}, Characters: []byte{0xe2, 0xab, 0xad}}, + "backcong": &HTML5Entity{Name: "backcong", CodePoints: []int{8780}, Characters: []byte{0xe2, 0x89, 0x8c}}, + "backepsilon": &HTML5Entity{Name: "backepsilon", CodePoints: []int{1014}, Characters: []byte{0xcf, 0xb6}}, + "backprime": &HTML5Entity{Name: "backprime", CodePoints: []int{8245}, Characters: []byte{0xe2, 0x80, 0xb5}}, + "backsim": &HTML5Entity{Name: "backsim", CodePoints: []int{8765}, Characters: []byte{0xe2, 0x88, 0xbd}}, + "backsimeq": &HTML5Entity{Name: "backsimeq", CodePoints: []int{8909}, Characters: []byte{0xe2, 0x8b, 0x8d}}, + "barvee": &HTML5Entity{Name: "barvee", CodePoints: []int{8893}, Characters: []byte{0xe2, 0x8a, 0xbd}}, + "barwed": &HTML5Entity{Name: "barwed", CodePoints: []int{8965}, Characters: []byte{0xe2, 0x8c, 0x85}}, + "barwedge": &HTML5Entity{Name: "barwedge", CodePoints: []int{8965}, Characters: []byte{0xe2, 0x8c, 0x85}}, + "bbrk": &HTML5Entity{Name: "bbrk", CodePoints: []int{9141}, Characters: []byte{0xe2, 0x8e, 0xb5}}, + "bbrktbrk": &HTML5Entity{Name: "bbrktbrk", CodePoints: []int{9142}, Characters: []byte{0xe2, 0x8e, 0xb6}}, + "bcong": &HTML5Entity{Name: "bcong", CodePoints: []int{8780}, Characters: []byte{0xe2, 0x89, 0x8c}}, + "bcy": &HTML5Entity{Name: "bcy", CodePoints: []int{1073}, Characters: []byte{0xd0, 0xb1}}, + "bdquo": &HTML5Entity{Name: "bdquo", CodePoints: []int{8222}, Characters: []byte{0xe2, 0x80, 0x9e}}, + "becaus": &HTML5Entity{Name: "becaus", CodePoints: []int{8757}, Characters: []byte{0xe2, 0x88, 0xb5}}, + "because": &HTML5Entity{Name: "because", CodePoints: []int{8757}, Characters: []byte{0xe2, 0x88, 0xb5}}, + "bemptyv": &HTML5Entity{Name: "bemptyv", CodePoints: []int{10672}, Characters: []byte{0xe2, 0xa6, 0xb0}}, + "bepsi": &HTML5Entity{Name: "bepsi", CodePoints: []int{1014}, Characters: []byte{0xcf, 0xb6}}, + "bernou": &HTML5Entity{Name: "bernou", CodePoints: []int{8492}, Characters: []byte{0xe2, 0x84, 0xac}}, + "beta": &HTML5Entity{Name: "beta", CodePoints: []int{946}, Characters: []byte{0xce, 0xb2}}, + "beth": &HTML5Entity{Name: "beth", CodePoints: []int{8502}, Characters: []byte{0xe2, 0x84, 0xb6}}, + "between": &HTML5Entity{Name: "between", CodePoints: []int{8812}, Characters: []byte{0xe2, 0x89, 0xac}}, + "bfr": &HTML5Entity{Name: "bfr", CodePoints: []int{120095}, Characters: []byte{0xf0, 0x9d, 0x94, 0x9f}}, + "bigcap": &HTML5Entity{Name: "bigcap", CodePoints: []int{8898}, Characters: []byte{0xe2, 0x8b, 0x82}}, + "bigcirc": &HTML5Entity{Name: "bigcirc", CodePoints: []int{9711}, Characters: []byte{0xe2, 0x97, 0xaf}}, + "bigcup": &HTML5Entity{Name: "bigcup", CodePoints: []int{8899}, Characters: []byte{0xe2, 0x8b, 0x83}}, + "bigodot": &HTML5Entity{Name: "bigodot", CodePoints: []int{10752}, Characters: []byte{0xe2, 0xa8, 0x80}}, + "bigoplus": &HTML5Entity{Name: "bigoplus", CodePoints: []int{10753}, Characters: []byte{0xe2, 0xa8, 0x81}}, + "bigotimes": &HTML5Entity{Name: "bigotimes", CodePoints: []int{10754}, Characters: []byte{0xe2, 0xa8, 0x82}}, + "bigsqcup": &HTML5Entity{Name: "bigsqcup", CodePoints: []int{10758}, Characters: []byte{0xe2, 0xa8, 0x86}}, + "bigstar": &HTML5Entity{Name: "bigstar", CodePoints: []int{9733}, Characters: []byte{0xe2, 0x98, 0x85}}, + "bigtriangledown": &HTML5Entity{Name: "bigtriangledown", CodePoints: []int{9661}, Characters: []byte{0xe2, 0x96, 0xbd}}, + "bigtriangleup": &HTML5Entity{Name: "bigtriangleup", CodePoints: []int{9651}, Characters: []byte{0xe2, 0x96, 0xb3}}, + "biguplus": &HTML5Entity{Name: "biguplus", CodePoints: []int{10756}, Characters: []byte{0xe2, 0xa8, 0x84}}, + "bigvee": &HTML5Entity{Name: "bigvee", CodePoints: []int{8897}, Characters: []byte{0xe2, 0x8b, 0x81}}, + "bigwedge": &HTML5Entity{Name: "bigwedge", CodePoints: []int{8896}, Characters: []byte{0xe2, 0x8b, 0x80}}, + "bkarow": &HTML5Entity{Name: "bkarow", CodePoints: []int{10509}, Characters: []byte{0xe2, 0xa4, 0x8d}}, + "blacklozenge": &HTML5Entity{Name: "blacklozenge", CodePoints: []int{10731}, Characters: []byte{0xe2, 0xa7, 0xab}}, + "blacksquare": &HTML5Entity{Name: "blacksquare", CodePoints: []int{9642}, Characters: []byte{0xe2, 0x96, 0xaa}}, + "blacktriangle": &HTML5Entity{Name: "blacktriangle", CodePoints: []int{9652}, Characters: []byte{0xe2, 0x96, 0xb4}}, + "blacktriangledown": &HTML5Entity{Name: "blacktriangledown", CodePoints: []int{9662}, Characters: []byte{0xe2, 0x96, 0xbe}}, + "blacktriangleleft": &HTML5Entity{Name: "blacktriangleleft", CodePoints: []int{9666}, Characters: []byte{0xe2, 0x97, 0x82}}, + "blacktriangleright": &HTML5Entity{Name: "blacktriangleright", CodePoints: []int{9656}, Characters: []byte{0xe2, 0x96, 0xb8}}, + "blank": &HTML5Entity{Name: "blank", CodePoints: []int{9251}, Characters: []byte{0xe2, 0x90, 0xa3}}, + "blk12": &HTML5Entity{Name: "blk12", CodePoints: []int{9618}, Characters: []byte{0xe2, 0x96, 0x92}}, + "blk14": &HTML5Entity{Name: "blk14", CodePoints: []int{9617}, Characters: []byte{0xe2, 0x96, 0x91}}, + "blk34": &HTML5Entity{Name: "blk34", CodePoints: []int{9619}, Characters: []byte{0xe2, 0x96, 0x93}}, + "block": &HTML5Entity{Name: "block", CodePoints: []int{9608}, Characters: []byte{0xe2, 0x96, 0x88}}, + "bne": &HTML5Entity{Name: "bne", CodePoints: []int{61, 8421}, Characters: []byte{0x3d, 0xe2, 0x83, 0xa5}}, + "bnequiv": &HTML5Entity{Name: "bnequiv", CodePoints: []int{8801, 8421}, Characters: []byte{0xe2, 0x89, 0xa1, 0xe2, 0x83, 0xa5}}, + "bnot": &HTML5Entity{Name: "bnot", CodePoints: []int{8976}, Characters: []byte{0xe2, 0x8c, 0x90}}, + "bopf": &HTML5Entity{Name: "bopf", CodePoints: []int{120147}, Characters: []byte{0xf0, 0x9d, 0x95, 0x93}}, + "bot": &HTML5Entity{Name: "bot", CodePoints: []int{8869}, Characters: []byte{0xe2, 0x8a, 0xa5}}, + "bottom": &HTML5Entity{Name: "bottom", CodePoints: []int{8869}, Characters: []byte{0xe2, 0x8a, 0xa5}}, + "bowtie": &HTML5Entity{Name: "bowtie", CodePoints: []int{8904}, Characters: []byte{0xe2, 0x8b, 0x88}}, + "boxDL": &HTML5Entity{Name: "boxDL", CodePoints: []int{9559}, Characters: []byte{0xe2, 0x95, 0x97}}, + "boxDR": &HTML5Entity{Name: "boxDR", CodePoints: []int{9556}, Characters: []byte{0xe2, 0x95, 0x94}}, + "boxDl": &HTML5Entity{Name: "boxDl", CodePoints: []int{9558}, Characters: []byte{0xe2, 0x95, 0x96}}, + "boxDr": &HTML5Entity{Name: "boxDr", CodePoints: []int{9555}, Characters: []byte{0xe2, 0x95, 0x93}}, + "boxH": &HTML5Entity{Name: "boxH", CodePoints: []int{9552}, Characters: []byte{0xe2, 0x95, 0x90}}, + "boxHD": &HTML5Entity{Name: "boxHD", CodePoints: []int{9574}, Characters: []byte{0xe2, 0x95, 0xa6}}, + "boxHU": &HTML5Entity{Name: "boxHU", CodePoints: []int{9577}, Characters: []byte{0xe2, 0x95, 0xa9}}, + "boxHd": &HTML5Entity{Name: "boxHd", CodePoints: []int{9572}, Characters: []byte{0xe2, 0x95, 0xa4}}, + "boxHu": &HTML5Entity{Name: "boxHu", CodePoints: []int{9575}, Characters: []byte{0xe2, 0x95, 0xa7}}, + "boxUL": &HTML5Entity{Name: "boxUL", CodePoints: []int{9565}, Characters: []byte{0xe2, 0x95, 0x9d}}, + "boxUR": &HTML5Entity{Name: "boxUR", CodePoints: []int{9562}, Characters: []byte{0xe2, 0x95, 0x9a}}, + "boxUl": &HTML5Entity{Name: "boxUl", CodePoints: []int{9564}, Characters: []byte{0xe2, 0x95, 0x9c}}, + "boxUr": &HTML5Entity{Name: "boxUr", CodePoints: []int{9561}, Characters: []byte{0xe2, 0x95, 0x99}}, + "boxV": &HTML5Entity{Name: "boxV", CodePoints: []int{9553}, Characters: []byte{0xe2, 0x95, 0x91}}, + "boxVH": &HTML5Entity{Name: "boxVH", CodePoints: []int{9580}, Characters: []byte{0xe2, 0x95, 0xac}}, + "boxVL": &HTML5Entity{Name: "boxVL", CodePoints: []int{9571}, Characters: []byte{0xe2, 0x95, 0xa3}}, + "boxVR": &HTML5Entity{Name: "boxVR", CodePoints: []int{9568}, Characters: []byte{0xe2, 0x95, 0xa0}}, + "boxVh": &HTML5Entity{Name: "boxVh", CodePoints: []int{9579}, Characters: []byte{0xe2, 0x95, 0xab}}, + "boxVl": &HTML5Entity{Name: "boxVl", CodePoints: []int{9570}, Characters: []byte{0xe2, 0x95, 0xa2}}, + "boxVr": &HTML5Entity{Name: "boxVr", CodePoints: []int{9567}, Characters: []byte{0xe2, 0x95, 0x9f}}, + "boxbox": &HTML5Entity{Name: "boxbox", CodePoints: []int{10697}, Characters: []byte{0xe2, 0xa7, 0x89}}, + "boxdL": &HTML5Entity{Name: "boxdL", CodePoints: []int{9557}, Characters: []byte{0xe2, 0x95, 0x95}}, + "boxdR": &HTML5Entity{Name: "boxdR", CodePoints: []int{9554}, Characters: []byte{0xe2, 0x95, 0x92}}, + "boxdl": &HTML5Entity{Name: "boxdl", CodePoints: []int{9488}, Characters: []byte{0xe2, 0x94, 0x90}}, + "boxdr": &HTML5Entity{Name: "boxdr", CodePoints: []int{9484}, Characters: []byte{0xe2, 0x94, 0x8c}}, + "boxh": &HTML5Entity{Name: "boxh", CodePoints: []int{9472}, Characters: []byte{0xe2, 0x94, 0x80}}, + "boxhD": &HTML5Entity{Name: "boxhD", CodePoints: []int{9573}, Characters: []byte{0xe2, 0x95, 0xa5}}, + "boxhU": &HTML5Entity{Name: "boxhU", CodePoints: []int{9576}, Characters: []byte{0xe2, 0x95, 0xa8}}, + "boxhd": &HTML5Entity{Name: "boxhd", CodePoints: []int{9516}, Characters: []byte{0xe2, 0x94, 0xac}}, + "boxhu": &HTML5Entity{Name: "boxhu", CodePoints: []int{9524}, Characters: []byte{0xe2, 0x94, 0xb4}}, + "boxminus": &HTML5Entity{Name: "boxminus", CodePoints: []int{8863}, Characters: []byte{0xe2, 0x8a, 0x9f}}, + "boxplus": &HTML5Entity{Name: "boxplus", CodePoints: []int{8862}, Characters: []byte{0xe2, 0x8a, 0x9e}}, + "boxtimes": &HTML5Entity{Name: "boxtimes", CodePoints: []int{8864}, Characters: []byte{0xe2, 0x8a, 0xa0}}, + "boxuL": &HTML5Entity{Name: "boxuL", CodePoints: []int{9563}, Characters: []byte{0xe2, 0x95, 0x9b}}, + "boxuR": &HTML5Entity{Name: "boxuR", CodePoints: []int{9560}, Characters: []byte{0xe2, 0x95, 0x98}}, + "boxul": &HTML5Entity{Name: "boxul", CodePoints: []int{9496}, Characters: []byte{0xe2, 0x94, 0x98}}, + "boxur": &HTML5Entity{Name: "boxur", CodePoints: []int{9492}, Characters: []byte{0xe2, 0x94, 0x94}}, + "boxv": &HTML5Entity{Name: "boxv", CodePoints: []int{9474}, Characters: []byte{0xe2, 0x94, 0x82}}, + "boxvH": &HTML5Entity{Name: "boxvH", CodePoints: []int{9578}, Characters: []byte{0xe2, 0x95, 0xaa}}, + "boxvL": &HTML5Entity{Name: "boxvL", CodePoints: []int{9569}, Characters: []byte{0xe2, 0x95, 0xa1}}, + "boxvR": &HTML5Entity{Name: "boxvR", CodePoints: []int{9566}, Characters: []byte{0xe2, 0x95, 0x9e}}, + "boxvh": &HTML5Entity{Name: "boxvh", CodePoints: []int{9532}, Characters: []byte{0xe2, 0x94, 0xbc}}, + "boxvl": &HTML5Entity{Name: "boxvl", CodePoints: []int{9508}, Characters: []byte{0xe2, 0x94, 0xa4}}, + "boxvr": &HTML5Entity{Name: "boxvr", CodePoints: []int{9500}, Characters: []byte{0xe2, 0x94, 0x9c}}, + "bprime": &HTML5Entity{Name: "bprime", CodePoints: []int{8245}, Characters: []byte{0xe2, 0x80, 0xb5}}, + "breve": &HTML5Entity{Name: "breve", CodePoints: []int{728}, Characters: []byte{0xcb, 0x98}}, + "brvbar": &HTML5Entity{Name: "brvbar", CodePoints: []int{166}, Characters: []byte{0xc2, 0xa6}}, + "bscr": &HTML5Entity{Name: "bscr", CodePoints: []int{119991}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb7}}, + "bsemi": &HTML5Entity{Name: "bsemi", CodePoints: []int{8271}, Characters: []byte{0xe2, 0x81, 0x8f}}, + "bsim": &HTML5Entity{Name: "bsim", CodePoints: []int{8765}, Characters: []byte{0xe2, 0x88, 0xbd}}, + "bsime": &HTML5Entity{Name: "bsime", CodePoints: []int{8909}, Characters: []byte{0xe2, 0x8b, 0x8d}}, + "bsol": &HTML5Entity{Name: "bsol", CodePoints: []int{92}, Characters: []byte{0x5c}}, + "bsolb": &HTML5Entity{Name: "bsolb", CodePoints: []int{10693}, Characters: []byte{0xe2, 0xa7, 0x85}}, + "bsolhsub": &HTML5Entity{Name: "bsolhsub", CodePoints: []int{10184}, Characters: []byte{0xe2, 0x9f, 0x88}}, + "bull": &HTML5Entity{Name: "bull", CodePoints: []int{8226}, Characters: []byte{0xe2, 0x80, 0xa2}}, + "bullet": &HTML5Entity{Name: "bullet", CodePoints: []int{8226}, Characters: []byte{0xe2, 0x80, 0xa2}}, + "bump": &HTML5Entity{Name: "bump", CodePoints: []int{8782}, Characters: []byte{0xe2, 0x89, 0x8e}}, + "bumpE": &HTML5Entity{Name: "bumpE", CodePoints: []int{10926}, Characters: []byte{0xe2, 0xaa, 0xae}}, + "bumpe": &HTML5Entity{Name: "bumpe", CodePoints: []int{8783}, Characters: []byte{0xe2, 0x89, 0x8f}}, + "bumpeq": &HTML5Entity{Name: "bumpeq", CodePoints: []int{8783}, Characters: []byte{0xe2, 0x89, 0x8f}}, + "cacute": &HTML5Entity{Name: "cacute", CodePoints: []int{263}, Characters: []byte{0xc4, 0x87}}, + "cap": &HTML5Entity{Name: "cap", CodePoints: []int{8745}, Characters: []byte{0xe2, 0x88, 0xa9}}, + "capand": &HTML5Entity{Name: "capand", CodePoints: []int{10820}, Characters: []byte{0xe2, 0xa9, 0x84}}, + "capbrcup": &HTML5Entity{Name: "capbrcup", CodePoints: []int{10825}, Characters: []byte{0xe2, 0xa9, 0x89}}, + "capcap": &HTML5Entity{Name: "capcap", CodePoints: []int{10827}, Characters: []byte{0xe2, 0xa9, 0x8b}}, + "capcup": &HTML5Entity{Name: "capcup", CodePoints: []int{10823}, Characters: []byte{0xe2, 0xa9, 0x87}}, + "capdot": &HTML5Entity{Name: "capdot", CodePoints: []int{10816}, Characters: []byte{0xe2, 0xa9, 0x80}}, + "caps": &HTML5Entity{Name: "caps", CodePoints: []int{8745, 65024}, Characters: []byte{0xe2, 0x88, 0xa9, 0xef, 0xb8, 0x80}}, + "caret": &HTML5Entity{Name: "caret", CodePoints: []int{8257}, Characters: []byte{0xe2, 0x81, 0x81}}, + "caron": &HTML5Entity{Name: "caron", CodePoints: []int{711}, Characters: []byte{0xcb, 0x87}}, + "ccaps": &HTML5Entity{Name: "ccaps", CodePoints: []int{10829}, Characters: []byte{0xe2, 0xa9, 0x8d}}, + "ccaron": &HTML5Entity{Name: "ccaron", CodePoints: []int{269}, Characters: []byte{0xc4, 0x8d}}, + "ccedil": &HTML5Entity{Name: "ccedil", CodePoints: []int{231}, Characters: []byte{0xc3, 0xa7}}, + "ccirc": &HTML5Entity{Name: "ccirc", CodePoints: []int{265}, Characters: []byte{0xc4, 0x89}}, + "ccups": &HTML5Entity{Name: "ccups", CodePoints: []int{10828}, Characters: []byte{0xe2, 0xa9, 0x8c}}, + "ccupssm": &HTML5Entity{Name: "ccupssm", CodePoints: []int{10832}, Characters: []byte{0xe2, 0xa9, 0x90}}, + "cdot": &HTML5Entity{Name: "cdot", CodePoints: []int{267}, Characters: []byte{0xc4, 0x8b}}, + "cedil": &HTML5Entity{Name: "cedil", CodePoints: []int{184}, Characters: []byte{0xc2, 0xb8}}, + "cemptyv": &HTML5Entity{Name: "cemptyv", CodePoints: []int{10674}, Characters: []byte{0xe2, 0xa6, 0xb2}}, + "cent": &HTML5Entity{Name: "cent", CodePoints: []int{162}, Characters: []byte{0xc2, 0xa2}}, + "centerdot": &HTML5Entity{Name: "centerdot", CodePoints: []int{183}, Characters: []byte{0xc2, 0xb7}}, + "cfr": &HTML5Entity{Name: "cfr", CodePoints: []int{120096}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa0}}, + "chcy": &HTML5Entity{Name: "chcy", CodePoints: []int{1095}, Characters: []byte{0xd1, 0x87}}, + "check": &HTML5Entity{Name: "check", CodePoints: []int{10003}, Characters: []byte{0xe2, 0x9c, 0x93}}, + "checkmark": &HTML5Entity{Name: "checkmark", CodePoints: []int{10003}, Characters: []byte{0xe2, 0x9c, 0x93}}, + "chi": &HTML5Entity{Name: "chi", CodePoints: []int{967}, Characters: []byte{0xcf, 0x87}}, + "cir": &HTML5Entity{Name: "cir", CodePoints: []int{9675}, Characters: []byte{0xe2, 0x97, 0x8b}}, + "cirE": &HTML5Entity{Name: "cirE", CodePoints: []int{10691}, Characters: []byte{0xe2, 0xa7, 0x83}}, + "circ": &HTML5Entity{Name: "circ", CodePoints: []int{710}, Characters: []byte{0xcb, 0x86}}, + "circeq": &HTML5Entity{Name: "circeq", CodePoints: []int{8791}, Characters: []byte{0xe2, 0x89, 0x97}}, + "circlearrowleft": &HTML5Entity{Name: "circlearrowleft", CodePoints: []int{8634}, Characters: []byte{0xe2, 0x86, 0xba}}, + "circlearrowright": &HTML5Entity{Name: "circlearrowright", CodePoints: []int{8635}, Characters: []byte{0xe2, 0x86, 0xbb}}, + "circledR": &HTML5Entity{Name: "circledR", CodePoints: []int{174}, Characters: []byte{0xc2, 0xae}}, + "circledS": &HTML5Entity{Name: "circledS", CodePoints: []int{9416}, Characters: []byte{0xe2, 0x93, 0x88}}, + "circledast": &HTML5Entity{Name: "circledast", CodePoints: []int{8859}, Characters: []byte{0xe2, 0x8a, 0x9b}}, + "circledcirc": &HTML5Entity{Name: "circledcirc", CodePoints: []int{8858}, Characters: []byte{0xe2, 0x8a, 0x9a}}, + "circleddash": &HTML5Entity{Name: "circleddash", CodePoints: []int{8861}, Characters: []byte{0xe2, 0x8a, 0x9d}}, + "cire": &HTML5Entity{Name: "cire", CodePoints: []int{8791}, Characters: []byte{0xe2, 0x89, 0x97}}, + "cirfnint": &HTML5Entity{Name: "cirfnint", CodePoints: []int{10768}, Characters: []byte{0xe2, 0xa8, 0x90}}, + "cirmid": &HTML5Entity{Name: "cirmid", CodePoints: []int{10991}, Characters: []byte{0xe2, 0xab, 0xaf}}, + "cirscir": &HTML5Entity{Name: "cirscir", CodePoints: []int{10690}, Characters: []byte{0xe2, 0xa7, 0x82}}, + "clubs": &HTML5Entity{Name: "clubs", CodePoints: []int{9827}, Characters: []byte{0xe2, 0x99, 0xa3}}, + "clubsuit": &HTML5Entity{Name: "clubsuit", CodePoints: []int{9827}, Characters: []byte{0xe2, 0x99, 0xa3}}, + "colon": &HTML5Entity{Name: "colon", CodePoints: []int{58}, Characters: []byte{0x3a}}, + "colone": &HTML5Entity{Name: "colone", CodePoints: []int{8788}, Characters: []byte{0xe2, 0x89, 0x94}}, + "coloneq": &HTML5Entity{Name: "coloneq", CodePoints: []int{8788}, Characters: []byte{0xe2, 0x89, 0x94}}, + "comma": &HTML5Entity{Name: "comma", CodePoints: []int{44}, Characters: []byte{0x2c}}, + "commat": &HTML5Entity{Name: "commat", CodePoints: []int{64}, Characters: []byte{0x40}}, + "comp": &HTML5Entity{Name: "comp", CodePoints: []int{8705}, Characters: []byte{0xe2, 0x88, 0x81}}, + "compfn": &HTML5Entity{Name: "compfn", CodePoints: []int{8728}, Characters: []byte{0xe2, 0x88, 0x98}}, + "complement": &HTML5Entity{Name: "complement", CodePoints: []int{8705}, Characters: []byte{0xe2, 0x88, 0x81}}, + "complexes": &HTML5Entity{Name: "complexes", CodePoints: []int{8450}, Characters: []byte{0xe2, 0x84, 0x82}}, + "cong": &HTML5Entity{Name: "cong", CodePoints: []int{8773}, Characters: []byte{0xe2, 0x89, 0x85}}, + "congdot": &HTML5Entity{Name: "congdot", CodePoints: []int{10861}, Characters: []byte{0xe2, 0xa9, 0xad}}, + "conint": &HTML5Entity{Name: "conint", CodePoints: []int{8750}, Characters: []byte{0xe2, 0x88, 0xae}}, + "copf": &HTML5Entity{Name: "copf", CodePoints: []int{120148}, Characters: []byte{0xf0, 0x9d, 0x95, 0x94}}, + "coprod": &HTML5Entity{Name: "coprod", CodePoints: []int{8720}, Characters: []byte{0xe2, 0x88, 0x90}}, + "copy": &HTML5Entity{Name: "copy", CodePoints: []int{169}, Characters: []byte{0xc2, 0xa9}}, + "copysr": &HTML5Entity{Name: "copysr", CodePoints: []int{8471}, Characters: []byte{0xe2, 0x84, 0x97}}, + "crarr": &HTML5Entity{Name: "crarr", CodePoints: []int{8629}, Characters: []byte{0xe2, 0x86, 0xb5}}, + "cross": &HTML5Entity{Name: "cross", CodePoints: []int{10007}, Characters: []byte{0xe2, 0x9c, 0x97}}, + "cscr": &HTML5Entity{Name: "cscr", CodePoints: []int{119992}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb8}}, + "csub": &HTML5Entity{Name: "csub", CodePoints: []int{10959}, Characters: []byte{0xe2, 0xab, 0x8f}}, + "csube": &HTML5Entity{Name: "csube", CodePoints: []int{10961}, Characters: []byte{0xe2, 0xab, 0x91}}, + "csup": &HTML5Entity{Name: "csup", CodePoints: []int{10960}, Characters: []byte{0xe2, 0xab, 0x90}}, + "csupe": &HTML5Entity{Name: "csupe", CodePoints: []int{10962}, Characters: []byte{0xe2, 0xab, 0x92}}, + "ctdot": &HTML5Entity{Name: "ctdot", CodePoints: []int{8943}, Characters: []byte{0xe2, 0x8b, 0xaf}}, + "cudarrl": &HTML5Entity{Name: "cudarrl", CodePoints: []int{10552}, Characters: []byte{0xe2, 0xa4, 0xb8}}, + "cudarrr": &HTML5Entity{Name: "cudarrr", CodePoints: []int{10549}, Characters: []byte{0xe2, 0xa4, 0xb5}}, + "cuepr": &HTML5Entity{Name: "cuepr", CodePoints: []int{8926}, Characters: []byte{0xe2, 0x8b, 0x9e}}, + "cuesc": &HTML5Entity{Name: "cuesc", CodePoints: []int{8927}, Characters: []byte{0xe2, 0x8b, 0x9f}}, + "cularr": &HTML5Entity{Name: "cularr", CodePoints: []int{8630}, Characters: []byte{0xe2, 0x86, 0xb6}}, + "cularrp": &HTML5Entity{Name: "cularrp", CodePoints: []int{10557}, Characters: []byte{0xe2, 0xa4, 0xbd}}, + "cup": &HTML5Entity{Name: "cup", CodePoints: []int{8746}, Characters: []byte{0xe2, 0x88, 0xaa}}, + "cupbrcap": &HTML5Entity{Name: "cupbrcap", CodePoints: []int{10824}, Characters: []byte{0xe2, 0xa9, 0x88}}, + "cupcap": &HTML5Entity{Name: "cupcap", CodePoints: []int{10822}, Characters: []byte{0xe2, 0xa9, 0x86}}, + "cupcup": &HTML5Entity{Name: "cupcup", CodePoints: []int{10826}, Characters: []byte{0xe2, 0xa9, 0x8a}}, + "cupdot": &HTML5Entity{Name: "cupdot", CodePoints: []int{8845}, Characters: []byte{0xe2, 0x8a, 0x8d}}, + "cupor": &HTML5Entity{Name: "cupor", CodePoints: []int{10821}, Characters: []byte{0xe2, 0xa9, 0x85}}, + "cups": &HTML5Entity{Name: "cups", CodePoints: []int{8746, 65024}, Characters: []byte{0xe2, 0x88, 0xaa, 0xef, 0xb8, 0x80}}, + "curarr": &HTML5Entity{Name: "curarr", CodePoints: []int{8631}, Characters: []byte{0xe2, 0x86, 0xb7}}, + "curarrm": &HTML5Entity{Name: "curarrm", CodePoints: []int{10556}, Characters: []byte{0xe2, 0xa4, 0xbc}}, + "curlyeqprec": &HTML5Entity{Name: "curlyeqprec", CodePoints: []int{8926}, Characters: []byte{0xe2, 0x8b, 0x9e}}, + "curlyeqsucc": &HTML5Entity{Name: "curlyeqsucc", CodePoints: []int{8927}, Characters: []byte{0xe2, 0x8b, 0x9f}}, + "curlyvee": &HTML5Entity{Name: "curlyvee", CodePoints: []int{8910}, Characters: []byte{0xe2, 0x8b, 0x8e}}, + "curlywedge": &HTML5Entity{Name: "curlywedge", CodePoints: []int{8911}, Characters: []byte{0xe2, 0x8b, 0x8f}}, + "curren": &HTML5Entity{Name: "curren", CodePoints: []int{164}, Characters: []byte{0xc2, 0xa4}}, + "curvearrowleft": &HTML5Entity{Name: "curvearrowleft", CodePoints: []int{8630}, Characters: []byte{0xe2, 0x86, 0xb6}}, + "curvearrowright": &HTML5Entity{Name: "curvearrowright", CodePoints: []int{8631}, Characters: []byte{0xe2, 0x86, 0xb7}}, + "cuvee": &HTML5Entity{Name: "cuvee", CodePoints: []int{8910}, Characters: []byte{0xe2, 0x8b, 0x8e}}, + "cuwed": &HTML5Entity{Name: "cuwed", CodePoints: []int{8911}, Characters: []byte{0xe2, 0x8b, 0x8f}}, + "cwconint": &HTML5Entity{Name: "cwconint", CodePoints: []int{8754}, Characters: []byte{0xe2, 0x88, 0xb2}}, + "cwint": &HTML5Entity{Name: "cwint", CodePoints: []int{8753}, Characters: []byte{0xe2, 0x88, 0xb1}}, + "cylcty": &HTML5Entity{Name: "cylcty", CodePoints: []int{9005}, Characters: []byte{0xe2, 0x8c, 0xad}}, + "dArr": &HTML5Entity{Name: "dArr", CodePoints: []int{8659}, Characters: []byte{0xe2, 0x87, 0x93}}, + "dHar": &HTML5Entity{Name: "dHar", CodePoints: []int{10597}, Characters: []byte{0xe2, 0xa5, 0xa5}}, + "dagger": &HTML5Entity{Name: "dagger", CodePoints: []int{8224}, Characters: []byte{0xe2, 0x80, 0xa0}}, + "daleth": &HTML5Entity{Name: "daleth", CodePoints: []int{8504}, Characters: []byte{0xe2, 0x84, 0xb8}}, + "darr": &HTML5Entity{Name: "darr", CodePoints: []int{8595}, Characters: []byte{0xe2, 0x86, 0x93}}, + "dash": &HTML5Entity{Name: "dash", CodePoints: []int{8208}, Characters: []byte{0xe2, 0x80, 0x90}}, + "dashv": &HTML5Entity{Name: "dashv", CodePoints: []int{8867}, Characters: []byte{0xe2, 0x8a, 0xa3}}, + "dbkarow": &HTML5Entity{Name: "dbkarow", CodePoints: []int{10511}, Characters: []byte{0xe2, 0xa4, 0x8f}}, + "dblac": &HTML5Entity{Name: "dblac", CodePoints: []int{733}, Characters: []byte{0xcb, 0x9d}}, + "dcaron": &HTML5Entity{Name: "dcaron", CodePoints: []int{271}, Characters: []byte{0xc4, 0x8f}}, + "dcy": &HTML5Entity{Name: "dcy", CodePoints: []int{1076}, Characters: []byte{0xd0, 0xb4}}, + "dd": &HTML5Entity{Name: "dd", CodePoints: []int{8518}, Characters: []byte{0xe2, 0x85, 0x86}}, + "ddagger": &HTML5Entity{Name: "ddagger", CodePoints: []int{8225}, Characters: []byte{0xe2, 0x80, 0xa1}}, + "ddarr": &HTML5Entity{Name: "ddarr", CodePoints: []int{8650}, Characters: []byte{0xe2, 0x87, 0x8a}}, + "ddotseq": &HTML5Entity{Name: "ddotseq", CodePoints: []int{10871}, Characters: []byte{0xe2, 0xa9, 0xb7}}, + "deg": &HTML5Entity{Name: "deg", CodePoints: []int{176}, Characters: []byte{0xc2, 0xb0}}, + "delta": &HTML5Entity{Name: "delta", CodePoints: []int{948}, Characters: []byte{0xce, 0xb4}}, + "demptyv": &HTML5Entity{Name: "demptyv", CodePoints: []int{10673}, Characters: []byte{0xe2, 0xa6, 0xb1}}, + "dfisht": &HTML5Entity{Name: "dfisht", CodePoints: []int{10623}, Characters: []byte{0xe2, 0xa5, 0xbf}}, + "dfr": &HTML5Entity{Name: "dfr", CodePoints: []int{120097}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa1}}, + "dharl": &HTML5Entity{Name: "dharl", CodePoints: []int{8643}, Characters: []byte{0xe2, 0x87, 0x83}}, + "dharr": &HTML5Entity{Name: "dharr", CodePoints: []int{8642}, Characters: []byte{0xe2, 0x87, 0x82}}, + "diam": &HTML5Entity{Name: "diam", CodePoints: []int{8900}, Characters: []byte{0xe2, 0x8b, 0x84}}, + "diamond": &HTML5Entity{Name: "diamond", CodePoints: []int{8900}, Characters: []byte{0xe2, 0x8b, 0x84}}, + "diamondsuit": &HTML5Entity{Name: "diamondsuit", CodePoints: []int{9830}, Characters: []byte{0xe2, 0x99, 0xa6}}, + "diams": &HTML5Entity{Name: "diams", CodePoints: []int{9830}, Characters: []byte{0xe2, 0x99, 0xa6}}, + "die": &HTML5Entity{Name: "die", CodePoints: []int{168}, Characters: []byte{0xc2, 0xa8}}, + "digamma": &HTML5Entity{Name: "digamma", CodePoints: []int{989}, Characters: []byte{0xcf, 0x9d}}, + "disin": &HTML5Entity{Name: "disin", CodePoints: []int{8946}, Characters: []byte{0xe2, 0x8b, 0xb2}}, + "div": &HTML5Entity{Name: "div", CodePoints: []int{247}, Characters: []byte{0xc3, 0xb7}}, + "divide": &HTML5Entity{Name: "divide", CodePoints: []int{247}, Characters: []byte{0xc3, 0xb7}}, + "divideontimes": &HTML5Entity{Name: "divideontimes", CodePoints: []int{8903}, Characters: []byte{0xe2, 0x8b, 0x87}}, + "divonx": &HTML5Entity{Name: "divonx", CodePoints: []int{8903}, Characters: []byte{0xe2, 0x8b, 0x87}}, + "djcy": &HTML5Entity{Name: "djcy", CodePoints: []int{1106}, Characters: []byte{0xd1, 0x92}}, + "dlcorn": &HTML5Entity{Name: "dlcorn", CodePoints: []int{8990}, Characters: []byte{0xe2, 0x8c, 0x9e}}, + "dlcrop": &HTML5Entity{Name: "dlcrop", CodePoints: []int{8973}, Characters: []byte{0xe2, 0x8c, 0x8d}}, + "dollar": &HTML5Entity{Name: "dollar", CodePoints: []int{36}, Characters: []byte{0x24}}, + "dopf": &HTML5Entity{Name: "dopf", CodePoints: []int{120149}, Characters: []byte{0xf0, 0x9d, 0x95, 0x95}}, + "dot": &HTML5Entity{Name: "dot", CodePoints: []int{729}, Characters: []byte{0xcb, 0x99}}, + "doteq": &HTML5Entity{Name: "doteq", CodePoints: []int{8784}, Characters: []byte{0xe2, 0x89, 0x90}}, + "doteqdot": &HTML5Entity{Name: "doteqdot", CodePoints: []int{8785}, Characters: []byte{0xe2, 0x89, 0x91}}, + "dotminus": &HTML5Entity{Name: "dotminus", CodePoints: []int{8760}, Characters: []byte{0xe2, 0x88, 0xb8}}, + "dotplus": &HTML5Entity{Name: "dotplus", CodePoints: []int{8724}, Characters: []byte{0xe2, 0x88, 0x94}}, + "dotsquare": &HTML5Entity{Name: "dotsquare", CodePoints: []int{8865}, Characters: []byte{0xe2, 0x8a, 0xa1}}, + "doublebarwedge": &HTML5Entity{Name: "doublebarwedge", CodePoints: []int{8966}, Characters: []byte{0xe2, 0x8c, 0x86}}, + "downarrow": &HTML5Entity{Name: "downarrow", CodePoints: []int{8595}, Characters: []byte{0xe2, 0x86, 0x93}}, + "downdownarrows": &HTML5Entity{Name: "downdownarrows", CodePoints: []int{8650}, Characters: []byte{0xe2, 0x87, 0x8a}}, + "downharpoonleft": &HTML5Entity{Name: "downharpoonleft", CodePoints: []int{8643}, Characters: []byte{0xe2, 0x87, 0x83}}, + "downharpoonright": &HTML5Entity{Name: "downharpoonright", CodePoints: []int{8642}, Characters: []byte{0xe2, 0x87, 0x82}}, + "drbkarow": &HTML5Entity{Name: "drbkarow", CodePoints: []int{10512}, Characters: []byte{0xe2, 0xa4, 0x90}}, + "drcorn": &HTML5Entity{Name: "drcorn", CodePoints: []int{8991}, Characters: []byte{0xe2, 0x8c, 0x9f}}, + "drcrop": &HTML5Entity{Name: "drcrop", CodePoints: []int{8972}, Characters: []byte{0xe2, 0x8c, 0x8c}}, + "dscr": &HTML5Entity{Name: "dscr", CodePoints: []int{119993}, Characters: []byte{0xf0, 0x9d, 0x92, 0xb9}}, + "dscy": &HTML5Entity{Name: "dscy", CodePoints: []int{1109}, Characters: []byte{0xd1, 0x95}}, + "dsol": &HTML5Entity{Name: "dsol", CodePoints: []int{10742}, Characters: []byte{0xe2, 0xa7, 0xb6}}, + "dstrok": &HTML5Entity{Name: "dstrok", CodePoints: []int{273}, Characters: []byte{0xc4, 0x91}}, + "dtdot": &HTML5Entity{Name: "dtdot", CodePoints: []int{8945}, Characters: []byte{0xe2, 0x8b, 0xb1}}, + "dtri": &HTML5Entity{Name: "dtri", CodePoints: []int{9663}, Characters: []byte{0xe2, 0x96, 0xbf}}, + "dtrif": &HTML5Entity{Name: "dtrif", CodePoints: []int{9662}, Characters: []byte{0xe2, 0x96, 0xbe}}, + "duarr": &HTML5Entity{Name: "duarr", CodePoints: []int{8693}, Characters: []byte{0xe2, 0x87, 0xb5}}, + "duhar": &HTML5Entity{Name: "duhar", CodePoints: []int{10607}, Characters: []byte{0xe2, 0xa5, 0xaf}}, + "dwangle": &HTML5Entity{Name: "dwangle", CodePoints: []int{10662}, Characters: []byte{0xe2, 0xa6, 0xa6}}, + "dzcy": &HTML5Entity{Name: "dzcy", CodePoints: []int{1119}, Characters: []byte{0xd1, 0x9f}}, + "dzigrarr": &HTML5Entity{Name: "dzigrarr", CodePoints: []int{10239}, Characters: []byte{0xe2, 0x9f, 0xbf}}, + "eDDot": &HTML5Entity{Name: "eDDot", CodePoints: []int{10871}, Characters: []byte{0xe2, 0xa9, 0xb7}}, + "eDot": &HTML5Entity{Name: "eDot", CodePoints: []int{8785}, Characters: []byte{0xe2, 0x89, 0x91}}, + "eacute": &HTML5Entity{Name: "eacute", CodePoints: []int{233}, Characters: []byte{0xc3, 0xa9}}, + "easter": &HTML5Entity{Name: "easter", CodePoints: []int{10862}, Characters: []byte{0xe2, 0xa9, 0xae}}, + "ecaron": &HTML5Entity{Name: "ecaron", CodePoints: []int{283}, Characters: []byte{0xc4, 0x9b}}, + "ecir": &HTML5Entity{Name: "ecir", CodePoints: []int{8790}, Characters: []byte{0xe2, 0x89, 0x96}}, + "ecirc": &HTML5Entity{Name: "ecirc", CodePoints: []int{234}, Characters: []byte{0xc3, 0xaa}}, + "ecolon": &HTML5Entity{Name: "ecolon", CodePoints: []int{8789}, Characters: []byte{0xe2, 0x89, 0x95}}, + "ecy": &HTML5Entity{Name: "ecy", CodePoints: []int{1101}, Characters: []byte{0xd1, 0x8d}}, + "edot": &HTML5Entity{Name: "edot", CodePoints: []int{279}, Characters: []byte{0xc4, 0x97}}, + "ee": &HTML5Entity{Name: "ee", CodePoints: []int{8519}, Characters: []byte{0xe2, 0x85, 0x87}}, + "efDot": &HTML5Entity{Name: "efDot", CodePoints: []int{8786}, Characters: []byte{0xe2, 0x89, 0x92}}, + "efr": &HTML5Entity{Name: "efr", CodePoints: []int{120098}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa2}}, + "eg": &HTML5Entity{Name: "eg", CodePoints: []int{10906}, Characters: []byte{0xe2, 0xaa, 0x9a}}, + "egrave": &HTML5Entity{Name: "egrave", CodePoints: []int{232}, Characters: []byte{0xc3, 0xa8}}, + "egs": &HTML5Entity{Name: "egs", CodePoints: []int{10902}, Characters: []byte{0xe2, 0xaa, 0x96}}, + "egsdot": &HTML5Entity{Name: "egsdot", CodePoints: []int{10904}, Characters: []byte{0xe2, 0xaa, 0x98}}, + "el": &HTML5Entity{Name: "el", CodePoints: []int{10905}, Characters: []byte{0xe2, 0xaa, 0x99}}, + "elinters": &HTML5Entity{Name: "elinters", CodePoints: []int{9191}, Characters: []byte{0xe2, 0x8f, 0xa7}}, + "ell": &HTML5Entity{Name: "ell", CodePoints: []int{8467}, Characters: []byte{0xe2, 0x84, 0x93}}, + "els": &HTML5Entity{Name: "els", CodePoints: []int{10901}, Characters: []byte{0xe2, 0xaa, 0x95}}, + "elsdot": &HTML5Entity{Name: "elsdot", CodePoints: []int{10903}, Characters: []byte{0xe2, 0xaa, 0x97}}, + "emacr": &HTML5Entity{Name: "emacr", CodePoints: []int{275}, Characters: []byte{0xc4, 0x93}}, + "empty": &HTML5Entity{Name: "empty", CodePoints: []int{8709}, Characters: []byte{0xe2, 0x88, 0x85}}, + "emptyset": &HTML5Entity{Name: "emptyset", CodePoints: []int{8709}, Characters: []byte{0xe2, 0x88, 0x85}}, + "emptyv": &HTML5Entity{Name: "emptyv", CodePoints: []int{8709}, Characters: []byte{0xe2, 0x88, 0x85}}, + "emsp": &HTML5Entity{Name: "emsp", CodePoints: []int{8195}, Characters: []byte{0xe2, 0x80, 0x83}}, + "emsp13": &HTML5Entity{Name: "emsp13", CodePoints: []int{8196}, Characters: []byte{0xe2, 0x80, 0x84}}, + "emsp14": &HTML5Entity{Name: "emsp14", CodePoints: []int{8197}, Characters: []byte{0xe2, 0x80, 0x85}}, + "eng": &HTML5Entity{Name: "eng", CodePoints: []int{331}, Characters: []byte{0xc5, 0x8b}}, + "ensp": &HTML5Entity{Name: "ensp", CodePoints: []int{8194}, Characters: []byte{0xe2, 0x80, 0x82}}, + "eogon": &HTML5Entity{Name: "eogon", CodePoints: []int{281}, Characters: []byte{0xc4, 0x99}}, + "eopf": &HTML5Entity{Name: "eopf", CodePoints: []int{120150}, Characters: []byte{0xf0, 0x9d, 0x95, 0x96}}, + "epar": &HTML5Entity{Name: "epar", CodePoints: []int{8917}, Characters: []byte{0xe2, 0x8b, 0x95}}, + "eparsl": &HTML5Entity{Name: "eparsl", CodePoints: []int{10723}, Characters: []byte{0xe2, 0xa7, 0xa3}}, + "eplus": &HTML5Entity{Name: "eplus", CodePoints: []int{10865}, Characters: []byte{0xe2, 0xa9, 0xb1}}, + "epsi": &HTML5Entity{Name: "epsi", CodePoints: []int{949}, Characters: []byte{0xce, 0xb5}}, + "epsilon": &HTML5Entity{Name: "epsilon", CodePoints: []int{949}, Characters: []byte{0xce, 0xb5}}, + "epsiv": &HTML5Entity{Name: "epsiv", CodePoints: []int{1013}, Characters: []byte{0xcf, 0xb5}}, + "eqcirc": &HTML5Entity{Name: "eqcirc", CodePoints: []int{8790}, Characters: []byte{0xe2, 0x89, 0x96}}, + "eqcolon": &HTML5Entity{Name: "eqcolon", CodePoints: []int{8789}, Characters: []byte{0xe2, 0x89, 0x95}}, + "eqsim": &HTML5Entity{Name: "eqsim", CodePoints: []int{8770}, Characters: []byte{0xe2, 0x89, 0x82}}, + "eqslantgtr": &HTML5Entity{Name: "eqslantgtr", CodePoints: []int{10902}, Characters: []byte{0xe2, 0xaa, 0x96}}, + "eqslantless": &HTML5Entity{Name: "eqslantless", CodePoints: []int{10901}, Characters: []byte{0xe2, 0xaa, 0x95}}, + "equals": &HTML5Entity{Name: "equals", CodePoints: []int{61}, Characters: []byte{0x3d}}, + "equest": &HTML5Entity{Name: "equest", CodePoints: []int{8799}, Characters: []byte{0xe2, 0x89, 0x9f}}, + "equiv": &HTML5Entity{Name: "equiv", CodePoints: []int{8801}, Characters: []byte{0xe2, 0x89, 0xa1}}, + "equivDD": &HTML5Entity{Name: "equivDD", CodePoints: []int{10872}, Characters: []byte{0xe2, 0xa9, 0xb8}}, + "eqvparsl": &HTML5Entity{Name: "eqvparsl", CodePoints: []int{10725}, Characters: []byte{0xe2, 0xa7, 0xa5}}, + "erDot": &HTML5Entity{Name: "erDot", CodePoints: []int{8787}, Characters: []byte{0xe2, 0x89, 0x93}}, + "erarr": &HTML5Entity{Name: "erarr", CodePoints: []int{10609}, Characters: []byte{0xe2, 0xa5, 0xb1}}, + "escr": &HTML5Entity{Name: "escr", CodePoints: []int{8495}, Characters: []byte{0xe2, 0x84, 0xaf}}, + "esdot": &HTML5Entity{Name: "esdot", CodePoints: []int{8784}, Characters: []byte{0xe2, 0x89, 0x90}}, + "esim": &HTML5Entity{Name: "esim", CodePoints: []int{8770}, Characters: []byte{0xe2, 0x89, 0x82}}, + "eta": &HTML5Entity{Name: "eta", CodePoints: []int{951}, Characters: []byte{0xce, 0xb7}}, + "eth": &HTML5Entity{Name: "eth", CodePoints: []int{240}, Characters: []byte{0xc3, 0xb0}}, + "euml": &HTML5Entity{Name: "euml", CodePoints: []int{235}, Characters: []byte{0xc3, 0xab}}, + "euro": &HTML5Entity{Name: "euro", CodePoints: []int{8364}, Characters: []byte{0xe2, 0x82, 0xac}}, + "excl": &HTML5Entity{Name: "excl", CodePoints: []int{33}, Characters: []byte{0x21}}, + "exist": &HTML5Entity{Name: "exist", CodePoints: []int{8707}, Characters: []byte{0xe2, 0x88, 0x83}}, + "expectation": &HTML5Entity{Name: "expectation", CodePoints: []int{8496}, Characters: []byte{0xe2, 0x84, 0xb0}}, + "exponentiale": &HTML5Entity{Name: "exponentiale", CodePoints: []int{8519}, Characters: []byte{0xe2, 0x85, 0x87}}, + "fallingdotseq": &HTML5Entity{Name: "fallingdotseq", CodePoints: []int{8786}, Characters: []byte{0xe2, 0x89, 0x92}}, + "fcy": &HTML5Entity{Name: "fcy", CodePoints: []int{1092}, Characters: []byte{0xd1, 0x84}}, + "female": &HTML5Entity{Name: "female", CodePoints: []int{9792}, Characters: []byte{0xe2, 0x99, 0x80}}, + "ffilig": &HTML5Entity{Name: "ffilig", CodePoints: []int{64259}, Characters: []byte{0xef, 0xac, 0x83}}, + "fflig": &HTML5Entity{Name: "fflig", CodePoints: []int{64256}, Characters: []byte{0xef, 0xac, 0x80}}, + "ffllig": &HTML5Entity{Name: "ffllig", CodePoints: []int{64260}, Characters: []byte{0xef, 0xac, 0x84}}, + "ffr": &HTML5Entity{Name: "ffr", CodePoints: []int{120099}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa3}}, + "filig": &HTML5Entity{Name: "filig", CodePoints: []int{64257}, Characters: []byte{0xef, 0xac, 0x81}}, + "fjlig": &HTML5Entity{Name: "fjlig", CodePoints: []int{102, 106}, Characters: []byte{0x66, 0x6a}}, + "flat": &HTML5Entity{Name: "flat", CodePoints: []int{9837}, Characters: []byte{0xe2, 0x99, 0xad}}, + "fllig": &HTML5Entity{Name: "fllig", CodePoints: []int{64258}, Characters: []byte{0xef, 0xac, 0x82}}, + "fltns": &HTML5Entity{Name: "fltns", CodePoints: []int{9649}, Characters: []byte{0xe2, 0x96, 0xb1}}, + "fnof": &HTML5Entity{Name: "fnof", CodePoints: []int{402}, Characters: []byte{0xc6, 0x92}}, + "fopf": &HTML5Entity{Name: "fopf", CodePoints: []int{120151}, Characters: []byte{0xf0, 0x9d, 0x95, 0x97}}, + "forall": &HTML5Entity{Name: "forall", CodePoints: []int{8704}, Characters: []byte{0xe2, 0x88, 0x80}}, + "fork": &HTML5Entity{Name: "fork", CodePoints: []int{8916}, Characters: []byte{0xe2, 0x8b, 0x94}}, + "forkv": &HTML5Entity{Name: "forkv", CodePoints: []int{10969}, Characters: []byte{0xe2, 0xab, 0x99}}, + "fpartint": &HTML5Entity{Name: "fpartint", CodePoints: []int{10765}, Characters: []byte{0xe2, 0xa8, 0x8d}}, + "frac12": &HTML5Entity{Name: "frac12", CodePoints: []int{189}, Characters: []byte{0xc2, 0xbd}}, + "frac13": &HTML5Entity{Name: "frac13", CodePoints: []int{8531}, Characters: []byte{0xe2, 0x85, 0x93}}, + "frac14": &HTML5Entity{Name: "frac14", CodePoints: []int{188}, Characters: []byte{0xc2, 0xbc}}, + "frac15": &HTML5Entity{Name: "frac15", CodePoints: []int{8533}, Characters: []byte{0xe2, 0x85, 0x95}}, + "frac16": &HTML5Entity{Name: "frac16", CodePoints: []int{8537}, Characters: []byte{0xe2, 0x85, 0x99}}, + "frac18": &HTML5Entity{Name: "frac18", CodePoints: []int{8539}, Characters: []byte{0xe2, 0x85, 0x9b}}, + "frac23": &HTML5Entity{Name: "frac23", CodePoints: []int{8532}, Characters: []byte{0xe2, 0x85, 0x94}}, + "frac25": &HTML5Entity{Name: "frac25", CodePoints: []int{8534}, Characters: []byte{0xe2, 0x85, 0x96}}, + "frac34": &HTML5Entity{Name: "frac34", CodePoints: []int{190}, Characters: []byte{0xc2, 0xbe}}, + "frac35": &HTML5Entity{Name: "frac35", CodePoints: []int{8535}, Characters: []byte{0xe2, 0x85, 0x97}}, + "frac38": &HTML5Entity{Name: "frac38", CodePoints: []int{8540}, Characters: []byte{0xe2, 0x85, 0x9c}}, + "frac45": &HTML5Entity{Name: "frac45", CodePoints: []int{8536}, Characters: []byte{0xe2, 0x85, 0x98}}, + "frac56": &HTML5Entity{Name: "frac56", CodePoints: []int{8538}, Characters: []byte{0xe2, 0x85, 0x9a}}, + "frac58": &HTML5Entity{Name: "frac58", CodePoints: []int{8541}, Characters: []byte{0xe2, 0x85, 0x9d}}, + "frac78": &HTML5Entity{Name: "frac78", CodePoints: []int{8542}, Characters: []byte{0xe2, 0x85, 0x9e}}, + "frasl": &HTML5Entity{Name: "frasl", CodePoints: []int{8260}, Characters: []byte{0xe2, 0x81, 0x84}}, + "frown": &HTML5Entity{Name: "frown", CodePoints: []int{8994}, Characters: []byte{0xe2, 0x8c, 0xa2}}, + "fscr": &HTML5Entity{Name: "fscr", CodePoints: []int{119995}, Characters: []byte{0xf0, 0x9d, 0x92, 0xbb}}, + "gE": &HTML5Entity{Name: "gE", CodePoints: []int{8807}, Characters: []byte{0xe2, 0x89, 0xa7}}, + "gEl": &HTML5Entity{Name: "gEl", CodePoints: []int{10892}, Characters: []byte{0xe2, 0xaa, 0x8c}}, + "gacute": &HTML5Entity{Name: "gacute", CodePoints: []int{501}, Characters: []byte{0xc7, 0xb5}}, + "gamma": &HTML5Entity{Name: "gamma", CodePoints: []int{947}, Characters: []byte{0xce, 0xb3}}, + "gammad": &HTML5Entity{Name: "gammad", CodePoints: []int{989}, Characters: []byte{0xcf, 0x9d}}, + "gap": &HTML5Entity{Name: "gap", CodePoints: []int{10886}, Characters: []byte{0xe2, 0xaa, 0x86}}, + "gbreve": &HTML5Entity{Name: "gbreve", CodePoints: []int{287}, Characters: []byte{0xc4, 0x9f}}, + "gcirc": &HTML5Entity{Name: "gcirc", CodePoints: []int{285}, Characters: []byte{0xc4, 0x9d}}, + "gcy": &HTML5Entity{Name: "gcy", CodePoints: []int{1075}, Characters: []byte{0xd0, 0xb3}}, + "gdot": &HTML5Entity{Name: "gdot", CodePoints: []int{289}, Characters: []byte{0xc4, 0xa1}}, + "ge": &HTML5Entity{Name: "ge", CodePoints: []int{8805}, Characters: []byte{0xe2, 0x89, 0xa5}}, + "gel": &HTML5Entity{Name: "gel", CodePoints: []int{8923}, Characters: []byte{0xe2, 0x8b, 0x9b}}, + "geq": &HTML5Entity{Name: "geq", CodePoints: []int{8805}, Characters: []byte{0xe2, 0x89, 0xa5}}, + "geqq": &HTML5Entity{Name: "geqq", CodePoints: []int{8807}, Characters: []byte{0xe2, 0x89, 0xa7}}, + "geqslant": &HTML5Entity{Name: "geqslant", CodePoints: []int{10878}, Characters: []byte{0xe2, 0xa9, 0xbe}}, + "ges": &HTML5Entity{Name: "ges", CodePoints: []int{10878}, Characters: []byte{0xe2, 0xa9, 0xbe}}, + "gescc": &HTML5Entity{Name: "gescc", CodePoints: []int{10921}, Characters: []byte{0xe2, 0xaa, 0xa9}}, + "gesdot": &HTML5Entity{Name: "gesdot", CodePoints: []int{10880}, Characters: []byte{0xe2, 0xaa, 0x80}}, + "gesdoto": &HTML5Entity{Name: "gesdoto", CodePoints: []int{10882}, Characters: []byte{0xe2, 0xaa, 0x82}}, + "gesdotol": &HTML5Entity{Name: "gesdotol", CodePoints: []int{10884}, Characters: []byte{0xe2, 0xaa, 0x84}}, + "gesl": &HTML5Entity{Name: "gesl", CodePoints: []int{8923, 65024}, Characters: []byte{0xe2, 0x8b, 0x9b, 0xef, 0xb8, 0x80}}, + "gesles": &HTML5Entity{Name: "gesles", CodePoints: []int{10900}, Characters: []byte{0xe2, 0xaa, 0x94}}, + "gfr": &HTML5Entity{Name: "gfr", CodePoints: []int{120100}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa4}}, + "gg": &HTML5Entity{Name: "gg", CodePoints: []int{8811}, Characters: []byte{0xe2, 0x89, 0xab}}, + "ggg": &HTML5Entity{Name: "ggg", CodePoints: []int{8921}, Characters: []byte{0xe2, 0x8b, 0x99}}, + "gimel": &HTML5Entity{Name: "gimel", CodePoints: []int{8503}, Characters: []byte{0xe2, 0x84, 0xb7}}, + "gjcy": &HTML5Entity{Name: "gjcy", CodePoints: []int{1107}, Characters: []byte{0xd1, 0x93}}, + "gl": &HTML5Entity{Name: "gl", CodePoints: []int{8823}, Characters: []byte{0xe2, 0x89, 0xb7}}, + "glE": &HTML5Entity{Name: "glE", CodePoints: []int{10898}, Characters: []byte{0xe2, 0xaa, 0x92}}, + "gla": &HTML5Entity{Name: "gla", CodePoints: []int{10917}, Characters: []byte{0xe2, 0xaa, 0xa5}}, + "glj": &HTML5Entity{Name: "glj", CodePoints: []int{10916}, Characters: []byte{0xe2, 0xaa, 0xa4}}, + "gnE": &HTML5Entity{Name: "gnE", CodePoints: []int{8809}, Characters: []byte{0xe2, 0x89, 0xa9}}, + "gnap": &HTML5Entity{Name: "gnap", CodePoints: []int{10890}, Characters: []byte{0xe2, 0xaa, 0x8a}}, + "gnapprox": &HTML5Entity{Name: "gnapprox", CodePoints: []int{10890}, Characters: []byte{0xe2, 0xaa, 0x8a}}, + "gne": &HTML5Entity{Name: "gne", CodePoints: []int{10888}, Characters: []byte{0xe2, 0xaa, 0x88}}, + "gneq": &HTML5Entity{Name: "gneq", CodePoints: []int{10888}, Characters: []byte{0xe2, 0xaa, 0x88}}, + "gneqq": &HTML5Entity{Name: "gneqq", CodePoints: []int{8809}, Characters: []byte{0xe2, 0x89, 0xa9}}, + "gnsim": &HTML5Entity{Name: "gnsim", CodePoints: []int{8935}, Characters: []byte{0xe2, 0x8b, 0xa7}}, + "gopf": &HTML5Entity{Name: "gopf", CodePoints: []int{120152}, Characters: []byte{0xf0, 0x9d, 0x95, 0x98}}, + "grave": &HTML5Entity{Name: "grave", CodePoints: []int{96}, Characters: []byte{0x60}}, + "gscr": &HTML5Entity{Name: "gscr", CodePoints: []int{8458}, Characters: []byte{0xe2, 0x84, 0x8a}}, + "gsim": &HTML5Entity{Name: "gsim", CodePoints: []int{8819}, Characters: []byte{0xe2, 0x89, 0xb3}}, + "gsime": &HTML5Entity{Name: "gsime", CodePoints: []int{10894}, Characters: []byte{0xe2, 0xaa, 0x8e}}, + "gsiml": &HTML5Entity{Name: "gsiml", CodePoints: []int{10896}, Characters: []byte{0xe2, 0xaa, 0x90}}, + "gt": &HTML5Entity{Name: "gt", CodePoints: []int{62}, Characters: []byte{0x3e}}, + "gtcc": &HTML5Entity{Name: "gtcc", CodePoints: []int{10919}, Characters: []byte{0xe2, 0xaa, 0xa7}}, + "gtcir": &HTML5Entity{Name: "gtcir", CodePoints: []int{10874}, Characters: []byte{0xe2, 0xa9, 0xba}}, + "gtdot": &HTML5Entity{Name: "gtdot", CodePoints: []int{8919}, Characters: []byte{0xe2, 0x8b, 0x97}}, + "gtlPar": &HTML5Entity{Name: "gtlPar", CodePoints: []int{10645}, Characters: []byte{0xe2, 0xa6, 0x95}}, + "gtquest": &HTML5Entity{Name: "gtquest", CodePoints: []int{10876}, Characters: []byte{0xe2, 0xa9, 0xbc}}, + "gtrapprox": &HTML5Entity{Name: "gtrapprox", CodePoints: []int{10886}, Characters: []byte{0xe2, 0xaa, 0x86}}, + "gtrarr": &HTML5Entity{Name: "gtrarr", CodePoints: []int{10616}, Characters: []byte{0xe2, 0xa5, 0xb8}}, + "gtrdot": &HTML5Entity{Name: "gtrdot", CodePoints: []int{8919}, Characters: []byte{0xe2, 0x8b, 0x97}}, + "gtreqless": &HTML5Entity{Name: "gtreqless", CodePoints: []int{8923}, Characters: []byte{0xe2, 0x8b, 0x9b}}, + "gtreqqless": &HTML5Entity{Name: "gtreqqless", CodePoints: []int{10892}, Characters: []byte{0xe2, 0xaa, 0x8c}}, + "gtrless": &HTML5Entity{Name: "gtrless", CodePoints: []int{8823}, Characters: []byte{0xe2, 0x89, 0xb7}}, + "gtrsim": &HTML5Entity{Name: "gtrsim", CodePoints: []int{8819}, Characters: []byte{0xe2, 0x89, 0xb3}}, + "gvertneqq": &HTML5Entity{Name: "gvertneqq", CodePoints: []int{8809, 65024}, Characters: []byte{0xe2, 0x89, 0xa9, 0xef, 0xb8, 0x80}}, + "gvnE": &HTML5Entity{Name: "gvnE", CodePoints: []int{8809, 65024}, Characters: []byte{0xe2, 0x89, 0xa9, 0xef, 0xb8, 0x80}}, + "hArr": &HTML5Entity{Name: "hArr", CodePoints: []int{8660}, Characters: []byte{0xe2, 0x87, 0x94}}, + "hairsp": &HTML5Entity{Name: "hairsp", CodePoints: []int{8202}, Characters: []byte{0xe2, 0x80, 0x8a}}, + "half": &HTML5Entity{Name: "half", CodePoints: []int{189}, Characters: []byte{0xc2, 0xbd}}, + "hamilt": &HTML5Entity{Name: "hamilt", CodePoints: []int{8459}, Characters: []byte{0xe2, 0x84, 0x8b}}, + "hardcy": &HTML5Entity{Name: "hardcy", CodePoints: []int{1098}, Characters: []byte{0xd1, 0x8a}}, + "harr": &HTML5Entity{Name: "harr", CodePoints: []int{8596}, Characters: []byte{0xe2, 0x86, 0x94}}, + "harrcir": &HTML5Entity{Name: "harrcir", CodePoints: []int{10568}, Characters: []byte{0xe2, 0xa5, 0x88}}, + "harrw": &HTML5Entity{Name: "harrw", CodePoints: []int{8621}, Characters: []byte{0xe2, 0x86, 0xad}}, + "hbar": &HTML5Entity{Name: "hbar", CodePoints: []int{8463}, Characters: []byte{0xe2, 0x84, 0x8f}}, + "hcirc": &HTML5Entity{Name: "hcirc", CodePoints: []int{293}, Characters: []byte{0xc4, 0xa5}}, + "hearts": &HTML5Entity{Name: "hearts", CodePoints: []int{9829}, Characters: []byte{0xe2, 0x99, 0xa5}}, + "heartsuit": &HTML5Entity{Name: "heartsuit", CodePoints: []int{9829}, Characters: []byte{0xe2, 0x99, 0xa5}}, + "hellip": &HTML5Entity{Name: "hellip", CodePoints: []int{8230}, Characters: []byte{0xe2, 0x80, 0xa6}}, + "hercon": &HTML5Entity{Name: "hercon", CodePoints: []int{8889}, Characters: []byte{0xe2, 0x8a, 0xb9}}, + "hfr": &HTML5Entity{Name: "hfr", CodePoints: []int{120101}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa5}}, + "hksearow": &HTML5Entity{Name: "hksearow", CodePoints: []int{10533}, Characters: []byte{0xe2, 0xa4, 0xa5}}, + "hkswarow": &HTML5Entity{Name: "hkswarow", CodePoints: []int{10534}, Characters: []byte{0xe2, 0xa4, 0xa6}}, + "hoarr": &HTML5Entity{Name: "hoarr", CodePoints: []int{8703}, Characters: []byte{0xe2, 0x87, 0xbf}}, + "homtht": &HTML5Entity{Name: "homtht", CodePoints: []int{8763}, Characters: []byte{0xe2, 0x88, 0xbb}}, + "hookleftarrow": &HTML5Entity{Name: "hookleftarrow", CodePoints: []int{8617}, Characters: []byte{0xe2, 0x86, 0xa9}}, + "hookrightarrow": &HTML5Entity{Name: "hookrightarrow", CodePoints: []int{8618}, Characters: []byte{0xe2, 0x86, 0xaa}}, + "hopf": &HTML5Entity{Name: "hopf", CodePoints: []int{120153}, Characters: []byte{0xf0, 0x9d, 0x95, 0x99}}, + "horbar": &HTML5Entity{Name: "horbar", CodePoints: []int{8213}, Characters: []byte{0xe2, 0x80, 0x95}}, + "hscr": &HTML5Entity{Name: "hscr", CodePoints: []int{119997}, Characters: []byte{0xf0, 0x9d, 0x92, 0xbd}}, + "hslash": &HTML5Entity{Name: "hslash", CodePoints: []int{8463}, Characters: []byte{0xe2, 0x84, 0x8f}}, + "hstrok": &HTML5Entity{Name: "hstrok", CodePoints: []int{295}, Characters: []byte{0xc4, 0xa7}}, + "hybull": &HTML5Entity{Name: "hybull", CodePoints: []int{8259}, Characters: []byte{0xe2, 0x81, 0x83}}, + "hyphen": &HTML5Entity{Name: "hyphen", CodePoints: []int{8208}, Characters: []byte{0xe2, 0x80, 0x90}}, + "iacute": &HTML5Entity{Name: "iacute", CodePoints: []int{237}, Characters: []byte{0xc3, 0xad}}, + "ic": &HTML5Entity{Name: "ic", CodePoints: []int{8291}, Characters: []byte{0xe2, 0x81, 0xa3}}, + "icirc": &HTML5Entity{Name: "icirc", CodePoints: []int{238}, Characters: []byte{0xc3, 0xae}}, + "icy": &HTML5Entity{Name: "icy", CodePoints: []int{1080}, Characters: []byte{0xd0, 0xb8}}, + "iecy": &HTML5Entity{Name: "iecy", CodePoints: []int{1077}, Characters: []byte{0xd0, 0xb5}}, + "iexcl": &HTML5Entity{Name: "iexcl", CodePoints: []int{161}, Characters: []byte{0xc2, 0xa1}}, + "iff": &HTML5Entity{Name: "iff", CodePoints: []int{8660}, Characters: []byte{0xe2, 0x87, 0x94}}, + "ifr": &HTML5Entity{Name: "ifr", CodePoints: []int{120102}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa6}}, + "igrave": &HTML5Entity{Name: "igrave", CodePoints: []int{236}, Characters: []byte{0xc3, 0xac}}, + "ii": &HTML5Entity{Name: "ii", CodePoints: []int{8520}, Characters: []byte{0xe2, 0x85, 0x88}}, + "iiiint": &HTML5Entity{Name: "iiiint", CodePoints: []int{10764}, Characters: []byte{0xe2, 0xa8, 0x8c}}, + "iiint": &HTML5Entity{Name: "iiint", CodePoints: []int{8749}, Characters: []byte{0xe2, 0x88, 0xad}}, + "iinfin": &HTML5Entity{Name: "iinfin", CodePoints: []int{10716}, Characters: []byte{0xe2, 0xa7, 0x9c}}, + "iiota": &HTML5Entity{Name: "iiota", CodePoints: []int{8489}, Characters: []byte{0xe2, 0x84, 0xa9}}, + "ijlig": &HTML5Entity{Name: "ijlig", CodePoints: []int{307}, Characters: []byte{0xc4, 0xb3}}, + "imacr": &HTML5Entity{Name: "imacr", CodePoints: []int{299}, Characters: []byte{0xc4, 0xab}}, + "image": &HTML5Entity{Name: "image", CodePoints: []int{8465}, Characters: []byte{0xe2, 0x84, 0x91}}, + "imagline": &HTML5Entity{Name: "imagline", CodePoints: []int{8464}, Characters: []byte{0xe2, 0x84, 0x90}}, + "imagpart": &HTML5Entity{Name: "imagpart", CodePoints: []int{8465}, Characters: []byte{0xe2, 0x84, 0x91}}, + "imath": &HTML5Entity{Name: "imath", CodePoints: []int{305}, Characters: []byte{0xc4, 0xb1}}, + "imof": &HTML5Entity{Name: "imof", CodePoints: []int{8887}, Characters: []byte{0xe2, 0x8a, 0xb7}}, + "imped": &HTML5Entity{Name: "imped", CodePoints: []int{437}, Characters: []byte{0xc6, 0xb5}}, + "in": &HTML5Entity{Name: "in", CodePoints: []int{8712}, Characters: []byte{0xe2, 0x88, 0x88}}, + "incare": &HTML5Entity{Name: "incare", CodePoints: []int{8453}, Characters: []byte{0xe2, 0x84, 0x85}}, + "infin": &HTML5Entity{Name: "infin", CodePoints: []int{8734}, Characters: []byte{0xe2, 0x88, 0x9e}}, + "infintie": &HTML5Entity{Name: "infintie", CodePoints: []int{10717}, Characters: []byte{0xe2, 0xa7, 0x9d}}, + "inodot": &HTML5Entity{Name: "inodot", CodePoints: []int{305}, Characters: []byte{0xc4, 0xb1}}, + "int": &HTML5Entity{Name: "int", CodePoints: []int{8747}, Characters: []byte{0xe2, 0x88, 0xab}}, + "intcal": &HTML5Entity{Name: "intcal", CodePoints: []int{8890}, Characters: []byte{0xe2, 0x8a, 0xba}}, + "integers": &HTML5Entity{Name: "integers", CodePoints: []int{8484}, Characters: []byte{0xe2, 0x84, 0xa4}}, + "intercal": &HTML5Entity{Name: "intercal", CodePoints: []int{8890}, Characters: []byte{0xe2, 0x8a, 0xba}}, + "intlarhk": &HTML5Entity{Name: "intlarhk", CodePoints: []int{10775}, Characters: []byte{0xe2, 0xa8, 0x97}}, + "intprod": &HTML5Entity{Name: "intprod", CodePoints: []int{10812}, Characters: []byte{0xe2, 0xa8, 0xbc}}, + "iocy": &HTML5Entity{Name: "iocy", CodePoints: []int{1105}, Characters: []byte{0xd1, 0x91}}, + "iogon": &HTML5Entity{Name: "iogon", CodePoints: []int{303}, Characters: []byte{0xc4, 0xaf}}, + "iopf": &HTML5Entity{Name: "iopf", CodePoints: []int{120154}, Characters: []byte{0xf0, 0x9d, 0x95, 0x9a}}, + "iota": &HTML5Entity{Name: "iota", CodePoints: []int{953}, Characters: []byte{0xce, 0xb9}}, + "iprod": &HTML5Entity{Name: "iprod", CodePoints: []int{10812}, Characters: []byte{0xe2, 0xa8, 0xbc}}, + "iquest": &HTML5Entity{Name: "iquest", CodePoints: []int{191}, Characters: []byte{0xc2, 0xbf}}, + "iscr": &HTML5Entity{Name: "iscr", CodePoints: []int{119998}, Characters: []byte{0xf0, 0x9d, 0x92, 0xbe}}, + "isin": &HTML5Entity{Name: "isin", CodePoints: []int{8712}, Characters: []byte{0xe2, 0x88, 0x88}}, + "isinE": &HTML5Entity{Name: "isinE", CodePoints: []int{8953}, Characters: []byte{0xe2, 0x8b, 0xb9}}, + "isindot": &HTML5Entity{Name: "isindot", CodePoints: []int{8949}, Characters: []byte{0xe2, 0x8b, 0xb5}}, + "isins": &HTML5Entity{Name: "isins", CodePoints: []int{8948}, Characters: []byte{0xe2, 0x8b, 0xb4}}, + "isinsv": &HTML5Entity{Name: "isinsv", CodePoints: []int{8947}, Characters: []byte{0xe2, 0x8b, 0xb3}}, + "isinv": &HTML5Entity{Name: "isinv", CodePoints: []int{8712}, Characters: []byte{0xe2, 0x88, 0x88}}, + "it": &HTML5Entity{Name: "it", CodePoints: []int{8290}, Characters: []byte{0xe2, 0x81, 0xa2}}, + "itilde": &HTML5Entity{Name: "itilde", CodePoints: []int{297}, Characters: []byte{0xc4, 0xa9}}, + "iukcy": &HTML5Entity{Name: "iukcy", CodePoints: []int{1110}, Characters: []byte{0xd1, 0x96}}, + "iuml": &HTML5Entity{Name: "iuml", CodePoints: []int{239}, Characters: []byte{0xc3, 0xaf}}, + "jcirc": &HTML5Entity{Name: "jcirc", CodePoints: []int{309}, Characters: []byte{0xc4, 0xb5}}, + "jcy": &HTML5Entity{Name: "jcy", CodePoints: []int{1081}, Characters: []byte{0xd0, 0xb9}}, + "jfr": &HTML5Entity{Name: "jfr", CodePoints: []int{120103}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa7}}, + "jmath": &HTML5Entity{Name: "jmath", CodePoints: []int{567}, Characters: []byte{0xc8, 0xb7}}, + "jopf": &HTML5Entity{Name: "jopf", CodePoints: []int{120155}, Characters: []byte{0xf0, 0x9d, 0x95, 0x9b}}, + "jscr": &HTML5Entity{Name: "jscr", CodePoints: []int{119999}, Characters: []byte{0xf0, 0x9d, 0x92, 0xbf}}, + "jsercy": &HTML5Entity{Name: "jsercy", CodePoints: []int{1112}, Characters: []byte{0xd1, 0x98}}, + "jukcy": &HTML5Entity{Name: "jukcy", CodePoints: []int{1108}, Characters: []byte{0xd1, 0x94}}, + "kappa": &HTML5Entity{Name: "kappa", CodePoints: []int{954}, Characters: []byte{0xce, 0xba}}, + "kappav": &HTML5Entity{Name: "kappav", CodePoints: []int{1008}, Characters: []byte{0xcf, 0xb0}}, + "kcedil": &HTML5Entity{Name: "kcedil", CodePoints: []int{311}, Characters: []byte{0xc4, 0xb7}}, + "kcy": &HTML5Entity{Name: "kcy", CodePoints: []int{1082}, Characters: []byte{0xd0, 0xba}}, + "kfr": &HTML5Entity{Name: "kfr", CodePoints: []int{120104}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa8}}, + "kgreen": &HTML5Entity{Name: "kgreen", CodePoints: []int{312}, Characters: []byte{0xc4, 0xb8}}, + "khcy": &HTML5Entity{Name: "khcy", CodePoints: []int{1093}, Characters: []byte{0xd1, 0x85}}, + "kjcy": &HTML5Entity{Name: "kjcy", CodePoints: []int{1116}, Characters: []byte{0xd1, 0x9c}}, + "kopf": &HTML5Entity{Name: "kopf", CodePoints: []int{120156}, Characters: []byte{0xf0, 0x9d, 0x95, 0x9c}}, + "kscr": &HTML5Entity{Name: "kscr", CodePoints: []int{120000}, Characters: []byte{0xf0, 0x9d, 0x93, 0x80}}, + "lAarr": &HTML5Entity{Name: "lAarr", CodePoints: []int{8666}, Characters: []byte{0xe2, 0x87, 0x9a}}, + "lArr": &HTML5Entity{Name: "lArr", CodePoints: []int{8656}, Characters: []byte{0xe2, 0x87, 0x90}}, + "lAtail": &HTML5Entity{Name: "lAtail", CodePoints: []int{10523}, Characters: []byte{0xe2, 0xa4, 0x9b}}, + "lBarr": &HTML5Entity{Name: "lBarr", CodePoints: []int{10510}, Characters: []byte{0xe2, 0xa4, 0x8e}}, + "lE": &HTML5Entity{Name: "lE", CodePoints: []int{8806}, Characters: []byte{0xe2, 0x89, 0xa6}}, + "lEg": &HTML5Entity{Name: "lEg", CodePoints: []int{10891}, Characters: []byte{0xe2, 0xaa, 0x8b}}, + "lHar": &HTML5Entity{Name: "lHar", CodePoints: []int{10594}, Characters: []byte{0xe2, 0xa5, 0xa2}}, + "lacute": &HTML5Entity{Name: "lacute", CodePoints: []int{314}, Characters: []byte{0xc4, 0xba}}, + "laemptyv": &HTML5Entity{Name: "laemptyv", CodePoints: []int{10676}, Characters: []byte{0xe2, 0xa6, 0xb4}}, + "lagran": &HTML5Entity{Name: "lagran", CodePoints: []int{8466}, Characters: []byte{0xe2, 0x84, 0x92}}, + "lambda": &HTML5Entity{Name: "lambda", CodePoints: []int{955}, Characters: []byte{0xce, 0xbb}}, + "lang": &HTML5Entity{Name: "lang", CodePoints: []int{10216}, Characters: []byte{0xe2, 0x9f, 0xa8}}, + "langd": &HTML5Entity{Name: "langd", CodePoints: []int{10641}, Characters: []byte{0xe2, 0xa6, 0x91}}, + "langle": &HTML5Entity{Name: "langle", CodePoints: []int{10216}, Characters: []byte{0xe2, 0x9f, 0xa8}}, + "lap": &HTML5Entity{Name: "lap", CodePoints: []int{10885}, Characters: []byte{0xe2, 0xaa, 0x85}}, + "laquo": &HTML5Entity{Name: "laquo", CodePoints: []int{171}, Characters: []byte{0xc2, 0xab}}, + "larr": &HTML5Entity{Name: "larr", CodePoints: []int{8592}, Characters: []byte{0xe2, 0x86, 0x90}}, + "larrb": &HTML5Entity{Name: "larrb", CodePoints: []int{8676}, Characters: []byte{0xe2, 0x87, 0xa4}}, + "larrbfs": &HTML5Entity{Name: "larrbfs", CodePoints: []int{10527}, Characters: []byte{0xe2, 0xa4, 0x9f}}, + "larrfs": &HTML5Entity{Name: "larrfs", CodePoints: []int{10525}, Characters: []byte{0xe2, 0xa4, 0x9d}}, + "larrhk": &HTML5Entity{Name: "larrhk", CodePoints: []int{8617}, Characters: []byte{0xe2, 0x86, 0xa9}}, + "larrlp": &HTML5Entity{Name: "larrlp", CodePoints: []int{8619}, Characters: []byte{0xe2, 0x86, 0xab}}, + "larrpl": &HTML5Entity{Name: "larrpl", CodePoints: []int{10553}, Characters: []byte{0xe2, 0xa4, 0xb9}}, + "larrsim": &HTML5Entity{Name: "larrsim", CodePoints: []int{10611}, Characters: []byte{0xe2, 0xa5, 0xb3}}, + "larrtl": &HTML5Entity{Name: "larrtl", CodePoints: []int{8610}, Characters: []byte{0xe2, 0x86, 0xa2}}, + "lat": &HTML5Entity{Name: "lat", CodePoints: []int{10923}, Characters: []byte{0xe2, 0xaa, 0xab}}, + "latail": &HTML5Entity{Name: "latail", CodePoints: []int{10521}, Characters: []byte{0xe2, 0xa4, 0x99}}, + "late": &HTML5Entity{Name: "late", CodePoints: []int{10925}, Characters: []byte{0xe2, 0xaa, 0xad}}, + "lates": &HTML5Entity{Name: "lates", CodePoints: []int{10925, 65024}, Characters: []byte{0xe2, 0xaa, 0xad, 0xef, 0xb8, 0x80}}, + "lbarr": &HTML5Entity{Name: "lbarr", CodePoints: []int{10508}, Characters: []byte{0xe2, 0xa4, 0x8c}}, + "lbbrk": &HTML5Entity{Name: "lbbrk", CodePoints: []int{10098}, Characters: []byte{0xe2, 0x9d, 0xb2}}, + "lbrace": &HTML5Entity{Name: "lbrace", CodePoints: []int{123}, Characters: []byte{0x7b}}, + "lbrack": &HTML5Entity{Name: "lbrack", CodePoints: []int{91}, Characters: []byte{0x5b}}, + "lbrke": &HTML5Entity{Name: "lbrke", CodePoints: []int{10635}, Characters: []byte{0xe2, 0xa6, 0x8b}}, + "lbrksld": &HTML5Entity{Name: "lbrksld", CodePoints: []int{10639}, Characters: []byte{0xe2, 0xa6, 0x8f}}, + "lbrkslu": &HTML5Entity{Name: "lbrkslu", CodePoints: []int{10637}, Characters: []byte{0xe2, 0xa6, 0x8d}}, + "lcaron": &HTML5Entity{Name: "lcaron", CodePoints: []int{318}, Characters: []byte{0xc4, 0xbe}}, + "lcedil": &HTML5Entity{Name: "lcedil", CodePoints: []int{316}, Characters: []byte{0xc4, 0xbc}}, + "lceil": &HTML5Entity{Name: "lceil", CodePoints: []int{8968}, Characters: []byte{0xe2, 0x8c, 0x88}}, + "lcub": &HTML5Entity{Name: "lcub", CodePoints: []int{123}, Characters: []byte{0x7b}}, + "lcy": &HTML5Entity{Name: "lcy", CodePoints: []int{1083}, Characters: []byte{0xd0, 0xbb}}, + "ldca": &HTML5Entity{Name: "ldca", CodePoints: []int{10550}, Characters: []byte{0xe2, 0xa4, 0xb6}}, + "ldquo": &HTML5Entity{Name: "ldquo", CodePoints: []int{8220}, Characters: []byte{0xe2, 0x80, 0x9c}}, + "ldquor": &HTML5Entity{Name: "ldquor", CodePoints: []int{8222}, Characters: []byte{0xe2, 0x80, 0x9e}}, + "ldrdhar": &HTML5Entity{Name: "ldrdhar", CodePoints: []int{10599}, Characters: []byte{0xe2, 0xa5, 0xa7}}, + "ldrushar": &HTML5Entity{Name: "ldrushar", CodePoints: []int{10571}, Characters: []byte{0xe2, 0xa5, 0x8b}}, + "ldsh": &HTML5Entity{Name: "ldsh", CodePoints: []int{8626}, Characters: []byte{0xe2, 0x86, 0xb2}}, + "le": &HTML5Entity{Name: "le", CodePoints: []int{8804}, Characters: []byte{0xe2, 0x89, 0xa4}}, + "leftarrow": &HTML5Entity{Name: "leftarrow", CodePoints: []int{8592}, Characters: []byte{0xe2, 0x86, 0x90}}, + "leftarrowtail": &HTML5Entity{Name: "leftarrowtail", CodePoints: []int{8610}, Characters: []byte{0xe2, 0x86, 0xa2}}, + "leftharpoondown": &HTML5Entity{Name: "leftharpoondown", CodePoints: []int{8637}, Characters: []byte{0xe2, 0x86, 0xbd}}, + "leftharpoonup": &HTML5Entity{Name: "leftharpoonup", CodePoints: []int{8636}, Characters: []byte{0xe2, 0x86, 0xbc}}, + "leftleftarrows": &HTML5Entity{Name: "leftleftarrows", CodePoints: []int{8647}, Characters: []byte{0xe2, 0x87, 0x87}}, + "leftrightarrow": &HTML5Entity{Name: "leftrightarrow", CodePoints: []int{8596}, Characters: []byte{0xe2, 0x86, 0x94}}, + "leftrightarrows": &HTML5Entity{Name: "leftrightarrows", CodePoints: []int{8646}, Characters: []byte{0xe2, 0x87, 0x86}}, + "leftrightharpoons": &HTML5Entity{Name: "leftrightharpoons", CodePoints: []int{8651}, Characters: []byte{0xe2, 0x87, 0x8b}}, + "leftrightsquigarrow": &HTML5Entity{Name: "leftrightsquigarrow", CodePoints: []int{8621}, Characters: []byte{0xe2, 0x86, 0xad}}, + "leftthreetimes": &HTML5Entity{Name: "leftthreetimes", CodePoints: []int{8907}, Characters: []byte{0xe2, 0x8b, 0x8b}}, + "leg": &HTML5Entity{Name: "leg", CodePoints: []int{8922}, Characters: []byte{0xe2, 0x8b, 0x9a}}, + "leq": &HTML5Entity{Name: "leq", CodePoints: []int{8804}, Characters: []byte{0xe2, 0x89, 0xa4}}, + "leqq": &HTML5Entity{Name: "leqq", CodePoints: []int{8806}, Characters: []byte{0xe2, 0x89, 0xa6}}, + "leqslant": &HTML5Entity{Name: "leqslant", CodePoints: []int{10877}, Characters: []byte{0xe2, 0xa9, 0xbd}}, + "les": &HTML5Entity{Name: "les", CodePoints: []int{10877}, Characters: []byte{0xe2, 0xa9, 0xbd}}, + "lescc": &HTML5Entity{Name: "lescc", CodePoints: []int{10920}, Characters: []byte{0xe2, 0xaa, 0xa8}}, + "lesdot": &HTML5Entity{Name: "lesdot", CodePoints: []int{10879}, Characters: []byte{0xe2, 0xa9, 0xbf}}, + "lesdoto": &HTML5Entity{Name: "lesdoto", CodePoints: []int{10881}, Characters: []byte{0xe2, 0xaa, 0x81}}, + "lesdotor": &HTML5Entity{Name: "lesdotor", CodePoints: []int{10883}, Characters: []byte{0xe2, 0xaa, 0x83}}, + "lesg": &HTML5Entity{Name: "lesg", CodePoints: []int{8922, 65024}, Characters: []byte{0xe2, 0x8b, 0x9a, 0xef, 0xb8, 0x80}}, + "lesges": &HTML5Entity{Name: "lesges", CodePoints: []int{10899}, Characters: []byte{0xe2, 0xaa, 0x93}}, + "lessapprox": &HTML5Entity{Name: "lessapprox", CodePoints: []int{10885}, Characters: []byte{0xe2, 0xaa, 0x85}}, + "lessdot": &HTML5Entity{Name: "lessdot", CodePoints: []int{8918}, Characters: []byte{0xe2, 0x8b, 0x96}}, + "lesseqgtr": &HTML5Entity{Name: "lesseqgtr", CodePoints: []int{8922}, Characters: []byte{0xe2, 0x8b, 0x9a}}, + "lesseqqgtr": &HTML5Entity{Name: "lesseqqgtr", CodePoints: []int{10891}, Characters: []byte{0xe2, 0xaa, 0x8b}}, + "lessgtr": &HTML5Entity{Name: "lessgtr", CodePoints: []int{8822}, Characters: []byte{0xe2, 0x89, 0xb6}}, + "lesssim": &HTML5Entity{Name: "lesssim", CodePoints: []int{8818}, Characters: []byte{0xe2, 0x89, 0xb2}}, + "lfisht": &HTML5Entity{Name: "lfisht", CodePoints: []int{10620}, Characters: []byte{0xe2, 0xa5, 0xbc}}, + "lfloor": &HTML5Entity{Name: "lfloor", CodePoints: []int{8970}, Characters: []byte{0xe2, 0x8c, 0x8a}}, + "lfr": &HTML5Entity{Name: "lfr", CodePoints: []int{120105}, Characters: []byte{0xf0, 0x9d, 0x94, 0xa9}}, + "lg": &HTML5Entity{Name: "lg", CodePoints: []int{8822}, Characters: []byte{0xe2, 0x89, 0xb6}}, + "lgE": &HTML5Entity{Name: "lgE", CodePoints: []int{10897}, Characters: []byte{0xe2, 0xaa, 0x91}}, + "lhard": &HTML5Entity{Name: "lhard", CodePoints: []int{8637}, Characters: []byte{0xe2, 0x86, 0xbd}}, + "lharu": &HTML5Entity{Name: "lharu", CodePoints: []int{8636}, Characters: []byte{0xe2, 0x86, 0xbc}}, + "lharul": &HTML5Entity{Name: "lharul", CodePoints: []int{10602}, Characters: []byte{0xe2, 0xa5, 0xaa}}, + "lhblk": &HTML5Entity{Name: "lhblk", CodePoints: []int{9604}, Characters: []byte{0xe2, 0x96, 0x84}}, + "ljcy": &HTML5Entity{Name: "ljcy", CodePoints: []int{1113}, Characters: []byte{0xd1, 0x99}}, + "ll": &HTML5Entity{Name: "ll", CodePoints: []int{8810}, Characters: []byte{0xe2, 0x89, 0xaa}}, + "llarr": &HTML5Entity{Name: "llarr", CodePoints: []int{8647}, Characters: []byte{0xe2, 0x87, 0x87}}, + "llcorner": &HTML5Entity{Name: "llcorner", CodePoints: []int{8990}, Characters: []byte{0xe2, 0x8c, 0x9e}}, + "llhard": &HTML5Entity{Name: "llhard", CodePoints: []int{10603}, Characters: []byte{0xe2, 0xa5, 0xab}}, + "lltri": &HTML5Entity{Name: "lltri", CodePoints: []int{9722}, Characters: []byte{0xe2, 0x97, 0xba}}, + "lmidot": &HTML5Entity{Name: "lmidot", CodePoints: []int{320}, Characters: []byte{0xc5, 0x80}}, + "lmoust": &HTML5Entity{Name: "lmoust", CodePoints: []int{9136}, Characters: []byte{0xe2, 0x8e, 0xb0}}, + "lmoustache": &HTML5Entity{Name: "lmoustache", CodePoints: []int{9136}, Characters: []byte{0xe2, 0x8e, 0xb0}}, + "lnE": &HTML5Entity{Name: "lnE", CodePoints: []int{8808}, Characters: []byte{0xe2, 0x89, 0xa8}}, + "lnap": &HTML5Entity{Name: "lnap", CodePoints: []int{10889}, Characters: []byte{0xe2, 0xaa, 0x89}}, + "lnapprox": &HTML5Entity{Name: "lnapprox", CodePoints: []int{10889}, Characters: []byte{0xe2, 0xaa, 0x89}}, + "lne": &HTML5Entity{Name: "lne", CodePoints: []int{10887}, Characters: []byte{0xe2, 0xaa, 0x87}}, + "lneq": &HTML5Entity{Name: "lneq", CodePoints: []int{10887}, Characters: []byte{0xe2, 0xaa, 0x87}}, + "lneqq": &HTML5Entity{Name: "lneqq", CodePoints: []int{8808}, Characters: []byte{0xe2, 0x89, 0xa8}}, + "lnsim": &HTML5Entity{Name: "lnsim", CodePoints: []int{8934}, Characters: []byte{0xe2, 0x8b, 0xa6}}, + "loang": &HTML5Entity{Name: "loang", CodePoints: []int{10220}, Characters: []byte{0xe2, 0x9f, 0xac}}, + "loarr": &HTML5Entity{Name: "loarr", CodePoints: []int{8701}, Characters: []byte{0xe2, 0x87, 0xbd}}, + "lobrk": &HTML5Entity{Name: "lobrk", CodePoints: []int{10214}, Characters: []byte{0xe2, 0x9f, 0xa6}}, + "longleftarrow": &HTML5Entity{Name: "longleftarrow", CodePoints: []int{10229}, Characters: []byte{0xe2, 0x9f, 0xb5}}, + "longleftrightarrow": &HTML5Entity{Name: "longleftrightarrow", CodePoints: []int{10231}, Characters: []byte{0xe2, 0x9f, 0xb7}}, + "longmapsto": &HTML5Entity{Name: "longmapsto", CodePoints: []int{10236}, Characters: []byte{0xe2, 0x9f, 0xbc}}, + "longrightarrow": &HTML5Entity{Name: "longrightarrow", CodePoints: []int{10230}, Characters: []byte{0xe2, 0x9f, 0xb6}}, + "looparrowleft": &HTML5Entity{Name: "looparrowleft", CodePoints: []int{8619}, Characters: []byte{0xe2, 0x86, 0xab}}, + "looparrowright": &HTML5Entity{Name: "looparrowright", CodePoints: []int{8620}, Characters: []byte{0xe2, 0x86, 0xac}}, + "lopar": &HTML5Entity{Name: "lopar", CodePoints: []int{10629}, Characters: []byte{0xe2, 0xa6, 0x85}}, + "lopf": &HTML5Entity{Name: "lopf", CodePoints: []int{120157}, Characters: []byte{0xf0, 0x9d, 0x95, 0x9d}}, + "loplus": &HTML5Entity{Name: "loplus", CodePoints: []int{10797}, Characters: []byte{0xe2, 0xa8, 0xad}}, + "lotimes": &HTML5Entity{Name: "lotimes", CodePoints: []int{10804}, Characters: []byte{0xe2, 0xa8, 0xb4}}, + "lowast": &HTML5Entity{Name: "lowast", CodePoints: []int{8727}, Characters: []byte{0xe2, 0x88, 0x97}}, + "lowbar": &HTML5Entity{Name: "lowbar", CodePoints: []int{95}, Characters: []byte{0x5f}}, + "loz": &HTML5Entity{Name: "loz", CodePoints: []int{9674}, Characters: []byte{0xe2, 0x97, 0x8a}}, + "lozenge": &HTML5Entity{Name: "lozenge", CodePoints: []int{9674}, Characters: []byte{0xe2, 0x97, 0x8a}}, + "lozf": &HTML5Entity{Name: "lozf", CodePoints: []int{10731}, Characters: []byte{0xe2, 0xa7, 0xab}}, + "lpar": &HTML5Entity{Name: "lpar", CodePoints: []int{40}, Characters: []byte{0x28}}, + "lparlt": &HTML5Entity{Name: "lparlt", CodePoints: []int{10643}, Characters: []byte{0xe2, 0xa6, 0x93}}, + "lrarr": &HTML5Entity{Name: "lrarr", CodePoints: []int{8646}, Characters: []byte{0xe2, 0x87, 0x86}}, + "lrcorner": &HTML5Entity{Name: "lrcorner", CodePoints: []int{8991}, Characters: []byte{0xe2, 0x8c, 0x9f}}, + "lrhar": &HTML5Entity{Name: "lrhar", CodePoints: []int{8651}, Characters: []byte{0xe2, 0x87, 0x8b}}, + "lrhard": &HTML5Entity{Name: "lrhard", CodePoints: []int{10605}, Characters: []byte{0xe2, 0xa5, 0xad}}, + "lrm": &HTML5Entity{Name: "lrm", CodePoints: []int{8206}, Characters: []byte{0xe2, 0x80, 0x8e}}, + "lrtri": &HTML5Entity{Name: "lrtri", CodePoints: []int{8895}, Characters: []byte{0xe2, 0x8a, 0xbf}}, + "lsaquo": &HTML5Entity{Name: "lsaquo", CodePoints: []int{8249}, Characters: []byte{0xe2, 0x80, 0xb9}}, + "lscr": &HTML5Entity{Name: "lscr", CodePoints: []int{120001}, Characters: []byte{0xf0, 0x9d, 0x93, 0x81}}, + "lsh": &HTML5Entity{Name: "lsh", CodePoints: []int{8624}, Characters: []byte{0xe2, 0x86, 0xb0}}, + "lsim": &HTML5Entity{Name: "lsim", CodePoints: []int{8818}, Characters: []byte{0xe2, 0x89, 0xb2}}, + "lsime": &HTML5Entity{Name: "lsime", CodePoints: []int{10893}, Characters: []byte{0xe2, 0xaa, 0x8d}}, + "lsimg": &HTML5Entity{Name: "lsimg", CodePoints: []int{10895}, Characters: []byte{0xe2, 0xaa, 0x8f}}, + "lsqb": &HTML5Entity{Name: "lsqb", CodePoints: []int{91}, Characters: []byte{0x5b}}, + "lsquo": &HTML5Entity{Name: "lsquo", CodePoints: []int{8216}, Characters: []byte{0xe2, 0x80, 0x98}}, + "lsquor": &HTML5Entity{Name: "lsquor", CodePoints: []int{8218}, Characters: []byte{0xe2, 0x80, 0x9a}}, + "lstrok": &HTML5Entity{Name: "lstrok", CodePoints: []int{322}, Characters: []byte{0xc5, 0x82}}, + "lt": &HTML5Entity{Name: "lt", CodePoints: []int{60}, Characters: []byte{0x3c}}, + "ltcc": &HTML5Entity{Name: "ltcc", CodePoints: []int{10918}, Characters: []byte{0xe2, 0xaa, 0xa6}}, + "ltcir": &HTML5Entity{Name: "ltcir", CodePoints: []int{10873}, Characters: []byte{0xe2, 0xa9, 0xb9}}, + "ltdot": &HTML5Entity{Name: "ltdot", CodePoints: []int{8918}, Characters: []byte{0xe2, 0x8b, 0x96}}, + "lthree": &HTML5Entity{Name: "lthree", CodePoints: []int{8907}, Characters: []byte{0xe2, 0x8b, 0x8b}}, + "ltimes": &HTML5Entity{Name: "ltimes", CodePoints: []int{8905}, Characters: []byte{0xe2, 0x8b, 0x89}}, + "ltlarr": &HTML5Entity{Name: "ltlarr", CodePoints: []int{10614}, Characters: []byte{0xe2, 0xa5, 0xb6}}, + "ltquest": &HTML5Entity{Name: "ltquest", CodePoints: []int{10875}, Characters: []byte{0xe2, 0xa9, 0xbb}}, + "ltrPar": &HTML5Entity{Name: "ltrPar", CodePoints: []int{10646}, Characters: []byte{0xe2, 0xa6, 0x96}}, + "ltri": &HTML5Entity{Name: "ltri", CodePoints: []int{9667}, Characters: []byte{0xe2, 0x97, 0x83}}, + "ltrie": &HTML5Entity{Name: "ltrie", CodePoints: []int{8884}, Characters: []byte{0xe2, 0x8a, 0xb4}}, + "ltrif": &HTML5Entity{Name: "ltrif", CodePoints: []int{9666}, Characters: []byte{0xe2, 0x97, 0x82}}, + "lurdshar": &HTML5Entity{Name: "lurdshar", CodePoints: []int{10570}, Characters: []byte{0xe2, 0xa5, 0x8a}}, + "luruhar": &HTML5Entity{Name: "luruhar", CodePoints: []int{10598}, Characters: []byte{0xe2, 0xa5, 0xa6}}, + "lvertneqq": &HTML5Entity{Name: "lvertneqq", CodePoints: []int{8808, 65024}, Characters: []byte{0xe2, 0x89, 0xa8, 0xef, 0xb8, 0x80}}, + "lvnE": &HTML5Entity{Name: "lvnE", CodePoints: []int{8808, 65024}, Characters: []byte{0xe2, 0x89, 0xa8, 0xef, 0xb8, 0x80}}, + "mDDot": &HTML5Entity{Name: "mDDot", CodePoints: []int{8762}, Characters: []byte{0xe2, 0x88, 0xba}}, + "macr": &HTML5Entity{Name: "macr", CodePoints: []int{175}, Characters: []byte{0xc2, 0xaf}}, + "male": &HTML5Entity{Name: "male", CodePoints: []int{9794}, Characters: []byte{0xe2, 0x99, 0x82}}, + "malt": &HTML5Entity{Name: "malt", CodePoints: []int{10016}, Characters: []byte{0xe2, 0x9c, 0xa0}}, + "maltese": &HTML5Entity{Name: "maltese", CodePoints: []int{10016}, Characters: []byte{0xe2, 0x9c, 0xa0}}, + "map": &HTML5Entity{Name: "map", CodePoints: []int{8614}, Characters: []byte{0xe2, 0x86, 0xa6}}, + "mapsto": &HTML5Entity{Name: "mapsto", CodePoints: []int{8614}, Characters: []byte{0xe2, 0x86, 0xa6}}, + "mapstodown": &HTML5Entity{Name: "mapstodown", CodePoints: []int{8615}, Characters: []byte{0xe2, 0x86, 0xa7}}, + "mapstoleft": &HTML5Entity{Name: "mapstoleft", CodePoints: []int{8612}, Characters: []byte{0xe2, 0x86, 0xa4}}, + "mapstoup": &HTML5Entity{Name: "mapstoup", CodePoints: []int{8613}, Characters: []byte{0xe2, 0x86, 0xa5}}, + "marker": &HTML5Entity{Name: "marker", CodePoints: []int{9646}, Characters: []byte{0xe2, 0x96, 0xae}}, + "mcomma": &HTML5Entity{Name: "mcomma", CodePoints: []int{10793}, Characters: []byte{0xe2, 0xa8, 0xa9}}, + "mcy": &HTML5Entity{Name: "mcy", CodePoints: []int{1084}, Characters: []byte{0xd0, 0xbc}}, + "mdash": &HTML5Entity{Name: "mdash", CodePoints: []int{8212}, Characters: []byte{0xe2, 0x80, 0x94}}, + "measuredangle": &HTML5Entity{Name: "measuredangle", CodePoints: []int{8737}, Characters: []byte{0xe2, 0x88, 0xa1}}, + "mfr": &HTML5Entity{Name: "mfr", CodePoints: []int{120106}, Characters: []byte{0xf0, 0x9d, 0x94, 0xaa}}, + "mho": &HTML5Entity{Name: "mho", CodePoints: []int{8487}, Characters: []byte{0xe2, 0x84, 0xa7}}, + "micro": &HTML5Entity{Name: "micro", CodePoints: []int{181}, Characters: []byte{0xc2, 0xb5}}, + "mid": &HTML5Entity{Name: "mid", CodePoints: []int{8739}, Characters: []byte{0xe2, 0x88, 0xa3}}, + "midast": &HTML5Entity{Name: "midast", CodePoints: []int{42}, Characters: []byte{0x2a}}, + "midcir": &HTML5Entity{Name: "midcir", CodePoints: []int{10992}, Characters: []byte{0xe2, 0xab, 0xb0}}, + "middot": &HTML5Entity{Name: "middot", CodePoints: []int{183}, Characters: []byte{0xc2, 0xb7}}, + "minus": &HTML5Entity{Name: "minus", CodePoints: []int{8722}, Characters: []byte{0xe2, 0x88, 0x92}}, + "minusb": &HTML5Entity{Name: "minusb", CodePoints: []int{8863}, Characters: []byte{0xe2, 0x8a, 0x9f}}, + "minusd": &HTML5Entity{Name: "minusd", CodePoints: []int{8760}, Characters: []byte{0xe2, 0x88, 0xb8}}, + "minusdu": &HTML5Entity{Name: "minusdu", CodePoints: []int{10794}, Characters: []byte{0xe2, 0xa8, 0xaa}}, + "mlcp": &HTML5Entity{Name: "mlcp", CodePoints: []int{10971}, Characters: []byte{0xe2, 0xab, 0x9b}}, + "mldr": &HTML5Entity{Name: "mldr", CodePoints: []int{8230}, Characters: []byte{0xe2, 0x80, 0xa6}}, + "mnplus": &HTML5Entity{Name: "mnplus", CodePoints: []int{8723}, Characters: []byte{0xe2, 0x88, 0x93}}, + "models": &HTML5Entity{Name: "models", CodePoints: []int{8871}, Characters: []byte{0xe2, 0x8a, 0xa7}}, + "mopf": &HTML5Entity{Name: "mopf", CodePoints: []int{120158}, Characters: []byte{0xf0, 0x9d, 0x95, 0x9e}}, + "mp": &HTML5Entity{Name: "mp", CodePoints: []int{8723}, Characters: []byte{0xe2, 0x88, 0x93}}, + "mscr": &HTML5Entity{Name: "mscr", CodePoints: []int{120002}, Characters: []byte{0xf0, 0x9d, 0x93, 0x82}}, + "mstpos": &HTML5Entity{Name: "mstpos", CodePoints: []int{8766}, Characters: []byte{0xe2, 0x88, 0xbe}}, + "mu": &HTML5Entity{Name: "mu", CodePoints: []int{956}, Characters: []byte{0xce, 0xbc}}, + "multimap": &HTML5Entity{Name: "multimap", CodePoints: []int{8888}, Characters: []byte{0xe2, 0x8a, 0xb8}}, + "mumap": &HTML5Entity{Name: "mumap", CodePoints: []int{8888}, Characters: []byte{0xe2, 0x8a, 0xb8}}, + "nGg": &HTML5Entity{Name: "nGg", CodePoints: []int{8921, 824}, Characters: []byte{0xe2, 0x8b, 0x99, 0xcc, 0xb8}}, + "nGt": &HTML5Entity{Name: "nGt", CodePoints: []int{8811, 8402}, Characters: []byte{0xe2, 0x89, 0xab, 0xe2, 0x83, 0x92}}, + "nGtv": &HTML5Entity{Name: "nGtv", CodePoints: []int{8811, 824}, Characters: []byte{0xe2, 0x89, 0xab, 0xcc, 0xb8}}, + "nLeftarrow": &HTML5Entity{Name: "nLeftarrow", CodePoints: []int{8653}, Characters: []byte{0xe2, 0x87, 0x8d}}, + "nLeftrightarrow": &HTML5Entity{Name: "nLeftrightarrow", CodePoints: []int{8654}, Characters: []byte{0xe2, 0x87, 0x8e}}, + "nLl": &HTML5Entity{Name: "nLl", CodePoints: []int{8920, 824}, Characters: []byte{0xe2, 0x8b, 0x98, 0xcc, 0xb8}}, + "nLt": &HTML5Entity{Name: "nLt", CodePoints: []int{8810, 8402}, Characters: []byte{0xe2, 0x89, 0xaa, 0xe2, 0x83, 0x92}}, + "nLtv": &HTML5Entity{Name: "nLtv", CodePoints: []int{8810, 824}, Characters: []byte{0xe2, 0x89, 0xaa, 0xcc, 0xb8}}, + "nRightarrow": &HTML5Entity{Name: "nRightarrow", CodePoints: []int{8655}, Characters: []byte{0xe2, 0x87, 0x8f}}, + "nVDash": &HTML5Entity{Name: "nVDash", CodePoints: []int{8879}, Characters: []byte{0xe2, 0x8a, 0xaf}}, + "nVdash": &HTML5Entity{Name: "nVdash", CodePoints: []int{8878}, Characters: []byte{0xe2, 0x8a, 0xae}}, + "nabla": &HTML5Entity{Name: "nabla", CodePoints: []int{8711}, Characters: []byte{0xe2, 0x88, 0x87}}, + "nacute": &HTML5Entity{Name: "nacute", CodePoints: []int{324}, Characters: []byte{0xc5, 0x84}}, + "nang": &HTML5Entity{Name: "nang", CodePoints: []int{8736, 8402}, Characters: []byte{0xe2, 0x88, 0xa0, 0xe2, 0x83, 0x92}}, + "nap": &HTML5Entity{Name: "nap", CodePoints: []int{8777}, Characters: []byte{0xe2, 0x89, 0x89}}, + "napE": &HTML5Entity{Name: "napE", CodePoints: []int{10864, 824}, Characters: []byte{0xe2, 0xa9, 0xb0, 0xcc, 0xb8}}, + "napid": &HTML5Entity{Name: "napid", CodePoints: []int{8779, 824}, Characters: []byte{0xe2, 0x89, 0x8b, 0xcc, 0xb8}}, + "napos": &HTML5Entity{Name: "napos", CodePoints: []int{329}, Characters: []byte{0xc5, 0x89}}, + "napprox": &HTML5Entity{Name: "napprox", CodePoints: []int{8777}, Characters: []byte{0xe2, 0x89, 0x89}}, + "natur": &HTML5Entity{Name: "natur", CodePoints: []int{9838}, Characters: []byte{0xe2, 0x99, 0xae}}, + "natural": &HTML5Entity{Name: "natural", CodePoints: []int{9838}, Characters: []byte{0xe2, 0x99, 0xae}}, + "naturals": &HTML5Entity{Name: "naturals", CodePoints: []int{8469}, Characters: []byte{0xe2, 0x84, 0x95}}, + "nbsp": &HTML5Entity{Name: "nbsp", CodePoints: []int{160}, Characters: []byte{0xc2, 0xa0}}, + "nbump": &HTML5Entity{Name: "nbump", CodePoints: []int{8782, 824}, Characters: []byte{0xe2, 0x89, 0x8e, 0xcc, 0xb8}}, + "nbumpe": &HTML5Entity{Name: "nbumpe", CodePoints: []int{8783, 824}, Characters: []byte{0xe2, 0x89, 0x8f, 0xcc, 0xb8}}, + "ncap": &HTML5Entity{Name: "ncap", CodePoints: []int{10819}, Characters: []byte{0xe2, 0xa9, 0x83}}, + "ncaron": &HTML5Entity{Name: "ncaron", CodePoints: []int{328}, Characters: []byte{0xc5, 0x88}}, + "ncedil": &HTML5Entity{Name: "ncedil", CodePoints: []int{326}, Characters: []byte{0xc5, 0x86}}, + "ncong": &HTML5Entity{Name: "ncong", CodePoints: []int{8775}, Characters: []byte{0xe2, 0x89, 0x87}}, + "ncongdot": &HTML5Entity{Name: "ncongdot", CodePoints: []int{10861, 824}, Characters: []byte{0xe2, 0xa9, 0xad, 0xcc, 0xb8}}, + "ncup": &HTML5Entity{Name: "ncup", CodePoints: []int{10818}, Characters: []byte{0xe2, 0xa9, 0x82}}, + "ncy": &HTML5Entity{Name: "ncy", CodePoints: []int{1085}, Characters: []byte{0xd0, 0xbd}}, + "ndash": &HTML5Entity{Name: "ndash", CodePoints: []int{8211}, Characters: []byte{0xe2, 0x80, 0x93}}, + "ne": &HTML5Entity{Name: "ne", CodePoints: []int{8800}, Characters: []byte{0xe2, 0x89, 0xa0}}, + "neArr": &HTML5Entity{Name: "neArr", CodePoints: []int{8663}, Characters: []byte{0xe2, 0x87, 0x97}}, + "nearhk": &HTML5Entity{Name: "nearhk", CodePoints: []int{10532}, Characters: []byte{0xe2, 0xa4, 0xa4}}, + "nearr": &HTML5Entity{Name: "nearr", CodePoints: []int{8599}, Characters: []byte{0xe2, 0x86, 0x97}}, + "nearrow": &HTML5Entity{Name: "nearrow", CodePoints: []int{8599}, Characters: []byte{0xe2, 0x86, 0x97}}, + "nedot": &HTML5Entity{Name: "nedot", CodePoints: []int{8784, 824}, Characters: []byte{0xe2, 0x89, 0x90, 0xcc, 0xb8}}, + "nequiv": &HTML5Entity{Name: "nequiv", CodePoints: []int{8802}, Characters: []byte{0xe2, 0x89, 0xa2}}, + "nesear": &HTML5Entity{Name: "nesear", CodePoints: []int{10536}, Characters: []byte{0xe2, 0xa4, 0xa8}}, + "nesim": &HTML5Entity{Name: "nesim", CodePoints: []int{8770, 824}, Characters: []byte{0xe2, 0x89, 0x82, 0xcc, 0xb8}}, + "nexist": &HTML5Entity{Name: "nexist", CodePoints: []int{8708}, Characters: []byte{0xe2, 0x88, 0x84}}, + "nexists": &HTML5Entity{Name: "nexists", CodePoints: []int{8708}, Characters: []byte{0xe2, 0x88, 0x84}}, + "nfr": &HTML5Entity{Name: "nfr", CodePoints: []int{120107}, Characters: []byte{0xf0, 0x9d, 0x94, 0xab}}, + "ngE": &HTML5Entity{Name: "ngE", CodePoints: []int{8807, 824}, Characters: []byte{0xe2, 0x89, 0xa7, 0xcc, 0xb8}}, + "nge": &HTML5Entity{Name: "nge", CodePoints: []int{8817}, Characters: []byte{0xe2, 0x89, 0xb1}}, + "ngeq": &HTML5Entity{Name: "ngeq", CodePoints: []int{8817}, Characters: []byte{0xe2, 0x89, 0xb1}}, + "ngeqq": &HTML5Entity{Name: "ngeqq", CodePoints: []int{8807, 824}, Characters: []byte{0xe2, 0x89, 0xa7, 0xcc, 0xb8}}, + "ngeqslant": &HTML5Entity{Name: "ngeqslant", CodePoints: []int{10878, 824}, Characters: []byte{0xe2, 0xa9, 0xbe, 0xcc, 0xb8}}, + "nges": &HTML5Entity{Name: "nges", CodePoints: []int{10878, 824}, Characters: []byte{0xe2, 0xa9, 0xbe, 0xcc, 0xb8}}, + "ngsim": &HTML5Entity{Name: "ngsim", CodePoints: []int{8821}, Characters: []byte{0xe2, 0x89, 0xb5}}, + "ngt": &HTML5Entity{Name: "ngt", CodePoints: []int{8815}, Characters: []byte{0xe2, 0x89, 0xaf}}, + "ngtr": &HTML5Entity{Name: "ngtr", CodePoints: []int{8815}, Characters: []byte{0xe2, 0x89, 0xaf}}, + "nhArr": &HTML5Entity{Name: "nhArr", CodePoints: []int{8654}, Characters: []byte{0xe2, 0x87, 0x8e}}, + "nharr": &HTML5Entity{Name: "nharr", CodePoints: []int{8622}, Characters: []byte{0xe2, 0x86, 0xae}}, + "nhpar": &HTML5Entity{Name: "nhpar", CodePoints: []int{10994}, Characters: []byte{0xe2, 0xab, 0xb2}}, + "ni": &HTML5Entity{Name: "ni", CodePoints: []int{8715}, Characters: []byte{0xe2, 0x88, 0x8b}}, + "nis": &HTML5Entity{Name: "nis", CodePoints: []int{8956}, Characters: []byte{0xe2, 0x8b, 0xbc}}, + "nisd": &HTML5Entity{Name: "nisd", CodePoints: []int{8954}, Characters: []byte{0xe2, 0x8b, 0xba}}, + "niv": &HTML5Entity{Name: "niv", CodePoints: []int{8715}, Characters: []byte{0xe2, 0x88, 0x8b}}, + "njcy": &HTML5Entity{Name: "njcy", CodePoints: []int{1114}, Characters: []byte{0xd1, 0x9a}}, + "nlArr": &HTML5Entity{Name: "nlArr", CodePoints: []int{8653}, Characters: []byte{0xe2, 0x87, 0x8d}}, + "nlE": &HTML5Entity{Name: "nlE", CodePoints: []int{8806, 824}, Characters: []byte{0xe2, 0x89, 0xa6, 0xcc, 0xb8}}, + "nlarr": &HTML5Entity{Name: "nlarr", CodePoints: []int{8602}, Characters: []byte{0xe2, 0x86, 0x9a}}, + "nldr": &HTML5Entity{Name: "nldr", CodePoints: []int{8229}, Characters: []byte{0xe2, 0x80, 0xa5}}, + "nle": &HTML5Entity{Name: "nle", CodePoints: []int{8816}, Characters: []byte{0xe2, 0x89, 0xb0}}, + "nleftarrow": &HTML5Entity{Name: "nleftarrow", CodePoints: []int{8602}, Characters: []byte{0xe2, 0x86, 0x9a}}, + "nleftrightarrow": &HTML5Entity{Name: "nleftrightarrow", CodePoints: []int{8622}, Characters: []byte{0xe2, 0x86, 0xae}}, + "nleq": &HTML5Entity{Name: "nleq", CodePoints: []int{8816}, Characters: []byte{0xe2, 0x89, 0xb0}}, + "nleqq": &HTML5Entity{Name: "nleqq", CodePoints: []int{8806, 824}, Characters: []byte{0xe2, 0x89, 0xa6, 0xcc, 0xb8}}, + "nleqslant": &HTML5Entity{Name: "nleqslant", CodePoints: []int{10877, 824}, Characters: []byte{0xe2, 0xa9, 0xbd, 0xcc, 0xb8}}, + "nles": &HTML5Entity{Name: "nles", CodePoints: []int{10877, 824}, Characters: []byte{0xe2, 0xa9, 0xbd, 0xcc, 0xb8}}, + "nless": &HTML5Entity{Name: "nless", CodePoints: []int{8814}, Characters: []byte{0xe2, 0x89, 0xae}}, + "nlsim": &HTML5Entity{Name: "nlsim", CodePoints: []int{8820}, Characters: []byte{0xe2, 0x89, 0xb4}}, + "nlt": &HTML5Entity{Name: "nlt", CodePoints: []int{8814}, Characters: []byte{0xe2, 0x89, 0xae}}, + "nltri": &HTML5Entity{Name: "nltri", CodePoints: []int{8938}, Characters: []byte{0xe2, 0x8b, 0xaa}}, + "nltrie": &HTML5Entity{Name: "nltrie", CodePoints: []int{8940}, Characters: []byte{0xe2, 0x8b, 0xac}}, + "nmid": &HTML5Entity{Name: "nmid", CodePoints: []int{8740}, Characters: []byte{0xe2, 0x88, 0xa4}}, + "nopf": &HTML5Entity{Name: "nopf", CodePoints: []int{120159}, Characters: []byte{0xf0, 0x9d, 0x95, 0x9f}}, + "not": &HTML5Entity{Name: "not", CodePoints: []int{172}, Characters: []byte{0xc2, 0xac}}, + "notin": &HTML5Entity{Name: "notin", CodePoints: []int{8713}, Characters: []byte{0xe2, 0x88, 0x89}}, + "notinE": &HTML5Entity{Name: "notinE", CodePoints: []int{8953, 824}, Characters: []byte{0xe2, 0x8b, 0xb9, 0xcc, 0xb8}}, + "notindot": &HTML5Entity{Name: "notindot", CodePoints: []int{8949, 824}, Characters: []byte{0xe2, 0x8b, 0xb5, 0xcc, 0xb8}}, + "notinva": &HTML5Entity{Name: "notinva", CodePoints: []int{8713}, Characters: []byte{0xe2, 0x88, 0x89}}, + "notinvb": &HTML5Entity{Name: "notinvb", CodePoints: []int{8951}, Characters: []byte{0xe2, 0x8b, 0xb7}}, + "notinvc": &HTML5Entity{Name: "notinvc", CodePoints: []int{8950}, Characters: []byte{0xe2, 0x8b, 0xb6}}, + "notni": &HTML5Entity{Name: "notni", CodePoints: []int{8716}, Characters: []byte{0xe2, 0x88, 0x8c}}, + "notniva": &HTML5Entity{Name: "notniva", CodePoints: []int{8716}, Characters: []byte{0xe2, 0x88, 0x8c}}, + "notnivb": &HTML5Entity{Name: "notnivb", CodePoints: []int{8958}, Characters: []byte{0xe2, 0x8b, 0xbe}}, + "notnivc": &HTML5Entity{Name: "notnivc", CodePoints: []int{8957}, Characters: []byte{0xe2, 0x8b, 0xbd}}, + "npar": &HTML5Entity{Name: "npar", CodePoints: []int{8742}, Characters: []byte{0xe2, 0x88, 0xa6}}, + "nparallel": &HTML5Entity{Name: "nparallel", CodePoints: []int{8742}, Characters: []byte{0xe2, 0x88, 0xa6}}, + "nparsl": &HTML5Entity{Name: "nparsl", CodePoints: []int{11005, 8421}, Characters: []byte{0xe2, 0xab, 0xbd, 0xe2, 0x83, 0xa5}}, + "npart": &HTML5Entity{Name: "npart", CodePoints: []int{8706, 824}, Characters: []byte{0xe2, 0x88, 0x82, 0xcc, 0xb8}}, + "npolint": &HTML5Entity{Name: "npolint", CodePoints: []int{10772}, Characters: []byte{0xe2, 0xa8, 0x94}}, + "npr": &HTML5Entity{Name: "npr", CodePoints: []int{8832}, Characters: []byte{0xe2, 0x8a, 0x80}}, + "nprcue": &HTML5Entity{Name: "nprcue", CodePoints: []int{8928}, Characters: []byte{0xe2, 0x8b, 0xa0}}, + "npre": &HTML5Entity{Name: "npre", CodePoints: []int{10927, 824}, Characters: []byte{0xe2, 0xaa, 0xaf, 0xcc, 0xb8}}, + "nprec": &HTML5Entity{Name: "nprec", CodePoints: []int{8832}, Characters: []byte{0xe2, 0x8a, 0x80}}, + "npreceq": &HTML5Entity{Name: "npreceq", CodePoints: []int{10927, 824}, Characters: []byte{0xe2, 0xaa, 0xaf, 0xcc, 0xb8}}, + "nrArr": &HTML5Entity{Name: "nrArr", CodePoints: []int{8655}, Characters: []byte{0xe2, 0x87, 0x8f}}, + "nrarr": &HTML5Entity{Name: "nrarr", CodePoints: []int{8603}, Characters: []byte{0xe2, 0x86, 0x9b}}, + "nrarrc": &HTML5Entity{Name: "nrarrc", CodePoints: []int{10547, 824}, Characters: []byte{0xe2, 0xa4, 0xb3, 0xcc, 0xb8}}, + "nrarrw": &HTML5Entity{Name: "nrarrw", CodePoints: []int{8605, 824}, Characters: []byte{0xe2, 0x86, 0x9d, 0xcc, 0xb8}}, + "nrightarrow": &HTML5Entity{Name: "nrightarrow", CodePoints: []int{8603}, Characters: []byte{0xe2, 0x86, 0x9b}}, + "nrtri": &HTML5Entity{Name: "nrtri", CodePoints: []int{8939}, Characters: []byte{0xe2, 0x8b, 0xab}}, + "nrtrie": &HTML5Entity{Name: "nrtrie", CodePoints: []int{8941}, Characters: []byte{0xe2, 0x8b, 0xad}}, + "nsc": &HTML5Entity{Name: "nsc", CodePoints: []int{8833}, Characters: []byte{0xe2, 0x8a, 0x81}}, + "nsccue": &HTML5Entity{Name: "nsccue", CodePoints: []int{8929}, Characters: []byte{0xe2, 0x8b, 0xa1}}, + "nsce": &HTML5Entity{Name: "nsce", CodePoints: []int{10928, 824}, Characters: []byte{0xe2, 0xaa, 0xb0, 0xcc, 0xb8}}, + "nscr": &HTML5Entity{Name: "nscr", CodePoints: []int{120003}, Characters: []byte{0xf0, 0x9d, 0x93, 0x83}}, + "nshortmid": &HTML5Entity{Name: "nshortmid", CodePoints: []int{8740}, Characters: []byte{0xe2, 0x88, 0xa4}}, + "nshortparallel": &HTML5Entity{Name: "nshortparallel", CodePoints: []int{8742}, Characters: []byte{0xe2, 0x88, 0xa6}}, + "nsim": &HTML5Entity{Name: "nsim", CodePoints: []int{8769}, Characters: []byte{0xe2, 0x89, 0x81}}, + "nsime": &HTML5Entity{Name: "nsime", CodePoints: []int{8772}, Characters: []byte{0xe2, 0x89, 0x84}}, + "nsimeq": &HTML5Entity{Name: "nsimeq", CodePoints: []int{8772}, Characters: []byte{0xe2, 0x89, 0x84}}, + "nsmid": &HTML5Entity{Name: "nsmid", CodePoints: []int{8740}, Characters: []byte{0xe2, 0x88, 0xa4}}, + "nspar": &HTML5Entity{Name: "nspar", CodePoints: []int{8742}, Characters: []byte{0xe2, 0x88, 0xa6}}, + "nsqsube": &HTML5Entity{Name: "nsqsube", CodePoints: []int{8930}, Characters: []byte{0xe2, 0x8b, 0xa2}}, + "nsqsupe": &HTML5Entity{Name: "nsqsupe", CodePoints: []int{8931}, Characters: []byte{0xe2, 0x8b, 0xa3}}, + "nsub": &HTML5Entity{Name: "nsub", CodePoints: []int{8836}, Characters: []byte{0xe2, 0x8a, 0x84}}, + "nsubE": &HTML5Entity{Name: "nsubE", CodePoints: []int{10949, 824}, Characters: []byte{0xe2, 0xab, 0x85, 0xcc, 0xb8}}, + "nsube": &HTML5Entity{Name: "nsube", CodePoints: []int{8840}, Characters: []byte{0xe2, 0x8a, 0x88}}, + "nsubset": &HTML5Entity{Name: "nsubset", CodePoints: []int{8834, 8402}, Characters: []byte{0xe2, 0x8a, 0x82, 0xe2, 0x83, 0x92}}, + "nsubseteq": &HTML5Entity{Name: "nsubseteq", CodePoints: []int{8840}, Characters: []byte{0xe2, 0x8a, 0x88}}, + "nsubseteqq": &HTML5Entity{Name: "nsubseteqq", CodePoints: []int{10949, 824}, Characters: []byte{0xe2, 0xab, 0x85, 0xcc, 0xb8}}, + "nsucc": &HTML5Entity{Name: "nsucc", CodePoints: []int{8833}, Characters: []byte{0xe2, 0x8a, 0x81}}, + "nsucceq": &HTML5Entity{Name: "nsucceq", CodePoints: []int{10928, 824}, Characters: []byte{0xe2, 0xaa, 0xb0, 0xcc, 0xb8}}, + "nsup": &HTML5Entity{Name: "nsup", CodePoints: []int{8837}, Characters: []byte{0xe2, 0x8a, 0x85}}, + "nsupE": &HTML5Entity{Name: "nsupE", CodePoints: []int{10950, 824}, Characters: []byte{0xe2, 0xab, 0x86, 0xcc, 0xb8}}, + "nsupe": &HTML5Entity{Name: "nsupe", CodePoints: []int{8841}, Characters: []byte{0xe2, 0x8a, 0x89}}, + "nsupset": &HTML5Entity{Name: "nsupset", CodePoints: []int{8835, 8402}, Characters: []byte{0xe2, 0x8a, 0x83, 0xe2, 0x83, 0x92}}, + "nsupseteq": &HTML5Entity{Name: "nsupseteq", CodePoints: []int{8841}, Characters: []byte{0xe2, 0x8a, 0x89}}, + "nsupseteqq": &HTML5Entity{Name: "nsupseteqq", CodePoints: []int{10950, 824}, Characters: []byte{0xe2, 0xab, 0x86, 0xcc, 0xb8}}, + "ntgl": &HTML5Entity{Name: "ntgl", CodePoints: []int{8825}, Characters: []byte{0xe2, 0x89, 0xb9}}, + "ntilde": &HTML5Entity{Name: "ntilde", CodePoints: []int{241}, Characters: []byte{0xc3, 0xb1}}, + "ntlg": &HTML5Entity{Name: "ntlg", CodePoints: []int{8824}, Characters: []byte{0xe2, 0x89, 0xb8}}, + "ntriangleleft": &HTML5Entity{Name: "ntriangleleft", CodePoints: []int{8938}, Characters: []byte{0xe2, 0x8b, 0xaa}}, + "ntrianglelefteq": &HTML5Entity{Name: "ntrianglelefteq", CodePoints: []int{8940}, Characters: []byte{0xe2, 0x8b, 0xac}}, + "ntriangleright": &HTML5Entity{Name: "ntriangleright", CodePoints: []int{8939}, Characters: []byte{0xe2, 0x8b, 0xab}}, + "ntrianglerighteq": &HTML5Entity{Name: "ntrianglerighteq", CodePoints: []int{8941}, Characters: []byte{0xe2, 0x8b, 0xad}}, + "nu": &HTML5Entity{Name: "nu", CodePoints: []int{957}, Characters: []byte{0xce, 0xbd}}, + "num": &HTML5Entity{Name: "num", CodePoints: []int{35}, Characters: []byte{0x23}}, + "numero": &HTML5Entity{Name: "numero", CodePoints: []int{8470}, Characters: []byte{0xe2, 0x84, 0x96}}, + "numsp": &HTML5Entity{Name: "numsp", CodePoints: []int{8199}, Characters: []byte{0xe2, 0x80, 0x87}}, + "nvDash": &HTML5Entity{Name: "nvDash", CodePoints: []int{8877}, Characters: []byte{0xe2, 0x8a, 0xad}}, + "nvHarr": &HTML5Entity{Name: "nvHarr", CodePoints: []int{10500}, Characters: []byte{0xe2, 0xa4, 0x84}}, + "nvap": &HTML5Entity{Name: "nvap", CodePoints: []int{8781, 8402}, Characters: []byte{0xe2, 0x89, 0x8d, 0xe2, 0x83, 0x92}}, + "nvdash": &HTML5Entity{Name: "nvdash", CodePoints: []int{8876}, Characters: []byte{0xe2, 0x8a, 0xac}}, + "nvge": &HTML5Entity{Name: "nvge", CodePoints: []int{8805, 8402}, Characters: []byte{0xe2, 0x89, 0xa5, 0xe2, 0x83, 0x92}}, + "nvgt": &HTML5Entity{Name: "nvgt", CodePoints: []int{62, 8402}, Characters: []byte{0x3e, 0xe2, 0x83, 0x92}}, + "nvinfin": &HTML5Entity{Name: "nvinfin", CodePoints: []int{10718}, Characters: []byte{0xe2, 0xa7, 0x9e}}, + "nvlArr": &HTML5Entity{Name: "nvlArr", CodePoints: []int{10498}, Characters: []byte{0xe2, 0xa4, 0x82}}, + "nvle": &HTML5Entity{Name: "nvle", CodePoints: []int{8804, 8402}, Characters: []byte{0xe2, 0x89, 0xa4, 0xe2, 0x83, 0x92}}, + "nvlt": &HTML5Entity{Name: "nvlt", CodePoints: []int{60, 8402}, Characters: []byte{0x3c, 0xe2, 0x83, 0x92}}, + "nvltrie": &HTML5Entity{Name: "nvltrie", CodePoints: []int{8884, 8402}, Characters: []byte{0xe2, 0x8a, 0xb4, 0xe2, 0x83, 0x92}}, + "nvrArr": &HTML5Entity{Name: "nvrArr", CodePoints: []int{10499}, Characters: []byte{0xe2, 0xa4, 0x83}}, + "nvrtrie": &HTML5Entity{Name: "nvrtrie", CodePoints: []int{8885, 8402}, Characters: []byte{0xe2, 0x8a, 0xb5, 0xe2, 0x83, 0x92}}, + "nvsim": &HTML5Entity{Name: "nvsim", CodePoints: []int{8764, 8402}, Characters: []byte{0xe2, 0x88, 0xbc, 0xe2, 0x83, 0x92}}, + "nwArr": &HTML5Entity{Name: "nwArr", CodePoints: []int{8662}, Characters: []byte{0xe2, 0x87, 0x96}}, + "nwarhk": &HTML5Entity{Name: "nwarhk", CodePoints: []int{10531}, Characters: []byte{0xe2, 0xa4, 0xa3}}, + "nwarr": &HTML5Entity{Name: "nwarr", CodePoints: []int{8598}, Characters: []byte{0xe2, 0x86, 0x96}}, + "nwarrow": &HTML5Entity{Name: "nwarrow", CodePoints: []int{8598}, Characters: []byte{0xe2, 0x86, 0x96}}, + "nwnear": &HTML5Entity{Name: "nwnear", CodePoints: []int{10535}, Characters: []byte{0xe2, 0xa4, 0xa7}}, + "oS": &HTML5Entity{Name: "oS", CodePoints: []int{9416}, Characters: []byte{0xe2, 0x93, 0x88}}, + "oacute": &HTML5Entity{Name: "oacute", CodePoints: []int{243}, Characters: []byte{0xc3, 0xb3}}, + "oast": &HTML5Entity{Name: "oast", CodePoints: []int{8859}, Characters: []byte{0xe2, 0x8a, 0x9b}}, + "ocir": &HTML5Entity{Name: "ocir", CodePoints: []int{8858}, Characters: []byte{0xe2, 0x8a, 0x9a}}, + "ocirc": &HTML5Entity{Name: "ocirc", CodePoints: []int{244}, Characters: []byte{0xc3, 0xb4}}, + "ocy": &HTML5Entity{Name: "ocy", CodePoints: []int{1086}, Characters: []byte{0xd0, 0xbe}}, + "odash": &HTML5Entity{Name: "odash", CodePoints: []int{8861}, Characters: []byte{0xe2, 0x8a, 0x9d}}, + "odblac": &HTML5Entity{Name: "odblac", CodePoints: []int{337}, Characters: []byte{0xc5, 0x91}}, + "odiv": &HTML5Entity{Name: "odiv", CodePoints: []int{10808}, Characters: []byte{0xe2, 0xa8, 0xb8}}, + "odot": &HTML5Entity{Name: "odot", CodePoints: []int{8857}, Characters: []byte{0xe2, 0x8a, 0x99}}, + "odsold": &HTML5Entity{Name: "odsold", CodePoints: []int{10684}, Characters: []byte{0xe2, 0xa6, 0xbc}}, + "oelig": &HTML5Entity{Name: "oelig", CodePoints: []int{339}, Characters: []byte{0xc5, 0x93}}, + "ofcir": &HTML5Entity{Name: "ofcir", CodePoints: []int{10687}, Characters: []byte{0xe2, 0xa6, 0xbf}}, + "ofr": &HTML5Entity{Name: "ofr", CodePoints: []int{120108}, Characters: []byte{0xf0, 0x9d, 0x94, 0xac}}, + "ogon": &HTML5Entity{Name: "ogon", CodePoints: []int{731}, Characters: []byte{0xcb, 0x9b}}, + "ograve": &HTML5Entity{Name: "ograve", CodePoints: []int{242}, Characters: []byte{0xc3, 0xb2}}, + "ogt": &HTML5Entity{Name: "ogt", CodePoints: []int{10689}, Characters: []byte{0xe2, 0xa7, 0x81}}, + "ohbar": &HTML5Entity{Name: "ohbar", CodePoints: []int{10677}, Characters: []byte{0xe2, 0xa6, 0xb5}}, + "ohm": &HTML5Entity{Name: "ohm", CodePoints: []int{937}, Characters: []byte{0xce, 0xa9}}, + "oint": &HTML5Entity{Name: "oint", CodePoints: []int{8750}, Characters: []byte{0xe2, 0x88, 0xae}}, + "olarr": &HTML5Entity{Name: "olarr", CodePoints: []int{8634}, Characters: []byte{0xe2, 0x86, 0xba}}, + "olcir": &HTML5Entity{Name: "olcir", CodePoints: []int{10686}, Characters: []byte{0xe2, 0xa6, 0xbe}}, + "olcross": &HTML5Entity{Name: "olcross", CodePoints: []int{10683}, Characters: []byte{0xe2, 0xa6, 0xbb}}, + "oline": &HTML5Entity{Name: "oline", CodePoints: []int{8254}, Characters: []byte{0xe2, 0x80, 0xbe}}, + "olt": &HTML5Entity{Name: "olt", CodePoints: []int{10688}, Characters: []byte{0xe2, 0xa7, 0x80}}, + "omacr": &HTML5Entity{Name: "omacr", CodePoints: []int{333}, Characters: []byte{0xc5, 0x8d}}, + "omega": &HTML5Entity{Name: "omega", CodePoints: []int{969}, Characters: []byte{0xcf, 0x89}}, + "omicron": &HTML5Entity{Name: "omicron", CodePoints: []int{959}, Characters: []byte{0xce, 0xbf}}, + "omid": &HTML5Entity{Name: "omid", CodePoints: []int{10678}, Characters: []byte{0xe2, 0xa6, 0xb6}}, + "ominus": &HTML5Entity{Name: "ominus", CodePoints: []int{8854}, Characters: []byte{0xe2, 0x8a, 0x96}}, + "oopf": &HTML5Entity{Name: "oopf", CodePoints: []int{120160}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa0}}, + "opar": &HTML5Entity{Name: "opar", CodePoints: []int{10679}, Characters: []byte{0xe2, 0xa6, 0xb7}}, + "operp": &HTML5Entity{Name: "operp", CodePoints: []int{10681}, Characters: []byte{0xe2, 0xa6, 0xb9}}, + "oplus": &HTML5Entity{Name: "oplus", CodePoints: []int{8853}, Characters: []byte{0xe2, 0x8a, 0x95}}, + "or": &HTML5Entity{Name: "or", CodePoints: []int{8744}, Characters: []byte{0xe2, 0x88, 0xa8}}, + "orarr": &HTML5Entity{Name: "orarr", CodePoints: []int{8635}, Characters: []byte{0xe2, 0x86, 0xbb}}, + "ord": &HTML5Entity{Name: "ord", CodePoints: []int{10845}, Characters: []byte{0xe2, 0xa9, 0x9d}}, + "order": &HTML5Entity{Name: "order", CodePoints: []int{8500}, Characters: []byte{0xe2, 0x84, 0xb4}}, + "orderof": &HTML5Entity{Name: "orderof", CodePoints: []int{8500}, Characters: []byte{0xe2, 0x84, 0xb4}}, + "ordf": &HTML5Entity{Name: "ordf", CodePoints: []int{170}, Characters: []byte{0xc2, 0xaa}}, + "ordm": &HTML5Entity{Name: "ordm", CodePoints: []int{186}, Characters: []byte{0xc2, 0xba}}, + "origof": &HTML5Entity{Name: "origof", CodePoints: []int{8886}, Characters: []byte{0xe2, 0x8a, 0xb6}}, + "oror": &HTML5Entity{Name: "oror", CodePoints: []int{10838}, Characters: []byte{0xe2, 0xa9, 0x96}}, + "orslope": &HTML5Entity{Name: "orslope", CodePoints: []int{10839}, Characters: []byte{0xe2, 0xa9, 0x97}}, + "orv": &HTML5Entity{Name: "orv", CodePoints: []int{10843}, Characters: []byte{0xe2, 0xa9, 0x9b}}, + "oscr": &HTML5Entity{Name: "oscr", CodePoints: []int{8500}, Characters: []byte{0xe2, 0x84, 0xb4}}, + "oslash": &HTML5Entity{Name: "oslash", CodePoints: []int{248}, Characters: []byte{0xc3, 0xb8}}, + "osol": &HTML5Entity{Name: "osol", CodePoints: []int{8856}, Characters: []byte{0xe2, 0x8a, 0x98}}, + "otilde": &HTML5Entity{Name: "otilde", CodePoints: []int{245}, Characters: []byte{0xc3, 0xb5}}, + "otimes": &HTML5Entity{Name: "otimes", CodePoints: []int{8855}, Characters: []byte{0xe2, 0x8a, 0x97}}, + "otimesas": &HTML5Entity{Name: "otimesas", CodePoints: []int{10806}, Characters: []byte{0xe2, 0xa8, 0xb6}}, + "ouml": &HTML5Entity{Name: "ouml", CodePoints: []int{246}, Characters: []byte{0xc3, 0xb6}}, + "ovbar": &HTML5Entity{Name: "ovbar", CodePoints: []int{9021}, Characters: []byte{0xe2, 0x8c, 0xbd}}, + "par": &HTML5Entity{Name: "par", CodePoints: []int{8741}, Characters: []byte{0xe2, 0x88, 0xa5}}, + "para": &HTML5Entity{Name: "para", CodePoints: []int{182}, Characters: []byte{0xc2, 0xb6}}, + "parallel": &HTML5Entity{Name: "parallel", CodePoints: []int{8741}, Characters: []byte{0xe2, 0x88, 0xa5}}, + "parsim": &HTML5Entity{Name: "parsim", CodePoints: []int{10995}, Characters: []byte{0xe2, 0xab, 0xb3}}, + "parsl": &HTML5Entity{Name: "parsl", CodePoints: []int{11005}, Characters: []byte{0xe2, 0xab, 0xbd}}, + "part": &HTML5Entity{Name: "part", CodePoints: []int{8706}, Characters: []byte{0xe2, 0x88, 0x82}}, + "pcy": &HTML5Entity{Name: "pcy", CodePoints: []int{1087}, Characters: []byte{0xd0, 0xbf}}, + "percnt": &HTML5Entity{Name: "percnt", CodePoints: []int{37}, Characters: []byte{0x25}}, + "period": &HTML5Entity{Name: "period", CodePoints: []int{46}, Characters: []byte{0x2e}}, + "permil": &HTML5Entity{Name: "permil", CodePoints: []int{8240}, Characters: []byte{0xe2, 0x80, 0xb0}}, + "perp": &HTML5Entity{Name: "perp", CodePoints: []int{8869}, Characters: []byte{0xe2, 0x8a, 0xa5}}, + "pertenk": &HTML5Entity{Name: "pertenk", CodePoints: []int{8241}, Characters: []byte{0xe2, 0x80, 0xb1}}, + "pfr": &HTML5Entity{Name: "pfr", CodePoints: []int{120109}, Characters: []byte{0xf0, 0x9d, 0x94, 0xad}}, + "phi": &HTML5Entity{Name: "phi", CodePoints: []int{966}, Characters: []byte{0xcf, 0x86}}, + "phiv": &HTML5Entity{Name: "phiv", CodePoints: []int{981}, Characters: []byte{0xcf, 0x95}}, + "phmmat": &HTML5Entity{Name: "phmmat", CodePoints: []int{8499}, Characters: []byte{0xe2, 0x84, 0xb3}}, + "phone": &HTML5Entity{Name: "phone", CodePoints: []int{9742}, Characters: []byte{0xe2, 0x98, 0x8e}}, + "pi": &HTML5Entity{Name: "pi", CodePoints: []int{960}, Characters: []byte{0xcf, 0x80}}, + "pitchfork": &HTML5Entity{Name: "pitchfork", CodePoints: []int{8916}, Characters: []byte{0xe2, 0x8b, 0x94}}, + "piv": &HTML5Entity{Name: "piv", CodePoints: []int{982}, Characters: []byte{0xcf, 0x96}}, + "planck": &HTML5Entity{Name: "planck", CodePoints: []int{8463}, Characters: []byte{0xe2, 0x84, 0x8f}}, + "planckh": &HTML5Entity{Name: "planckh", CodePoints: []int{8462}, Characters: []byte{0xe2, 0x84, 0x8e}}, + "plankv": &HTML5Entity{Name: "plankv", CodePoints: []int{8463}, Characters: []byte{0xe2, 0x84, 0x8f}}, + "plus": &HTML5Entity{Name: "plus", CodePoints: []int{43}, Characters: []byte{0x2b}}, + "plusacir": &HTML5Entity{Name: "plusacir", CodePoints: []int{10787}, Characters: []byte{0xe2, 0xa8, 0xa3}}, + "plusb": &HTML5Entity{Name: "plusb", CodePoints: []int{8862}, Characters: []byte{0xe2, 0x8a, 0x9e}}, + "pluscir": &HTML5Entity{Name: "pluscir", CodePoints: []int{10786}, Characters: []byte{0xe2, 0xa8, 0xa2}}, + "plusdo": &HTML5Entity{Name: "plusdo", CodePoints: []int{8724}, Characters: []byte{0xe2, 0x88, 0x94}}, + "plusdu": &HTML5Entity{Name: "plusdu", CodePoints: []int{10789}, Characters: []byte{0xe2, 0xa8, 0xa5}}, + "pluse": &HTML5Entity{Name: "pluse", CodePoints: []int{10866}, Characters: []byte{0xe2, 0xa9, 0xb2}}, + "plusmn": &HTML5Entity{Name: "plusmn", CodePoints: []int{177}, Characters: []byte{0xc2, 0xb1}}, + "plussim": &HTML5Entity{Name: "plussim", CodePoints: []int{10790}, Characters: []byte{0xe2, 0xa8, 0xa6}}, + "plustwo": &HTML5Entity{Name: "plustwo", CodePoints: []int{10791}, Characters: []byte{0xe2, 0xa8, 0xa7}}, + "pm": &HTML5Entity{Name: "pm", CodePoints: []int{177}, Characters: []byte{0xc2, 0xb1}}, + "pointint": &HTML5Entity{Name: "pointint", CodePoints: []int{10773}, Characters: []byte{0xe2, 0xa8, 0x95}}, + "popf": &HTML5Entity{Name: "popf", CodePoints: []int{120161}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa1}}, + "pound": &HTML5Entity{Name: "pound", CodePoints: []int{163}, Characters: []byte{0xc2, 0xa3}}, + "pr": &HTML5Entity{Name: "pr", CodePoints: []int{8826}, Characters: []byte{0xe2, 0x89, 0xba}}, + "prE": &HTML5Entity{Name: "prE", CodePoints: []int{10931}, Characters: []byte{0xe2, 0xaa, 0xb3}}, + "prap": &HTML5Entity{Name: "prap", CodePoints: []int{10935}, Characters: []byte{0xe2, 0xaa, 0xb7}}, + "prcue": &HTML5Entity{Name: "prcue", CodePoints: []int{8828}, Characters: []byte{0xe2, 0x89, 0xbc}}, + "pre": &HTML5Entity{Name: "pre", CodePoints: []int{10927}, Characters: []byte{0xe2, 0xaa, 0xaf}}, + "prec": &HTML5Entity{Name: "prec", CodePoints: []int{8826}, Characters: []byte{0xe2, 0x89, 0xba}}, + "precapprox": &HTML5Entity{Name: "precapprox", CodePoints: []int{10935}, Characters: []byte{0xe2, 0xaa, 0xb7}}, + "preccurlyeq": &HTML5Entity{Name: "preccurlyeq", CodePoints: []int{8828}, Characters: []byte{0xe2, 0x89, 0xbc}}, + "preceq": &HTML5Entity{Name: "preceq", CodePoints: []int{10927}, Characters: []byte{0xe2, 0xaa, 0xaf}}, + "precnapprox": &HTML5Entity{Name: "precnapprox", CodePoints: []int{10937}, Characters: []byte{0xe2, 0xaa, 0xb9}}, + "precneqq": &HTML5Entity{Name: "precneqq", CodePoints: []int{10933}, Characters: []byte{0xe2, 0xaa, 0xb5}}, + "precnsim": &HTML5Entity{Name: "precnsim", CodePoints: []int{8936}, Characters: []byte{0xe2, 0x8b, 0xa8}}, + "precsim": &HTML5Entity{Name: "precsim", CodePoints: []int{8830}, Characters: []byte{0xe2, 0x89, 0xbe}}, + "prime": &HTML5Entity{Name: "prime", CodePoints: []int{8242}, Characters: []byte{0xe2, 0x80, 0xb2}}, + "primes": &HTML5Entity{Name: "primes", CodePoints: []int{8473}, Characters: []byte{0xe2, 0x84, 0x99}}, + "prnE": &HTML5Entity{Name: "prnE", CodePoints: []int{10933}, Characters: []byte{0xe2, 0xaa, 0xb5}}, + "prnap": &HTML5Entity{Name: "prnap", CodePoints: []int{10937}, Characters: []byte{0xe2, 0xaa, 0xb9}}, + "prnsim": &HTML5Entity{Name: "prnsim", CodePoints: []int{8936}, Characters: []byte{0xe2, 0x8b, 0xa8}}, + "prod": &HTML5Entity{Name: "prod", CodePoints: []int{8719}, Characters: []byte{0xe2, 0x88, 0x8f}}, + "profalar": &HTML5Entity{Name: "profalar", CodePoints: []int{9006}, Characters: []byte{0xe2, 0x8c, 0xae}}, + "profline": &HTML5Entity{Name: "profline", CodePoints: []int{8978}, Characters: []byte{0xe2, 0x8c, 0x92}}, + "profsurf": &HTML5Entity{Name: "profsurf", CodePoints: []int{8979}, Characters: []byte{0xe2, 0x8c, 0x93}}, + "prop": &HTML5Entity{Name: "prop", CodePoints: []int{8733}, Characters: []byte{0xe2, 0x88, 0x9d}}, + "propto": &HTML5Entity{Name: "propto", CodePoints: []int{8733}, Characters: []byte{0xe2, 0x88, 0x9d}}, + "prsim": &HTML5Entity{Name: "prsim", CodePoints: []int{8830}, Characters: []byte{0xe2, 0x89, 0xbe}}, + "prurel": &HTML5Entity{Name: "prurel", CodePoints: []int{8880}, Characters: []byte{0xe2, 0x8a, 0xb0}}, + "pscr": &HTML5Entity{Name: "pscr", CodePoints: []int{120005}, Characters: []byte{0xf0, 0x9d, 0x93, 0x85}}, + "psi": &HTML5Entity{Name: "psi", CodePoints: []int{968}, Characters: []byte{0xcf, 0x88}}, + "puncsp": &HTML5Entity{Name: "puncsp", CodePoints: []int{8200}, Characters: []byte{0xe2, 0x80, 0x88}}, + "qfr": &HTML5Entity{Name: "qfr", CodePoints: []int{120110}, Characters: []byte{0xf0, 0x9d, 0x94, 0xae}}, + "qint": &HTML5Entity{Name: "qint", CodePoints: []int{10764}, Characters: []byte{0xe2, 0xa8, 0x8c}}, + "qopf": &HTML5Entity{Name: "qopf", CodePoints: []int{120162}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa2}}, + "qprime": &HTML5Entity{Name: "qprime", CodePoints: []int{8279}, Characters: []byte{0xe2, 0x81, 0x97}}, + "qscr": &HTML5Entity{Name: "qscr", CodePoints: []int{120006}, Characters: []byte{0xf0, 0x9d, 0x93, 0x86}}, + "quaternions": &HTML5Entity{Name: "quaternions", CodePoints: []int{8461}, Characters: []byte{0xe2, 0x84, 0x8d}}, + "quatint": &HTML5Entity{Name: "quatint", CodePoints: []int{10774}, Characters: []byte{0xe2, 0xa8, 0x96}}, + "quest": &HTML5Entity{Name: "quest", CodePoints: []int{63}, Characters: []byte{0x3f}}, + "questeq": &HTML5Entity{Name: "questeq", CodePoints: []int{8799}, Characters: []byte{0xe2, 0x89, 0x9f}}, + "quot": &HTML5Entity{Name: "quot", CodePoints: []int{34}, Characters: []byte{0x22}}, + "rAarr": &HTML5Entity{Name: "rAarr", CodePoints: []int{8667}, Characters: []byte{0xe2, 0x87, 0x9b}}, + "rArr": &HTML5Entity{Name: "rArr", CodePoints: []int{8658}, Characters: []byte{0xe2, 0x87, 0x92}}, + "rAtail": &HTML5Entity{Name: "rAtail", CodePoints: []int{10524}, Characters: []byte{0xe2, 0xa4, 0x9c}}, + "rBarr": &HTML5Entity{Name: "rBarr", CodePoints: []int{10511}, Characters: []byte{0xe2, 0xa4, 0x8f}}, + "rHar": &HTML5Entity{Name: "rHar", CodePoints: []int{10596}, Characters: []byte{0xe2, 0xa5, 0xa4}}, + "race": &HTML5Entity{Name: "race", CodePoints: []int{8765, 817}, Characters: []byte{0xe2, 0x88, 0xbd, 0xcc, 0xb1}}, + "racute": &HTML5Entity{Name: "racute", CodePoints: []int{341}, Characters: []byte{0xc5, 0x95}}, + "radic": &HTML5Entity{Name: "radic", CodePoints: []int{8730}, Characters: []byte{0xe2, 0x88, 0x9a}}, + "raemptyv": &HTML5Entity{Name: "raemptyv", CodePoints: []int{10675}, Characters: []byte{0xe2, 0xa6, 0xb3}}, + "rang": &HTML5Entity{Name: "rang", CodePoints: []int{10217}, Characters: []byte{0xe2, 0x9f, 0xa9}}, + "rangd": &HTML5Entity{Name: "rangd", CodePoints: []int{10642}, Characters: []byte{0xe2, 0xa6, 0x92}}, + "range": &HTML5Entity{Name: "range", CodePoints: []int{10661}, Characters: []byte{0xe2, 0xa6, 0xa5}}, + "rangle": &HTML5Entity{Name: "rangle", CodePoints: []int{10217}, Characters: []byte{0xe2, 0x9f, 0xa9}}, + "raquo": &HTML5Entity{Name: "raquo", CodePoints: []int{187}, Characters: []byte{0xc2, 0xbb}}, + "rarr": &HTML5Entity{Name: "rarr", CodePoints: []int{8594}, Characters: []byte{0xe2, 0x86, 0x92}}, + "rarrap": &HTML5Entity{Name: "rarrap", CodePoints: []int{10613}, Characters: []byte{0xe2, 0xa5, 0xb5}}, + "rarrb": &HTML5Entity{Name: "rarrb", CodePoints: []int{8677}, Characters: []byte{0xe2, 0x87, 0xa5}}, + "rarrbfs": &HTML5Entity{Name: "rarrbfs", CodePoints: []int{10528}, Characters: []byte{0xe2, 0xa4, 0xa0}}, + "rarrc": &HTML5Entity{Name: "rarrc", CodePoints: []int{10547}, Characters: []byte{0xe2, 0xa4, 0xb3}}, + "rarrfs": &HTML5Entity{Name: "rarrfs", CodePoints: []int{10526}, Characters: []byte{0xe2, 0xa4, 0x9e}}, + "rarrhk": &HTML5Entity{Name: "rarrhk", CodePoints: []int{8618}, Characters: []byte{0xe2, 0x86, 0xaa}}, + "rarrlp": &HTML5Entity{Name: "rarrlp", CodePoints: []int{8620}, Characters: []byte{0xe2, 0x86, 0xac}}, + "rarrpl": &HTML5Entity{Name: "rarrpl", CodePoints: []int{10565}, Characters: []byte{0xe2, 0xa5, 0x85}}, + "rarrsim": &HTML5Entity{Name: "rarrsim", CodePoints: []int{10612}, Characters: []byte{0xe2, 0xa5, 0xb4}}, + "rarrtl": &HTML5Entity{Name: "rarrtl", CodePoints: []int{8611}, Characters: []byte{0xe2, 0x86, 0xa3}}, + "rarrw": &HTML5Entity{Name: "rarrw", CodePoints: []int{8605}, Characters: []byte{0xe2, 0x86, 0x9d}}, + "ratail": &HTML5Entity{Name: "ratail", CodePoints: []int{10522}, Characters: []byte{0xe2, 0xa4, 0x9a}}, + "ratio": &HTML5Entity{Name: "ratio", CodePoints: []int{8758}, Characters: []byte{0xe2, 0x88, 0xb6}}, + "rationals": &HTML5Entity{Name: "rationals", CodePoints: []int{8474}, Characters: []byte{0xe2, 0x84, 0x9a}}, + "rbarr": &HTML5Entity{Name: "rbarr", CodePoints: []int{10509}, Characters: []byte{0xe2, 0xa4, 0x8d}}, + "rbbrk": &HTML5Entity{Name: "rbbrk", CodePoints: []int{10099}, Characters: []byte{0xe2, 0x9d, 0xb3}}, + "rbrace": &HTML5Entity{Name: "rbrace", CodePoints: []int{125}, Characters: []byte{0x7d}}, + "rbrack": &HTML5Entity{Name: "rbrack", CodePoints: []int{93}, Characters: []byte{0x5d}}, + "rbrke": &HTML5Entity{Name: "rbrke", CodePoints: []int{10636}, Characters: []byte{0xe2, 0xa6, 0x8c}}, + "rbrksld": &HTML5Entity{Name: "rbrksld", CodePoints: []int{10638}, Characters: []byte{0xe2, 0xa6, 0x8e}}, + "rbrkslu": &HTML5Entity{Name: "rbrkslu", CodePoints: []int{10640}, Characters: []byte{0xe2, 0xa6, 0x90}}, + "rcaron": &HTML5Entity{Name: "rcaron", CodePoints: []int{345}, Characters: []byte{0xc5, 0x99}}, + "rcedil": &HTML5Entity{Name: "rcedil", CodePoints: []int{343}, Characters: []byte{0xc5, 0x97}}, + "rceil": &HTML5Entity{Name: "rceil", CodePoints: []int{8969}, Characters: []byte{0xe2, 0x8c, 0x89}}, + "rcub": &HTML5Entity{Name: "rcub", CodePoints: []int{125}, Characters: []byte{0x7d}}, + "rcy": &HTML5Entity{Name: "rcy", CodePoints: []int{1088}, Characters: []byte{0xd1, 0x80}}, + "rdca": &HTML5Entity{Name: "rdca", CodePoints: []int{10551}, Characters: []byte{0xe2, 0xa4, 0xb7}}, + "rdldhar": &HTML5Entity{Name: "rdldhar", CodePoints: []int{10601}, Characters: []byte{0xe2, 0xa5, 0xa9}}, + "rdquo": &HTML5Entity{Name: "rdquo", CodePoints: []int{8221}, Characters: []byte{0xe2, 0x80, 0x9d}}, + "rdquor": &HTML5Entity{Name: "rdquor", CodePoints: []int{8221}, Characters: []byte{0xe2, 0x80, 0x9d}}, + "rdsh": &HTML5Entity{Name: "rdsh", CodePoints: []int{8627}, Characters: []byte{0xe2, 0x86, 0xb3}}, + "real": &HTML5Entity{Name: "real", CodePoints: []int{8476}, Characters: []byte{0xe2, 0x84, 0x9c}}, + "realine": &HTML5Entity{Name: "realine", CodePoints: []int{8475}, Characters: []byte{0xe2, 0x84, 0x9b}}, + "realpart": &HTML5Entity{Name: "realpart", CodePoints: []int{8476}, Characters: []byte{0xe2, 0x84, 0x9c}}, + "reals": &HTML5Entity{Name: "reals", CodePoints: []int{8477}, Characters: []byte{0xe2, 0x84, 0x9d}}, + "rect": &HTML5Entity{Name: "rect", CodePoints: []int{9645}, Characters: []byte{0xe2, 0x96, 0xad}}, + "reg": &HTML5Entity{Name: "reg", CodePoints: []int{174}, Characters: []byte{0xc2, 0xae}}, + "rfisht": &HTML5Entity{Name: "rfisht", CodePoints: []int{10621}, Characters: []byte{0xe2, 0xa5, 0xbd}}, + "rfloor": &HTML5Entity{Name: "rfloor", CodePoints: []int{8971}, Characters: []byte{0xe2, 0x8c, 0x8b}}, + "rfr": &HTML5Entity{Name: "rfr", CodePoints: []int{120111}, Characters: []byte{0xf0, 0x9d, 0x94, 0xaf}}, + "rhard": &HTML5Entity{Name: "rhard", CodePoints: []int{8641}, Characters: []byte{0xe2, 0x87, 0x81}}, + "rharu": &HTML5Entity{Name: "rharu", CodePoints: []int{8640}, Characters: []byte{0xe2, 0x87, 0x80}}, + "rharul": &HTML5Entity{Name: "rharul", CodePoints: []int{10604}, Characters: []byte{0xe2, 0xa5, 0xac}}, + "rho": &HTML5Entity{Name: "rho", CodePoints: []int{961}, Characters: []byte{0xcf, 0x81}}, + "rhov": &HTML5Entity{Name: "rhov", CodePoints: []int{1009}, Characters: []byte{0xcf, 0xb1}}, + "rightarrow": &HTML5Entity{Name: "rightarrow", CodePoints: []int{8594}, Characters: []byte{0xe2, 0x86, 0x92}}, + "rightarrowtail": &HTML5Entity{Name: "rightarrowtail", CodePoints: []int{8611}, Characters: []byte{0xe2, 0x86, 0xa3}}, + "rightharpoondown": &HTML5Entity{Name: "rightharpoondown", CodePoints: []int{8641}, Characters: []byte{0xe2, 0x87, 0x81}}, + "rightharpoonup": &HTML5Entity{Name: "rightharpoonup", CodePoints: []int{8640}, Characters: []byte{0xe2, 0x87, 0x80}}, + "rightleftarrows": &HTML5Entity{Name: "rightleftarrows", CodePoints: []int{8644}, Characters: []byte{0xe2, 0x87, 0x84}}, + "rightleftharpoons": &HTML5Entity{Name: "rightleftharpoons", CodePoints: []int{8652}, Characters: []byte{0xe2, 0x87, 0x8c}}, + "rightrightarrows": &HTML5Entity{Name: "rightrightarrows", CodePoints: []int{8649}, Characters: []byte{0xe2, 0x87, 0x89}}, + "rightsquigarrow": &HTML5Entity{Name: "rightsquigarrow", CodePoints: []int{8605}, Characters: []byte{0xe2, 0x86, 0x9d}}, + "rightthreetimes": &HTML5Entity{Name: "rightthreetimes", CodePoints: []int{8908}, Characters: []byte{0xe2, 0x8b, 0x8c}}, + "ring": &HTML5Entity{Name: "ring", CodePoints: []int{730}, Characters: []byte{0xcb, 0x9a}}, + "risingdotseq": &HTML5Entity{Name: "risingdotseq", CodePoints: []int{8787}, Characters: []byte{0xe2, 0x89, 0x93}}, + "rlarr": &HTML5Entity{Name: "rlarr", CodePoints: []int{8644}, Characters: []byte{0xe2, 0x87, 0x84}}, + "rlhar": &HTML5Entity{Name: "rlhar", CodePoints: []int{8652}, Characters: []byte{0xe2, 0x87, 0x8c}}, + "rlm": &HTML5Entity{Name: "rlm", CodePoints: []int{8207}, Characters: []byte{0xe2, 0x80, 0x8f}}, + "rmoust": &HTML5Entity{Name: "rmoust", CodePoints: []int{9137}, Characters: []byte{0xe2, 0x8e, 0xb1}}, + "rmoustache": &HTML5Entity{Name: "rmoustache", CodePoints: []int{9137}, Characters: []byte{0xe2, 0x8e, 0xb1}}, + "rnmid": &HTML5Entity{Name: "rnmid", CodePoints: []int{10990}, Characters: []byte{0xe2, 0xab, 0xae}}, + "roang": &HTML5Entity{Name: "roang", CodePoints: []int{10221}, Characters: []byte{0xe2, 0x9f, 0xad}}, + "roarr": &HTML5Entity{Name: "roarr", CodePoints: []int{8702}, Characters: []byte{0xe2, 0x87, 0xbe}}, + "robrk": &HTML5Entity{Name: "robrk", CodePoints: []int{10215}, Characters: []byte{0xe2, 0x9f, 0xa7}}, + "ropar": &HTML5Entity{Name: "ropar", CodePoints: []int{10630}, Characters: []byte{0xe2, 0xa6, 0x86}}, + "ropf": &HTML5Entity{Name: "ropf", CodePoints: []int{120163}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa3}}, + "roplus": &HTML5Entity{Name: "roplus", CodePoints: []int{10798}, Characters: []byte{0xe2, 0xa8, 0xae}}, + "rotimes": &HTML5Entity{Name: "rotimes", CodePoints: []int{10805}, Characters: []byte{0xe2, 0xa8, 0xb5}}, + "rpar": &HTML5Entity{Name: "rpar", CodePoints: []int{41}, Characters: []byte{0x29}}, + "rpargt": &HTML5Entity{Name: "rpargt", CodePoints: []int{10644}, Characters: []byte{0xe2, 0xa6, 0x94}}, + "rppolint": &HTML5Entity{Name: "rppolint", CodePoints: []int{10770}, Characters: []byte{0xe2, 0xa8, 0x92}}, + "rrarr": &HTML5Entity{Name: "rrarr", CodePoints: []int{8649}, Characters: []byte{0xe2, 0x87, 0x89}}, + "rsaquo": &HTML5Entity{Name: "rsaquo", CodePoints: []int{8250}, Characters: []byte{0xe2, 0x80, 0xba}}, + "rscr": &HTML5Entity{Name: "rscr", CodePoints: []int{120007}, Characters: []byte{0xf0, 0x9d, 0x93, 0x87}}, + "rsh": &HTML5Entity{Name: "rsh", CodePoints: []int{8625}, Characters: []byte{0xe2, 0x86, 0xb1}}, + "rsqb": &HTML5Entity{Name: "rsqb", CodePoints: []int{93}, Characters: []byte{0x5d}}, + "rsquo": &HTML5Entity{Name: "rsquo", CodePoints: []int{8217}, Characters: []byte{0xe2, 0x80, 0x99}}, + "rsquor": &HTML5Entity{Name: "rsquor", CodePoints: []int{8217}, Characters: []byte{0xe2, 0x80, 0x99}}, + "rthree": &HTML5Entity{Name: "rthree", CodePoints: []int{8908}, Characters: []byte{0xe2, 0x8b, 0x8c}}, + "rtimes": &HTML5Entity{Name: "rtimes", CodePoints: []int{8906}, Characters: []byte{0xe2, 0x8b, 0x8a}}, + "rtri": &HTML5Entity{Name: "rtri", CodePoints: []int{9657}, Characters: []byte{0xe2, 0x96, 0xb9}}, + "rtrie": &HTML5Entity{Name: "rtrie", CodePoints: []int{8885}, Characters: []byte{0xe2, 0x8a, 0xb5}}, + "rtrif": &HTML5Entity{Name: "rtrif", CodePoints: []int{9656}, Characters: []byte{0xe2, 0x96, 0xb8}}, + "rtriltri": &HTML5Entity{Name: "rtriltri", CodePoints: []int{10702}, Characters: []byte{0xe2, 0xa7, 0x8e}}, + "ruluhar": &HTML5Entity{Name: "ruluhar", CodePoints: []int{10600}, Characters: []byte{0xe2, 0xa5, 0xa8}}, + "rx": &HTML5Entity{Name: "rx", CodePoints: []int{8478}, Characters: []byte{0xe2, 0x84, 0x9e}}, + "sacute": &HTML5Entity{Name: "sacute", CodePoints: []int{347}, Characters: []byte{0xc5, 0x9b}}, + "sbquo": &HTML5Entity{Name: "sbquo", CodePoints: []int{8218}, Characters: []byte{0xe2, 0x80, 0x9a}}, + "sc": &HTML5Entity{Name: "sc", CodePoints: []int{8827}, Characters: []byte{0xe2, 0x89, 0xbb}}, + "scE": &HTML5Entity{Name: "scE", CodePoints: []int{10932}, Characters: []byte{0xe2, 0xaa, 0xb4}}, + "scap": &HTML5Entity{Name: "scap", CodePoints: []int{10936}, Characters: []byte{0xe2, 0xaa, 0xb8}}, + "scaron": &HTML5Entity{Name: "scaron", CodePoints: []int{353}, Characters: []byte{0xc5, 0xa1}}, + "sccue": &HTML5Entity{Name: "sccue", CodePoints: []int{8829}, Characters: []byte{0xe2, 0x89, 0xbd}}, + "sce": &HTML5Entity{Name: "sce", CodePoints: []int{10928}, Characters: []byte{0xe2, 0xaa, 0xb0}}, + "scedil": &HTML5Entity{Name: "scedil", CodePoints: []int{351}, Characters: []byte{0xc5, 0x9f}}, + "scirc": &HTML5Entity{Name: "scirc", CodePoints: []int{349}, Characters: []byte{0xc5, 0x9d}}, + "scnE": &HTML5Entity{Name: "scnE", CodePoints: []int{10934}, Characters: []byte{0xe2, 0xaa, 0xb6}}, + "scnap": &HTML5Entity{Name: "scnap", CodePoints: []int{10938}, Characters: []byte{0xe2, 0xaa, 0xba}}, + "scnsim": &HTML5Entity{Name: "scnsim", CodePoints: []int{8937}, Characters: []byte{0xe2, 0x8b, 0xa9}}, + "scpolint": &HTML5Entity{Name: "scpolint", CodePoints: []int{10771}, Characters: []byte{0xe2, 0xa8, 0x93}}, + "scsim": &HTML5Entity{Name: "scsim", CodePoints: []int{8831}, Characters: []byte{0xe2, 0x89, 0xbf}}, + "scy": &HTML5Entity{Name: "scy", CodePoints: []int{1089}, Characters: []byte{0xd1, 0x81}}, + "sdot": &HTML5Entity{Name: "sdot", CodePoints: []int{8901}, Characters: []byte{0xe2, 0x8b, 0x85}}, + "sdotb": &HTML5Entity{Name: "sdotb", CodePoints: []int{8865}, Characters: []byte{0xe2, 0x8a, 0xa1}}, + "sdote": &HTML5Entity{Name: "sdote", CodePoints: []int{10854}, Characters: []byte{0xe2, 0xa9, 0xa6}}, + "seArr": &HTML5Entity{Name: "seArr", CodePoints: []int{8664}, Characters: []byte{0xe2, 0x87, 0x98}}, + "searhk": &HTML5Entity{Name: "searhk", CodePoints: []int{10533}, Characters: []byte{0xe2, 0xa4, 0xa5}}, + "searr": &HTML5Entity{Name: "searr", CodePoints: []int{8600}, Characters: []byte{0xe2, 0x86, 0x98}}, + "searrow": &HTML5Entity{Name: "searrow", CodePoints: []int{8600}, Characters: []byte{0xe2, 0x86, 0x98}}, + "sect": &HTML5Entity{Name: "sect", CodePoints: []int{167}, Characters: []byte{0xc2, 0xa7}}, + "semi": &HTML5Entity{Name: "semi", CodePoints: []int{59}, Characters: []byte{0x3b}}, + "seswar": &HTML5Entity{Name: "seswar", CodePoints: []int{10537}, Characters: []byte{0xe2, 0xa4, 0xa9}}, + "setminus": &HTML5Entity{Name: "setminus", CodePoints: []int{8726}, Characters: []byte{0xe2, 0x88, 0x96}}, + "setmn": &HTML5Entity{Name: "setmn", CodePoints: []int{8726}, Characters: []byte{0xe2, 0x88, 0x96}}, + "sext": &HTML5Entity{Name: "sext", CodePoints: []int{10038}, Characters: []byte{0xe2, 0x9c, 0xb6}}, + "sfr": &HTML5Entity{Name: "sfr", CodePoints: []int{120112}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb0}}, + "sfrown": &HTML5Entity{Name: "sfrown", CodePoints: []int{8994}, Characters: []byte{0xe2, 0x8c, 0xa2}}, + "sharp": &HTML5Entity{Name: "sharp", CodePoints: []int{9839}, Characters: []byte{0xe2, 0x99, 0xaf}}, + "shchcy": &HTML5Entity{Name: "shchcy", CodePoints: []int{1097}, Characters: []byte{0xd1, 0x89}}, + "shcy": &HTML5Entity{Name: "shcy", CodePoints: []int{1096}, Characters: []byte{0xd1, 0x88}}, + "shortmid": &HTML5Entity{Name: "shortmid", CodePoints: []int{8739}, Characters: []byte{0xe2, 0x88, 0xa3}}, + "shortparallel": &HTML5Entity{Name: "shortparallel", CodePoints: []int{8741}, Characters: []byte{0xe2, 0x88, 0xa5}}, + "shy": &HTML5Entity{Name: "shy", CodePoints: []int{173}, Characters: []byte{0xc2, 0xad}}, + "sigma": &HTML5Entity{Name: "sigma", CodePoints: []int{963}, Characters: []byte{0xcf, 0x83}}, + "sigmaf": &HTML5Entity{Name: "sigmaf", CodePoints: []int{962}, Characters: []byte{0xcf, 0x82}}, + "sigmav": &HTML5Entity{Name: "sigmav", CodePoints: []int{962}, Characters: []byte{0xcf, 0x82}}, + "sim": &HTML5Entity{Name: "sim", CodePoints: []int{8764}, Characters: []byte{0xe2, 0x88, 0xbc}}, + "simdot": &HTML5Entity{Name: "simdot", CodePoints: []int{10858}, Characters: []byte{0xe2, 0xa9, 0xaa}}, + "sime": &HTML5Entity{Name: "sime", CodePoints: []int{8771}, Characters: []byte{0xe2, 0x89, 0x83}}, + "simeq": &HTML5Entity{Name: "simeq", CodePoints: []int{8771}, Characters: []byte{0xe2, 0x89, 0x83}}, + "simg": &HTML5Entity{Name: "simg", CodePoints: []int{10910}, Characters: []byte{0xe2, 0xaa, 0x9e}}, + "simgE": &HTML5Entity{Name: "simgE", CodePoints: []int{10912}, Characters: []byte{0xe2, 0xaa, 0xa0}}, + "siml": &HTML5Entity{Name: "siml", CodePoints: []int{10909}, Characters: []byte{0xe2, 0xaa, 0x9d}}, + "simlE": &HTML5Entity{Name: "simlE", CodePoints: []int{10911}, Characters: []byte{0xe2, 0xaa, 0x9f}}, + "simne": &HTML5Entity{Name: "simne", CodePoints: []int{8774}, Characters: []byte{0xe2, 0x89, 0x86}}, + "simplus": &HTML5Entity{Name: "simplus", CodePoints: []int{10788}, Characters: []byte{0xe2, 0xa8, 0xa4}}, + "simrarr": &HTML5Entity{Name: "simrarr", CodePoints: []int{10610}, Characters: []byte{0xe2, 0xa5, 0xb2}}, + "slarr": &HTML5Entity{Name: "slarr", CodePoints: []int{8592}, Characters: []byte{0xe2, 0x86, 0x90}}, + "smallsetminus": &HTML5Entity{Name: "smallsetminus", CodePoints: []int{8726}, Characters: []byte{0xe2, 0x88, 0x96}}, + "smashp": &HTML5Entity{Name: "smashp", CodePoints: []int{10803}, Characters: []byte{0xe2, 0xa8, 0xb3}}, + "smeparsl": &HTML5Entity{Name: "smeparsl", CodePoints: []int{10724}, Characters: []byte{0xe2, 0xa7, 0xa4}}, + "smid": &HTML5Entity{Name: "smid", CodePoints: []int{8739}, Characters: []byte{0xe2, 0x88, 0xa3}}, + "smile": &HTML5Entity{Name: "smile", CodePoints: []int{8995}, Characters: []byte{0xe2, 0x8c, 0xa3}}, + "smt": &HTML5Entity{Name: "smt", CodePoints: []int{10922}, Characters: []byte{0xe2, 0xaa, 0xaa}}, + "smte": &HTML5Entity{Name: "smte", CodePoints: []int{10924}, Characters: []byte{0xe2, 0xaa, 0xac}}, + "smtes": &HTML5Entity{Name: "smtes", CodePoints: []int{10924, 65024}, Characters: []byte{0xe2, 0xaa, 0xac, 0xef, 0xb8, 0x80}}, + "softcy": &HTML5Entity{Name: "softcy", CodePoints: []int{1100}, Characters: []byte{0xd1, 0x8c}}, + "sol": &HTML5Entity{Name: "sol", CodePoints: []int{47}, Characters: []byte{0x2f}}, + "solb": &HTML5Entity{Name: "solb", CodePoints: []int{10692}, Characters: []byte{0xe2, 0xa7, 0x84}}, + "solbar": &HTML5Entity{Name: "solbar", CodePoints: []int{9023}, Characters: []byte{0xe2, 0x8c, 0xbf}}, + "sopf": &HTML5Entity{Name: "sopf", CodePoints: []int{120164}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa4}}, + "spades": &HTML5Entity{Name: "spades", CodePoints: []int{9824}, Characters: []byte{0xe2, 0x99, 0xa0}}, + "spadesuit": &HTML5Entity{Name: "spadesuit", CodePoints: []int{9824}, Characters: []byte{0xe2, 0x99, 0xa0}}, + "spar": &HTML5Entity{Name: "spar", CodePoints: []int{8741}, Characters: []byte{0xe2, 0x88, 0xa5}}, + "sqcap": &HTML5Entity{Name: "sqcap", CodePoints: []int{8851}, Characters: []byte{0xe2, 0x8a, 0x93}}, + "sqcaps": &HTML5Entity{Name: "sqcaps", CodePoints: []int{8851, 65024}, Characters: []byte{0xe2, 0x8a, 0x93, 0xef, 0xb8, 0x80}}, + "sqcup": &HTML5Entity{Name: "sqcup", CodePoints: []int{8852}, Characters: []byte{0xe2, 0x8a, 0x94}}, + "sqcups": &HTML5Entity{Name: "sqcups", CodePoints: []int{8852, 65024}, Characters: []byte{0xe2, 0x8a, 0x94, 0xef, 0xb8, 0x80}}, + "sqsub": &HTML5Entity{Name: "sqsub", CodePoints: []int{8847}, Characters: []byte{0xe2, 0x8a, 0x8f}}, + "sqsube": &HTML5Entity{Name: "sqsube", CodePoints: []int{8849}, Characters: []byte{0xe2, 0x8a, 0x91}}, + "sqsubset": &HTML5Entity{Name: "sqsubset", CodePoints: []int{8847}, Characters: []byte{0xe2, 0x8a, 0x8f}}, + "sqsubseteq": &HTML5Entity{Name: "sqsubseteq", CodePoints: []int{8849}, Characters: []byte{0xe2, 0x8a, 0x91}}, + "sqsup": &HTML5Entity{Name: "sqsup", CodePoints: []int{8848}, Characters: []byte{0xe2, 0x8a, 0x90}}, + "sqsupe": &HTML5Entity{Name: "sqsupe", CodePoints: []int{8850}, Characters: []byte{0xe2, 0x8a, 0x92}}, + "sqsupset": &HTML5Entity{Name: "sqsupset", CodePoints: []int{8848}, Characters: []byte{0xe2, 0x8a, 0x90}}, + "sqsupseteq": &HTML5Entity{Name: "sqsupseteq", CodePoints: []int{8850}, Characters: []byte{0xe2, 0x8a, 0x92}}, + "squ": &HTML5Entity{Name: "squ", CodePoints: []int{9633}, Characters: []byte{0xe2, 0x96, 0xa1}}, + "square": &HTML5Entity{Name: "square", CodePoints: []int{9633}, Characters: []byte{0xe2, 0x96, 0xa1}}, + "squarf": &HTML5Entity{Name: "squarf", CodePoints: []int{9642}, Characters: []byte{0xe2, 0x96, 0xaa}}, + "squf": &HTML5Entity{Name: "squf", CodePoints: []int{9642}, Characters: []byte{0xe2, 0x96, 0xaa}}, + "srarr": &HTML5Entity{Name: "srarr", CodePoints: []int{8594}, Characters: []byte{0xe2, 0x86, 0x92}}, + "sscr": &HTML5Entity{Name: "sscr", CodePoints: []int{120008}, Characters: []byte{0xf0, 0x9d, 0x93, 0x88}}, + "ssetmn": &HTML5Entity{Name: "ssetmn", CodePoints: []int{8726}, Characters: []byte{0xe2, 0x88, 0x96}}, + "ssmile": &HTML5Entity{Name: "ssmile", CodePoints: []int{8995}, Characters: []byte{0xe2, 0x8c, 0xa3}}, + "sstarf": &HTML5Entity{Name: "sstarf", CodePoints: []int{8902}, Characters: []byte{0xe2, 0x8b, 0x86}}, + "star": &HTML5Entity{Name: "star", CodePoints: []int{9734}, Characters: []byte{0xe2, 0x98, 0x86}}, + "starf": &HTML5Entity{Name: "starf", CodePoints: []int{9733}, Characters: []byte{0xe2, 0x98, 0x85}}, + "straightepsilon": &HTML5Entity{Name: "straightepsilon", CodePoints: []int{1013}, Characters: []byte{0xcf, 0xb5}}, + "straightphi": &HTML5Entity{Name: "straightphi", CodePoints: []int{981}, Characters: []byte{0xcf, 0x95}}, + "strns": &HTML5Entity{Name: "strns", CodePoints: []int{175}, Characters: []byte{0xc2, 0xaf}}, + "sub": &HTML5Entity{Name: "sub", CodePoints: []int{8834}, Characters: []byte{0xe2, 0x8a, 0x82}}, + "subE": &HTML5Entity{Name: "subE", CodePoints: []int{10949}, Characters: []byte{0xe2, 0xab, 0x85}}, + "subdot": &HTML5Entity{Name: "subdot", CodePoints: []int{10941}, Characters: []byte{0xe2, 0xaa, 0xbd}}, + "sube": &HTML5Entity{Name: "sube", CodePoints: []int{8838}, Characters: []byte{0xe2, 0x8a, 0x86}}, + "subedot": &HTML5Entity{Name: "subedot", CodePoints: []int{10947}, Characters: []byte{0xe2, 0xab, 0x83}}, + "submult": &HTML5Entity{Name: "submult", CodePoints: []int{10945}, Characters: []byte{0xe2, 0xab, 0x81}}, + "subnE": &HTML5Entity{Name: "subnE", CodePoints: []int{10955}, Characters: []byte{0xe2, 0xab, 0x8b}}, + "subne": &HTML5Entity{Name: "subne", CodePoints: []int{8842}, Characters: []byte{0xe2, 0x8a, 0x8a}}, + "subplus": &HTML5Entity{Name: "subplus", CodePoints: []int{10943}, Characters: []byte{0xe2, 0xaa, 0xbf}}, + "subrarr": &HTML5Entity{Name: "subrarr", CodePoints: []int{10617}, Characters: []byte{0xe2, 0xa5, 0xb9}}, + "subset": &HTML5Entity{Name: "subset", CodePoints: []int{8834}, Characters: []byte{0xe2, 0x8a, 0x82}}, + "subseteq": &HTML5Entity{Name: "subseteq", CodePoints: []int{8838}, Characters: []byte{0xe2, 0x8a, 0x86}}, + "subseteqq": &HTML5Entity{Name: "subseteqq", CodePoints: []int{10949}, Characters: []byte{0xe2, 0xab, 0x85}}, + "subsetneq": &HTML5Entity{Name: "subsetneq", CodePoints: []int{8842}, Characters: []byte{0xe2, 0x8a, 0x8a}}, + "subsetneqq": &HTML5Entity{Name: "subsetneqq", CodePoints: []int{10955}, Characters: []byte{0xe2, 0xab, 0x8b}}, + "subsim": &HTML5Entity{Name: "subsim", CodePoints: []int{10951}, Characters: []byte{0xe2, 0xab, 0x87}}, + "subsub": &HTML5Entity{Name: "subsub", CodePoints: []int{10965}, Characters: []byte{0xe2, 0xab, 0x95}}, + "subsup": &HTML5Entity{Name: "subsup", CodePoints: []int{10963}, Characters: []byte{0xe2, 0xab, 0x93}}, + "succ": &HTML5Entity{Name: "succ", CodePoints: []int{8827}, Characters: []byte{0xe2, 0x89, 0xbb}}, + "succapprox": &HTML5Entity{Name: "succapprox", CodePoints: []int{10936}, Characters: []byte{0xe2, 0xaa, 0xb8}}, + "succcurlyeq": &HTML5Entity{Name: "succcurlyeq", CodePoints: []int{8829}, Characters: []byte{0xe2, 0x89, 0xbd}}, + "succeq": &HTML5Entity{Name: "succeq", CodePoints: []int{10928}, Characters: []byte{0xe2, 0xaa, 0xb0}}, + "succnapprox": &HTML5Entity{Name: "succnapprox", CodePoints: []int{10938}, Characters: []byte{0xe2, 0xaa, 0xba}}, + "succneqq": &HTML5Entity{Name: "succneqq", CodePoints: []int{10934}, Characters: []byte{0xe2, 0xaa, 0xb6}}, + "succnsim": &HTML5Entity{Name: "succnsim", CodePoints: []int{8937}, Characters: []byte{0xe2, 0x8b, 0xa9}}, + "succsim": &HTML5Entity{Name: "succsim", CodePoints: []int{8831}, Characters: []byte{0xe2, 0x89, 0xbf}}, + "sum": &HTML5Entity{Name: "sum", CodePoints: []int{8721}, Characters: []byte{0xe2, 0x88, 0x91}}, + "sung": &HTML5Entity{Name: "sung", CodePoints: []int{9834}, Characters: []byte{0xe2, 0x99, 0xaa}}, + "sup": &HTML5Entity{Name: "sup", CodePoints: []int{8835}, Characters: []byte{0xe2, 0x8a, 0x83}}, + "sup1": &HTML5Entity{Name: "sup1", CodePoints: []int{185}, Characters: []byte{0xc2, 0xb9}}, + "sup2": &HTML5Entity{Name: "sup2", CodePoints: []int{178}, Characters: []byte{0xc2, 0xb2}}, + "sup3": &HTML5Entity{Name: "sup3", CodePoints: []int{179}, Characters: []byte{0xc2, 0xb3}}, + "supE": &HTML5Entity{Name: "supE", CodePoints: []int{10950}, Characters: []byte{0xe2, 0xab, 0x86}}, + "supdot": &HTML5Entity{Name: "supdot", CodePoints: []int{10942}, Characters: []byte{0xe2, 0xaa, 0xbe}}, + "supdsub": &HTML5Entity{Name: "supdsub", CodePoints: []int{10968}, Characters: []byte{0xe2, 0xab, 0x98}}, + "supe": &HTML5Entity{Name: "supe", CodePoints: []int{8839}, Characters: []byte{0xe2, 0x8a, 0x87}}, + "supedot": &HTML5Entity{Name: "supedot", CodePoints: []int{10948}, Characters: []byte{0xe2, 0xab, 0x84}}, + "suphsol": &HTML5Entity{Name: "suphsol", CodePoints: []int{10185}, Characters: []byte{0xe2, 0x9f, 0x89}}, + "suphsub": &HTML5Entity{Name: "suphsub", CodePoints: []int{10967}, Characters: []byte{0xe2, 0xab, 0x97}}, + "suplarr": &HTML5Entity{Name: "suplarr", CodePoints: []int{10619}, Characters: []byte{0xe2, 0xa5, 0xbb}}, + "supmult": &HTML5Entity{Name: "supmult", CodePoints: []int{10946}, Characters: []byte{0xe2, 0xab, 0x82}}, + "supnE": &HTML5Entity{Name: "supnE", CodePoints: []int{10956}, Characters: []byte{0xe2, 0xab, 0x8c}}, + "supne": &HTML5Entity{Name: "supne", CodePoints: []int{8843}, Characters: []byte{0xe2, 0x8a, 0x8b}}, + "supplus": &HTML5Entity{Name: "supplus", CodePoints: []int{10944}, Characters: []byte{0xe2, 0xab, 0x80}}, + "supset": &HTML5Entity{Name: "supset", CodePoints: []int{8835}, Characters: []byte{0xe2, 0x8a, 0x83}}, + "supseteq": &HTML5Entity{Name: "supseteq", CodePoints: []int{8839}, Characters: []byte{0xe2, 0x8a, 0x87}}, + "supseteqq": &HTML5Entity{Name: "supseteqq", CodePoints: []int{10950}, Characters: []byte{0xe2, 0xab, 0x86}}, + "supsetneq": &HTML5Entity{Name: "supsetneq", CodePoints: []int{8843}, Characters: []byte{0xe2, 0x8a, 0x8b}}, + "supsetneqq": &HTML5Entity{Name: "supsetneqq", CodePoints: []int{10956}, Characters: []byte{0xe2, 0xab, 0x8c}}, + "supsim": &HTML5Entity{Name: "supsim", CodePoints: []int{10952}, Characters: []byte{0xe2, 0xab, 0x88}}, + "supsub": &HTML5Entity{Name: "supsub", CodePoints: []int{10964}, Characters: []byte{0xe2, 0xab, 0x94}}, + "supsup": &HTML5Entity{Name: "supsup", CodePoints: []int{10966}, Characters: []byte{0xe2, 0xab, 0x96}}, + "swArr": &HTML5Entity{Name: "swArr", CodePoints: []int{8665}, Characters: []byte{0xe2, 0x87, 0x99}}, + "swarhk": &HTML5Entity{Name: "swarhk", CodePoints: []int{10534}, Characters: []byte{0xe2, 0xa4, 0xa6}}, + "swarr": &HTML5Entity{Name: "swarr", CodePoints: []int{8601}, Characters: []byte{0xe2, 0x86, 0x99}}, + "swarrow": &HTML5Entity{Name: "swarrow", CodePoints: []int{8601}, Characters: []byte{0xe2, 0x86, 0x99}}, + "swnwar": &HTML5Entity{Name: "swnwar", CodePoints: []int{10538}, Characters: []byte{0xe2, 0xa4, 0xaa}}, + "szlig": &HTML5Entity{Name: "szlig", CodePoints: []int{223}, Characters: []byte{0xc3, 0x9f}}, + "target": &HTML5Entity{Name: "target", CodePoints: []int{8982}, Characters: []byte{0xe2, 0x8c, 0x96}}, + "tau": &HTML5Entity{Name: "tau", CodePoints: []int{964}, Characters: []byte{0xcf, 0x84}}, + "tbrk": &HTML5Entity{Name: "tbrk", CodePoints: []int{9140}, Characters: []byte{0xe2, 0x8e, 0xb4}}, + "tcaron": &HTML5Entity{Name: "tcaron", CodePoints: []int{357}, Characters: []byte{0xc5, 0xa5}}, + "tcedil": &HTML5Entity{Name: "tcedil", CodePoints: []int{355}, Characters: []byte{0xc5, 0xa3}}, + "tcy": &HTML5Entity{Name: "tcy", CodePoints: []int{1090}, Characters: []byte{0xd1, 0x82}}, + "tdot": &HTML5Entity{Name: "tdot", CodePoints: []int{8411}, Characters: []byte{0xe2, 0x83, 0x9b}}, + "telrec": &HTML5Entity{Name: "telrec", CodePoints: []int{8981}, Characters: []byte{0xe2, 0x8c, 0x95}}, + "tfr": &HTML5Entity{Name: "tfr", CodePoints: []int{120113}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb1}}, + "there4": &HTML5Entity{Name: "there4", CodePoints: []int{8756}, Characters: []byte{0xe2, 0x88, 0xb4}}, + "therefore": &HTML5Entity{Name: "therefore", CodePoints: []int{8756}, Characters: []byte{0xe2, 0x88, 0xb4}}, + "theta": &HTML5Entity{Name: "theta", CodePoints: []int{952}, Characters: []byte{0xce, 0xb8}}, + "thetasym": &HTML5Entity{Name: "thetasym", CodePoints: []int{977}, Characters: []byte{0xcf, 0x91}}, + "thetav": &HTML5Entity{Name: "thetav", CodePoints: []int{977}, Characters: []byte{0xcf, 0x91}}, + "thickapprox": &HTML5Entity{Name: "thickapprox", CodePoints: []int{8776}, Characters: []byte{0xe2, 0x89, 0x88}}, + "thicksim": &HTML5Entity{Name: "thicksim", CodePoints: []int{8764}, Characters: []byte{0xe2, 0x88, 0xbc}}, + "thinsp": &HTML5Entity{Name: "thinsp", CodePoints: []int{8201}, Characters: []byte{0xe2, 0x80, 0x89}}, + "thkap": &HTML5Entity{Name: "thkap", CodePoints: []int{8776}, Characters: []byte{0xe2, 0x89, 0x88}}, + "thksim": &HTML5Entity{Name: "thksim", CodePoints: []int{8764}, Characters: []byte{0xe2, 0x88, 0xbc}}, + "thorn": &HTML5Entity{Name: "thorn", CodePoints: []int{254}, Characters: []byte{0xc3, 0xbe}}, + "tilde": &HTML5Entity{Name: "tilde", CodePoints: []int{732}, Characters: []byte{0xcb, 0x9c}}, + "times": &HTML5Entity{Name: "times", CodePoints: []int{215}, Characters: []byte{0xc3, 0x97}}, + "timesb": &HTML5Entity{Name: "timesb", CodePoints: []int{8864}, Characters: []byte{0xe2, 0x8a, 0xa0}}, + "timesbar": &HTML5Entity{Name: "timesbar", CodePoints: []int{10801}, Characters: []byte{0xe2, 0xa8, 0xb1}}, + "timesd": &HTML5Entity{Name: "timesd", CodePoints: []int{10800}, Characters: []byte{0xe2, 0xa8, 0xb0}}, + "tint": &HTML5Entity{Name: "tint", CodePoints: []int{8749}, Characters: []byte{0xe2, 0x88, 0xad}}, + "toea": &HTML5Entity{Name: "toea", CodePoints: []int{10536}, Characters: []byte{0xe2, 0xa4, 0xa8}}, + "top": &HTML5Entity{Name: "top", CodePoints: []int{8868}, Characters: []byte{0xe2, 0x8a, 0xa4}}, + "topbot": &HTML5Entity{Name: "topbot", CodePoints: []int{9014}, Characters: []byte{0xe2, 0x8c, 0xb6}}, + "topcir": &HTML5Entity{Name: "topcir", CodePoints: []int{10993}, Characters: []byte{0xe2, 0xab, 0xb1}}, + "topf": &HTML5Entity{Name: "topf", CodePoints: []int{120165}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa5}}, + "topfork": &HTML5Entity{Name: "topfork", CodePoints: []int{10970}, Characters: []byte{0xe2, 0xab, 0x9a}}, + "tosa": &HTML5Entity{Name: "tosa", CodePoints: []int{10537}, Characters: []byte{0xe2, 0xa4, 0xa9}}, + "tprime": &HTML5Entity{Name: "tprime", CodePoints: []int{8244}, Characters: []byte{0xe2, 0x80, 0xb4}}, + "trade": &HTML5Entity{Name: "trade", CodePoints: []int{8482}, Characters: []byte{0xe2, 0x84, 0xa2}}, + "triangle": &HTML5Entity{Name: "triangle", CodePoints: []int{9653}, Characters: []byte{0xe2, 0x96, 0xb5}}, + "triangledown": &HTML5Entity{Name: "triangledown", CodePoints: []int{9663}, Characters: []byte{0xe2, 0x96, 0xbf}}, + "triangleleft": &HTML5Entity{Name: "triangleleft", CodePoints: []int{9667}, Characters: []byte{0xe2, 0x97, 0x83}}, + "trianglelefteq": &HTML5Entity{Name: "trianglelefteq", CodePoints: []int{8884}, Characters: []byte{0xe2, 0x8a, 0xb4}}, + "triangleq": &HTML5Entity{Name: "triangleq", CodePoints: []int{8796}, Characters: []byte{0xe2, 0x89, 0x9c}}, + "triangleright": &HTML5Entity{Name: "triangleright", CodePoints: []int{9657}, Characters: []byte{0xe2, 0x96, 0xb9}}, + "trianglerighteq": &HTML5Entity{Name: "trianglerighteq", CodePoints: []int{8885}, Characters: []byte{0xe2, 0x8a, 0xb5}}, + "tridot": &HTML5Entity{Name: "tridot", CodePoints: []int{9708}, Characters: []byte{0xe2, 0x97, 0xac}}, + "trie": &HTML5Entity{Name: "trie", CodePoints: []int{8796}, Characters: []byte{0xe2, 0x89, 0x9c}}, + "triminus": &HTML5Entity{Name: "triminus", CodePoints: []int{10810}, Characters: []byte{0xe2, 0xa8, 0xba}}, + "triplus": &HTML5Entity{Name: "triplus", CodePoints: []int{10809}, Characters: []byte{0xe2, 0xa8, 0xb9}}, + "trisb": &HTML5Entity{Name: "trisb", CodePoints: []int{10701}, Characters: []byte{0xe2, 0xa7, 0x8d}}, + "tritime": &HTML5Entity{Name: "tritime", CodePoints: []int{10811}, Characters: []byte{0xe2, 0xa8, 0xbb}}, + "trpezium": &HTML5Entity{Name: "trpezium", CodePoints: []int{9186}, Characters: []byte{0xe2, 0x8f, 0xa2}}, + "tscr": &HTML5Entity{Name: "tscr", CodePoints: []int{120009}, Characters: []byte{0xf0, 0x9d, 0x93, 0x89}}, + "tscy": &HTML5Entity{Name: "tscy", CodePoints: []int{1094}, Characters: []byte{0xd1, 0x86}}, + "tshcy": &HTML5Entity{Name: "tshcy", CodePoints: []int{1115}, Characters: []byte{0xd1, 0x9b}}, + "tstrok": &HTML5Entity{Name: "tstrok", CodePoints: []int{359}, Characters: []byte{0xc5, 0xa7}}, + "twixt": &HTML5Entity{Name: "twixt", CodePoints: []int{8812}, Characters: []byte{0xe2, 0x89, 0xac}}, + "twoheadleftarrow": &HTML5Entity{Name: "twoheadleftarrow", CodePoints: []int{8606}, Characters: []byte{0xe2, 0x86, 0x9e}}, + "twoheadrightarrow": &HTML5Entity{Name: "twoheadrightarrow", CodePoints: []int{8608}, Characters: []byte{0xe2, 0x86, 0xa0}}, + "uArr": &HTML5Entity{Name: "uArr", CodePoints: []int{8657}, Characters: []byte{0xe2, 0x87, 0x91}}, + "uHar": &HTML5Entity{Name: "uHar", CodePoints: []int{10595}, Characters: []byte{0xe2, 0xa5, 0xa3}}, + "uacute": &HTML5Entity{Name: "uacute", CodePoints: []int{250}, Characters: []byte{0xc3, 0xba}}, + "uarr": &HTML5Entity{Name: "uarr", CodePoints: []int{8593}, Characters: []byte{0xe2, 0x86, 0x91}}, + "ubrcy": &HTML5Entity{Name: "ubrcy", CodePoints: []int{1118}, Characters: []byte{0xd1, 0x9e}}, + "ubreve": &HTML5Entity{Name: "ubreve", CodePoints: []int{365}, Characters: []byte{0xc5, 0xad}}, + "ucirc": &HTML5Entity{Name: "ucirc", CodePoints: []int{251}, Characters: []byte{0xc3, 0xbb}}, + "ucy": &HTML5Entity{Name: "ucy", CodePoints: []int{1091}, Characters: []byte{0xd1, 0x83}}, + "udarr": &HTML5Entity{Name: "udarr", CodePoints: []int{8645}, Characters: []byte{0xe2, 0x87, 0x85}}, + "udblac": &HTML5Entity{Name: "udblac", CodePoints: []int{369}, Characters: []byte{0xc5, 0xb1}}, + "udhar": &HTML5Entity{Name: "udhar", CodePoints: []int{10606}, Characters: []byte{0xe2, 0xa5, 0xae}}, + "ufisht": &HTML5Entity{Name: "ufisht", CodePoints: []int{10622}, Characters: []byte{0xe2, 0xa5, 0xbe}}, + "ufr": &HTML5Entity{Name: "ufr", CodePoints: []int{120114}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb2}}, + "ugrave": &HTML5Entity{Name: "ugrave", CodePoints: []int{249}, Characters: []byte{0xc3, 0xb9}}, + "uharl": &HTML5Entity{Name: "uharl", CodePoints: []int{8639}, Characters: []byte{0xe2, 0x86, 0xbf}}, + "uharr": &HTML5Entity{Name: "uharr", CodePoints: []int{8638}, Characters: []byte{0xe2, 0x86, 0xbe}}, + "uhblk": &HTML5Entity{Name: "uhblk", CodePoints: []int{9600}, Characters: []byte{0xe2, 0x96, 0x80}}, + "ulcorn": &HTML5Entity{Name: "ulcorn", CodePoints: []int{8988}, Characters: []byte{0xe2, 0x8c, 0x9c}}, + "ulcorner": &HTML5Entity{Name: "ulcorner", CodePoints: []int{8988}, Characters: []byte{0xe2, 0x8c, 0x9c}}, + "ulcrop": &HTML5Entity{Name: "ulcrop", CodePoints: []int{8975}, Characters: []byte{0xe2, 0x8c, 0x8f}}, + "ultri": &HTML5Entity{Name: "ultri", CodePoints: []int{9720}, Characters: []byte{0xe2, 0x97, 0xb8}}, + "umacr": &HTML5Entity{Name: "umacr", CodePoints: []int{363}, Characters: []byte{0xc5, 0xab}}, + "uml": &HTML5Entity{Name: "uml", CodePoints: []int{168}, Characters: []byte{0xc2, 0xa8}}, + "uogon": &HTML5Entity{Name: "uogon", CodePoints: []int{371}, Characters: []byte{0xc5, 0xb3}}, + "uopf": &HTML5Entity{Name: "uopf", CodePoints: []int{120166}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa6}}, + "uparrow": &HTML5Entity{Name: "uparrow", CodePoints: []int{8593}, Characters: []byte{0xe2, 0x86, 0x91}}, + "updownarrow": &HTML5Entity{Name: "updownarrow", CodePoints: []int{8597}, Characters: []byte{0xe2, 0x86, 0x95}}, + "upharpoonleft": &HTML5Entity{Name: "upharpoonleft", CodePoints: []int{8639}, Characters: []byte{0xe2, 0x86, 0xbf}}, + "upharpoonright": &HTML5Entity{Name: "upharpoonright", CodePoints: []int{8638}, Characters: []byte{0xe2, 0x86, 0xbe}}, + "uplus": &HTML5Entity{Name: "uplus", CodePoints: []int{8846}, Characters: []byte{0xe2, 0x8a, 0x8e}}, + "upsi": &HTML5Entity{Name: "upsi", CodePoints: []int{965}, Characters: []byte{0xcf, 0x85}}, + "upsih": &HTML5Entity{Name: "upsih", CodePoints: []int{978}, Characters: []byte{0xcf, 0x92}}, + "upsilon": &HTML5Entity{Name: "upsilon", CodePoints: []int{965}, Characters: []byte{0xcf, 0x85}}, + "upuparrows": &HTML5Entity{Name: "upuparrows", CodePoints: []int{8648}, Characters: []byte{0xe2, 0x87, 0x88}}, + "urcorn": &HTML5Entity{Name: "urcorn", CodePoints: []int{8989}, Characters: []byte{0xe2, 0x8c, 0x9d}}, + "urcorner": &HTML5Entity{Name: "urcorner", CodePoints: []int{8989}, Characters: []byte{0xe2, 0x8c, 0x9d}}, + "urcrop": &HTML5Entity{Name: "urcrop", CodePoints: []int{8974}, Characters: []byte{0xe2, 0x8c, 0x8e}}, + "uring": &HTML5Entity{Name: "uring", CodePoints: []int{367}, Characters: []byte{0xc5, 0xaf}}, + "urtri": &HTML5Entity{Name: "urtri", CodePoints: []int{9721}, Characters: []byte{0xe2, 0x97, 0xb9}}, + "uscr": &HTML5Entity{Name: "uscr", CodePoints: []int{120010}, Characters: []byte{0xf0, 0x9d, 0x93, 0x8a}}, + "utdot": &HTML5Entity{Name: "utdot", CodePoints: []int{8944}, Characters: []byte{0xe2, 0x8b, 0xb0}}, + "utilde": &HTML5Entity{Name: "utilde", CodePoints: []int{361}, Characters: []byte{0xc5, 0xa9}}, + "utri": &HTML5Entity{Name: "utri", CodePoints: []int{9653}, Characters: []byte{0xe2, 0x96, 0xb5}}, + "utrif": &HTML5Entity{Name: "utrif", CodePoints: []int{9652}, Characters: []byte{0xe2, 0x96, 0xb4}}, + "uuarr": &HTML5Entity{Name: "uuarr", CodePoints: []int{8648}, Characters: []byte{0xe2, 0x87, 0x88}}, + "uuml": &HTML5Entity{Name: "uuml", CodePoints: []int{252}, Characters: []byte{0xc3, 0xbc}}, + "uwangle": &HTML5Entity{Name: "uwangle", CodePoints: []int{10663}, Characters: []byte{0xe2, 0xa6, 0xa7}}, + "vArr": &HTML5Entity{Name: "vArr", CodePoints: []int{8661}, Characters: []byte{0xe2, 0x87, 0x95}}, + "vBar": &HTML5Entity{Name: "vBar", CodePoints: []int{10984}, Characters: []byte{0xe2, 0xab, 0xa8}}, + "vBarv": &HTML5Entity{Name: "vBarv", CodePoints: []int{10985}, Characters: []byte{0xe2, 0xab, 0xa9}}, + "vDash": &HTML5Entity{Name: "vDash", CodePoints: []int{8872}, Characters: []byte{0xe2, 0x8a, 0xa8}}, + "vangrt": &HTML5Entity{Name: "vangrt", CodePoints: []int{10652}, Characters: []byte{0xe2, 0xa6, 0x9c}}, + "varepsilon": &HTML5Entity{Name: "varepsilon", CodePoints: []int{1013}, Characters: []byte{0xcf, 0xb5}}, + "varkappa": &HTML5Entity{Name: "varkappa", CodePoints: []int{1008}, Characters: []byte{0xcf, 0xb0}}, + "varnothing": &HTML5Entity{Name: "varnothing", CodePoints: []int{8709}, Characters: []byte{0xe2, 0x88, 0x85}}, + "varphi": &HTML5Entity{Name: "varphi", CodePoints: []int{981}, Characters: []byte{0xcf, 0x95}}, + "varpi": &HTML5Entity{Name: "varpi", CodePoints: []int{982}, Characters: []byte{0xcf, 0x96}}, + "varpropto": &HTML5Entity{Name: "varpropto", CodePoints: []int{8733}, Characters: []byte{0xe2, 0x88, 0x9d}}, + "varr": &HTML5Entity{Name: "varr", CodePoints: []int{8597}, Characters: []byte{0xe2, 0x86, 0x95}}, + "varrho": &HTML5Entity{Name: "varrho", CodePoints: []int{1009}, Characters: []byte{0xcf, 0xb1}}, + "varsigma": &HTML5Entity{Name: "varsigma", CodePoints: []int{962}, Characters: []byte{0xcf, 0x82}}, + "varsubsetneq": &HTML5Entity{Name: "varsubsetneq", CodePoints: []int{8842, 65024}, Characters: []byte{0xe2, 0x8a, 0x8a, 0xef, 0xb8, 0x80}}, + "varsubsetneqq": &HTML5Entity{Name: "varsubsetneqq", CodePoints: []int{10955, 65024}, Characters: []byte{0xe2, 0xab, 0x8b, 0xef, 0xb8, 0x80}}, + "varsupsetneq": &HTML5Entity{Name: "varsupsetneq", CodePoints: []int{8843, 65024}, Characters: []byte{0xe2, 0x8a, 0x8b, 0xef, 0xb8, 0x80}}, + "varsupsetneqq": &HTML5Entity{Name: "varsupsetneqq", CodePoints: []int{10956, 65024}, Characters: []byte{0xe2, 0xab, 0x8c, 0xef, 0xb8, 0x80}}, + "vartheta": &HTML5Entity{Name: "vartheta", CodePoints: []int{977}, Characters: []byte{0xcf, 0x91}}, + "vartriangleleft": &HTML5Entity{Name: "vartriangleleft", CodePoints: []int{8882}, Characters: []byte{0xe2, 0x8a, 0xb2}}, + "vartriangleright": &HTML5Entity{Name: "vartriangleright", CodePoints: []int{8883}, Characters: []byte{0xe2, 0x8a, 0xb3}}, + "vcy": &HTML5Entity{Name: "vcy", CodePoints: []int{1074}, Characters: []byte{0xd0, 0xb2}}, + "vdash": &HTML5Entity{Name: "vdash", CodePoints: []int{8866}, Characters: []byte{0xe2, 0x8a, 0xa2}}, + "vee": &HTML5Entity{Name: "vee", CodePoints: []int{8744}, Characters: []byte{0xe2, 0x88, 0xa8}}, + "veebar": &HTML5Entity{Name: "veebar", CodePoints: []int{8891}, Characters: []byte{0xe2, 0x8a, 0xbb}}, + "veeeq": &HTML5Entity{Name: "veeeq", CodePoints: []int{8794}, Characters: []byte{0xe2, 0x89, 0x9a}}, + "vellip": &HTML5Entity{Name: "vellip", CodePoints: []int{8942}, Characters: []byte{0xe2, 0x8b, 0xae}}, + "verbar": &HTML5Entity{Name: "verbar", CodePoints: []int{124}, Characters: []byte{0x7c}}, + "vert": &HTML5Entity{Name: "vert", CodePoints: []int{124}, Characters: []byte{0x7c}}, + "vfr": &HTML5Entity{Name: "vfr", CodePoints: []int{120115}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb3}}, + "vltri": &HTML5Entity{Name: "vltri", CodePoints: []int{8882}, Characters: []byte{0xe2, 0x8a, 0xb2}}, + "vnsub": &HTML5Entity{Name: "vnsub", CodePoints: []int{8834, 8402}, Characters: []byte{0xe2, 0x8a, 0x82, 0xe2, 0x83, 0x92}}, + "vnsup": &HTML5Entity{Name: "vnsup", CodePoints: []int{8835, 8402}, Characters: []byte{0xe2, 0x8a, 0x83, 0xe2, 0x83, 0x92}}, + "vopf": &HTML5Entity{Name: "vopf", CodePoints: []int{120167}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa7}}, + "vprop": &HTML5Entity{Name: "vprop", CodePoints: []int{8733}, Characters: []byte{0xe2, 0x88, 0x9d}}, + "vrtri": &HTML5Entity{Name: "vrtri", CodePoints: []int{8883}, Characters: []byte{0xe2, 0x8a, 0xb3}}, + "vscr": &HTML5Entity{Name: "vscr", CodePoints: []int{120011}, Characters: []byte{0xf0, 0x9d, 0x93, 0x8b}}, + "vsubnE": &HTML5Entity{Name: "vsubnE", CodePoints: []int{10955, 65024}, Characters: []byte{0xe2, 0xab, 0x8b, 0xef, 0xb8, 0x80}}, + "vsubne": &HTML5Entity{Name: "vsubne", CodePoints: []int{8842, 65024}, Characters: []byte{0xe2, 0x8a, 0x8a, 0xef, 0xb8, 0x80}}, + "vsupnE": &HTML5Entity{Name: "vsupnE", CodePoints: []int{10956, 65024}, Characters: []byte{0xe2, 0xab, 0x8c, 0xef, 0xb8, 0x80}}, + "vsupne": &HTML5Entity{Name: "vsupne", CodePoints: []int{8843, 65024}, Characters: []byte{0xe2, 0x8a, 0x8b, 0xef, 0xb8, 0x80}}, + "vzigzag": &HTML5Entity{Name: "vzigzag", CodePoints: []int{10650}, Characters: []byte{0xe2, 0xa6, 0x9a}}, + "wcirc": &HTML5Entity{Name: "wcirc", CodePoints: []int{373}, Characters: []byte{0xc5, 0xb5}}, + "wedbar": &HTML5Entity{Name: "wedbar", CodePoints: []int{10847}, Characters: []byte{0xe2, 0xa9, 0x9f}}, + "wedge": &HTML5Entity{Name: "wedge", CodePoints: []int{8743}, Characters: []byte{0xe2, 0x88, 0xa7}}, + "wedgeq": &HTML5Entity{Name: "wedgeq", CodePoints: []int{8793}, Characters: []byte{0xe2, 0x89, 0x99}}, + "weierp": &HTML5Entity{Name: "weierp", CodePoints: []int{8472}, Characters: []byte{0xe2, 0x84, 0x98}}, + "wfr": &HTML5Entity{Name: "wfr", CodePoints: []int{120116}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb4}}, + "wopf": &HTML5Entity{Name: "wopf", CodePoints: []int{120168}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa8}}, + "wp": &HTML5Entity{Name: "wp", CodePoints: []int{8472}, Characters: []byte{0xe2, 0x84, 0x98}}, + "wr": &HTML5Entity{Name: "wr", CodePoints: []int{8768}, Characters: []byte{0xe2, 0x89, 0x80}}, + "wreath": &HTML5Entity{Name: "wreath", CodePoints: []int{8768}, Characters: []byte{0xe2, 0x89, 0x80}}, + "wscr": &HTML5Entity{Name: "wscr", CodePoints: []int{120012}, Characters: []byte{0xf0, 0x9d, 0x93, 0x8c}}, + "xcap": &HTML5Entity{Name: "xcap", CodePoints: []int{8898}, Characters: []byte{0xe2, 0x8b, 0x82}}, + "xcirc": &HTML5Entity{Name: "xcirc", CodePoints: []int{9711}, Characters: []byte{0xe2, 0x97, 0xaf}}, + "xcup": &HTML5Entity{Name: "xcup", CodePoints: []int{8899}, Characters: []byte{0xe2, 0x8b, 0x83}}, + "xdtri": &HTML5Entity{Name: "xdtri", CodePoints: []int{9661}, Characters: []byte{0xe2, 0x96, 0xbd}}, + "xfr": &HTML5Entity{Name: "xfr", CodePoints: []int{120117}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb5}}, + "xhArr": &HTML5Entity{Name: "xhArr", CodePoints: []int{10234}, Characters: []byte{0xe2, 0x9f, 0xba}}, + "xharr": &HTML5Entity{Name: "xharr", CodePoints: []int{10231}, Characters: []byte{0xe2, 0x9f, 0xb7}}, + "xi": &HTML5Entity{Name: "xi", CodePoints: []int{958}, Characters: []byte{0xce, 0xbe}}, + "xlArr": &HTML5Entity{Name: "xlArr", CodePoints: []int{10232}, Characters: []byte{0xe2, 0x9f, 0xb8}}, + "xlarr": &HTML5Entity{Name: "xlarr", CodePoints: []int{10229}, Characters: []byte{0xe2, 0x9f, 0xb5}}, + "xmap": &HTML5Entity{Name: "xmap", CodePoints: []int{10236}, Characters: []byte{0xe2, 0x9f, 0xbc}}, + "xnis": &HTML5Entity{Name: "xnis", CodePoints: []int{8955}, Characters: []byte{0xe2, 0x8b, 0xbb}}, + "xodot": &HTML5Entity{Name: "xodot", CodePoints: []int{10752}, Characters: []byte{0xe2, 0xa8, 0x80}}, + "xopf": &HTML5Entity{Name: "xopf", CodePoints: []int{120169}, Characters: []byte{0xf0, 0x9d, 0x95, 0xa9}}, + "xoplus": &HTML5Entity{Name: "xoplus", CodePoints: []int{10753}, Characters: []byte{0xe2, 0xa8, 0x81}}, + "xotime": &HTML5Entity{Name: "xotime", CodePoints: []int{10754}, Characters: []byte{0xe2, 0xa8, 0x82}}, + "xrArr": &HTML5Entity{Name: "xrArr", CodePoints: []int{10233}, Characters: []byte{0xe2, 0x9f, 0xb9}}, + "xrarr": &HTML5Entity{Name: "xrarr", CodePoints: []int{10230}, Characters: []byte{0xe2, 0x9f, 0xb6}}, + "xscr": &HTML5Entity{Name: "xscr", CodePoints: []int{120013}, Characters: []byte{0xf0, 0x9d, 0x93, 0x8d}}, + "xsqcup": &HTML5Entity{Name: "xsqcup", CodePoints: []int{10758}, Characters: []byte{0xe2, 0xa8, 0x86}}, + "xuplus": &HTML5Entity{Name: "xuplus", CodePoints: []int{10756}, Characters: []byte{0xe2, 0xa8, 0x84}}, + "xutri": &HTML5Entity{Name: "xutri", CodePoints: []int{9651}, Characters: []byte{0xe2, 0x96, 0xb3}}, + "xvee": &HTML5Entity{Name: "xvee", CodePoints: []int{8897}, Characters: []byte{0xe2, 0x8b, 0x81}}, + "xwedge": &HTML5Entity{Name: "xwedge", CodePoints: []int{8896}, Characters: []byte{0xe2, 0x8b, 0x80}}, + "yacute": &HTML5Entity{Name: "yacute", CodePoints: []int{253}, Characters: []byte{0xc3, 0xbd}}, + "yacy": &HTML5Entity{Name: "yacy", CodePoints: []int{1103}, Characters: []byte{0xd1, 0x8f}}, + "ycirc": &HTML5Entity{Name: "ycirc", CodePoints: []int{375}, Characters: []byte{0xc5, 0xb7}}, + "ycy": &HTML5Entity{Name: "ycy", CodePoints: []int{1099}, Characters: []byte{0xd1, 0x8b}}, + "yen": &HTML5Entity{Name: "yen", CodePoints: []int{165}, Characters: []byte{0xc2, 0xa5}}, + "yfr": &HTML5Entity{Name: "yfr", CodePoints: []int{120118}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb6}}, + "yicy": &HTML5Entity{Name: "yicy", CodePoints: []int{1111}, Characters: []byte{0xd1, 0x97}}, + "yopf": &HTML5Entity{Name: "yopf", CodePoints: []int{120170}, Characters: []byte{0xf0, 0x9d, 0x95, 0xaa}}, + "yscr": &HTML5Entity{Name: "yscr", CodePoints: []int{120014}, Characters: []byte{0xf0, 0x9d, 0x93, 0x8e}}, + "yucy": &HTML5Entity{Name: "yucy", CodePoints: []int{1102}, Characters: []byte{0xd1, 0x8e}}, + "yuml": &HTML5Entity{Name: "yuml", CodePoints: []int{255}, Characters: []byte{0xc3, 0xbf}}, + "zacute": &HTML5Entity{Name: "zacute", CodePoints: []int{378}, Characters: []byte{0xc5, 0xba}}, + "zcaron": &HTML5Entity{Name: "zcaron", CodePoints: []int{382}, Characters: []byte{0xc5, 0xbe}}, + "zcy": &HTML5Entity{Name: "zcy", CodePoints: []int{1079}, Characters: []byte{0xd0, 0xb7}}, + "zdot": &HTML5Entity{Name: "zdot", CodePoints: []int{380}, Characters: []byte{0xc5, 0xbc}}, + "zeetrf": &HTML5Entity{Name: "zeetrf", CodePoints: []int{8488}, Characters: []byte{0xe2, 0x84, 0xa8}}, + "zeta": &HTML5Entity{Name: "zeta", CodePoints: []int{950}, Characters: []byte{0xce, 0xb6}}, + "zfr": &HTML5Entity{Name: "zfr", CodePoints: []int{120119}, Characters: []byte{0xf0, 0x9d, 0x94, 0xb7}}, + "zhcy": &HTML5Entity{Name: "zhcy", CodePoints: []int{1078}, Characters: []byte{0xd0, 0xb6}}, + "zigrarr": &HTML5Entity{Name: "zigrarr", CodePoints: []int{8669}, Characters: []byte{0xe2, 0x87, 0x9d}}, + "zopf": &HTML5Entity{Name: "zopf", CodePoints: []int{120171}, Characters: []byte{0xf0, 0x9d, 0x95, 0xab}}, + "zscr": &HTML5Entity{Name: "zscr", CodePoints: []int{120015}, Characters: []byte{0xf0, 0x9d, 0x93, 0x8f}}, + "zwj": &HTML5Entity{Name: "zwj", CodePoints: []int{8205}, Characters: []byte{0xe2, 0x80, 0x8d}}, + "zwnj": &HTML5Entity{Name: "zwnj", CodePoints: []int{8204}, Characters: []byte{0xe2, 0x80, 0x8c}}, +} diff --git a/util/util.go b/util/util.go new file mode 100644 index 0000000..9247d1f --- /dev/null +++ b/util/util.go @@ -0,0 +1,538 @@ +// Package util provides utility functions for the goldmark. +package util + +import ( + "bytes" + "fmt" + "io" + "net/url" + "regexp" + "sort" + "strconv" + "strings" + "unicode/utf8" +) + +// IsBlank returns true if given string is all space characters. +func IsBlank(bs []byte) bool { + for _, b := range bs { + if IsSpace(b) { + continue + } + return false + } + return true +} + +// DedentPosition dedents lines by given width. +func DedentPosition(bs []byte, width int) (pos, padding int) { + i := 0 + l := len(bs) + w := 0 + for ; i < l && w < width; i++ { + b := bs[i] + if b == ' ' { + w++ + } else if b == '\t' { + w += 4 + } else { + break + } + } + padding = w - width + if padding < 0 { + padding = 0 + } + return i, padding +} + +// VisualizeSpaces visualize invisible space characters. +func VisualizeSpaces(bs []byte) []byte { + bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1) + bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1) + bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1) + return bs +} + +// TabWidth calculates actual width of a tab at given position. +func TabWidth(currentPos int) int { + return 4 - currentPos%4 +} + +// IndentPosition searches an indent position with given width for given line. +// If the line contains tab characters, paddings may be not zero. +// currentPos==0 and width==2: +// +// position: 0 1 +// [TAB]aaaa +// width: 1234 5678 +// +// width=2 is in the tab character. In this case, IndentPosition returns +// (pos=1, padding=2) +func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) { + w := 0 + l := len(bs) + for i := 0; i < l; i++ { + b := bs[i] + if b == ' ' { + w++ + } else if b == '\t' { + w += TabWidth(currentPos + w) + } else { + break + } + if w >= width { + return i + 1, w - width + } + } + return -1, -1 +} + +// IndentWidth calculate an indent width for given line. +func IndentWidth(bs []byte, currentPos int) (width, pos int) { + l := len(bs) + for i := 0; i < l; i++ { + b := bs[i] + if b == ' ' { + width++ + pos++ + } else if b == '\t' { + width += TabWidth(currentPos + width) + pos++ + } else { + break + } + } + return +} + +// FirstNonSpacePosition returns a potisoin line that is a first nonspace +// character. +func FirstNonSpacePosition(bs []byte) int { + i := 0 + for ; i < len(bs); i++ { + c := bs[i] + if c == ' ' || c == '\t' { + continue + } + if c == '\n' { + return -1 + } + return i + } + return -1 +} + +// FindClosure returns a position that closes given opener. +// If codeSpan is set true, it ignores characters in code spans. +// If allowNesting is set true, closures correspond to nested opener will be +// ignored. +func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int { + i := 0 + opened := 1 + codeSpanOpener := 0 + for i < len(bs) { + c := bs[i] + if codeSpan && codeSpanOpener != 0 && c == '`' { + codeSpanCloser := 0 + for ; i < len(bs); i++ { + if bs[i] == '`' { + codeSpanCloser++ + } else { + break + } + } + if codeSpanCloser == codeSpanOpener { + codeSpanOpener = 0 + } + } else if c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) { + i += 2 + continue + } else if codeSpan && codeSpanOpener == 0 && c == '`' { + for ; i < len(bs); i++ { + if bs[i] == '`' { + codeSpanOpener++ + } else { + break + } + } + } else if (codeSpan && codeSpanOpener == 0) || !codeSpan { + if c == closure { + opened-- + if opened == 0 { + return i + } + } else if c == opener { + if !allowNesting { + return -1 + } + opened++ + } + } + i++ + } + return -1 +} + +// TrimLeft trims characters in given s from head of the source. +// bytes.TrimLeft offers same functionalities, but bytes.TrimLeft +// allocates new buffer for the result. +func TrimLeft(source, b []byte) []byte { + i := 0 + for ; i < len(source); i++ { + c := source[i] + found := false + for j := 0; j < len(b); j++ { + if c == b[j] { + found = true + break + } + } + if !found { + break + } + } + return source[i:] +} + +// TrimRight trims characters in given s from tail of the source. +func TrimRight(source, b []byte) []byte { + i := len(source) - 1 + for ; i >= 0; i-- { + c := source[i] + found := false + for j := 0; j < len(b); j++ { + if c == b[j] { + found = true + break + } + } + if !found { + break + } + } + return source[:i+1] +} + +// TrimLeftLength returns a length of leading specified characters. +func TrimLeftLength(source, s []byte) int { + return len(source) - len(TrimLeft(source, s)) +} + +// TrimRightLength returns a length of trailing specified characters. +func TrimRightLength(source, s []byte) int { + return len(source) - len(TrimRight(source, s)) +} + +// TrimLeftSpaceLength returns a length of leading space characters. +func TrimLeftSpaceLength(source []byte) int { + return TrimLeftLength(source, spaces) +} + +// TrimRightSpaceLength returns a length of trailing space characters. +func TrimRightSpaceLength(source []byte) int { + return TrimRightLength(source, spaces) +} + +// TrimLeftSpace returns a subslice of given string by slicing off all leading +// space characters. +func TrimLeftSpace(source []byte) []byte { + return TrimLeft(source, spaces) +} + +// TrimRightSpace returns a subslice of given string by slicing off all trailing +// space characters. +func TrimRightSpace(source []byte) []byte { + return TrimRight(source, spaces) +} + +// ReplaceSpaces replaces sequence of spaces with given repl. +func ReplaceSpaces(source []byte, repl byte) []byte { + var ret []byte + start := -1 + for i, c := range source { + iss := IsSpace(c) + if start < 0 && iss { + start = i + continue + } else if start >= 0 && iss { + continue + } else if start >= 0 { + if ret == nil { + ret = make([]byte, 0, len(source)) + ret = append(ret, source[:start]...) + } + ret = append(ret, repl) + start = -1 + } + if ret != nil { + ret = append(ret, c) + } + } + if start >= 0 && ret != nil { + ret = append(ret, repl) + } + if ret == nil { + return source + } + return ret +} + +// ToRune decode given bytes start at pos and returns a rune. +func ToRune(source []byte, pos int) rune { + i := pos + for ; i >= 0; i-- { + if utf8.RuneStart(source[i]) { + break + } + } + r, _ := utf8.DecodeRune(source[i:]) + return r +} + +// ToValidRune returns 0xFFFD if given rune is invalid, otherwise v. +func ToValidRune(v rune) rune { + if v == 0 || !utf8.ValidRune(v) { + return rune(0xFFFD) + } + return v +} + +// ToLinkReference convert given bytes into a valid link reference string. +// ToLinkReference trims leading and trailing spaces and convert into lower +// case and replace spaces with a single space character. +func ToLinkReference(v []byte) string { + v = TrimLeftSpace(v) + v = TrimRightSpace(v) + return strings.ToLower(string(ReplaceSpaces(v, ' '))) +} + +var escapeRegex = regexp.MustCompile(`\\.`) +var hexRefRegex = regexp.MustCompile(`#[xX][\da-fA-F]+;`) +var numRefRegex = regexp.MustCompile(`#\d{1,7};`) +var entityRefRegex = regexp.MustCompile(`&([a-zA-Z\d]+);`) + +var entityLt = []byte("<") +var entityGt = []byte(">") +var entityAmp = []byte("&") +var entityQuot = []byte(""") + +// EscapeHTML escapes characters that should be escaped in HTML text. +func EscapeHTML(v []byte) []byte { + result := make([]byte, 0, len(v)+10) + for _, c := range v { + switch c { + case '<': + result = append(result, entityLt...) + case '>': + result = append(result, entityGt...) + case '&': + result = append(result, entityAmp...) + case '"': + result = append(result, entityQuot...) + default: + result = append(result, c) + } + } + return result +} + +// UnescapePunctuations unescapes blackslash escaped punctuations. +func UnescapePunctuations(v []byte) []byte { + return escapeRegex.ReplaceAllFunc(v, func(match []byte) []byte { + if IsPunct(match[1]) { + return []byte{match[1]} + } + return match + }) +} + +// ResolveNumericReferences resolve numeric references like 'Ӓ" . +func ResolveNumericReferences(v []byte) []byte { + buf := make([]byte, 6, 6) + v = hexRefRegex.ReplaceAllFunc(v, func(match []byte) []byte { + v, _ := strconv.ParseUint(string(match[2:len(match)-1]), 16, 32) + n := utf8.EncodeRune(buf, ToValidRune(rune(v))) + return buf[:n] + }) + return numRefRegex.ReplaceAllFunc(v, func(match []byte) []byte { + v, _ := strconv.ParseUint(string(match[1:len(match)-1]), 0, 32) + n := utf8.EncodeRune(buf, ToValidRune(rune(v))) + return buf[:n] + }) +} + +// ResolveEntityNames resolve entity references like 'ö" . +func ResolveEntityNames(v []byte) []byte { + return entityRefRegex.ReplaceAllFunc(v, func(match []byte) []byte { + entity, ok := LookUpHTML5EntityByName(string(match[1 : len(match)-1])) + if ok { + return entity.Characters + } + return match + }) +} + +// URLEscape escape given URL. +// If resolveReference is set true: +// 1. unescape punctuations +// 2. resolve numeric references +// 3. resolve entity references +// +// URL encoded values (%xx) are keeped as is. +func URLEscape(v []byte, resolveReference bool) []byte { + if resolveReference { + v = UnescapePunctuations(v) + v = ResolveNumericReferences(v) + v = ResolveEntityNames(v) + } + result := make([]byte, 0, len(v)+10) + for i := 0; i < len(v); { + c := v[i] + if urlEscapeTable[c] == 1 { + result = append(result, c) + i++ + continue + } + if c == '%' && i+2 < len(v) && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) { + result = append(result, c, v[i+1], v[i+2]) + i += 3 + continue + } + u8len := utf8lenTable[c] + if u8len == 99 { // invalid utf8 leading byte, skip it + result = append(result, c) + i++ + continue + } + if c == ' ' { + result = append(result, '%', '2', '0') + i++ + continue + } + result = append(result, []byte(url.QueryEscape(string(v[i:i+int(u8len)])))...) + i += int(u8len) + } + return result +} + +// GenerateLinkID generates an ID for links. +func GenerateLinkID(value []byte, exists map[string]bool) []byte { + value = TrimLeftSpace(value) + value = TrimRightSpace(value) + result := []byte{} + for i := 0; i < len(value); { + v := value[i] + l := utf8lenTable[v] + i += int(l) + if l != 1 { + continue + } + if IsAlphaNumeric(v) { + result = append(result, v) + } else if v == ' ' { + result = append(result, '-') + } + } + if len(result) == 0 { + result = []byte("id") + } + if _, ok := exists[string(result)]; !ok { + exists[string(result)] = true + return result + } + for i := 1; ; i++ { + newResult := fmt.Sprintf("%s%d", result, i) + if _, ok := exists[newResult]; !ok { + exists[newResult] = true + return []byte(newResult) + } + + } +} + +var spaces = []byte(" \t\n\x0b\x0c\x0d") + +var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()# +var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99} + +// IsPunct returns true if given character is a punctuation, otherwise false. +func IsPunct(c byte) bool { + return punctTable[c] == 1 +} + +// IsSpace returns true if given character is a space, otherwise false. +func IsSpace(c byte) bool { + return spaceTable[c] == 1 +} + +// IsNumeric returns true if given character is a numeric, otherwise false. +func IsNumeric(c byte) bool { + return c >= '0' && c <= '9' +} + +// IsHexDecimal returns true if given character is a hexdecimal, otherwise false. +func IsHexDecimal(c byte) bool { + return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' +} + +// IsAlphaNumeric returns true if given character is a alphabet or a numeric, otherwise false. +func IsAlphaNumeric(c byte) bool { + return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' +} + +// A BufWriter is a subset of the bufio.Writer . +type BufWriter interface { + io.Writer + Available() int + Buffered() int + Flush() error + WriteByte(c byte) error + WriteRune(r rune) (size int, err error) + WriteString(s string) (int, error) +} + +// A PrioritizedValue struct holds pair of an arbitary value and a priority. +type PrioritizedValue struct { + // Value is an arbitary value that you want to prioritize. + Value interface{} + // Priority is a priority of the value. + Priority int +} + +// PrioritizedSlice is a slice of the PrioritizedValues +type PrioritizedSlice []PrioritizedValue + +// Sort sorts the PrioritizedSlice in ascending order. +func (s PrioritizedSlice) Sort() { + sort.Slice(s, func(i, j int) bool { + return s[i].Priority < s[j].Priority + }) +} + +// Remove removes given value from this slice. +func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice { + i := 0 + found := false + for ; i < len(s); i++ { + if s[i].Value == v { + found = true + break + } + } + if !found { + return s + } + return append(s[:i], s[i+1:]...) +} + +// Prioritized returns a new PrioritizedValue. +func Prioritized(v interface{}, priority int) PrioritizedValue { + return PrioritizedValue{v, priority} +} diff --git a/util/util_safe.go b/util/util_safe.go new file mode 100644 index 0000000..d640585 --- /dev/null +++ b/util/util_safe.go @@ -0,0 +1,13 @@ +// +build appengine,js + +package util + +// BytesToReadOnlyString returns a string converted from given bytes. +func BytesToReadOnlyString(b []byte) string { + return string(b) +} + +// StringToReadOnlyBytes returns bytes converted from given string. +func StringToReadOnlyBytes(s string) []byte { + return []byte(s) +} diff --git a/util/util_unsafe.go b/util/util_unsafe.go new file mode 100644 index 0000000..beeae29 --- /dev/null +++ b/util/util_unsafe.go @@ -0,0 +1,20 @@ +// +build !appengine,!js + +package util + +import ( + "reflect" + "unsafe" +) + +// BytesToReadOnlyString returns a string converted from given bytes. +func BytesToReadOnlyString(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} + +// StringToReadOnlyBytes returns bytes converted from given string. +func StringToReadOnlyBytes(s string) []byte { + sh := (*reflect.StringHeader)(unsafe.Pointer(&s)) + bh := reflect.SliceHeader{Data: sh.Data, Len: sh.Len, Cap: sh.Len} + return *(*[]byte)(unsafe.Pointer(&bh)) +}
    ')
    +		r.writeLines(w, source, n)
    +	} else {
    +		w.WriteString("