diff --git a/README.md b/README.md index 0a2bd36..c07efe2 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Customize a parser and a renderer: md := goldmark.NewMarkdown( goldmark.WithExtensions(extension.GFM), goldmark.WithParserOptions( - parser.WithHeadingID(), + parser.WithAutoHeadingID(), ), goldmark.WithRendererOptions( html.WithHardWraps(), @@ -90,7 +90,7 @@ Parser and Renderer options | `parser.WithBlockParsers` | A `util.PrioritizedSlice` whose elements are `parser.BlockParser` | Parsers for parsing block level elements. | | `parser.WithInlineParsers` | A `util.PrioritizedSlice` whose elements are `parser.InlineParser` | Parsers for parsing inline level elements. | | `parser.WithParagraphTransformers` | A `util.PrioritizedSlice` whose elements are `parser.ParagraphTransformer` | Transformers for transforming paragraph nodes. | -| `parser.WithHeadingID` | `-` | Enables custom heading ids( `{#custom-id}` ) and auto heading ids. | +| `parser.WithAutoHeadingID` | `-` | Enables auto heading ids. | | `parser.WithFilterTags` | `...string` | HTML tag names forbidden in HTML blocks and Raw HTMLs. | ### HTML Renderer options diff --git a/ast/ast.go b/ast/ast.go index ab830a2..3d9097a 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -320,13 +320,21 @@ func (n *BaseNode) Text(source []byte) []byte { func (n *BaseNode) SetAttribute(name, value []byte) { if n.attributes == nil { n.attributes = make([]Attribute, 0, 10) - n.attributes = append(n.attributes, Attribute{name, value}) - return + } else { + for i, a := range n.attributes { + if bytes.Equal(a.Name, name) { + n.attributes[i].Name = name + n.attributes[i].Value = value + return + } + } } - for i, a := range n.attributes { - if bytes.Equal(a.Name, name) { - n.attributes[i].Name = name - n.attributes[i].Value = value + if len(name) == 1 { + if name[0] == '#' { + n.attributes = append(n.attributes, Attribute{attrNameID, value}) + return + } else if name[0] == '.' { + n.attributes = append(n.attributes, Attribute{attrNameClass, value}) return } } diff --git a/parser/atx_heading.go b/parser/atx_heading.go index 04f9003..4e1ca7d 100644 --- a/parser/atx_heading.go +++ b/parser/atx_heading.go @@ -9,14 +9,14 @@ import ( // A HeadingConfig struct is a data structure that holds configuration of the renderers related to headings. type HeadingConfig struct { - HeadingID bool + AutoHeadingID bool } // SetOption implements SetOptioner. func (b *HeadingConfig) SetOption(name OptionName, value interface{}) { switch name { - case HeadingID: - b.HeadingID = true + case AutoHeadingID: + b.AutoHeadingID = true } } @@ -25,27 +25,27 @@ type HeadingOption interface { SetHeadingOption(*HeadingConfig) } -// HeadingID is an option name that enables custom and auto IDs for headings. -var HeadingID OptionName = "HeadingID" +// AutoHeadingID is an option name that enables auto IDs for headings. +var AutoHeadingID OptionName = "AutoHeadingID" -type withHeadingID struct { +type withAutoHeadingID struct { } -func (o *withHeadingID) SetConfig(c *Config) { - c.Options[HeadingID] = true +func (o *withAutoHeadingID) SetConfig(c *Config) { + c.Options[AutoHeadingID] = true } -func (o *withHeadingID) SetHeadingOption(p *HeadingConfig) { - p.HeadingID = true +func (o *withAutoHeadingID) SetHeadingOption(p *HeadingConfig) { + p.AutoHeadingID = true } -// WithHeadingID is a functional option that enables custom heading ids and +// WithAutoHeadingID is a functional option that enables custom heading ids and // auto generated heading ids. -func WithHeadingID() interface { +func WithAutoHeadingID() interface { Option HeadingOption } { - return &withHeadingID{} + return &withAutoHeadingID{} } var atxHeadingRegexp = regexp.MustCompile(`^[ ]{0,3}(#{1,6})(?:\s+(.*?)\s*([\s]#+\s*)?)?\n?$`) @@ -104,10 +104,10 @@ func (b *atxHeadingParser) Continue(node ast.Node, reader text.Reader, pc Contex } func (b *atxHeadingParser) Close(node ast.Node, reader text.Reader, pc Context) { - if !b.HeadingID { + if !b.AutoHeadingID { return } - parseOrGenerateHeadingID(node.(*ast.Heading), reader, pc) + generateAutoHeadingID(node.(*ast.Heading), reader, pc) } func (b *atxHeadingParser) CanInterruptParagraph() bool { @@ -118,30 +118,13 @@ func (b *atxHeadingParser) CanAcceptIndentedLine() bool { return false } -var headingIDRegexp = regexp.MustCompile(`^(.*[^\\])({#([^}]+)}\s*)\n?$`) -var headingIDMap = NewContextKey() -var attrNameID = []byte("id") +var attrAutoHeadingIDPrefix = []byte("heading") +var attrNameID = []byte("#") -func parseOrGenerateHeadingID(node *ast.Heading, reader text.Reader, pc Context) { - existsv := pc.Get(headingIDMap) - var exists map[string]bool - if existsv == nil { - exists = map[string]bool{} - pc.Set(headingIDMap, exists) - } else { - exists = existsv.(map[string]bool) - } +func generateAutoHeadingID(node *ast.Heading, reader text.Reader, pc Context) { lastIndex := node.Lines().Len() - 1 lastLine := node.Lines().At(lastIndex) line := lastLine.Value(reader.Source()) - m := headingIDRegexp.FindSubmatchIndex(line) - var headingID []byte - if m != nil { - headingID = line[m[6]:m[7]] - lastLine.Stop -= m[5] - m[4] - node.Lines().Set(lastIndex, lastLine) - } else { - headingID = util.GenerateLinkID(line, exists) - } + headingID := pc.IDs().Generate(line, attrAutoHeadingIDPrefix) node.SetAttribute(attrNameID, headingID) } diff --git a/parser/parser.go b/parser/parser.go index 3c9f21a..bab8c89 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -53,6 +53,67 @@ func (r *reference) String() string { return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title) } +// An IDs interface is a collection of the element ids. +type IDs interface { + // Generate generates a new element id. + Generate(value, prefix []byte) []byte + + // Put puts a given element id to the used ids table. + Put(value []byte) +} + +type ids struct { + values map[string]bool +} + +func newIDs() IDs { + return &ids{ + values: map[string]bool{}, + } +} + +func (s *ids) Generate(value, prefix []byte) []byte { + value = util.TrimLeftSpace(value) + value = util.TrimRightSpace(value) + result := []byte{} + for i := 0; i < len(value); { + v := value[i] + l := util.UTF8Len(v) + i += int(l) + if l != 1 { + continue + } + if util.IsAlphaNumeric(v) { + result = append(result, v) + } else if util.IsSpace(v) { + result = append(result, '-') + } + } + if len(result) == 0 { + if prefix != nil { + result = append(make([]byte, 0, len(prefix)), prefix...) + } else { + result = []byte("id") + } + } + if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok { + s.values[util.BytesToReadOnlyString(result)] = true + return result + } + for i := 1; ; i++ { + newResult := fmt.Sprintf("%s%d", result, i) + if _, ok := s.values[newResult]; !ok { + s.values[newResult] = true + return []byte(newResult) + } + + } +} + +func (s *ids) Put(value []byte) { + s.values[util.BytesToReadOnlyString(value)] = true +} + // ContextKey is a key that is used to set arbitary values to the context. type ContextKey int @@ -87,6 +148,9 @@ type Context interface { // References returns a list of references. References() []Reference + // IDs returns a collection of the element ids. + IDs() IDs + // BlockOffset returns a first non-space character position on current line. // This value is valid only for BlockParser.Open. BlockOffset() int @@ -123,6 +187,7 @@ type Context interface { type parseContext struct { store []interface{} + ids IDs refs map[string]Reference blockOffset int delimiters *Delimiter @@ -135,6 +200,7 @@ func NewContext() Context { return &parseContext{ store: make([]interface{}, ContextKeyMax+1), refs: map[string]Reference{}, + ids: newIDs(), blockOffset: 0, delimiters: nil, lastDelimiter: nil, @@ -150,6 +216,10 @@ func (p *parseContext) Set(key ContextKey, value interface{}) { p.store[key] = value } +func (p *parseContext) IDs() IDs { + return p.ids +} + func (p *parseContext) BlockOffset() int { return p.blockOffset } diff --git a/parser/setext_headings.go b/parser/setext_headings.go index 2552465..220403f 100644 --- a/parser/setext_headings.go +++ b/parser/setext_headings.go @@ -94,10 +94,10 @@ func (b *setextHeadingParser) Close(node ast.Node, reader text.Reader, pc Contex tmp.Parent().RemoveChild(tmp.Parent(), tmp) } - if !b.HeadingID { + if !b.AutoHeadingID { return } - parseOrGenerateHeadingID(heading, reader, pc) + generateAutoHeadingID(heading, reader, pc) } func (b *setextHeadingParser) CanInterruptParagraph() bool { diff --git a/util/util.go b/util/util.go index 2f6268b..4b084aa 100644 --- a/util/util.go +++ b/util/util.go @@ -3,7 +3,6 @@ package util import ( "bytes" - "fmt" "io" "net/url" "sort" @@ -574,7 +573,7 @@ func FindAttributeIndex(b []byte, canEscapeQuotes bool) [4]int { i++ result[1] = i result[2] = i - for ; i < l && !IsSpace(b[i]); i++ { + for ; i < l && !IsSpace(b[i]) && (!IsPunct(b[i]) || b[i] == '_' || b[i] == '-'); i++ { } result[3] = i return result @@ -677,41 +676,6 @@ func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) [4]int { return result } -// GenerateLinkID generates an ID for links. -func GenerateLinkID(value []byte, exists map[string]bool) []byte { - value = TrimLeftSpace(value) - value = TrimRightSpace(value) - result := []byte{} - for i := 0; i < len(value); { - v := value[i] - l := utf8lenTable[v] - i += int(l) - if l != 1 { - continue - } - if IsAlphaNumeric(v) { - result = append(result, v) - } else if v == ' ' { - result = append(result, '-') - } - } - if len(result) == 0 { - result = []byte("id") - } - if _, ok := exists[string(result)]; !ok { - exists[string(result)] = true - return result - } - for i := 1; ; i++ { - newResult := fmt.Sprintf("%s%d", result, i) - if _, ok := exists[newResult]; !ok { - exists[newResult] = true - return []byte(newResult) - } - - } -} - var spaces = []byte(" \t\n\x0b\x0c\x0d") var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} @@ -723,6 +687,11 @@ var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99} +// UTF8Len returns a byte length of the utf-8 character. +func UTF8Len(b byte) int8 { + return utf8lenTable[b] +} + // IsPunct returns true if the given character is a punctuation, otherwise false. func IsPunct(c byte) bool { return punctTable[c] == 1