From 7cdc0fb06f6953972ceaa7176c6bf25e706303e9 Mon Sep 17 00:00:00 2001
From: Karel Bilek
Date: Fri, 29 Jan 2021 13:08:04 +0700
Subject: [PATCH] Fix leading tabs with codeblocks
Note that this is a breaking change and will require new goldmark major version.
I have tried to fix problem with leading tabs in fenced code blocks (and probably normal code blocks too).
Important note - tabs do not behave like "just 4 spaces". They "finish" 4 space columns. So tab can behave like anything between 1 space to 4 spaces, depending on position.
If you have MD like this (. represents space, [tb] , [t] or [] tabs)
```
*.some.text
..```
..foo
..[]foo
..```
```
you expect the tab to be kept in the code. This did not work properly in goldmark and I fixed that.
However, if you have a code like this
```
*.some.text
..```
..foo
.[t]foo
..```
```
what should happen? I decided that it should be two spaces, as the tab is not "completely" in the code block. Similarly, what should happen in this case
```
*.some.text
..```
..foo
.[t][tb]foo
..```
```
I decided that it should be first three spaces and then tab. Not sure what even is the correct solution here...
The crux of the fix is - text segments don't have just padding, but also remember what chars is the padding and then print that, if they are called to do so in the code blocks. In other cases, the paddingChars are ignored.
This should fix #177 .
---
_test/extra.txt | 57 ++++++++++++++++
extension/definition_list.go | 8 +--
extension/footnote.go | 6 +-
parser/blockquote.go | 2 +-
parser/code_block.go | 15 ++--
parser/fcode_block.go | 8 ++-
parser/list_item.go | 8 +--
text/reader.go | 27 +++++---
text/segment.go | 81 +++++++++++++++++++---
util/util.go | 128 +++++++++++++----------------------
10 files changed, 218 insertions(+), 122 deletions(-)
diff --git a/_test/extra.txt b/_test/extra.txt
index 4f9499c..eefec94 100644
--- a/_test/extra.txt
+++ b/_test/extra.txt
@@ -159,3 +159,60 @@ bbb

//= = = = = = = = = = = = = = = = = = = = = = = =//
+
+13: fenced code block starting with tab inside list
+//- - - - - - - - -//
+* foo
+ ```Makefile
+ foo
+ foo
+ ```
+//- - - - - - - - -//
+
+//= = = = = = = = = = = = = = = = = = = = = = = =//
+
+14: fenced code block inside list, mismatched tab start
+//- - - - - - - - -//
+* foo
+ ```Makefile
+ foo
+ foo
+ ```
+//- - - - - - - - -//
+
+//= = = = = = = = = = = = = = = = = = = = = = = =//
+
+
+15: fenced code block inside nested list
+//- - - - - - - - -//
+* foo
+ - bar
+ ```Makefile
+ foo
+ foo
+ ```
+//- - - - - - - - -//
+
+//= = = = = = = = = = = = = = = = = = = = = = = =//
diff --git a/extension/definition_list.go b/extension/definition_list.go
index eb16dd0..35adeee 100644
--- a/extension/definition_list.go
+++ b/extension/definition_list.go
@@ -81,8 +81,8 @@ func (b *definitionListParser) Continue(node gast.Node, reader text.Reader, pc p
if w < list.Offset {
return parser.Close
}
- pos, padding := util.IndentPosition(line, reader.LineOffset(), list.Offset)
- reader.AdvanceAndSetPadding(pos, padding)
+ pos, padding, chars := util.IndentPosition(line, reader.LineOffset(), list.Offset)
+ reader.AdvanceAndSetPadding(pos, padding, chars)
return parser.Continue | parser.HasChildren
}
@@ -137,8 +137,8 @@ func (b *definitionDescriptionParser) Open(parent gast.Node, reader text.Reader,
}
para.Parent().RemoveChild(para.Parent(), para)
}
- cpos, padding := util.IndentPosition(line[pos+1:], pos+1, list.Offset-pos-1)
- reader.AdvanceAndSetPadding(cpos, padding)
+ cpos, padding, chars := util.IndentPosition(line[pos+1:], pos+1, list.Offset-pos-1)
+ reader.AdvanceAndSetPadding(cpos, padding, chars)
return ast.NewDefinitionDescription(), parser.HasChildren
}
diff --git a/extension/footnote.go b/extension/footnote.go
index 62f5ee6..2ec9dbd 100644
--- a/extension/footnote.go
+++ b/extension/footnote.go
@@ -66,7 +66,7 @@ func (b *footnoteBlockParser) Open(parent gast.Node, reader text.Reader, pc pars
reader.Advance(pos)
return item, parser.NoChildren
}
- reader.AdvanceAndSetPadding(pos, padding)
+ reader.AdvanceAndSetPadding(pos, padding, segment.PaddingChars)
return item, parser.HasChildren
}
@@ -75,11 +75,11 @@ func (b *footnoteBlockParser) Continue(node gast.Node, reader text.Reader, pc pa
if util.IsBlank(line) {
return parser.Continue | parser.HasChildren
}
- childpos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
+ childpos, padding, paddingChars := util.IndentPosition(line, reader.LineOffset(), 4)
if childpos < 0 {
return parser.Close
}
- reader.AdvanceAndSetPadding(childpos, padding)
+ reader.AdvanceAndSetPadding(childpos, padding, paddingChars)
return parser.Continue | parser.HasChildren
}
diff --git a/parser/blockquote.go b/parser/blockquote.go
index e7778dc..36b0a14 100644
--- a/parser/blockquote.go
+++ b/parser/blockquote.go
@@ -33,7 +33,7 @@ func (b *blockquoteParser) process(reader text.Reader) bool {
}
reader.Advance(pos)
if line[pos-1] == '\t' {
- reader.SetPadding(2)
+ reader.SetPadding(2, []byte(" "))
}
return true
}
diff --git a/parser/code_block.go b/parser/code_block.go
index d02c21f..13f230e 100644
--- a/parser/code_block.go
+++ b/parser/code_block.go
@@ -24,17 +24,18 @@ func (b *codeBlockParser) Trigger() []byte {
func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
line, segment := reader.PeekLine()
- pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
+ pos, padding, chars := util.IndentPosition(line, reader.LineOffset(), 4)
if pos < 0 || util.IsBlank(line) {
return nil, NoChildren
}
node := ast.NewCodeBlock()
- reader.AdvanceAndSetPadding(pos, padding)
+
+
+ reader.AdvanceAndSetPadding(pos, padding, chars)
_, segment = reader.PeekLine()
- node.Lines().Append(segment)
+ node.Lines().Append(segment.WithRenderPaddingTabs())
reader.Advance(segment.Len() - 1)
return node, NoChildren
-
}
func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
@@ -43,13 +44,13 @@ func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
node.Lines().Append(segment.TrimLeftSpaceWidth(4, reader.Source()))
return Continue | NoChildren
}
- pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
+ pos, padding, chars := util.IndentPosition(line, reader.LineOffset(), 4)
if pos < 0 {
return Close
}
- reader.AdvanceAndSetPadding(pos, padding)
+ reader.AdvanceAndSetPadding(pos, padding, chars)
_, segment = reader.PeekLine()
- node.Lines().Append(segment)
+ node.Lines().Append(segment.WithRenderPaddingTabs())
reader.Advance(segment.Len() - 1)
return Continue | NoChildren
}
diff --git a/parser/fcode_block.go b/parser/fcode_block.go
index f5b83ee..dd20486 100644
--- a/parser/fcode_block.go
+++ b/parser/fcode_block.go
@@ -70,6 +70,7 @@ func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Con
func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
line, segment := reader.PeekLine()
+
fdata := pc.Get(fencedCodeBlockInfoKey).(*fenceData)
w, pos := util.IndentWidth(line, reader.LineOffset())
if w < 4 {
@@ -86,11 +87,12 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
return Close
}
}
- pos, padding := util.DedentPositionPadding(line, reader.LineOffset(), segment.Padding, fdata.indent)
- seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding)
+ pos, padding, chars := util.DedentPositionPadding(line, reader.LineOffset(), segment.Padding, fdata.indent, segment.PaddingChars)
+ seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding, chars)
+ seg = seg.WithRenderPaddingTabs()
node.Lines().Append(seg)
- reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding)
+ reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding, chars)
return Continue | NoChildren
}
diff --git a/parser/list_item.go b/parser/list_item.go
index 4a698d8..4c2b097 100644
--- a/parser/list_item.go
+++ b/parser/list_item.go
@@ -44,9 +44,9 @@ func (b *listItemParser) Open(parent ast.Node, reader text.Reader, pc Context) (
return node, NoChildren
}
- pos, padding := util.IndentPosition(line[match[4]:], match[4], itemOffset)
+ pos, padding, chars := util.IndentPosition(line[match[4]:], match[4], itemOffset)
child := match[3] + pos
- reader.AdvanceAndSetPadding(child, padding)
+ reader.AdvanceAndSetPadding(child, padding, chars)
return node, HasChildren
}
@@ -66,8 +66,8 @@ func (b *listItemParser) Continue(node ast.Node, reader text.Reader, pc Context)
}
return Close
}
- pos, padding := util.IndentPosition(line, reader.LineOffset(), offset)
- reader.AdvanceAndSetPadding(pos, padding)
+ pos, padding, paddingChars := util.IndentPosition(line, reader.LineOffset(), offset)
+ reader.AdvanceAndSetPadding(pos, padding, paddingChars)
return Continue | HasChildren
}
diff --git a/text/reader.go b/text/reader.go
index df25e54..1659907 100644
--- a/text/reader.go
+++ b/text/reader.go
@@ -45,14 +45,14 @@ type Reader interface {
SetPosition(int, Segment)
// SetPadding sets padding to the reader.
- SetPadding(int)
+ SetPadding(int, []byte)
// Advance advances the internal pointer.
Advance(int)
// AdvanceAndSetPadding advances the internal pointer and add padding to the
// reader.
- AdvanceAndSetPadding(int, int)
+ AdvanceAndSetPadding(int, int, []byte)
// AdvanceLine advances the internal pointer to the next line head.
AdvanceLine()
@@ -120,7 +120,7 @@ func (r *reader) Peek() byte {
func (r *reader) PeekLine() ([]byte, Segment) {
if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
if r.peekedLine == nil {
- r.peekedLine = r.pos.Value(r.Source())
+ r.peekedLine = r.pos.ValueKeepTabs(r.Source())
}
return r.peekedLine, r.pos
}
@@ -169,9 +169,11 @@ func (r *reader) Advance(n int) {
if n < len(r.peekedLine) && r.pos.Padding == 0 {
r.pos.Start += n
r.peekedLine = nil
+
return
}
r.peekedLine = nil
+
l := r.sourceLength
for ; n > 0 && r.pos.Start < l; n-- {
if r.pos.Padding != 0 {
@@ -186,16 +188,19 @@ func (r *reader) Advance(n int) {
}
}
-func (r *reader) AdvanceAndSetPadding(n, padding int) {
+func (r *reader) AdvanceAndSetPadding(n, padding int, chars []byte) {
r.Advance(n)
if padding > r.pos.Padding {
- r.SetPadding(padding)
+ r.SetPadding(padding, chars)
}
+ // always set the chars
+ r.pos.PaddingChars = chars
}
func (r *reader) AdvanceLine() {
r.lineOffset = -1
r.peekedLine = nil
+
r.pos.Start = r.pos.Stop
r.head = r.pos.Start
if r.pos.Start < 0 {
@@ -223,8 +228,9 @@ func (r *reader) SetPosition(line int, pos Segment) {
r.pos = pos
}
-func (r *reader) SetPadding(v int) {
+func (r *reader) SetPadding(v int, chars []byte) {
r.pos.Padding = v
+ r.pos.PaddingChars = chars
}
func (r *reader) SkipSpaces() (Segment, int, bool) {
@@ -380,7 +386,7 @@ func (r *blockReader) Peek() byte {
func (r *blockReader) PeekLine() ([]byte, Segment) {
if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
- return r.pos.Value(r.source), r.pos
+ return r.pos.ValueKeepTabs(r.source), r.pos
}
return nil, r.pos
}
@@ -406,10 +412,10 @@ func (r *blockReader) Advance(n int) {
}
}
-func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
+func (r *blockReader) AdvanceAndSetPadding(n, padding int, chars []byte) {
r.Advance(n)
if padding > r.pos.Padding {
- r.SetPadding(padding)
+ r.SetPadding(padding, chars)
}
}
@@ -440,9 +446,10 @@ func (r *blockReader) SetPosition(line int, pos Segment) {
}
}
-func (r *blockReader) SetPadding(v int) {
+func (r *blockReader) SetPadding(v int, chars []byte) {
r.lineOffset = -1
r.pos.Padding = v
+ r.pos.PaddingChars = chars
}
func (r *blockReader) SkipSpaces() (Segment, int, bool) {
diff --git a/text/segment.go b/text/segment.go
index badd4bc..ce4cad4 100644
--- a/text/segment.go
+++ b/text/segment.go
@@ -18,6 +18,10 @@ type Segment struct {
// Padding is a padding length of the segment.
Padding int
+
+ PaddingChars []byte
+
+ RenderPaddingTabs bool
}
// NewSegment return a new Segment.
@@ -30,16 +34,25 @@ func NewSegment(start, stop int) Segment {
}
// NewSegmentPadding returns a new Segment with the given padding.
-func NewSegmentPadding(start, stop, n int) Segment {
+func NewSegmentPadding(start, stop, n int, chars []byte) Segment {
return Segment{
- Start: start,
- Stop: stop,
- Padding: n,
+ Start: start,
+ Stop: stop,
+ Padding: n,
+ PaddingChars: chars,
}
}
+func (t Segment) WithRenderPaddingTabs() Segment {
+ t.RenderPaddingTabs = true
+ return t
+}
+
// Value returns a value of the segment.
func (t *Segment) Value(buffer []byte) []byte {
+ if t.RenderPaddingTabs {
+ return t.ValueKeepTabs(buffer)
+ }
if t.Padding == 0 {
return buffer[t.Start:t.Stop]
}
@@ -48,6 +61,15 @@ func (t *Segment) Value(buffer []byte) []byte {
return append(result, buffer[t.Start:t.Stop]...)
}
+func (t *Segment) ValueKeepTabs(buffer []byte) []byte {
+ if t.Padding == 0 {
+ return buffer[t.Start:t.Stop]
+ }
+ result := make([]byte, 0, t.Padding+t.Stop-t.Start+1)
+ result = append(result, t.PaddingChars...)
+ return append(result, buffer[t.Start:t.Stop]...)
+}
+
// Len returns a length of the segment.
func (t *Segment) Len() int {
return t.Stop - t.Start + t.Padding
@@ -62,6 +84,8 @@ func (t *Segment) Between(other Segment) Segment {
t.Start,
other.Start,
t.Padding-other.Padding,
+ // ???? no idea what here, just put spaces there
+ bytes.Repeat([]byte{' '}, t.Padding-other.Padding),
)
}
@@ -78,7 +102,7 @@ func (t *Segment) TrimRightSpace(buffer []byte) Segment {
if l == len(v) {
return NewSegment(t.Start, t.Start)
}
- return NewSegmentPadding(t.Start, t.Stop-l, t.Padding)
+ return NewSegmentPadding(t.Start, t.Stop-l, t.Padding, t.PaddingChars)
}
// TrimLeftSpace returns a new segment by slicing off all leading
@@ -89,10 +113,39 @@ func (t *Segment) TrimLeftSpace(buffer []byte) Segment {
return NewSegment(t.Start+l, t.Stop)
}
+func trimWidthPaddingChars(origStartPos int, cut, goal int, chars []byte) []byte {
+ bytesPos := origStartPos - len(chars)
+ var i = 0
+ for i < cut {
+ if len(chars) == 0 {
+ // ???
+ return nil
+ }
+ b := chars[0]
+ if b == ' ' {
+ chars = chars[1:]
+ i++
+ bytesPos++
+ } else {
+ tw := util.TabWidth(bytesPos)
+ chars = chars[1:]
+ i += tw
+ bytesPos++
+ }
+ }
+ // if I can cut exactly, return the cut chars, otherwise just give up and put spaces
+ if i == cut {
+ return chars
+ } else {
+ return bytes.Repeat([]byte{' '}, goal)
+ }
+}
+
// TrimLeftSpaceWidth returns a new segment by slicing off leading space
// characters until the given width.
func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
padding := t.Padding
+ origWidth := width
for ; width > 0; width-- {
if padding == 0 {
break
@@ -100,8 +153,10 @@ func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
padding--
}
if width == 0 {
- return NewSegmentPadding(t.Start, t.Stop, padding)
+ paddingChars := trimWidthPaddingChars(t.Start, origWidth, padding, t.PaddingChars)
+ return NewSegmentPadding(t.Start, t.Stop, padding, paddingChars)
}
+ newPaddingChars := []byte{}
text := buffer[t.Start:t.Stop]
start := t.Start
for _, c := range text {
@@ -110,8 +165,14 @@ func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
}
if c == ' ' {
width--
+ if width < 0 {
+ newPaddingChars = append(newPaddingChars, ' ')
+ }
} else if c == '\t' {
width -= 4
+ if width < 0 {
+ newPaddingChars = append(newPaddingChars, '\t')
+ }
} else {
break
}
@@ -119,18 +180,20 @@ func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
}
if width < 0 {
padding = width * -1
+ return NewSegmentPadding(start, t.Stop, padding, newPaddingChars)
}
- return NewSegmentPadding(start, t.Stop, padding)
+ paddingChars := trimWidthPaddingChars(t.Start, origWidth, padding, t.PaddingChars)
+ return NewSegmentPadding(start, t.Stop, padding, paddingChars)
}
// WithStart returns a new Segment with same value except Start.
func (t *Segment) WithStart(v int) Segment {
- return NewSegmentPadding(v, t.Stop, t.Padding)
+ return NewSegmentPadding(v, t.Stop, t.Padding, t.PaddingChars)
}
// WithStop returns a new Segment with same value except Stop.
func (t *Segment) WithStop(v int) Segment {
- return NewSegmentPadding(t.Start, v, t.Padding)
+ return NewSegmentPadding(t.Start, v, t.Padding, t.PaddingChars)
}
// ConcatPadding concats the padding to the given slice.
diff --git a/util/util.go b/util/util.go
index 3ec73f5..df7b440 100644
--- a/util/util.go
+++ b/util/util.go
@@ -148,19 +148,36 @@ func TabWidth(currentPos int) int {
//
// width=2 is in the tab character. In this case, IndentPosition returns
// (pos=1, padding=2)
-func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
+func IndentPosition(bs []byte, currentPos, width int) (pos, padding int, paddingChars []byte) {
if width == 0 {
- return 0, 0
+ return 0, 0, nil
}
w := 0
l := len(bs)
i := 0
hasTab := false
+
+ firstOver := true
+
for ; i < l; i++ {
+ if w > width && firstOver {
+ firstOver = false
+ for j := 0; j < w-width; j++ {
+ paddingChars = append(paddingChars, ' ')
+ }
+ }
if bs[i] == '\t' {
+ if w >= width {
+ firstOver = false
+ paddingChars = append(paddingChars, '\t')
+ }
w += TabWidth(currentPos + w)
hasTab = true
} else if bs[i] == ' ' {
+ if w >= width {
+ firstOver = false
+ paddingChars = append(paddingChars, ' ')
+ }
w++
} else {
break
@@ -168,85 +185,56 @@ func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
}
if w >= width {
if !hasTab {
- return width, 0
+ return width, 0, nil
}
- return i, w - width
+ return i, w - width, paddingChars
}
- return -1, -1
-}
-
-// IndentPositionPadding searches an indent position with the given width for the given line.
-// This function is mostly same as IndentPosition except this function
-// takes account into additional paddings.
-func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
- if width == 0 {
- return 0, paddingv
- }
- w := 0
- i := 0
- l := len(bs)
- for ; i < l; i++ {
- if bs[i] == '\t' {
- w += TabWidth(currentPos + w)
- } else if bs[i] == ' ' {
- w++
- } else {
- break
- }
- }
- if w >= width {
- return i - paddingv, w - width
- }
- return -1, -1
-}
-
-// DedentPosition dedents lines by the given width.
-func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
- if width == 0 {
- return 0, 0
- }
- w := 0
- l := len(bs)
- i := 0
- for ; i < l; i++ {
- if bs[i] == '\t' {
- w += TabWidth(currentPos + w)
- } else if bs[i] == ' ' {
- w++
- } else {
- break
- }
- }
- if w >= width {
- return i, w - width
- }
- return i, 0
+ return -1, -1, nil
}
// DedentPositionPadding dedents lines by the given width.
-// This function is mostly same as DedentPosition except this function
-// takes account into additional paddings.
-func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
+// It takes account into additional paddings.
+func DedentPositionPadding(bs []byte, currentPos, paddingv, width int, origChars []byte) (pos, padding int, paddingChars []byte) {
if width == 0 {
- return 0, paddingv
+ return 0, paddingv, origChars
}
w := 0
i := 0
l := len(bs)
+
+ firstOver := true
+
for ; i < l; i++ {
+ if w > width && firstOver {
+ firstOver = false
+ for j := 0; j < w-width; j++ {
+ paddingChars = append(paddingChars, ' ')
+ }
+ }
+
if bs[i] == '\t' {
+ if w >= width {
+ firstOver = false
+ paddingChars = append(paddingChars, '\t')
+ }
+
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' {
+ if w >= width {
+ firstOver = false
+ paddingChars = append(paddingChars, ' ')
+ }
+
w++
} else {
break
}
}
if w >= width {
- return i - paddingv, w - width
+ return i - paddingv, w - width, paddingChars
}
- return i - paddingv, 0
+ return i - paddingv, 0, nil
}
// IndentWidth calculate an indent width for the given line.
@@ -267,23 +255,6 @@ func IndentWidth(bs []byte, currentPos int) (width, pos int) {
return
}
-// FirstNonSpacePosition returns a position line that is a first nonspace
-// character.
-func FirstNonSpacePosition(bs []byte) int {
- i := 0
- for ; i < len(bs); i++ {
- c := bs[i]
- if c == ' ' || c == '\t' {
- continue
- }
- if c == '\n' {
- return -1
- }
- return i
- }
- return -1
-}
-
// FindClosure returns a position that closes the given opener.
// If codeSpan is set true, it ignores characters in code spans.
// If allowNesting is set true, closures correspond to nested opener will be
@@ -382,11 +353,6 @@ func TrimLeftLength(source, s []byte) int {
return len(source) - len(TrimLeft(source, s))
}
-// TrimRightLength returns a length of trailing specified characters.
-func TrimRightLength(source, s []byte) int {
- return len(source) - len(TrimRight(source, s))
-}
-
// TrimLeftSpaceLength returns a length of leading space characters.
func TrimLeftSpaceLength(source []byte) int {
i := 0