Fix leading tabs with codeblocks

Note that this is a breaking change and will require new goldmark major version.

I have tried to fix problem with leading tabs in fenced code blocks (and probably normal code blocks too).

Important note - tabs do not behave like "just 4 spaces". They "finish" 4 space columns. So tab can behave like anything between 1 space to 4 spaces, depending on position.

If you have MD like this (. represents space, [tb] , [t] or [] tabs)

```
*.some.text
..```
..foo
..[]foo
..```
```

you expect the tab to be kept in the code. This did not work properly in goldmark and I fixed that.

However, if you have a code like this

```
*.some.text
..```
..foo
.[t]foo
..```
```

what should happen? I decided that it should be two spaces, as the tab is not "completely" in the code block. Similarly, what should happen in this case

```
*.some.text
..```
..foo
.[t][tb]foo
..```
```

I decided that it should be first three spaces and then tab. Not sure what even is the correct solution here...

The crux of the fix is - text segments don't have just padding, but also remember what chars is the padding and then print that, if they are called to do so in the code blocks. In other cases, the paddingChars are ignored.

This should fix #177 .
This commit is contained in:
Karel Bilek 2021-01-29 13:08:04 +07:00
parent 6c741ae251
commit 7cdc0fb06f
10 changed files with 218 additions and 122 deletions

View file

@ -159,3 +159,60 @@ bbb
<img src="gt.jpg" alt="&gt;" />
<img src="amp.jpg" alt="&amp;" /></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//
13: fenced code block starting with tab inside list
//- - - - - - - - -//
* foo
```Makefile
foo
foo
```
//- - - - - - - - -//
<ul>
<li>foo
<pre><code class="language-Makefile">foo
foo
</code></pre>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
14: fenced code block inside list, mismatched tab start
//- - - - - - - - -//
* foo
```Makefile
foo
foo
```
//- - - - - - - - -//
<ul>
<li>foo
<pre><code class="language-Makefile">foo
foo
</code></pre>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//
15: fenced code block inside nested list
//- - - - - - - - -//
* foo
- bar
```Makefile
foo
foo
```
//- - - - - - - - -//
<ul>
<li>foo
<ul>
<li>bar
<pre><code class="language-Makefile">foo
foo
</code></pre>
</li>
</ul>
</li>
</ul>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -81,8 +81,8 @@ func (b *definitionListParser) Continue(node gast.Node, reader text.Reader, pc p
if w < list.Offset {
return parser.Close
}
pos, padding := util.IndentPosition(line, reader.LineOffset(), list.Offset)
reader.AdvanceAndSetPadding(pos, padding)
pos, padding, chars := util.IndentPosition(line, reader.LineOffset(), list.Offset)
reader.AdvanceAndSetPadding(pos, padding, chars)
return parser.Continue | parser.HasChildren
}
@ -137,8 +137,8 @@ func (b *definitionDescriptionParser) Open(parent gast.Node, reader text.Reader,
}
para.Parent().RemoveChild(para.Parent(), para)
}
cpos, padding := util.IndentPosition(line[pos+1:], pos+1, list.Offset-pos-1)
reader.AdvanceAndSetPadding(cpos, padding)
cpos, padding, chars := util.IndentPosition(line[pos+1:], pos+1, list.Offset-pos-1)
reader.AdvanceAndSetPadding(cpos, padding, chars)
return ast.NewDefinitionDescription(), parser.HasChildren
}

View file

@ -66,7 +66,7 @@ func (b *footnoteBlockParser) Open(parent gast.Node, reader text.Reader, pc pars
reader.Advance(pos)
return item, parser.NoChildren
}
reader.AdvanceAndSetPadding(pos, padding)
reader.AdvanceAndSetPadding(pos, padding, segment.PaddingChars)
return item, parser.HasChildren
}
@ -75,11 +75,11 @@ func (b *footnoteBlockParser) Continue(node gast.Node, reader text.Reader, pc pa
if util.IsBlank(line) {
return parser.Continue | parser.HasChildren
}
childpos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
childpos, padding, paddingChars := util.IndentPosition(line, reader.LineOffset(), 4)
if childpos < 0 {
return parser.Close
}
reader.AdvanceAndSetPadding(childpos, padding)
reader.AdvanceAndSetPadding(childpos, padding, paddingChars)
return parser.Continue | parser.HasChildren
}

View file

@ -33,7 +33,7 @@ func (b *blockquoteParser) process(reader text.Reader) bool {
}
reader.Advance(pos)
if line[pos-1] == '\t' {
reader.SetPadding(2)
reader.SetPadding(2, []byte(" "))
}
return true
}

View file

@ -24,17 +24,18 @@ func (b *codeBlockParser) Trigger() []byte {
func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
line, segment := reader.PeekLine()
pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
pos, padding, chars := util.IndentPosition(line, reader.LineOffset(), 4)
if pos < 0 || util.IsBlank(line) {
return nil, NoChildren
}
node := ast.NewCodeBlock()
reader.AdvanceAndSetPadding(pos, padding)
reader.AdvanceAndSetPadding(pos, padding, chars)
_, segment = reader.PeekLine()
node.Lines().Append(segment)
node.Lines().Append(segment.WithRenderPaddingTabs())
reader.Advance(segment.Len() - 1)
return node, NoChildren
}
func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
@ -43,13 +44,13 @@ func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
node.Lines().Append(segment.TrimLeftSpaceWidth(4, reader.Source()))
return Continue | NoChildren
}
pos, padding := util.IndentPosition(line, reader.LineOffset(), 4)
pos, padding, chars := util.IndentPosition(line, reader.LineOffset(), 4)
if pos < 0 {
return Close
}
reader.AdvanceAndSetPadding(pos, padding)
reader.AdvanceAndSetPadding(pos, padding, chars)
_, segment = reader.PeekLine()
node.Lines().Append(segment)
node.Lines().Append(segment.WithRenderPaddingTabs())
reader.Advance(segment.Len() - 1)
return Continue | NoChildren
}

View file

@ -70,6 +70,7 @@ func (b *fencedCodeBlockParser) Open(parent ast.Node, reader text.Reader, pc Con
func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
line, segment := reader.PeekLine()
fdata := pc.Get(fencedCodeBlockInfoKey).(*fenceData)
w, pos := util.IndentWidth(line, reader.LineOffset())
if w < 4 {
@ -86,11 +87,12 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
return Close
}
}
pos, padding := util.DedentPositionPadding(line, reader.LineOffset(), segment.Padding, fdata.indent)
seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding)
pos, padding, chars := util.DedentPositionPadding(line, reader.LineOffset(), segment.Padding, fdata.indent, segment.PaddingChars)
seg := text.NewSegmentPadding(segment.Start+pos, segment.Stop, padding, chars)
seg = seg.WithRenderPaddingTabs()
node.Lines().Append(seg)
reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding)
reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding, chars)
return Continue | NoChildren
}

View file

@ -44,9 +44,9 @@ func (b *listItemParser) Open(parent ast.Node, reader text.Reader, pc Context) (
return node, NoChildren
}
pos, padding := util.IndentPosition(line[match[4]:], match[4], itemOffset)
pos, padding, chars := util.IndentPosition(line[match[4]:], match[4], itemOffset)
child := match[3] + pos
reader.AdvanceAndSetPadding(child, padding)
reader.AdvanceAndSetPadding(child, padding, chars)
return node, HasChildren
}
@ -66,8 +66,8 @@ func (b *listItemParser) Continue(node ast.Node, reader text.Reader, pc Context)
}
return Close
}
pos, padding := util.IndentPosition(line, reader.LineOffset(), offset)
reader.AdvanceAndSetPadding(pos, padding)
pos, padding, paddingChars := util.IndentPosition(line, reader.LineOffset(), offset)
reader.AdvanceAndSetPadding(pos, padding, paddingChars)
return Continue | HasChildren
}

View file

@ -45,14 +45,14 @@ type Reader interface {
SetPosition(int, Segment)
// SetPadding sets padding to the reader.
SetPadding(int)
SetPadding(int, []byte)
// Advance advances the internal pointer.
Advance(int)
// AdvanceAndSetPadding advances the internal pointer and add padding to the
// reader.
AdvanceAndSetPadding(int, int)
AdvanceAndSetPadding(int, int, []byte)
// AdvanceLine advances the internal pointer to the next line head.
AdvanceLine()
@ -120,7 +120,7 @@ func (r *reader) Peek() byte {
func (r *reader) PeekLine() ([]byte, Segment) {
if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
if r.peekedLine == nil {
r.peekedLine = r.pos.Value(r.Source())
r.peekedLine = r.pos.ValueKeepTabs(r.Source())
}
return r.peekedLine, r.pos
}
@ -169,9 +169,11 @@ func (r *reader) Advance(n int) {
if n < len(r.peekedLine) && r.pos.Padding == 0 {
r.pos.Start += n
r.peekedLine = nil
return
}
r.peekedLine = nil
l := r.sourceLength
for ; n > 0 && r.pos.Start < l; n-- {
if r.pos.Padding != 0 {
@ -186,16 +188,19 @@ func (r *reader) Advance(n int) {
}
}
func (r *reader) AdvanceAndSetPadding(n, padding int) {
func (r *reader) AdvanceAndSetPadding(n, padding int, chars []byte) {
r.Advance(n)
if padding > r.pos.Padding {
r.SetPadding(padding)
r.SetPadding(padding, chars)
}
// always set the chars
r.pos.PaddingChars = chars
}
func (r *reader) AdvanceLine() {
r.lineOffset = -1
r.peekedLine = nil
r.pos.Start = r.pos.Stop
r.head = r.pos.Start
if r.pos.Start < 0 {
@ -223,8 +228,9 @@ func (r *reader) SetPosition(line int, pos Segment) {
r.pos = pos
}
func (r *reader) SetPadding(v int) {
func (r *reader) SetPadding(v int, chars []byte) {
r.pos.Padding = v
r.pos.PaddingChars = chars
}
func (r *reader) SkipSpaces() (Segment, int, bool) {
@ -380,7 +386,7 @@ func (r *blockReader) Peek() byte {
func (r *blockReader) PeekLine() ([]byte, Segment) {
if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
return r.pos.Value(r.source), r.pos
return r.pos.ValueKeepTabs(r.source), r.pos
}
return nil, r.pos
}
@ -406,10 +412,10 @@ func (r *blockReader) Advance(n int) {
}
}
func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
func (r *blockReader) AdvanceAndSetPadding(n, padding int, chars []byte) {
r.Advance(n)
if padding > r.pos.Padding {
r.SetPadding(padding)
r.SetPadding(padding, chars)
}
}
@ -440,9 +446,10 @@ func (r *blockReader) SetPosition(line int, pos Segment) {
}
}
func (r *blockReader) SetPadding(v int) {
func (r *blockReader) SetPadding(v int, chars []byte) {
r.lineOffset = -1
r.pos.Padding = v
r.pos.PaddingChars = chars
}
func (r *blockReader) SkipSpaces() (Segment, int, bool) {

View file

@ -18,6 +18,10 @@ type Segment struct {
// Padding is a padding length of the segment.
Padding int
PaddingChars []byte
RenderPaddingTabs bool
}
// NewSegment return a new Segment.
@ -30,16 +34,25 @@ func NewSegment(start, stop int) Segment {
}
// NewSegmentPadding returns a new Segment with the given padding.
func NewSegmentPadding(start, stop, n int) Segment {
func NewSegmentPadding(start, stop, n int, chars []byte) Segment {
return Segment{
Start: start,
Stop: stop,
Padding: n,
Start: start,
Stop: stop,
Padding: n,
PaddingChars: chars,
}
}
func (t Segment) WithRenderPaddingTabs() Segment {
t.RenderPaddingTabs = true
return t
}
// Value returns a value of the segment.
func (t *Segment) Value(buffer []byte) []byte {
if t.RenderPaddingTabs {
return t.ValueKeepTabs(buffer)
}
if t.Padding == 0 {
return buffer[t.Start:t.Stop]
}
@ -48,6 +61,15 @@ func (t *Segment) Value(buffer []byte) []byte {
return append(result, buffer[t.Start:t.Stop]...)
}
func (t *Segment) ValueKeepTabs(buffer []byte) []byte {
if t.Padding == 0 {
return buffer[t.Start:t.Stop]
}
result := make([]byte, 0, t.Padding+t.Stop-t.Start+1)
result = append(result, t.PaddingChars...)
return append(result, buffer[t.Start:t.Stop]...)
}
// Len returns a length of the segment.
func (t *Segment) Len() int {
return t.Stop - t.Start + t.Padding
@ -62,6 +84,8 @@ func (t *Segment) Between(other Segment) Segment {
t.Start,
other.Start,
t.Padding-other.Padding,
// ???? no idea what here, just put spaces there
bytes.Repeat([]byte{' '}, t.Padding-other.Padding),
)
}
@ -78,7 +102,7 @@ func (t *Segment) TrimRightSpace(buffer []byte) Segment {
if l == len(v) {
return NewSegment(t.Start, t.Start)
}
return NewSegmentPadding(t.Start, t.Stop-l, t.Padding)
return NewSegmentPadding(t.Start, t.Stop-l, t.Padding, t.PaddingChars)
}
// TrimLeftSpace returns a new segment by slicing off all leading
@ -89,10 +113,39 @@ func (t *Segment) TrimLeftSpace(buffer []byte) Segment {
return NewSegment(t.Start+l, t.Stop)
}
func trimWidthPaddingChars(origStartPos int, cut, goal int, chars []byte) []byte {
bytesPos := origStartPos - len(chars)
var i = 0
for i < cut {
if len(chars) == 0 {
// ???
return nil
}
b := chars[0]
if b == ' ' {
chars = chars[1:]
i++
bytesPos++
} else {
tw := util.TabWidth(bytesPos)
chars = chars[1:]
i += tw
bytesPos++
}
}
// if I can cut exactly, return the cut chars, otherwise just give up and put spaces
if i == cut {
return chars
} else {
return bytes.Repeat([]byte{' '}, goal)
}
}
// TrimLeftSpaceWidth returns a new segment by slicing off leading space
// characters until the given width.
func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
padding := t.Padding
origWidth := width
for ; width > 0; width-- {
if padding == 0 {
break
@ -100,8 +153,10 @@ func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
padding--
}
if width == 0 {
return NewSegmentPadding(t.Start, t.Stop, padding)
paddingChars := trimWidthPaddingChars(t.Start, origWidth, padding, t.PaddingChars)
return NewSegmentPadding(t.Start, t.Stop, padding, paddingChars)
}
newPaddingChars := []byte{}
text := buffer[t.Start:t.Stop]
start := t.Start
for _, c := range text {
@ -110,8 +165,14 @@ func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
}
if c == ' ' {
width--
if width < 0 {
newPaddingChars = append(newPaddingChars, ' ')
}
} else if c == '\t' {
width -= 4
if width < 0 {
newPaddingChars = append(newPaddingChars, '\t')
}
} else {
break
}
@ -119,18 +180,20 @@ func (t *Segment) TrimLeftSpaceWidth(width int, buffer []byte) Segment {
}
if width < 0 {
padding = width * -1
return NewSegmentPadding(start, t.Stop, padding, newPaddingChars)
}
return NewSegmentPadding(start, t.Stop, padding)
paddingChars := trimWidthPaddingChars(t.Start, origWidth, padding, t.PaddingChars)
return NewSegmentPadding(start, t.Stop, padding, paddingChars)
}
// WithStart returns a new Segment with same value except Start.
func (t *Segment) WithStart(v int) Segment {
return NewSegmentPadding(v, t.Stop, t.Padding)
return NewSegmentPadding(v, t.Stop, t.Padding, t.PaddingChars)
}
// WithStop returns a new Segment with same value except Stop.
func (t *Segment) WithStop(v int) Segment {
return NewSegmentPadding(t.Start, v, t.Padding)
return NewSegmentPadding(t.Start, v, t.Padding, t.PaddingChars)
}
// ConcatPadding concats the padding to the given slice.

View file

@ -148,19 +148,36 @@ func TabWidth(currentPos int) int {
//
// width=2 is in the tab character. In this case, IndentPosition returns
// (pos=1, padding=2)
func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
func IndentPosition(bs []byte, currentPos, width int) (pos, padding int, paddingChars []byte) {
if width == 0 {
return 0, 0
return 0, 0, nil
}
w := 0
l := len(bs)
i := 0
hasTab := false
firstOver := true
for ; i < l; i++ {
if w > width && firstOver {
firstOver = false
for j := 0; j < w-width; j++ {
paddingChars = append(paddingChars, ' ')
}
}
if bs[i] == '\t' {
if w >= width {
firstOver = false
paddingChars = append(paddingChars, '\t')
}
w += TabWidth(currentPos + w)
hasTab = true
} else if bs[i] == ' ' {
if w >= width {
firstOver = false
paddingChars = append(paddingChars, ' ')
}
w++
} else {
break
@ -168,85 +185,56 @@ func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
}
if w >= width {
if !hasTab {
return width, 0
return width, 0, nil
}
return i, w - width
return i, w - width, paddingChars
}
return -1, -1
}
// IndentPositionPadding searches an indent position with the given width for the given line.
// This function is mostly same as IndentPosition except this function
// takes account into additional paddings.
func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
if width == 0 {
return 0, paddingv
}
w := 0
i := 0
l := len(bs)
for ; i < l; i++ {
if bs[i] == '\t' {
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' {
w++
} else {
break
}
}
if w >= width {
return i - paddingv, w - width
}
return -1, -1
}
// DedentPosition dedents lines by the given width.
func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
if width == 0 {
return 0, 0
}
w := 0
l := len(bs)
i := 0
for ; i < l; i++ {
if bs[i] == '\t' {
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' {
w++
} else {
break
}
}
if w >= width {
return i, w - width
}
return i, 0
return -1, -1, nil
}
// DedentPositionPadding dedents lines by the given width.
// This function is mostly same as DedentPosition except this function
// takes account into additional paddings.
func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
// It takes account into additional paddings.
func DedentPositionPadding(bs []byte, currentPos, paddingv, width int, origChars []byte) (pos, padding int, paddingChars []byte) {
if width == 0 {
return 0, paddingv
return 0, paddingv, origChars
}
w := 0
i := 0
l := len(bs)
firstOver := true
for ; i < l; i++ {
if w > width && firstOver {
firstOver = false
for j := 0; j < w-width; j++ {
paddingChars = append(paddingChars, ' ')
}
}
if bs[i] == '\t' {
if w >= width {
firstOver = false
paddingChars = append(paddingChars, '\t')
}
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' {
if w >= width {
firstOver = false
paddingChars = append(paddingChars, ' ')
}
w++
} else {
break
}
}
if w >= width {
return i - paddingv, w - width
return i - paddingv, w - width, paddingChars
}
return i - paddingv, 0
return i - paddingv, 0, nil
}
// IndentWidth calculate an indent width for the given line.
@ -267,23 +255,6 @@ func IndentWidth(bs []byte, currentPos int) (width, pos int) {
return
}
// FirstNonSpacePosition returns a position line that is a first nonspace
// character.
func FirstNonSpacePosition(bs []byte) int {
i := 0
for ; i < len(bs); i++ {
c := bs[i]
if c == ' ' || c == '\t' {
continue
}
if c == '\n' {
return -1
}
return i
}
return -1
}
// FindClosure returns a position that closes the given opener.
// If codeSpan is set true, it ignores characters in code spans.
// If allowNesting is set true, closures correspond to nested opener will be
@ -382,11 +353,6 @@ func TrimLeftLength(source, s []byte) int {
return len(source) - len(TrimLeft(source, s))
}
// TrimRightLength returns a length of trailing specified characters.
func TrimRightLength(source, s []byte) int {
return len(source) - len(TrimRight(source, s))
}
// TrimLeftSpaceLength returns a length of leading space characters.
func TrimLeftSpaceLength(source []byte) int {
i := 0