mirror of
https://github.com/yuin/goldmark
synced 2025-03-04 23:04:52 +00:00
Note that this is a breaking change and will require new goldmark major version. I have tried to fix problem with leading tabs in fenced code blocks (and probably normal code blocks too). Important note - tabs do not behave like "just 4 spaces". They "finish" 4 space columns. So tab can behave like anything between 1 space to 4 spaces, depending on position. If you have MD like this (. represents space, [tb] , [t] or [] tabs) ``` *.some.text ..``` ..foo ..[]foo ..``` ``` you expect the tab to be kept in the code. This did not work properly in goldmark and I fixed that. However, if you have a code like this ``` *.some.text ..``` ..foo .[t]foo ..``` ``` what should happen? I decided that it should be two spaces, as the tab is not "completely" in the code block. Similarly, what should happen in this case ``` *.some.text ..``` ..foo .[t][tb]foo ..``` ``` I decided that it should be first three spaces and then tab. Not sure what even is the correct solution here... The crux of the fix is - text segments don't have just padding, but also remember what chars is the padding and then print that, if they are called to do so in the code blocks. In other cases, the paddingChars are ignored. This should fix #177 .
550 lines
11 KiB
Go
550 lines
11 KiB
Go
package text
|
|
|
|
import (
|
|
"io"
|
|
"regexp"
|
|
"unicode/utf8"
|
|
|
|
"github.com/yuin/goldmark/util"
|
|
)
|
|
|
|
const invalidValue = -1
|
|
|
|
// EOF indicates the end of file.
|
|
const EOF = byte(0xff)
|
|
|
|
// A Reader interface provides abstracted method for reading text.
|
|
type Reader interface {
|
|
io.RuneReader
|
|
|
|
// Source returns a source of the reader.
|
|
Source() []byte
|
|
|
|
// ResetPosition resets positions.
|
|
ResetPosition()
|
|
|
|
// Peek returns a byte at current position without advancing the internal pointer.
|
|
Peek() byte
|
|
|
|
// PeekLine returns the current line without advancing the internal pointer.
|
|
PeekLine() ([]byte, Segment)
|
|
|
|
// PrecendingCharacter returns a character just before current internal pointer.
|
|
PrecendingCharacter() rune
|
|
|
|
// Value returns a value of the given segment.
|
|
Value(Segment) []byte
|
|
|
|
// LineOffset returns a distance from the line head to current position.
|
|
LineOffset() int
|
|
|
|
// Position returns current line number and position.
|
|
Position() (int, Segment)
|
|
|
|
// SetPosition sets current line number and position.
|
|
SetPosition(int, Segment)
|
|
|
|
// SetPadding sets padding to the reader.
|
|
SetPadding(int, []byte)
|
|
|
|
// Advance advances the internal pointer.
|
|
Advance(int)
|
|
|
|
// AdvanceAndSetPadding advances the internal pointer and add padding to the
|
|
// reader.
|
|
AdvanceAndSetPadding(int, int, []byte)
|
|
|
|
// AdvanceLine advances the internal pointer to the next line head.
|
|
AdvanceLine()
|
|
|
|
// SkipSpaces skips space characters and returns a non-blank line.
|
|
// If it reaches EOF, returns false.
|
|
SkipSpaces() (Segment, int, bool)
|
|
|
|
// SkipSpaces skips blank lines and returns a non-blank line.
|
|
// If it reaches EOF, returns false.
|
|
SkipBlankLines() (Segment, int, bool)
|
|
|
|
// Match performs regular expression matching to current line.
|
|
Match(reg *regexp.Regexp) bool
|
|
|
|
// Match performs regular expression searching to current line.
|
|
FindSubMatch(reg *regexp.Regexp) [][]byte
|
|
}
|
|
|
|
type reader struct {
|
|
source []byte
|
|
sourceLength int
|
|
line int
|
|
peekedLine []byte
|
|
pos Segment
|
|
head int
|
|
lineOffset int
|
|
}
|
|
|
|
// NewReader return a new Reader that can read UTF-8 bytes .
|
|
func NewReader(source []byte) Reader {
|
|
r := &reader{
|
|
source: source,
|
|
sourceLength: len(source),
|
|
}
|
|
r.ResetPosition()
|
|
return r
|
|
}
|
|
|
|
func (r *reader) ResetPosition() {
|
|
r.line = -1
|
|
r.head = 0
|
|
r.lineOffset = -1
|
|
r.AdvanceLine()
|
|
}
|
|
|
|
func (r *reader) Source() []byte {
|
|
return r.source
|
|
}
|
|
|
|
func (r *reader) Value(seg Segment) []byte {
|
|
return seg.Value(r.source)
|
|
}
|
|
|
|
func (r *reader) Peek() byte {
|
|
if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
|
|
if r.pos.Padding != 0 {
|
|
return space[0]
|
|
}
|
|
return r.source[r.pos.Start]
|
|
}
|
|
return EOF
|
|
}
|
|
|
|
func (r *reader) PeekLine() ([]byte, Segment) {
|
|
if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
|
|
if r.peekedLine == nil {
|
|
r.peekedLine = r.pos.ValueKeepTabs(r.Source())
|
|
}
|
|
return r.peekedLine, r.pos
|
|
}
|
|
return nil, r.pos
|
|
}
|
|
|
|
// io.RuneReader interface
|
|
func (r *reader) ReadRune() (rune, int, error) {
|
|
return readRuneReader(r)
|
|
}
|
|
|
|
func (r *reader) LineOffset() int {
|
|
if r.lineOffset < 0 {
|
|
v := 0
|
|
for i := r.head; i < r.pos.Start; i++ {
|
|
if r.source[i] == '\t' {
|
|
v += util.TabWidth(v)
|
|
} else {
|
|
v++
|
|
}
|
|
}
|
|
r.lineOffset = v - r.pos.Padding
|
|
}
|
|
return r.lineOffset
|
|
}
|
|
|
|
func (r *reader) PrecendingCharacter() rune {
|
|
if r.pos.Start <= 0 {
|
|
if r.pos.Padding != 0 {
|
|
return rune(' ')
|
|
}
|
|
return rune('\n')
|
|
}
|
|
i := r.pos.Start - 1
|
|
for ; i >= 0; i-- {
|
|
if utf8.RuneStart(r.source[i]) {
|
|
break
|
|
}
|
|
}
|
|
rn, _ := utf8.DecodeRune(r.source[i:])
|
|
return rn
|
|
}
|
|
|
|
func (r *reader) Advance(n int) {
|
|
r.lineOffset = -1
|
|
if n < len(r.peekedLine) && r.pos.Padding == 0 {
|
|
r.pos.Start += n
|
|
r.peekedLine = nil
|
|
|
|
return
|
|
}
|
|
r.peekedLine = nil
|
|
|
|
l := r.sourceLength
|
|
for ; n > 0 && r.pos.Start < l; n-- {
|
|
if r.pos.Padding != 0 {
|
|
r.pos.Padding--
|
|
continue
|
|
}
|
|
if r.source[r.pos.Start] == '\n' {
|
|
r.AdvanceLine()
|
|
continue
|
|
}
|
|
r.pos.Start++
|
|
}
|
|
}
|
|
|
|
func (r *reader) AdvanceAndSetPadding(n, padding int, chars []byte) {
|
|
r.Advance(n)
|
|
if padding > r.pos.Padding {
|
|
r.SetPadding(padding, chars)
|
|
}
|
|
// always set the chars
|
|
r.pos.PaddingChars = chars
|
|
}
|
|
|
|
func (r *reader) AdvanceLine() {
|
|
r.lineOffset = -1
|
|
r.peekedLine = nil
|
|
|
|
r.pos.Start = r.pos.Stop
|
|
r.head = r.pos.Start
|
|
if r.pos.Start < 0 {
|
|
return
|
|
}
|
|
r.pos.Stop = r.sourceLength
|
|
for i := r.pos.Start; i < r.sourceLength; i++ {
|
|
c := r.source[i]
|
|
if c == '\n' {
|
|
r.pos.Stop = i + 1
|
|
break
|
|
}
|
|
}
|
|
r.line++
|
|
r.pos.Padding = 0
|
|
}
|
|
|
|
func (r *reader) Position() (int, Segment) {
|
|
return r.line, r.pos
|
|
}
|
|
|
|
func (r *reader) SetPosition(line int, pos Segment) {
|
|
r.lineOffset = -1
|
|
r.line = line
|
|
r.pos = pos
|
|
}
|
|
|
|
func (r *reader) SetPadding(v int, chars []byte) {
|
|
r.pos.Padding = v
|
|
r.pos.PaddingChars = chars
|
|
}
|
|
|
|
func (r *reader) SkipSpaces() (Segment, int, bool) {
|
|
return skipSpacesReader(r)
|
|
}
|
|
|
|
func (r *reader) SkipBlankLines() (Segment, int, bool) {
|
|
return skipBlankLinesReader(r)
|
|
}
|
|
|
|
func (r *reader) Match(reg *regexp.Regexp) bool {
|
|
return matchReader(r, reg)
|
|
}
|
|
|
|
func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
|
|
return findSubMatchReader(r, reg)
|
|
}
|
|
|
|
// A BlockReader interface is a reader that is optimized for Blocks.
|
|
type BlockReader interface {
|
|
Reader
|
|
// Reset resets current state and sets new segments to the reader.
|
|
Reset(segment *Segments)
|
|
}
|
|
|
|
type blockReader struct {
|
|
source []byte
|
|
segments *Segments
|
|
segmentsLength int
|
|
line int
|
|
pos Segment
|
|
head int
|
|
last int
|
|
lineOffset int
|
|
}
|
|
|
|
// NewBlockReader returns a new BlockReader.
|
|
func NewBlockReader(source []byte, segments *Segments) BlockReader {
|
|
r := &blockReader{
|
|
source: source,
|
|
}
|
|
if segments != nil {
|
|
r.Reset(segments)
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (r *blockReader) ResetPosition() {
|
|
r.line = -1
|
|
r.head = 0
|
|
r.last = 0
|
|
r.lineOffset = -1
|
|
r.pos.Start = -1
|
|
r.pos.Stop = -1
|
|
r.pos.Padding = 0
|
|
if r.segmentsLength > 0 {
|
|
last := r.segments.At(r.segmentsLength - 1)
|
|
r.last = last.Stop
|
|
}
|
|
r.AdvanceLine()
|
|
}
|
|
|
|
func (r *blockReader) Reset(segments *Segments) {
|
|
r.segments = segments
|
|
r.segmentsLength = segments.Len()
|
|
r.ResetPosition()
|
|
}
|
|
|
|
func (r *blockReader) Source() []byte {
|
|
return r.source
|
|
}
|
|
|
|
func (r *blockReader) Value(seg Segment) []byte {
|
|
line := r.segmentsLength - 1
|
|
ret := make([]byte, 0, seg.Stop-seg.Start+1)
|
|
for ; line >= 0; line-- {
|
|
if seg.Start >= r.segments.At(line).Start {
|
|
break
|
|
}
|
|
}
|
|
i := seg.Start
|
|
for ; line < r.segmentsLength; line++ {
|
|
s := r.segments.At(line)
|
|
if i < 0 {
|
|
i = s.Start
|
|
}
|
|
ret = s.ConcatPadding(ret)
|
|
for ; i < seg.Stop && i < s.Stop; i++ {
|
|
ret = append(ret, r.source[i])
|
|
}
|
|
i = -1
|
|
if s.Stop > seg.Stop {
|
|
break
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
// io.RuneReader interface
|
|
func (r *blockReader) ReadRune() (rune, int, error) {
|
|
return readRuneReader(r)
|
|
}
|
|
|
|
func (r *blockReader) PrecendingCharacter() rune {
|
|
if r.pos.Padding != 0 {
|
|
return rune(' ')
|
|
}
|
|
if r.segments.Len() < 1 {
|
|
return rune('\n')
|
|
}
|
|
firstSegment := r.segments.At(0)
|
|
if r.line == 0 && r.pos.Start <= firstSegment.Start {
|
|
return rune('\n')
|
|
}
|
|
l := len(r.source)
|
|
i := r.pos.Start - 1
|
|
for ; i < l && i >= 0; i-- {
|
|
if utf8.RuneStart(r.source[i]) {
|
|
break
|
|
}
|
|
}
|
|
if i < 0 || i >= l {
|
|
return rune('\n')
|
|
}
|
|
rn, _ := utf8.DecodeRune(r.source[i:])
|
|
return rn
|
|
}
|
|
|
|
func (r *blockReader) LineOffset() int {
|
|
if r.lineOffset < 0 {
|
|
v := 0
|
|
for i := r.head; i < r.pos.Start; i++ {
|
|
if r.source[i] == '\t' {
|
|
v += util.TabWidth(v)
|
|
} else {
|
|
v++
|
|
}
|
|
}
|
|
r.lineOffset = v - r.pos.Padding
|
|
}
|
|
return r.lineOffset
|
|
}
|
|
|
|
func (r *blockReader) Peek() byte {
|
|
if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
|
|
if r.pos.Padding != 0 {
|
|
return space[0]
|
|
}
|
|
return r.source[r.pos.Start]
|
|
}
|
|
return EOF
|
|
}
|
|
|
|
func (r *blockReader) PeekLine() ([]byte, Segment) {
|
|
if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
|
|
return r.pos.ValueKeepTabs(r.source), r.pos
|
|
}
|
|
return nil, r.pos
|
|
}
|
|
|
|
func (r *blockReader) Advance(n int) {
|
|
r.lineOffset = -1
|
|
|
|
if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
|
|
r.pos.Start += n
|
|
return
|
|
}
|
|
|
|
for ; n > 0; n-- {
|
|
if r.pos.Padding != 0 {
|
|
r.pos.Padding--
|
|
continue
|
|
}
|
|
if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
|
|
r.AdvanceLine()
|
|
continue
|
|
}
|
|
r.pos.Start++
|
|
}
|
|
}
|
|
|
|
func (r *blockReader) AdvanceAndSetPadding(n, padding int, chars []byte) {
|
|
r.Advance(n)
|
|
if padding > r.pos.Padding {
|
|
r.SetPadding(padding, chars)
|
|
}
|
|
}
|
|
|
|
func (r *blockReader) AdvanceLine() {
|
|
r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
|
|
r.head = r.pos.Start
|
|
}
|
|
|
|
func (r *blockReader) Position() (int, Segment) {
|
|
return r.line, r.pos
|
|
}
|
|
|
|
func (r *blockReader) SetPosition(line int, pos Segment) {
|
|
r.lineOffset = -1
|
|
r.line = line
|
|
if pos.Start == invalidValue {
|
|
if r.line < r.segmentsLength {
|
|
s := r.segments.At(line)
|
|
r.head = s.Start
|
|
r.pos = s
|
|
}
|
|
} else {
|
|
r.pos = pos
|
|
if r.line < r.segmentsLength {
|
|
s := r.segments.At(line)
|
|
r.head = s.Start
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *blockReader) SetPadding(v int, chars []byte) {
|
|
r.lineOffset = -1
|
|
r.pos.Padding = v
|
|
r.pos.PaddingChars = chars
|
|
}
|
|
|
|
func (r *blockReader) SkipSpaces() (Segment, int, bool) {
|
|
return skipSpacesReader(r)
|
|
}
|
|
|
|
func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
|
|
return skipBlankLinesReader(r)
|
|
}
|
|
|
|
func (r *blockReader) Match(reg *regexp.Regexp) bool {
|
|
return matchReader(r, reg)
|
|
}
|
|
|
|
func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
|
|
return findSubMatchReader(r, reg)
|
|
}
|
|
|
|
func skipBlankLinesReader(r Reader) (Segment, int, bool) {
|
|
lines := 0
|
|
for {
|
|
line, seg := r.PeekLine()
|
|
if line == nil {
|
|
return seg, lines, false
|
|
}
|
|
if util.IsBlank(line) {
|
|
lines++
|
|
r.AdvanceLine()
|
|
} else {
|
|
return seg, lines, true
|
|
}
|
|
}
|
|
}
|
|
|
|
func skipSpacesReader(r Reader) (Segment, int, bool) {
|
|
chars := 0
|
|
for {
|
|
line, segment := r.PeekLine()
|
|
if line == nil {
|
|
return segment, chars, false
|
|
}
|
|
for i, c := range line {
|
|
if util.IsSpace(c) {
|
|
chars++
|
|
r.Advance(1)
|
|
continue
|
|
}
|
|
return segment.WithStart(segment.Start + i + 1), chars, true
|
|
}
|
|
}
|
|
}
|
|
|
|
func matchReader(r Reader, reg *regexp.Regexp) bool {
|
|
oldline, oldseg := r.Position()
|
|
match := reg.FindReaderSubmatchIndex(r)
|
|
r.SetPosition(oldline, oldseg)
|
|
if match == nil {
|
|
return false
|
|
}
|
|
r.Advance(match[1] - match[0])
|
|
return true
|
|
}
|
|
|
|
func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
|
|
oldline, oldseg := r.Position()
|
|
match := reg.FindReaderSubmatchIndex(r)
|
|
r.SetPosition(oldline, oldseg)
|
|
if match == nil {
|
|
return nil
|
|
}
|
|
runes := make([]rune, 0, match[1]-match[0])
|
|
for i := 0; i < match[1]; {
|
|
r, size, _ := readRuneReader(r)
|
|
i += size
|
|
runes = append(runes, r)
|
|
}
|
|
result := [][]byte{}
|
|
for i := 0; i < len(match); i += 2 {
|
|
result = append(result, []byte(string(runes[match[i]:match[i+1]])))
|
|
}
|
|
|
|
r.SetPosition(oldline, oldseg)
|
|
r.Advance(match[1] - match[0])
|
|
return result
|
|
}
|
|
|
|
func readRuneReader(r Reader) (rune, int, error) {
|
|
line, _ := r.PeekLine()
|
|
if line == nil {
|
|
return 0, 0, io.EOF
|
|
}
|
|
rn, size := utf8.DecodeRune(line)
|
|
if rn == utf8.RuneError {
|
|
return 0, 0, io.EOF
|
|
}
|
|
r.Advance(size)
|
|
return rn, size, nil
|
|
}
|