mirror of
https://github.com/yuin/goldmark
synced 2025-03-04 23:04:52 +00:00
Fixes #103
This commit is contained in:
parent
4a200405d7
commit
a727b5adb2
4 changed files with 1626 additions and 5 deletions
|
|
@ -94,3 +94,14 @@ correctly</a></p>
|
|||
//- - - - - - - - -//
|
||||
<p><a href=""></a></p>
|
||||
//= = = = = = = = = = = = = = = = = = = = = = = =//
|
||||
|
||||
|
||||
|
||||
9
|
||||
//- - - - - - - - -//
|
||||
[daß] is the old german spelling of [dass]
|
||||
|
||||
[daß]: www.das-dass.de
|
||||
//- - - - - - - - -//
|
||||
<p><a href="www.das-dass.de">daß</a> is the old german spelling of <a href="www.das-dass.de">dass</a></p>
|
||||
//= = = = = = = = = = = = = = = = = = = = = = = =//
|
||||
|
|
|
|||
73
_tools/gen-unicode-case-folding-map.go
Normal file
73
_tools/gen-unicode-case-folding-map.go
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const outPath = "../util/unicode_case_folding.go"
|
||||
|
||||
type caseFolding struct {
|
||||
Class byte
|
||||
From rune
|
||||
To []rune
|
||||
}
|
||||
|
||||
func main() {
|
||||
url := "http://www.unicode.org/Public/12.1.0/ucd/CaseFolding.txt"
|
||||
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to get CaseFolding.txt: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
bs, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to get CaseFolding.txt: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(bs)
|
||||
scanner := bufio.NewScanner(buf)
|
||||
f, err := os.Create(outPath)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to open %s: %v\n", outPath, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer f.Close()
|
||||
_, _ = f.WriteString("package util\n\n")
|
||||
_, _ = f.WriteString("var unicodeCaseFoldings = map[rune][]rune {\n")
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "#") || len(strings.TrimSpace(line)) == 0 {
|
||||
continue
|
||||
}
|
||||
line = strings.Split(line, "#")[0]
|
||||
parts := strings.Split(line, ";")
|
||||
for i, p := range parts {
|
||||
parts[i] = strings.TrimSpace(p)
|
||||
}
|
||||
cf := caseFolding{}
|
||||
v, _ := strconv.ParseInt(parts[0], 16, 32)
|
||||
cf.From = rune(int32(v))
|
||||
cf.Class = parts[1][0]
|
||||
for _, v := range strings.Split(parts[2], " ") {
|
||||
c, _ := strconv.ParseInt(v, 16, 32)
|
||||
cf.To = append(cf.To, rune(int32(c)))
|
||||
}
|
||||
if cf.Class != 'C' && cf.Class != 'F' {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(f, " %#x : %#v,\n", cf.From, cf.To)
|
||||
}
|
||||
fmt.Fprintf(f, "}\n")
|
||||
}
|
||||
1491
util/unicode_case_folding.go
Normal file
1491
util/unicode_case_folding.go
Normal file
File diff suppressed because it is too large
Load diff
54
util/util.go
54
util/util.go
|
|
@ -8,7 +8,6 @@ import (
|
|||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
|
|
@ -387,6 +386,52 @@ func TrimRightSpace(source []byte) []byte {
|
|||
return TrimRight(source, spaces)
|
||||
}
|
||||
|
||||
// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
|
||||
func DoFullUnicodeCaseFolding(v []byte) []byte {
|
||||
var rbuf []byte
|
||||
cob := NewCopyOnWriteBuffer(v)
|
||||
n := 0
|
||||
for i := 0; i < len(v); i++ {
|
||||
c := v[i]
|
||||
if c < 0xb5 {
|
||||
if c >= 0x41 && c <= 0x5a {
|
||||
// A-Z to a-z
|
||||
cob.Write(v[n:i])
|
||||
cob.WriteByte(c + 32)
|
||||
n = i + 1
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if !utf8.RuneStart(c) {
|
||||
continue
|
||||
}
|
||||
r, length := utf8.DecodeRune(v[i:])
|
||||
if r == utf8.RuneError {
|
||||
continue
|
||||
}
|
||||
folded, ok := unicodeCaseFoldings[r]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
cob.Write(v[n:i])
|
||||
if rbuf == nil {
|
||||
rbuf = make([]byte, 4)
|
||||
}
|
||||
for _, f := range folded {
|
||||
l := utf8.EncodeRune(rbuf, f)
|
||||
cob.Write(rbuf[:l])
|
||||
}
|
||||
i += length - 1
|
||||
n = i + 1
|
||||
}
|
||||
if cob.IsCopied() {
|
||||
cob.Write(v[n:])
|
||||
}
|
||||
return cob.Bytes()
|
||||
}
|
||||
|
||||
// ReplaceSpaces replaces sequence of spaces with the given repl.
|
||||
func ReplaceSpaces(source []byte, repl byte) []byte {
|
||||
var ret []byte
|
||||
|
|
@ -439,13 +484,14 @@ func ToValidRune(v rune) rune {
|
|||
return v
|
||||
}
|
||||
|
||||
// ToLinkReference convert given bytes into a valid link reference string.
|
||||
// ToLinkReference trims leading and trailing spaces and convert into lower
|
||||
// ToLinkReference converts given bytes into a valid link reference string.
|
||||
// ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower
|
||||
// case and replace spaces with a single space character.
|
||||
func ToLinkReference(v []byte) string {
|
||||
v = TrimLeftSpace(v)
|
||||
v = TrimRightSpace(v)
|
||||
return strings.ToLower(string(ReplaceSpaces(v, ' ')))
|
||||
v = DoFullUnicodeCaseFolding(v)
|
||||
return string(ReplaceSpaces(v, ' '))
|
||||
}
|
||||
|
||||
var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}
|
||||
|
|
|
|||
Loading…
Reference in a new issue