This commit is contained in:
yuin 2020-02-18 21:18:57 +09:00
parent 4a200405d7
commit a727b5adb2
4 changed files with 1626 additions and 5 deletions

View file

@ -93,4 +93,15 @@ correctly</a></p>
[]() []()
//- - - - - - - - -// //- - - - - - - - -//
<p><a href=""></a></p> <p><a href=""></a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =// //= = = = = = = = = = = = = = = = = = = = = = = =//
9
//- - - - - - - - -//
[daß] is the old german spelling of [dass]
[daß]: www.das-dass.de
//- - - - - - - - -//
<p><a href="www.das-dass.de">daß</a> is the old german spelling of <a href="www.das-dass.de">dass</a></p>
//= = = = = = = = = = = = = = = = = = = = = = = =//

View file

@ -0,0 +1,73 @@
package main
import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"net/http"
"os"
"strconv"
"strings"
)
const outPath = "../util/unicode_case_folding.go"
type caseFolding struct {
Class byte
From rune
To []rune
}
func main() {
url := "http://www.unicode.org/Public/12.1.0/ucd/CaseFolding.txt"
resp, err := http.Get(url)
if err != nil {
fmt.Printf("Failed to get CaseFolding.txt: %v\n", err)
os.Exit(1)
}
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Printf("Failed to get CaseFolding.txt: %v\n", err)
os.Exit(1)
}
buf := bytes.NewBuffer(bs)
scanner := bufio.NewScanner(buf)
f, err := os.Create(outPath)
if err != nil {
fmt.Printf("Failed to open %s: %v\n", outPath, err)
os.Exit(1)
}
defer f.Close()
_, _ = f.WriteString("package util\n\n")
_, _ = f.WriteString("var unicodeCaseFoldings = map[rune][]rune {\n")
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "#") || len(strings.TrimSpace(line)) == 0 {
continue
}
line = strings.Split(line, "#")[0]
parts := strings.Split(line, ";")
for i, p := range parts {
parts[i] = strings.TrimSpace(p)
}
cf := caseFolding{}
v, _ := strconv.ParseInt(parts[0], 16, 32)
cf.From = rune(int32(v))
cf.Class = parts[1][0]
for _, v := range strings.Split(parts[2], " ") {
c, _ := strconv.ParseInt(v, 16, 32)
cf.To = append(cf.To, rune(int32(c)))
}
if cf.Class != 'C' && cf.Class != 'F' {
continue
}
fmt.Fprintf(f, " %#x : %#v,\n", cf.From, cf.To)
}
fmt.Fprintf(f, "}\n")
}

1491
util/unicode_case_folding.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,6 @@ import (
"regexp" "regexp"
"sort" "sort"
"strconv" "strconv"
"strings"
"unicode/utf8" "unicode/utf8"
) )
@ -387,6 +386,52 @@ func TrimRightSpace(source []byte) []byte {
return TrimRight(source, spaces) return TrimRight(source, spaces)
} }
// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
func DoFullUnicodeCaseFolding(v []byte) []byte {
var rbuf []byte
cob := NewCopyOnWriteBuffer(v)
n := 0
for i := 0; i < len(v); i++ {
c := v[i]
if c < 0xb5 {
if c >= 0x41 && c <= 0x5a {
// A-Z to a-z
cob.Write(v[n:i])
cob.WriteByte(c + 32)
n = i + 1
}
continue
}
if !utf8.RuneStart(c) {
continue
}
r, length := utf8.DecodeRune(v[i:])
if r == utf8.RuneError {
continue
}
folded, ok := unicodeCaseFoldings[r]
if !ok {
continue
}
cob.Write(v[n:i])
if rbuf == nil {
rbuf = make([]byte, 4)
}
for _, f := range folded {
l := utf8.EncodeRune(rbuf, f)
cob.Write(rbuf[:l])
}
i += length - 1
n = i + 1
}
if cob.IsCopied() {
cob.Write(v[n:])
}
return cob.Bytes()
}
// ReplaceSpaces replaces sequence of spaces with the given repl. // ReplaceSpaces replaces sequence of spaces with the given repl.
func ReplaceSpaces(source []byte, repl byte) []byte { func ReplaceSpaces(source []byte, repl byte) []byte {
var ret []byte var ret []byte
@ -439,13 +484,14 @@ func ToValidRune(v rune) rune {
return v return v
} }
// ToLinkReference convert given bytes into a valid link reference string. // ToLinkReference converts given bytes into a valid link reference string.
// ToLinkReference trims leading and trailing spaces and convert into lower // ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower
// case and replace spaces with a single space character. // case and replace spaces with a single space character.
func ToLinkReference(v []byte) string { func ToLinkReference(v []byte) string {
v = TrimLeftSpace(v) v = TrimLeftSpace(v)
v = TrimRightSpace(v) v = TrimRightSpace(v)
return strings.ToLower(string(ReplaceSpaces(v, ' '))) v = DoFullUnicodeCaseFolding(v)
return string(ReplaceSpaces(v, ' '))
} }
var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}