Merge pull request #409 from henry0312/support_cjk_symbols_and_punctuation

Add support for CJK Symbols and Punctuation
This commit is contained in:
Yusuke Inuzuka 2023-08-13 22:26:44 +09:00 committed by GitHub
commit ac56543632
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 1 deletions

View file

@ -197,4 +197,15 @@ func TestEastAsianLineBreaks(t *testing.T) {
}, },
t, t,
) )
no = 8
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between east asian wide characters or punctuations are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と、\r\n言った\r\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と、言ったんです</p>",
},
t,
)
} }

View file

@ -836,11 +836,18 @@ func IsAlphaNumeric(c byte) bool {
// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false. // IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
func IsEastAsianWideRune(r rune) bool { func IsEastAsianWideRune(r rune) bool {
// https://en.wikipedia.org/wiki/CJK_Symbols_and_Punctuation
var CJKSymbolsAndPunctuation = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3000, 0x303F, 1},
},
}
return unicode.Is(unicode.Hiragana, r) || return unicode.Is(unicode.Hiragana, r) ||
unicode.Is(unicode.Katakana, r) || unicode.Is(unicode.Katakana, r) ||
unicode.Is(unicode.Han, r) || unicode.Is(unicode.Han, r) ||
unicode.Is(unicode.Lm, r) || unicode.Is(unicode.Lm, r) ||
unicode.Is(unicode.Hangul, r) unicode.Is(unicode.Hangul, r) ||
unicode.Is(CJKSymbolsAndPunctuation, r)
} }
// A BufWriter is a subset of the bufio.Writer . // A BufWriter is a subset of the bufio.Writer .