From 2f1b40d881de49689b3ba34f095c3df405572eb5 Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 13 Aug 2023 13:04:14 +0900 Subject: [PATCH] Support CJK Symbols and Punctuation This commit adds support of CJK Symbols and Punctuation to `func IsEastAsianWideRune` --- extension/cjk_test.go | 11 +++++++++++ util/util.go | 9 ++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/extension/cjk_test.go b/extension/cjk_test.go index fe12c00..e97bb72 100644 --- a/extension/cjk_test.go +++ b/extension/cjk_test.go @@ -197,4 +197,15 @@ func TestEastAsianLineBreaks(t *testing.T) { }, t, ) + no = 8 + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: no, + Description: "Soft line breaks between east asian wide characters or punctuations are ignored", + Markdown: "太郎は\\ **「こんにちわ」**\\ と、\r\n言った\r\nんです", + Expected: "

太郎は\\ 「こんにちわ」\\ と、言ったんです

", + }, + t, + ) } diff --git a/util/util.go b/util/util.go index 88d2538..22a0caf 100644 --- a/util/util.go +++ b/util/util.go @@ -836,11 +836,18 @@ func IsAlphaNumeric(c byte) bool { // IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false. func IsEastAsianWideRune(r rune) bool { + // https://en.wikipedia.org/wiki/CJK_Symbols_and_Punctuation + var CJKSymbolsAndPunctuation = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x3000, 0x303F, 1}, + }, + } return unicode.Is(unicode.Hiragana, r) || unicode.Is(unicode.Katakana, r) || unicode.Is(unicode.Han, r) || unicode.Is(unicode.Lm, r) || - unicode.Is(unicode.Hangul, r) + unicode.Is(unicode.Hangul, r) || + unicode.Is(CJKSymbolsAndPunctuation, r) } // A BufWriter is a subset of the bufio.Writer .