diff --git a/extension/cjk_test.go b/extension/cjk_test.go index fe12c00..e97bb72 100644 --- a/extension/cjk_test.go +++ b/extension/cjk_test.go @@ -197,4 +197,15 @@ func TestEastAsianLineBreaks(t *testing.T) { }, t, ) + no = 8 + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: no, + Description: "Soft line breaks between east asian wide characters or punctuations are ignored", + Markdown: "太郎は\\ **「こんにちわ」**\\ と、\r\n言った\r\nんです", + Expected: "
太郎は\\ 「こんにちわ」\\ と、言ったんです
", + }, + t, + ) } diff --git a/util/util.go b/util/util.go index 88d2538..22a0caf 100644 --- a/util/util.go +++ b/util/util.go @@ -836,11 +836,18 @@ func IsAlphaNumeric(c byte) bool { // IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false. func IsEastAsianWideRune(r rune) bool { + // https://en.wikipedia.org/wiki/CJK_Symbols_and_Punctuation + var CJKSymbolsAndPunctuation = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x3000, 0x303F, 1}, + }, + } return unicode.Is(unicode.Hiragana, r) || unicode.Is(unicode.Katakana, r) || unicode.Is(unicode.Han, r) || unicode.Is(unicode.Lm, r) || - unicode.Is(unicode.Hangul, r) + unicode.Is(unicode.Hangul, r) || + unicode.Is(CJKSymbolsAndPunctuation, r) } // A BufWriter is a subset of the bufio.Writer .