Add a WorksEvenWithOneSide option to EastAsianLineBreak

This commit is contained in:
OMOTO Tsukasa 2023-09-10 01:52:36 +09:00
parent 6ef9b10a3a
commit 6cbcfebb71
4 changed files with 114 additions and 16 deletions

View file

@ -380,6 +380,7 @@ This extension provides additional options for CJK users.
| Functional option | Type | Description | | Functional option | Type | Description |
| ----------------- | ---- | ----------- | | ----------------- | ---- | ----------- |
| `extension.WithEastAsianLineBreaks` | `-` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. | | `extension.WithEastAsianLineBreaks` | `-` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. |
| `extension.WithWorksEvenWithOneSide` | `-` | A functional option for `WithEastAsianLineBreaks` indicates that a softline break is ignored even if only one side of the break is east asian wide character. |
| `extension.WithEscapedSpace` | `-` | Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec). With this option, you can avoid this inconvenient behavior by putting 'not rendered' spaces around an emphasis like `太郎は\ **「こんにちわ」**\ といった`. | | `extension.WithEscapedSpace` | `-` | Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec). With this option, you can avoid this inconvenient behavior by putting 'not rendered' spaces around an emphasis like `太郎は\ **「こんにちわ」**\ といった`. |

View file

@ -9,11 +9,28 @@ import (
// A CJKOption sets options for CJK support mostly for HTML based renderers. // A CJKOption sets options for CJK support mostly for HTML based renderers.
type CJKOption func(*cjk) type CJKOption func(*cjk)
// A EastAsianLineBreaksOption sets options for east asian line breaks.
type EastAsianLineBreaksOption func(*eastAsianLineBreaks)
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
// between east asian wide characters should be ignored. // between east asian wide characters should be ignored.
func WithEastAsianLineBreaks() CJKOption { func WithEastAsianLineBreaks(opts ...EastAsianLineBreaksOption) CJKOption {
return func(c *cjk) { return func(c *cjk) {
c.EastAsianLineBreaks = true e := &eastAsianLineBreaks{
Enabled: true,
}
for _, opt := range opts {
opt(e)
}
c.EastAsianLineBreaks = e
}
}
// WithWorksEvenWithOneSide is a functional option that indicates that a softline break
// is ignored even if only one side of the break is east asian wide character.
func WithWorksEvenWithOneSide() EastAsianLineBreaksOption {
return func(e *eastAsianLineBreaks) {
e.WorksEvenWithOneSide = true
} }
} }
@ -25,10 +42,15 @@ func WithEscapedSpace() CJKOption {
} }
type cjk struct { type cjk struct {
EastAsianLineBreaks bool EastAsianLineBreaks *eastAsianLineBreaks
EscapedSpace bool EscapedSpace bool
} }
type eastAsianLineBreaks struct {
Enabled bool
WorksEvenWithOneSide bool
}
// CJK is a goldmark extension that provides functionalities for CJK languages. // CJK is a goldmark extension that provides functionalities for CJK languages.
var CJK = NewCJK(WithEastAsianLineBreaks(), WithEscapedSpace()) var CJK = NewCJK(WithEastAsianLineBreaks(), WithEscapedSpace())
@ -42,8 +64,15 @@ func NewCJK(opts ...CJKOption) goldmark.Extender {
} }
func (e *cjk) Extend(m goldmark.Markdown) { func (e *cjk) Extend(m goldmark.Markdown) {
if e.EastAsianLineBreaks { if e.EastAsianLineBreaks != nil {
m.Renderer().AddOptions(html.WithEastAsianLineBreaks()) if e.EastAsianLineBreaks.Enabled {
opts := []html.EastAsianLineBreaksOption{}
if e.EastAsianLineBreaks.WorksEvenWithOneSide {
opts = append(opts, html.WithWorksEvenWithOneSide())
}
m.Renderer().AddOptions(html.WithEastAsianLineBreaks(opts...))
}
} }
if e.EscapedSpace { if e.EscapedSpace {
m.Renderer().AddOptions(html.WithWriter(html.NewWriter(html.WithEscapedSpace()))) m.Renderer().AddOptions(html.WithWriter(html.NewWriter(html.WithEscapedSpace())))

View file

@ -208,4 +208,37 @@ func TestEastAsianLineBreaks(t *testing.T) {
}, },
t, t,
) )
// WithWorksEvenWithOneSide option
markdown = goldmark.New(goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(),
),
goldmark.WithExtensions(
NewCJK(WithEastAsianLineBreaks(WithWorksEvenWithOneSide())),
),
)
no = 9
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between a western character and an east asian wide character are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったaんです</p>",
},
t,
)
no = 10
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between an east asian wide character and a western character are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と言ったbんです</p>",
},
t,
)
} }

View file

@ -16,7 +16,7 @@ import (
type Config struct { type Config struct {
Writer Writer Writer Writer
HardWraps bool HardWraps bool
EastAsianLineBreaks bool EastAsianLineBreaks eastAsianLineBreaks
XHTML bool XHTML bool
Unsafe bool Unsafe bool
} }
@ -26,7 +26,7 @@ func NewConfig() Config {
return Config{ return Config{
Writer: DefaultWriter, Writer: DefaultWriter,
HardWraps: false, HardWraps: false,
EastAsianLineBreaks: false, EastAsianLineBreaks: eastAsianLineBreaks{},
XHTML: false, XHTML: false,
Unsafe: false, Unsafe: false,
} }
@ -38,7 +38,7 @@ func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
case optHardWraps: case optHardWraps:
c.HardWraps = value.(bool) c.HardWraps = value.(bool)
case optEastAsianLineBreaks: case optEastAsianLineBreaks:
c.EastAsianLineBreaks = value.(bool) c.EastAsianLineBreaks = value.(eastAsianLineBreaks)
case optXHTML: case optXHTML:
c.XHTML = value.(bool) c.XHTML = value.(bool)
case optUnsafe: case optUnsafe:
@ -103,24 +103,51 @@ func WithHardWraps() interface {
// EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks. // EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks" const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
type withEastAsianLineBreaks struct { type eastAsianLineBreaks struct {
Enabled bool
WorksEvenWithOneSide bool
} }
type withEastAsianLineBreaks struct {
worksEvenWithOneSide bool
}
type EastAsianLineBreaksOption func(*withEastAsianLineBreaks)
func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) { func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
c.Options[optEastAsianLineBreaks] = true c.Options[optEastAsianLineBreaks] = eastAsianLineBreaks{
Enabled: true,
WorksEvenWithOneSide: o.worksEvenWithOneSide,
}
} }
func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) { func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
c.EastAsianLineBreaks = true c.EastAsianLineBreaks = eastAsianLineBreaks{
Enabled: true,
WorksEvenWithOneSide: o.worksEvenWithOneSide,
}
} }
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
// between east asian wide characters should be ignored. // between east asian wide characters should be ignored.
func WithEastAsianLineBreaks() interface { func WithEastAsianLineBreaks(opts ...EastAsianLineBreaksOption) interface {
renderer.Option renderer.Option
Option Option
} { } {
return &withEastAsianLineBreaks{} w := &withEastAsianLineBreaks{}
for _, opt := range opts {
opt(w)
}
return w
}
// WithWorksEvenWithOneSide is a functional option that indicates that a softline break
// is ignored even if only one side of the break is east asian wide character.
func WithWorksEvenWithOneSide() EastAsianLineBreaksOption {
return func(o *withEastAsianLineBreaks) {
o.worksEvenWithOneSide = true
}
} }
// XHTML is an option name used in WithXHTML. // XHTML is an option name used in WithXHTML.
@ -663,14 +690,22 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en
_, _ = w.WriteString("<br>\n") _, _ = w.WriteString("<br>\n")
} }
} else if n.SoftLineBreak() { } else if n.SoftLineBreak() {
if r.EastAsianLineBreaks && len(value) != 0 { if r.EastAsianLineBreaks.Enabled && len(value) != 0 {
sibling := node.NextSibling() sibling := node.NextSibling()
if sibling != nil && sibling.Kind() == ast.KindText { if sibling != nil && sibling.Kind() == ast.KindText {
if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 { if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 {
thisLastRune := util.ToRune(value, len(value)-1) thisLastRune := util.ToRune(value, len(value)-1)
siblingFirstRune, _ := utf8.DecodeRune(siblingText) siblingFirstRune, _ := utf8.DecodeRune(siblingText)
if !util.IsEastAsianWideRune(thisLastRune) && !util.IsEastAsianWideRune(siblingFirstRune) { if r.EastAsianLineBreaks.WorksEvenWithOneSide {
_ = w.WriteByte('\n') if !(util.IsEastAsianWideRune(thisLastRune) ||
util.IsEastAsianWideRune(siblingFirstRune)) {
_ = w.WriteByte('\n')
}
} else {
if !(util.IsEastAsianWideRune(thisLastRune) &&
util.IsEastAsianWideRune(siblingFirstRune)) {
_ = w.WriteByte('\n')
}
} }
} }
} }