-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(term): ansi: implement wrap and wordwrap (#51)
* feat(term): ansi: implement wrap and wordwrap This is based on @muesli's awesome [reflow](https://github.com/muesli/reflow) library. It uses the ANSI parser state machine to find escape codes and runes. Since it uses the ANSI parser state machine, it supports OSC, DCS, and other sequences. * fix(term): ansi: mike's feedback
- Loading branch information
1 parent
2f4b840
commit 7faadd0
Showing
2 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
package ansi | ||
|
||
import ( | ||
"bytes" | ||
"unicode" | ||
"unicode/utf8" | ||
|
||
"github.com/charmbracelet/x/exp/term/ansi/parser" | ||
"github.com/rivo/uniseg" | ||
) | ||
|
||
// Wrap wraps a string or a block of text to a given line length, breaking word | ||
// boundaries. This will preserve ANSI escape codes and will account for | ||
// wide-characters in the string. | ||
// When preserveSpace is true, spaces at the beginning of a line will be | ||
// preserved. | ||
func Wrap(s string, limit int, preserveSpace bool) string { | ||
if limit < 1 { | ||
return s | ||
} | ||
|
||
var ( | ||
cluster []byte | ||
buf bytes.Buffer | ||
curWidth int | ||
forceNewline bool | ||
gstate = -1 | ||
pstate = parser.GroundState // initial state | ||
b = []byte(s) | ||
) | ||
|
||
addNewline := func() { | ||
buf.WriteByte('\n') | ||
curWidth = 0 | ||
} | ||
|
||
i := 0 | ||
for i < len(b) { | ||
state, action := parser.Table.Transition(pstate, b[i]) | ||
|
||
switch action { | ||
case parser.CollectAction: | ||
if w := utf8ByteLen(b[i]); w <= 1 { | ||
// Collect sequence intermediate bytes | ||
buf.WriteByte(b[i]) | ||
break | ||
} | ||
|
||
var width int | ||
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate) | ||
i += len(cluster) | ||
|
||
if curWidth+width > limit { | ||
addNewline() | ||
} | ||
if !preserveSpace && curWidth == 0 && len(cluster) <= 4 { | ||
// Skip spaces at the beginning of a line | ||
if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) { | ||
pstate = parser.GroundState | ||
continue | ||
} | ||
} | ||
|
||
buf.Write(cluster) | ||
curWidth += width | ||
gstate = -1 // reset grapheme state otherwise, width calculation might be off | ||
pstate = parser.GroundState | ||
continue | ||
case parser.PrintAction, parser.ExecuteAction: | ||
if b[i] == '\n' { | ||
addNewline() | ||
forceNewline = false | ||
break | ||
} | ||
|
||
if curWidth+1 > limit { | ||
addNewline() | ||
forceNewline = true | ||
} | ||
|
||
// Skip spaces at the beginning of a line | ||
if curWidth == 0 { | ||
if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) { | ||
break | ||
} | ||
forceNewline = false | ||
} | ||
|
||
buf.WriteByte(b[i]) | ||
curWidth++ | ||
default: | ||
buf.WriteByte(b[i]) | ||
} | ||
|
||
// We manage the UTF8 state separately manually above. | ||
if pstate != parser.Utf8State { | ||
pstate = state | ||
} | ||
i++ | ||
} | ||
|
||
return buf.String() | ||
} | ||
|
||
// Wordwrap wraps a string or a block of text to a given line length, not | ||
// breaking word boundaries. This will preserve ANSI escape codes and will | ||
// account for wide-characters in the string. | ||
// The breakpoints string is a list of characters that are considered | ||
// breakpoints for word wrapping. A hyphen (-) is always considered a | ||
// breakpoint. | ||
func Wordwrap(s string, limit int, breakpoints string) string { | ||
if limit < 1 { | ||
return s | ||
} | ||
|
||
// Add a hyphen to the breakpoints | ||
breakpoints += "-" | ||
|
||
var ( | ||
cluster []byte | ||
buf bytes.Buffer | ||
word bytes.Buffer | ||
space bytes.Buffer | ||
curWidth int | ||
wordLen int | ||
gstate = -1 | ||
pstate = parser.GroundState // initial state | ||
b = []byte(s) | ||
) | ||
|
||
addSpace := func() { | ||
curWidth += space.Len() | ||
buf.Write(space.Bytes()) | ||
space.Reset() | ||
} | ||
|
||
addWord := func() { | ||
if word.Len() == 0 { | ||
return | ||
} | ||
addSpace() | ||
curWidth += wordLen | ||
buf.Write(word.Bytes()) | ||
word.Reset() | ||
wordLen = 0 | ||
} | ||
|
||
addNewline := func() { | ||
buf.WriteByte('\n') | ||
curWidth = 0 | ||
space.Reset() | ||
} | ||
|
||
i := 0 | ||
for i < len(b) { | ||
state, action := parser.Table.Transition(pstate, b[i]) | ||
|
||
switch action { | ||
case parser.CollectAction: | ||
if w := utf8ByteLen(b[i]); w <= 1 { | ||
// Collect sequence intermediate bytes | ||
word.WriteByte(b[i]) | ||
break | ||
} | ||
|
||
var width int | ||
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate) | ||
i += len(cluster) | ||
|
||
r, _ := utf8.DecodeRune(cluster) | ||
if r != utf8.RuneError && unicode.IsSpace(r) { | ||
addWord() | ||
space.WriteRune(r) | ||
} else if bytes.ContainsAny(cluster, breakpoints) { | ||
addSpace() | ||
addWord() | ||
buf.Write(cluster) | ||
} else { | ||
word.Write(cluster) | ||
wordLen += width | ||
if curWidth+space.Len()+wordLen > limit && | ||
wordLen < limit { | ||
addNewline() | ||
} | ||
} | ||
|
||
pstate = parser.GroundState | ||
continue | ||
case parser.PrintAction, parser.ExecuteAction: | ||
r := rune(b[i]) | ||
switch { | ||
case r == '\n': | ||
if wordLen == 0 { | ||
if curWidth+space.Len() > limit { | ||
curWidth = 0 | ||
} else { | ||
buf.Write(space.Bytes()) | ||
} | ||
space.Reset() | ||
} | ||
|
||
addWord() | ||
addNewline() | ||
case unicode.IsSpace(r): | ||
addWord() | ||
space.WriteByte(b[i]) | ||
case runeContainsAny(r, breakpoints): | ||
addSpace() | ||
addWord() | ||
buf.WriteByte(b[i]) | ||
default: | ||
word.WriteByte(b[i]) | ||
wordLen++ | ||
if curWidth+space.Len()+wordLen > limit && | ||
wordLen < limit { | ||
addNewline() | ||
} | ||
} | ||
|
||
default: | ||
word.WriteByte(b[i]) | ||
} | ||
|
||
// We manage the UTF8 state separately manually above. | ||
if pstate != parser.Utf8State { | ||
pstate = state | ||
} | ||
i++ | ||
} | ||
|
||
addWord() | ||
|
||
return buf.String() | ||
} | ||
|
||
func runeContainsAny(r rune, s string) bool { | ||
for _, c := range s { | ||
if c == r { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
package ansi_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/charmbracelet/x/exp/term/ansi" | ||
) | ||
|
||
var cases = []struct { | ||
name string | ||
input string | ||
limit int | ||
expected string | ||
preserveSpace bool | ||
}{ | ||
{"empty string", "", 0, "", true}, | ||
{"passthrough", "foobar\n ", 0, "foobar\n ", true}, | ||
{"pass", "foo", 4, "foo", true}, | ||
{"simple", "foobarfoo", 4, "foob\narfo\no", true}, | ||
{"lf", "f\no\nobar", 3, "f\no\noba\nr", true}, | ||
{"lf_space", "foo bar\n baz", 3, "foo\n ba\nr\n b\naz", true}, | ||
{"tab", "foo\tbar", 3, "foo\n\tba\nr", true}, | ||
{"unicode_space", "foo\xc2\xa0bar", 3, "foo\nbar", false}, | ||
{"style_nochange", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", true}, | ||
{"style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", true}, | ||
{"style_lf", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 8, "I really\n\x1b[38;2;249;38;114mlove\x1b[0m Go!", false}, | ||
{"style_emoji", "I really \x1B[38;2;249;38;114mlove u🫧\x1B[0m", 8, "I really\n\x1b[38;2;249;38;114mlove u🫧\x1b[0m", false}, | ||
{"hyperlink", "I really \x1B]8;;https://example.com/\x1B\\love\x1B]8;;\x1B\\ Go!", 10, "I really \x1b]8;;https://example.com/\x1b\\l\nove\x1b]8;;\x1b\\ Go!", false}, | ||
{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", 3, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo\nbar", false}, | ||
{"begin_with_space", " foo", 4, " foo", false}, | ||
{"style_dont_affect_wrap", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", false}, | ||
{"preserve_style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", false}, | ||
{"emoji", "foo🫧foobar", 4, "foo\n🫧fo\nobar", false}, | ||
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false}, | ||
} | ||
|
||
func TestWrap(t *testing.T) { | ||
for i, tt := range cases { | ||
t.Run(tt.name, func(t *testing.T) { | ||
if got := ansi.Wrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected { | ||
t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
var wwCases = []struct { | ||
name string | ||
input string | ||
limit int | ||
breakPoints string | ||
expected string | ||
}{ | ||
{"empty string", "", 0, "", ""}, | ||
{"passthrough", "foobar\n ", 0, "", "foobar\n "}, | ||
{"pass", "foo", 3, "", "foo"}, | ||
{"toolong", "foobarfoo", 4, "", "foobarfoo"}, | ||
{"white space", "foo bar foo", 4, "", "foo\nbar\nfoo"}, | ||
{"broken_at_spaces", "foo bars foobars", 4, "", "foo\nbars\nfoobars"}, | ||
{"hyphen", "foo-foobar", 4, "-", "foo-\nfoobar"}, | ||
{"emoji_breakpoint", "foo😃 foobar", 4, "😃", "foo😃\nfoobar"}, | ||
{"wide_emoji_breakpoint", "foo🫧 foobar", 4, "🫧", "foo🫧\nfoobar"}, | ||
{"space_breakpoint", "foo --bar", 9, "-", "foo --bar"}, | ||
{"simple", "foo bars foobars", 4, "", "foo\nbars\nfoobars"}, | ||
{"limit", "foo bar", 5, "", "foo\nbar"}, | ||
{"remove white spaces", "foo \nb ar ", 4, "", "foo\nb\nar"}, | ||
{"white space trail width", "foo\nb\t a\n bar", 4, "", "foo\nb\t a\n bar"}, | ||
{"explicit_line_break", "foo bar foo\n", 4, "", "foo\nbar\nfoo\n"}, | ||
{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", 4, "", "\nfoo\nbar\n\n\nfoo\n"}, | ||
{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo \nbar ", 6, "", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar"}, | ||
{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m"}, | ||
{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust\nanother\ntest\x1B[38;2;249;38;114m)\x1B[0m"}, | ||
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\"}, | ||
} | ||
|
||
func TestWordwrap(t *testing.T) { | ||
for i, tt := range wwCases { | ||
t.Run(tt.name, func(t *testing.T) { | ||
if got := ansi.Wordwrap(tt.input, tt.limit, tt.breakPoints); got != tt.expected { | ||
t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func TestWrapWordwrap(t *testing.T) { | ||
t.Skip("WIP") | ||
input := "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog." | ||
limit := 16 | ||
output := ansi.Wordwrap(input, limit, "") | ||
t.Logf("output: %q", output) | ||
output = ansi.Wrap(output, limit, false) | ||
if output != "the quick brown\nfoxxxxxxxxxxxxx\nxxxx jumped over\nthe lazy dog." { | ||
t.Errorf("expected %q, got %q", "the quick brown\nfoxxxxxxxxxxxxxx\nxx jumped over\nthe lazy dog.", output) | ||
} | ||
} | ||
|
||
const _ = ` | ||
the quick brown | ||
foxxxxxxxxxxxxxxxx | ||
jumped over the | ||
lazy dog. | ||
` | ||
|
||
const _ = ` | ||
the quick brown | ||
foxxxxxxxxxxxxxx | ||
xx jumped over t | ||
he lazy dog. | ||
` |