Skip to content

Commit

Permalink
feat(term): ansi: implement wrap and wordwrap (#51)
Browse files Browse the repository at this point in the history
* feat(term): ansi: implement wrap and wordwrap

This is based on @muesli's awesome
[reflow](https://github.com/muesli/reflow) library. It uses the ANSI
parser state machine to find escape codes and runes. Since it uses the
ANSI parser state machine, it supports OSC, DCS, and other sequences.

* fix(term): ansi: mike's feedback
  • Loading branch information
aymanbagabas authored Mar 21, 2024
1 parent 2f4b840 commit 7faadd0
Show file tree
Hide file tree
Showing 2 changed files with 353 additions and 0 deletions.
243 changes: 243 additions & 0 deletions exp/term/ansi/wrap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
package ansi

import (
"bytes"
"unicode"
"unicode/utf8"

"github.com/charmbracelet/x/exp/term/ansi/parser"
"github.com/rivo/uniseg"
)

// Wrap wraps a string or a block of text to a given line length, breaking word
// boundaries. This will preserve ANSI escape codes and will account for
// wide-characters in the string.
// When preserveSpace is true, spaces at the beginning of a line will be
// preserved.
func Wrap(s string, limit int, preserveSpace bool) string {
if limit < 1 {
return s
}

var (
cluster []byte
buf bytes.Buffer
curWidth int
forceNewline bool
gstate = -1
pstate = parser.GroundState // initial state
b = []byte(s)
)

addNewline := func() {
buf.WriteByte('\n')
curWidth = 0
}

i := 0
for i < len(b) {
state, action := parser.Table.Transition(pstate, b[i])

switch action {
case parser.CollectAction:
if w := utf8ByteLen(b[i]); w <= 1 {
// Collect sequence intermediate bytes
buf.WriteByte(b[i])
break
}

var width int
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
i += len(cluster)

if curWidth+width > limit {
addNewline()
}
if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
// Skip spaces at the beginning of a line
if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
pstate = parser.GroundState
continue
}
}

buf.Write(cluster)
curWidth += width
gstate = -1 // reset grapheme state otherwise, width calculation might be off
pstate = parser.GroundState
continue
case parser.PrintAction, parser.ExecuteAction:
if b[i] == '\n' {
addNewline()
forceNewline = false
break
}

if curWidth+1 > limit {
addNewline()
forceNewline = true
}

// Skip spaces at the beginning of a line
if curWidth == 0 {
if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
break
}
forceNewline = false
}

buf.WriteByte(b[i])
curWidth++
default:
buf.WriteByte(b[i])
}

// We manage the UTF8 state separately manually above.
if pstate != parser.Utf8State {
pstate = state
}
i++
}

return buf.String()
}

// Wordwrap wraps a string or a block of text to a given line length, not
// breaking word boundaries. This will preserve ANSI escape codes and will
// account for wide-characters in the string.
// The breakpoints string is a list of characters that are considered
// breakpoints for word wrapping. A hyphen (-) is always considered a
// breakpoint.
func Wordwrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}

// Add a hyphen to the breakpoints
breakpoints += "-"

var (
cluster []byte
buf bytes.Buffer
word bytes.Buffer
space bytes.Buffer
curWidth int
wordLen int
gstate = -1
pstate = parser.GroundState // initial state
b = []byte(s)
)

addSpace := func() {
curWidth += space.Len()
buf.Write(space.Bytes())
space.Reset()
}

addWord := func() {
if word.Len() == 0 {
return
}
addSpace()
curWidth += wordLen
buf.Write(word.Bytes())
word.Reset()
wordLen = 0
}

addNewline := func() {
buf.WriteByte('\n')
curWidth = 0
space.Reset()
}

i := 0
for i < len(b) {
state, action := parser.Table.Transition(pstate, b[i])

switch action {
case parser.CollectAction:
if w := utf8ByteLen(b[i]); w <= 1 {
// Collect sequence intermediate bytes
word.WriteByte(b[i])
break
}

var width int
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
i += len(cluster)

r, _ := utf8.DecodeRune(cluster)
if r != utf8.RuneError && unicode.IsSpace(r) {
addWord()
space.WriteRune(r)
} else if bytes.ContainsAny(cluster, breakpoints) {
addSpace()
addWord()
buf.Write(cluster)
} else {
word.Write(cluster)
wordLen += width
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {
addNewline()
}
}

pstate = parser.GroundState
continue
case parser.PrintAction, parser.ExecuteAction:
r := rune(b[i])
switch {
case r == '\n':
if wordLen == 0 {
if curWidth+space.Len() > limit {
curWidth = 0
} else {
buf.Write(space.Bytes())
}
space.Reset()
}

addWord()
addNewline()
case unicode.IsSpace(r):
addWord()
space.WriteByte(b[i])
case runeContainsAny(r, breakpoints):
addSpace()
addWord()
buf.WriteByte(b[i])
default:
word.WriteByte(b[i])
wordLen++
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {
addNewline()
}
}

default:
word.WriteByte(b[i])
}

// We manage the UTF8 state separately manually above.
if pstate != parser.Utf8State {
pstate = state
}
i++
}

addWord()

return buf.String()
}

func runeContainsAny(r rune, s string) bool {
for _, c := range s {
if c == r {
return true
}
}
return false
}
110 changes: 110 additions & 0 deletions exp/term/ansi/wrap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package ansi_test

import (
"testing"

"github.com/charmbracelet/x/exp/term/ansi"
)

var cases = []struct {
name string
input string
limit int
expected string
preserveSpace bool
}{
{"empty string", "", 0, "", true},
{"passthrough", "foobar\n ", 0, "foobar\n ", true},
{"pass", "foo", 4, "foo", true},
{"simple", "foobarfoo", 4, "foob\narfo\no", true},
{"lf", "f\no\nobar", 3, "f\no\noba\nr", true},
{"lf_space", "foo bar\n baz", 3, "foo\n ba\nr\n b\naz", true},
{"tab", "foo\tbar", 3, "foo\n\tba\nr", true},
{"unicode_space", "foo\xc2\xa0bar", 3, "foo\nbar", false},
{"style_nochange", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", true},
{"style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", true},
{"style_lf", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 8, "I really\n\x1b[38;2;249;38;114mlove\x1b[0m Go!", false},
{"style_emoji", "I really \x1B[38;2;249;38;114mlove u🫧\x1B[0m", 8, "I really\n\x1b[38;2;249;38;114mlove u🫧\x1b[0m", false},
{"hyperlink", "I really \x1B]8;;https://example.com/\x1B\\love\x1B]8;;\x1B\\ Go!", 10, "I really \x1b]8;;https://example.com/\x1b\\l\nove\x1b]8;;\x1b\\ Go!", false},
{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", 3, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo\nbar", false},
{"begin_with_space", " foo", 4, " foo", false},
{"style_dont_affect_wrap", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", false},
{"preserve_style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", false},
{"emoji", "foo🫧foobar", 4, "foo\n🫧fo\nobar", false},
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false},
}

func TestWrap(t *testing.T) {
for i, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
if got := ansi.Wrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected {
t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
}
})
}
}

var wwCases = []struct {
name string
input string
limit int
breakPoints string
expected string
}{
{"empty string", "", 0, "", ""},
{"passthrough", "foobar\n ", 0, "", "foobar\n "},
{"pass", "foo", 3, "", "foo"},
{"toolong", "foobarfoo", 4, "", "foobarfoo"},
{"white space", "foo bar foo", 4, "", "foo\nbar\nfoo"},
{"broken_at_spaces", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
{"hyphen", "foo-foobar", 4, "-", "foo-\nfoobar"},
{"emoji_breakpoint", "foo😃 foobar", 4, "😃", "foo😃\nfoobar"},
{"wide_emoji_breakpoint", "foo🫧 foobar", 4, "🫧", "foo🫧\nfoobar"},
{"space_breakpoint", "foo --bar", 9, "-", "foo --bar"},
{"simple", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
{"limit", "foo bar", 5, "", "foo\nbar"},
{"remove white spaces", "foo \nb ar ", 4, "", "foo\nb\nar"},
{"white space trail width", "foo\nb\t a\n bar", 4, "", "foo\nb\t a\n bar"},
{"explicit_line_break", "foo bar foo\n", 4, "", "foo\nbar\nfoo\n"},
{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", 4, "", "\nfoo\nbar\n\n\nfoo\n"},
{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo \nbar ", 6, "", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar"},
{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m"},
{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust\nanother\ntest\x1B[38;2;249;38;114m)\x1B[0m"},
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\"},
}

func TestWordwrap(t *testing.T) {
for i, tt := range wwCases {
t.Run(tt.name, func(t *testing.T) {
if got := ansi.Wordwrap(tt.input, tt.limit, tt.breakPoints); got != tt.expected {
t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
}
})
}
}

func TestWrapWordwrap(t *testing.T) {
t.Skip("WIP")
input := "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog."
limit := 16
output := ansi.Wordwrap(input, limit, "")
t.Logf("output: %q", output)
output = ansi.Wrap(output, limit, false)
if output != "the quick brown\nfoxxxxxxxxxxxxx\nxxxx jumped over\nthe lazy dog." {
t.Errorf("expected %q, got %q", "the quick brown\nfoxxxxxxxxxxxxxx\nxx jumped over\nthe lazy dog.", output)
}
}

const _ = `
the quick brown
foxxxxxxxxxxxxxxxx
jumped over the
lazy dog.
`

const _ = `
the quick brown
foxxxxxxxxxxxxxx
xx jumped over t
he lazy dog.
`

0 comments on commit 7faadd0

Please sign in to comment.