-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(term): ansi: implement ANSI aware truncation
This implements an ANSI and wide-characters aware truncation algorithm that uses the newly merged [ANSI parser state machine][statemachine] and the fantastic library uniseg. Since this is using the ANSI state machine, it's compatible with `CSI m` (SGR) style sequence, `OSC 8` (hyperlinks), and basically any other escape sequence supported in the state machine (DCS, ESC, SOS, PM, APC). Related: muesli/reflow#71 [statemachine]: https://github.com/charmbracelet/x/blob/main/exp/term/ansi/parser/transition_table.go
- Loading branch information
1 parent
8cc69f8
commit 471d31b
Showing
2 changed files
with
173 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
package ansi | ||
|
||
import ( | ||
"bytes" | ||
|
||
. "github.com/charmbracelet/x/exp/term/ansi/parser" | ||
"github.com/rivo/uniseg" | ||
) | ||
|
||
// Truncate truncates a string to a given length, adding a tail to the | ||
// end if the string is longer than the given length. | ||
// This function is aware of ANSI escape codes and will not break them, and | ||
// accounts for wide-characters (such as East Asians and emojis). | ||
func Truncate(s string, length int, tail string) string { | ||
tw := StringWidth(tail) | ||
length -= tw | ||
if length < 0 { | ||
return "" | ||
} | ||
|
||
var cluster []byte | ||
var buf bytes.Buffer | ||
curWidth := 0 | ||
ignoring := false | ||
gstate := -1 | ||
pstate := GroundState // initial state | ||
b := []byte(s) | ||
i := 0 | ||
|
||
// Here we iterate over the bytes of the string and collect printable | ||
// characters and runes. We also keep track of the width of the string | ||
// in cells. | ||
// Once we reach the given length, we start ignoring characters and only | ||
// collect ANSI escape codes until we reach the end of the next escape | ||
// code, or end of string. | ||
for i < len(b) { | ||
state, action := Table.Transition(pstate, b[i]) | ||
// log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], s[i]) | ||
|
||
switch action { | ||
case CollectAction: | ||
// This action happens when we transition to the Utf8State. | ||
if w := utf8ByteLen(b[i]); w > 1 { | ||
var width int | ||
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate) | ||
|
||
// log.Printf("cluster: %q, width: %d, curWidth: %d", string(cluster), width, curWidth) | ||
|
||
// increment the index by the length of the cluster | ||
i += len(cluster) | ||
|
||
// Are we ignoring? Skip to the next byte | ||
if ignoring { | ||
continue | ||
} | ||
|
||
// Is this gonna be too wide? | ||
// If so write the tail and stop collecting. | ||
if curWidth+width >= length && !ignoring { | ||
ignoring = true | ||
buf.WriteString(tail) | ||
} | ||
|
||
if curWidth+width > length { | ||
continue | ||
} | ||
|
||
curWidth += width | ||
for _, r := range cluster { | ||
buf.WriteByte(r) | ||
} | ||
|
||
// Done collecting, now we're back in the ground state. | ||
pstate = GroundState | ||
continue | ||
} else { | ||
// Collecting sequence intermediate bytes | ||
buf.WriteByte(b[i]) | ||
} | ||
case PrintAction: | ||
// Is this gonna be too wide? | ||
// If so write the tail and stop collecting. | ||
if curWidth >= length && !ignoring { | ||
ignoring = true | ||
buf.WriteString(tail) | ||
} | ||
|
||
// Skip to the next byte if we're ignoring | ||
if ignoring { | ||
i++ | ||
continue | ||
} | ||
|
||
// collects printable ASCII | ||
curWidth++ | ||
fallthrough | ||
default: | ||
buf.WriteByte(b[i]) | ||
i++ | ||
} | ||
|
||
// Transition to the next state. | ||
pstate = state | ||
|
||
// log.Printf("buf: %q, curWidth: %d, ignoring: %v", buf.String(), curWidth, ignoring) | ||
|
||
// Once we reach the given length, we start ignoring runes and write | ||
// the tail to the buffer. | ||
if curWidth > length && !ignoring { | ||
ignoring = true | ||
buf.WriteString(tail) | ||
} | ||
} | ||
|
||
return buf.String() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package ansi | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
var tcases = []struct { | ||
name string | ||
input string | ||
tail string | ||
width int | ||
expect string | ||
}{ | ||
{"empty", "", "", 0, ""}, | ||
{"simple", "foobar", "", 3, "foo"}, | ||
{"passthrough", "foobar", "", 10, "foobar"}, | ||
{"ascii", "hello", "", 3, "hel"}, | ||
{"emoji", "👋", "", 2, "👋"}, | ||
{"wideemoji", "🫧", "", 2, "🫧"}, | ||
{"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", "", 8, "\x1b[31mhello 👋\x1b[0m"}, | ||
{"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", "", 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"}, | ||
{"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", "", 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"}, | ||
{"style_tail", "\x1B[38;5;219mHiya!", "…", 3, "\x1B[38;5;219mHi…"}, | ||
{"double_style_tail", "\x1B[38;5;219mHiya!\x1B[38;5;219mHello", "…", 7, "\x1B[38;5;219mHiya!\x1B[38;5;219mH…"}, | ||
{"noop", "\x1B[7m--", "", 2, "\x1B[7m--"}, | ||
{"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", "", 3, "\x1B[38;2;249;38;114m你\x1B[0m"}, | ||
{"double_width_rune", "你", "", 1, ""}, | ||
{"double_width_runes", "你好", "", 2, "你"}, | ||
{"spaces_only", " ", "…", 2, " …"}, | ||
{"longer_tail", "foo", "...", 2, ""}, | ||
{"same_tail_width", "foo", "...", 3, "..."}, | ||
{"same_tail_width_control", "\x1b[31mfoo\x1b[0m", "...", 3, "\x1b[31m...\x1b[0m"}, | ||
{"same_width", "foo", "", 3, "foo"}, | ||
{"truncate_with_tail", "foobar", ".", 4, "foo."}, | ||
{"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", "", 8, "I really\x1B[38;2;249;38;114m\x1B[0m"}, | ||
{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", "…", 4, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo…"}, | ||
} | ||
|
||
func TestTruncate(t *testing.T) { | ||
for i, c := range tcases { | ||
t.Run(c.name, func(t *testing.T) { | ||
if result := Truncate(c.input, c.width, c.tail); result != c.expect { | ||
t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func BenchmarkTruncateString(b *testing.B) { | ||
b.RunParallel(func(pb *testing.PB) { | ||
b.ReportAllocs() | ||
b.ResetTimer() | ||
for pb.Next() { | ||
Truncate("foo", 2, "") | ||
} | ||
}) | ||
} |