Skip to content

Commit 550abdb

Browse files
authored
Improve "ellipsis string" (#32989)
1 parent 9bfa9f4 commit 550abdb

File tree

2 files changed

+60
-8
lines changed

2 files changed

+60
-8
lines changed

modules/util/truncate.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package util
55

66
import (
77
"strings"
8+
"unicode"
89
"unicode/utf8"
910
)
1011

@@ -18,6 +19,30 @@ func IsLikelyEllipsisLeftPart(s string) bool {
1819
return strings.HasSuffix(s, utf8Ellipsis) || strings.HasSuffix(s, asciiEllipsis)
1920
}
2021

22+
func ellipsisGuessDisplayWidth(r rune) int {
23+
// To make the truncated string as long as possible,
24+
// CJK/emoji chars are considered as 2-ASCII width but not 3-4 bytes width.
25+
// Here we only make the best guess (better than counting them in bytes),
26+
// it's impossible to 100% correctly determine the width of a rune without a real font and render.
27+
//
28+
// ATTENTION: the guessed width can't be zero, more details in ellipsisDisplayString's comment
29+
if r <= 255 {
30+
return 1
31+
}
32+
33+
switch {
34+
case r == '\u3000': /* ideographic (CJK) characters, still use 2 */
35+
return 2
36+
case unicode.Is(unicode.M, r), /* (Mark) */
37+
unicode.Is(unicode.Cf, r), /* (Other, format) */
38+
unicode.Is(unicode.Cs, r), /* (Other, surrogate) */
39+
unicode.Is(unicode.Z /* (Space) */, r):
40+
return 1
41+
default:
42+
return 2
43+
}
44+
}
45+
2146
// EllipsisDisplayString returns a truncated short string for display purpose.
2247
// The length is the approximate number of ASCII-width in the string (CJK/emoji are 2-ASCII width)
2348
// It appends "…" or "..." at the end of truncated string.
@@ -56,10 +81,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc
5681
for i, r := range str {
5782
encounterInvalid = encounterInvalid || r == utf8.RuneError
5883
pos = i
59-
runeWidth := 1
60-
if r >= 128 {
61-
runeWidth = 2 // CJK/emoji chars are considered as 2-ASCII width
62-
}
84+
runeWidth := ellipsisGuessDisplayWidth(r)
6385
if used+runeWidth+3 > limit {
6486
break
6587
}
@@ -74,10 +96,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc
7496
if nextCnt >= 4 {
7597
break
7698
}
77-
nextWidth++
78-
if r >= 128 {
79-
nextWidth++ // CJK/emoji chars are considered as 2-ASCII width
80-
}
99+
nextWidth += ellipsisGuessDisplayWidth(r)
81100
nextCnt++
82101
}
83102
if nextCnt <= 3 && used+nextWidth <= limit {

modules/util/truncate_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,30 @@ import (
1111
"github.com/stretchr/testify/assert"
1212
)
1313

14+
func TestEllipsisGuessDisplayWidth(t *testing.T) {
15+
cases := []struct {
16+
r string
17+
want int
18+
}{
19+
{r: "a", want: 1},
20+
{r: "é", want: 1},
21+
{r: "测", want: 2},
22+
{r: "⚽", want: 2},
23+
{r: "☁️", want: 3}, // 2 runes, it has a mark
24+
{r: "\u200B", want: 1}, // ZWSP
25+
{r: "\u3000", want: 2}, // ideographic space
26+
}
27+
for _, c := range cases {
28+
t.Run(c.r, func(t *testing.T) {
29+
w := 0
30+
for _, r := range c.r {
31+
w += ellipsisGuessDisplayWidth(r)
32+
}
33+
assert.Equal(t, c.want, w, "hex=% x", []byte(c.r))
34+
})
35+
}
36+
}
37+
1438
func TestEllipsisString(t *testing.T) {
1539
cases := []struct {
1640
limit int
@@ -37,6 +61,15 @@ func TestEllipsisString(t *testing.T) {
3761
{limit: 7, input: "测试文本", left: "测试…", right: "…文本"},
3862
{limit: 8, input: "测试文本", left: "测试文本", right: ""},
3963
{limit: 9, input: "测试文本", left: "测试文本", right: ""},
64+
65+
{limit: 6, input: "测试abc", left: "测…", right: "…试abc"},
66+
{limit: 7, input: "测试abc", left: "测试abc", right: ""}, // exactly 7-width
67+
{limit: 8, input: "测试abc", left: "测试abc", right: ""},
68+
69+
{limit: 7, input: "测abc试啊", left: "测ab…", right: "…c试啊"},
70+
{limit: 8, input: "测abc试啊", left: "测abc…", right: "…试啊"},
71+
{limit: 9, input: "测abc试啊", left: "测abc试啊", right: ""}, // exactly 9-width
72+
{limit: 10, input: "测abc试啊", left: "测abc试啊", right: ""},
4073
}
4174
for _, c := range cases {
4275
t.Run(fmt.Sprintf("%s(%d)", c.input, c.limit), func(t *testing.T) {

0 commit comments

Comments
 (0)