Skip to content

Commit 7338cb9

Browse files
authored
Merge pull request #4006 from Textualize/fix-grapheme-stuck
fix for infinite loop in split_graphemes
2 parents 2770102 + 905b397 commit 7338cb9

File tree

3 files changed

+77
-12
lines changed

3 files changed

+77
-12
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## Unreleased
9+
10+
### Fixed
11+
12+
- Fixed infinite loop with `cells.split_graphemes`
13+
814
## [14.3.2] - 2026-02-01
915

1016
### Fixed

rich/cells.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -161,14 +161,19 @@ def _cell_len(text: str, unicode_version: str) -> int:
161161
def split_graphemes(
162162
text: str, unicode_version: str = "auto"
163163
) -> "tuple[list[CellSpan], int]":
164-
"""Divide text into spans that define a single grapheme.
164+
"""Divide text into spans that define a single grapheme, and additionally return the cell length of the whole string.
165+
166+
The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero.
167+
This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size.
165168
166169
Args:
167170
text: String to split.
168171
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
169172
170173
Returns:
171-
List of spans.
174+
A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples
175+
of three values consisting of (<START>, <END>, <CELL LENGTH>), where START and END are string indices,
176+
and CELL LENGTH is the cell length of the single grapheme.
172177
"""
173178

174179
cell_table = load_cell_table(unicode_version)
@@ -181,32 +186,48 @@ def split_graphemes(
181186
SPECIAL = {"\u200d", "\ufe0f"}
182187
while index < codepoint_count:
183188
if (character := text[index]) in SPECIAL:
189+
if not spans:
190+
# ZWJ or variation selector at the beginning of the string doesn't really make sense.
191+
# But handle it, we must.
192+
spans.append((index, index := index + 1, 0))
193+
continue
184194
if character == "\u200d":
185195
# zero width joiner
186-
index += 2
187-
if spans:
188-
start, _end, cell_length = spans[-1]
189-
spans[-1] = (start, index, cell_length)
190-
elif last_measured_character:
196+
# The condition handles the case where a ZWJ is at the end of the string, and has nothing to join
197+
index += 2 if index < (codepoint_count - 1) else 1
198+
start, _end, cell_length = spans[-1]
199+
spans[-1] = (start, index, cell_length)
200+
else:
191201
# variation selector 16
192202
index += 1
193-
if spans:
203+
if last_measured_character:
194204
start, _end, cell_length = spans[-1]
195205
if last_measured_character in cell_table.narrow_to_wide:
196206
last_measured_character = None
197207
cell_length += 1
198208
total_width += 1
199209
spans[-1] = (start, index, cell_length)
210+
else:
211+
# No previous character to change the size of.
212+
# Shouldn't occur in practice.
213+
# But handle it, we must.
214+
start, _end, cell_length = spans[-1]
215+
spans[-1] = (start, index, cell_length)
200216
continue
201217

202218
if character_width := get_character_cell_size(character, unicode_version):
203219
last_measured_character = character
204220
spans.append((index, index := index + 1, character_width))
205221
total_width += character_width
206-
elif spans:
207-
# zero width characters are associated with the previous character
208-
start, _end, cell_length = spans[-1]
209-
spans[-1] = (start, index := index + 1, cell_length)
222+
else:
223+
# Character has zero width
224+
if spans:
225+
# zero width characters are associated with the previous character
226+
start, _end, cell_length = spans[-1]
227+
spans[-1] = (start, index := index + 1, cell_length)
228+
else:
229+
# A zero width character with no prior spans
230+
spans.append((index, index := index + 1, 0))
210231

211232
return (spans, total_width)
212233

tests/test_cells.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,21 @@ def test_chop_cells_mixed_width():
134134
assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"]
135135

136136

137+
@pytest.mark.parametrize(
138+
"text,expected",
139+
[
140+
("", []),
141+
("\x1b", []),
142+
("\x1b\x1b", []),
143+
("\x1b\x1b\x1b", []),
144+
("\x1b\x1b\x1b\x1b", []),
145+
],
146+
)
147+
def test_chop_cells_zero_width(text: str, expected: list[str]) -> None:
148+
"""Test zer width characters being chopped."""
149+
assert chop_cells(text, 3) == expected
150+
151+
137152
def test_is_single_cell_widths() -> None:
138153
# Check _is_single_cell_widths reports correctly
139154
for character in string.printable:
@@ -172,12 +187,35 @@ def test_is_single_cell_widths() -> None:
172187
("♻", [(0, 1, 1)], 1),
173188
("♻️", [(0, 2, 2)], 2),
174189
("♻♻️", [(0, 1, 1), (1, 3, 2)], 3),
190+
("\x1b", [(0, 1, 0)], 0), # One escape should have zero width
191+
("\x1b\x1b", [(0, 2, 0)], 0), # Two escapes should have zero width
192+
(
193+
"\ufe0f",
194+
[(0, 1, 0)],
195+
0,
196+
), # Variation selector 16, without anything to change should have zero width
197+
(
198+
"\ufe0f\ufe0f",
199+
[(0, 2, 0)],
200+
0,
201+
), # 2 X variation selector 16, without anything to change should have zero width
202+
(
203+
"\u200d",
204+
[(0, 1, 0)],
205+
0,
206+
), # A zero width joiner within noting prior should have zero width
207+
(
208+
"\u200d\u200d",
209+
[(0, 2, 0)],
210+
0,
211+
), # Two ZWJs should have zero width
175212
],
176213
)
177214
def test_split_graphemes(
178215
text: str, expected_spans: list[CellSpan], expected_cell_length: int
179216
):
180217
spans, cell_length = split_graphemes(text)
218+
print(spans)
181219
assert cell_len(text) == expected_cell_length
182220
assert spans == expected_spans
183221
assert cell_length == expected_cell_length

0 commit comments

Comments
 (0)