Skip to content

Commit a5018f1

Browse files
isidenticalpablogsal
authored andcommitted
[3.12] bpo-43950: handle wide unicode characters in tracebacks (GH-28150)
(cherry picked from commit 78e6d72) Co-authored-by: Batuhan Taskaya <[email protected]>
1 parent 7cce26b commit a5018f1

File tree

2 files changed

+98
-14
lines changed

2 files changed

+98
-14
lines changed

Lib/test/test_traceback.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -922,8 +922,63 @@ def f():
922922
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
923923
" callable()",
924924
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
925-
" print(1, www(",
926-
" ^^^^",
925+
f" print(1, www(",
926+
f" ^^^^^^^",
927+
]
928+
self.assertEqual(actual, expected)
929+
930+
def test_byte_offset_with_wide_characters_term_highlight(self):
931+
def f():
932+
说明说明 = 1
933+
şçöğıĤellö = 0 # not wide but still non-ascii
934+
return 说明说明 / şçöğıĤellö
935+
936+
actual = self.get_exception(f)
937+
expected = [
938+
f"Traceback (most recent call last):",
939+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
940+
f" callable()",
941+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 3}, in f",
942+
f" return 说明说明 / şçöğıĤellö",
943+
f" ~~~~~~~~~^~~~~~~~~~~~",
944+
]
945+
self.assertEqual(actual, expected)
946+
947+
def test_byte_offset_with_emojis_term_highlight(self):
948+
def f():
949+
return "✨🐍" + func_说明说明("📗🚛",
950+
"📗🚛") + "🐍"
951+
952+
actual = self.get_exception(f)
953+
expected = [
954+
f"Traceback (most recent call last):",
955+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
956+
f" callable()",
957+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
958+
f' return "✨🐍" + func_说明说明("📗🚛",',
959+
f" ^^^^^^^^^^^^^",
960+
]
961+
self.assertEqual(actual, expected)
962+
963+
def test_byte_offset_wide_chars_subscript(self):
964+
def f():
965+
my_dct = {
966+
"✨🚛✨": {
967+
"说明": {
968+
"🐍🐍🐍": None
969+
}
970+
}
971+
}
972+
return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]
973+
974+
actual = self.get_exception(f)
975+
expected = [
976+
f"Traceback (most recent call last):",
977+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
978+
f" callable()",
979+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 8}, in f",
980+
f' return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]',
981+
f" ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^",
927982
]
928983
self.assertEqual(actual, expected)
929984

Lib/traceback.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -470,39 +470,49 @@ def format_frame_summary(self, frame_summary):
470470
stripped_line = frame_summary.line.strip()
471471
row.append(' {}\n'.format(stripped_line))
472472

473-
orig_line_len = len(frame_summary._original_line)
473+
line = frame_summary._original_line
474+
orig_line_len = len(line)
474475
frame_line_len = len(frame_summary.line.lstrip())
475476
stripped_characters = orig_line_len - frame_line_len
476477
if (
477478
frame_summary.colno is not None
478479
and frame_summary.end_colno is not None
479480
):
480481
start_offset = _byte_offset_to_character_offset(
481-
frame_summary._original_line, frame_summary.colno) + 1
482+
line, frame_summary.colno)
482483
end_offset = _byte_offset_to_character_offset(
483-
frame_summary._original_line, frame_summary.end_colno) + 1
484+
line, frame_summary.end_colno)
485+
code_segment = line[start_offset:end_offset]
484486

485487
anchors = None
486488
if frame_summary.lineno == frame_summary.end_lineno:
487489
with suppress(Exception):
488-
anchors = _extract_caret_anchors_from_line_segment(
489-
frame_summary._original_line[start_offset - 1:end_offset - 1]
490-
)
490+
anchors = _extract_caret_anchors_from_line_segment(code_segment)
491491
else:
492-
end_offset = stripped_characters + len(stripped_line)
492+
# Don't count the newline since the anchors only need to
493+
# go up until the last character of the line.
494+
end_offset = len(line.rstrip())
493495

494496
# show indicators if primary char doesn't span the frame line
495497
if end_offset - start_offset < len(stripped_line) or (
496498
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
499+
# When showing this on a terminal, some of the non-ASCII characters
500+
# might be rendered as double-width characters, so we need to take
501+
# that into account when calculating the length of the line.
502+
dp_start_offset = _display_width(line, start_offset) + 1
503+
dp_end_offset = _display_width(line, end_offset) + 1
504+
497505
row.append(' ')
498-
row.append(' ' * (start_offset - stripped_characters))
506+
row.append(' ' * (dp_start_offset - stripped_characters))
499507

500508
if anchors:
501-
row.append(anchors.primary_char * (anchors.left_end_offset))
502-
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
503-
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
509+
dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset)
510+
dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset)
511+
row.append(anchors.primary_char * dp_left_end_offset)
512+
row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset))
513+
row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset))
504514
else:
505-
row.append('^' * (end_offset - start_offset))
515+
row.append('^' * (dp_end_offset - dp_start_offset))
506516

507517
row.append('\n')
508518

@@ -623,6 +633,25 @@ def _extract_caret_anchors_from_line_segment(segment):
623633

624634
return None
625635

636+
_WIDE_CHAR_SPECIFIERS = "WF"
637+
638+
def _display_width(line, offset):
639+
"""Calculate the extra amount of width space the given source
640+
code segment might take if it were to be displayed on a fixed
641+
width output device. Supports wide unicode characters and emojis."""
642+
643+
# Fast track for ASCII-only strings
644+
if line.isascii():
645+
return offset
646+
647+
import unicodedata
648+
649+
return sum(
650+
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
651+
for char in line[:offset]
652+
)
653+
654+
626655

627656
class _ExceptionPrintContext:
628657
def __init__(self):

0 commit comments

Comments
 (0)