Skip to content

Commit dd48f67

Browse files
isidenticalaisk
authored andcommitted
bpo-43950: handle wide unicode characters in tracebacks (python#28150)
1 parent 035681b commit dd48f67

File tree

2 files changed

+98
-14
lines changed

2 files changed

+98
-14
lines changed

Lib/test/test_traceback.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -924,8 +924,63 @@ def f():
924924
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
925925
" callable()",
926926
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
927-
" print(1, www(",
928-
" ^^^^",
927+
f" print(1, www(",
928+
f" ^^^^^^^",
929+
]
930+
self.assertEqual(actual, expected)
931+
932+
def test_byte_offset_with_wide_characters_term_highlight(self):
933+
def f():
934+
说明说明 = 1
935+
şçöğıĤellö = 0 # not wide but still non-ascii
936+
return 说明说明 / şçöğıĤellö
937+
938+
actual = self.get_exception(f)
939+
expected = [
940+
f"Traceback (most recent call last):",
941+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
942+
f" callable()",
943+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 3}, in f",
944+
f" return 说明说明 / şçöğıĤellö",
945+
f" ~~~~~~~~~^~~~~~~~~~~~",
946+
]
947+
self.assertEqual(actual, expected)
948+
949+
def test_byte_offset_with_emojis_term_highlight(self):
950+
def f():
951+
return "✨🐍" + func_说明说明("📗🚛",
952+
"📗🚛") + "🐍"
953+
954+
actual = self.get_exception(f)
955+
expected = [
956+
f"Traceback (most recent call last):",
957+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
958+
f" callable()",
959+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
960+
f' return "✨🐍" + func_说明说明("📗🚛",',
961+
f" ^^^^^^^^^^^^^",
962+
]
963+
self.assertEqual(actual, expected)
964+
965+
def test_byte_offset_wide_chars_subscript(self):
966+
def f():
967+
my_dct = {
968+
"✨🚛✨": {
969+
"说明": {
970+
"🐍🐍🐍": None
971+
}
972+
}
973+
}
974+
return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]
975+
976+
actual = self.get_exception(f)
977+
expected = [
978+
f"Traceback (most recent call last):",
979+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
980+
f" callable()",
981+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 8}, in f",
982+
f' return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]',
983+
f" ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^",
929984
]
930985
self.assertEqual(actual, expected)
931986

Lib/traceback.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -485,39 +485,49 @@ def format_frame_summary(self, frame_summary):
485485
stripped_line = frame_summary.line.strip()
486486
row.append(' {}\n'.format(stripped_line))
487487

488-
orig_line_len = len(frame_summary._original_line)
488+
line = frame_summary._original_line
489+
orig_line_len = len(line)
489490
frame_line_len = len(frame_summary.line.lstrip())
490491
stripped_characters = orig_line_len - frame_line_len
491492
if (
492493
frame_summary.colno is not None
493494
and frame_summary.end_colno is not None
494495
):
495496
start_offset = _byte_offset_to_character_offset(
496-
frame_summary._original_line, frame_summary.colno) + 1
497+
line, frame_summary.colno)
497498
end_offset = _byte_offset_to_character_offset(
498-
frame_summary._original_line, frame_summary.end_colno) + 1
499+
line, frame_summary.end_colno)
500+
code_segment = line[start_offset:end_offset]
499501

500502
anchors = None
501503
if frame_summary.lineno == frame_summary.end_lineno:
502504
with suppress(Exception):
503-
anchors = _extract_caret_anchors_from_line_segment(
504-
frame_summary._original_line[start_offset - 1:end_offset - 1]
505-
)
505+
anchors = _extract_caret_anchors_from_line_segment(code_segment)
506506
else:
507-
end_offset = stripped_characters + len(stripped_line)
507+
# Don't count the newline since the anchors only need to
508+
# go up until the last character of the line.
509+
end_offset = len(line.rstrip())
508510

509511
# show indicators if primary char doesn't span the frame line
510512
if end_offset - start_offset < len(stripped_line) or (
511513
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
514+
# When showing this on a terminal, some of the non-ASCII characters
515+
# might be rendered as double-width characters, so we need to take
516+
# that into account when calculating the length of the line.
517+
dp_start_offset = _display_width(line, start_offset) + 1
518+
dp_end_offset = _display_width(line, end_offset) + 1
519+
512520
row.append(' ')
513-
row.append(' ' * (start_offset - stripped_characters))
521+
row.append(' ' * (dp_start_offset - stripped_characters))
514522

515523
if anchors:
516-
row.append(anchors.primary_char * (anchors.left_end_offset))
517-
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
518-
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
524+
dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset)
525+
dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset)
526+
row.append(anchors.primary_char * dp_left_end_offset)
527+
row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset))
528+
row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset))
519529
else:
520-
row.append('^' * (end_offset - start_offset))
530+
row.append('^' * (dp_end_offset - dp_start_offset))
521531

522532
row.append('\n')
523533

@@ -638,6 +648,25 @@ def _extract_caret_anchors_from_line_segment(segment):
638648

639649
return None
640650

651+
_WIDE_CHAR_SPECIFIERS = "WF"
652+
653+
def _display_width(line, offset):
654+
"""Calculate the extra amount of width space the given source
655+
code segment might take if it were to be displayed on a fixed
656+
width output device. Supports wide unicode characters and emojis."""
657+
658+
# Fast track for ASCII-only strings
659+
if line.isascii():
660+
return offset
661+
662+
import unicodedata
663+
664+
return sum(
665+
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
666+
for char in line[:offset]
667+
)
668+
669+
641670

642671
class _ExceptionPrintContext:
643672
def __init__(self):

0 commit comments

Comments
 (0)