diff --git a/Lib/test/test_contextlib.py b/Lib/test/test_contextlib.py index dbc7dfcc24bf07..c9b47f733c1e06 100644 --- a/Lib/test/test_contextlib.py +++ b/Lib/test/test_contextlib.py @@ -802,11 +802,17 @@ def raise_exc(exc): self.assertIsInstance(exc, ValueError) ve_frames = traceback.extract_tb(exc.__traceback__) expected = \ - [('test_exit_exception_traceback', 'with self.exit_stack() as stack:')] + \ + [( + 'test_exit_exception_traceback', + 'with self.exit_stack() as stack:\n' + ' stack.callback(raise_exc, ValueError)\n' + ' 1/0' + )] + \ self.callback_error_internal_frames + \ [('_exit_wrapper', 'callback(*args, **kwds)'), ('raise_exc', 'raise exc')] + # breakpoint() self.assertEqual( [(f.name, f.line) for f in ve_frames], expected) diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 6e12e82a7a0084..838b2764a8cf4a 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -2918,6 +2918,9 @@ def test_unicode(): """ Traceback (most recent call last): File ... exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in raise Exception('clé') Exception: clé diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 106baf959a6898..737c9d1afb5bf8 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -2045,6 +2045,7 @@ def test_multiline_not_highlighted(self): """, [ ' 1 < 2 and', + ' 3 > 4', 'AssertionError', ], ), @@ -2052,7 +2053,7 @@ def test_multiline_not_highlighted(self): for source, expected in cases: with self.subTest(source): result = self.write_source(source) - self.assertEqual(result[-2:], expected) + self.assertEqual(result[-len(expected):], expected) class SyntaxErrorTests(unittest.TestCase): diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index aa8405bd25d120..416d2f33e350e0 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -427,6 +427,7 @@ def f(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+1}, in f\n' ' if True: raise ValueError("basic caret tests")\n' ' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' @@ -445,6 +446,7 @@ def f_with_unicode(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+1}, in f_with_unicode\n' ' if True: raise ValueError("Ĥellö Wörld")\n' ' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' @@ -462,6 +464,7 @@ def foo(a: THIS_DOES_NOT_EXIST ) -> int: 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+1}, in f_with_type\n' ' def foo(a: THIS_DOES_NOT_EXIST ) -> int:\n' ' ^^^^^^^^^^^^^^^^^^^\n' @@ -470,8 +473,6 @@ def foo(a: THIS_DOES_NOT_EXIST ) -> int: self.assertEqual(result_lines, expected_f.splitlines()) def test_caret_multiline_expression(self): - # Make sure no carets are printed for expressions spanning multiple - # lines. def f_with_multiline(): if True: raise ValueError( "error over multiple lines" @@ -482,9 +483,14 @@ def f_with_multiline(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+1}, in f_with_multiline\n' ' if True: raise ValueError(\n' - ' ^^^^^^^^^^^^^^^^^' + ' ^^^^^^^^^^^^^^^^^\n' + ' "error over multiple lines"\n' + ' ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' + ' )\n' + ' ^' ) result_lines = self.get_exception(f_with_multiline) self.assertEqual(result_lines, expected_f.splitlines()) @@ -513,20 +519,19 @@ def f_with_multiline(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_multiline\n' ' return compile(code, "?", "exec")\n' - ' ^^^^^^^^^^^^^^^^^^^^^^^^^^\n' + ' ~~~~~~~^^^^^^^^^^^^^^^^^^^\n' ' File "?", line 7\n' ' foo(a, z\n' ' ^' - ) + ) result_lines = self.get_exception(f_with_multiline) self.assertEqual(result_lines, expected_f.splitlines()) def test_caret_multiline_expression_bin_op(self): - # Make sure no carets are printed for expressions spanning multiple - # lines. def f_with_multiline(): return ( 2 + 1 / @@ -538,9 +543,12 @@ def f_with_multiline(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_multiline\n' ' 2 + 1 /\n' - ' ^^^' + ' ~~^\n' + ' 0\n' + ' ~' ) result_lines = self.get_exception(f_with_multiline) self.assertEqual(result_lines, expected_f.splitlines()) @@ -555,6 +563,7 @@ def f_with_binary_operator(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n' ' return 10 + divisor / 0 + 30\n' ' ~~~~~~~~^~~\n' @@ -572,6 +581,7 @@ def f_with_binary_operator(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n' ' return 10 + áóí / 0 + 30\n' ' ~~~~^~~\n' @@ -589,6 +599,7 @@ def f_with_binary_operator(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n' ' return 10 + divisor // 0 + 30\n' ' ~~~~~~~~^^~~\n' @@ -600,16 +611,102 @@ def test_caret_for_binary_operators_with_spaces_and_parenthesis(self): def f_with_binary_operator(): a = 1 b = "" - return ( a ) + b + return ( a ) +b + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n' + ' return ( a ) +b\n' + ' ~~~~~~~~~~^~\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_binary_operators_multiline(self): + def f_with_binary_operator(): + b = 1 + c = "" + a = b \ + +\ + c # test + return a lineno_f = f_with_binary_operator.__code__.co_firstlineno expected_error = ( 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n' - ' return ( a ) + b\n' - ' ~~~~~~~~~~^~~\n' + ' a = b \\\n' + ' ~~~~~~\n' + ' +\\\n' + ' ^~\n' + ' c # test\n' + ' ~\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_binary_operators_multiline_two_char(self): + def f_with_binary_operator(): + b = 1 + c = "" + a = ( + (b # test + + ) \ + # + + << (c # test + \ + ) # test + ) + return a + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+4}, in f_with_binary_operator\n' + ' (b # test +\n' + ' ~~~~~~~~~~~~\n' + ' ) \\\n' + ' ~~~~\n' + ' # +\n' + ' ~~~\n' + ' << (c # test\n' + ' ^^~~~~~~~~~~~\n' + ' \\\n' + ' ~\n' + ' ) # test\n' + ' ~\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_binary_operators_multiline_with_unicode(self): + def f_with_binary_operator(): + b = 1 + a = ("ááá" + + "áá") + b + return a + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n' + ' a = ("ááá" +\n' + ' ~~~~~~~~\n' + ' "áá") + b\n' + ' ~~~~~~^~~\n' ) result_lines = self.get_exception(f_with_binary_operator) self.assertEqual(result_lines, expected_error.splitlines()) @@ -624,6 +721,7 @@ def f_with_subscript(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_subscript\n' " return some_dict['x']['y']['z']\n" ' ~~~~~~~~~~~~~~~~~~~^^^^^\n' @@ -641,6 +739,7 @@ def f_with_subscript(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_subscript\n' " return some_dict['ó']['á']['í']['beta']\n" ' ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^\n' @@ -659,6 +758,7 @@ def f_with_binary_operator(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n' ' return b [ a ] + c\n' ' ~~~~~~^^^^^^^^^\n' @@ -666,6 +766,226 @@ def f_with_binary_operator(): result_lines = self.get_exception(f_with_binary_operator) self.assertEqual(result_lines, expected_error.splitlines()) + def test_caret_for_subscript_multiline(self): + def f_with_subscript(): + bbbbb = {} + ccc = 1 + ddd = 2 + b = bbbbb \ + [ ccc # test + + + ddd \ + + ] # test + return b + + lineno_f = f_with_subscript.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+4}, in f_with_subscript\n' + ' b = bbbbb \\\n' + ' ~~~~~~~\n' + ' [ ccc # test\n' + ' ^^^^^^^^^^^^^\n' + ' \n' + ' \n' + ' + ddd \\\n' + ' ^^^^^^^^\n' + ' \n' + ' \n' + ' ] # test\n' + ' ^\n' + ) + result_lines = self.get_exception(f_with_subscript) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_call(self): + def f_with_call(): + def f1(a): + def f2(b): + raise RuntimeError("fail") + return f2 + return f1("x")("y") + + lineno_f = f_with_call.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+5}, in f_with_call\n' + ' return f1("x")("y")\n' + ' ~~~~~~~^^^^^\n' + f' File "{__file__}", line {lineno_f+3}, in f2\n' + ' raise RuntimeError("fail")\n' + ) + result_lines = self.get_exception(f_with_call) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_call_unicode(self): + def f_with_call(): + def f1(a): + def f2(b): + raise RuntimeError("fail") + return f2 + return f1("ó")("á") + + lineno_f = f_with_call.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+5}, in f_with_call\n' + ' return f1("ó")("á")\n' + ' ~~~~~~~^^^^^\n' + f' File "{__file__}", line {lineno_f+3}, in f2\n' + ' raise RuntimeError("fail")\n' + ) + result_lines = self.get_exception(f_with_call) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_call_with_spaces_and_parenthesis(self): + def f_with_binary_operator(): + def f(a): + raise RuntimeError("fail") + return f ( "x" ) + 2 + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n' + ' return f ( "x" ) + 2\n' + ' ~~~~~~^^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+2}, in f\n' + ' raise RuntimeError("fail")\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_caret_for_call_multiline(self): + def f_with_call(): + class C: + def y(self, a): + def f(b): + raise RuntimeError("fail") + return f + def g(x): + return C() + a = (g(1).y)( + 2 + )(3)(4) + return a + + lineno_f = f_with_call.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+8}, in f_with_call\n' + ' a = (g(1).y)(\n' + ' ~~~~~~~~~\n' + ' 2\n' + ' ~\n' + ' )(3)(4)\n' + ' ~^^^\n' + f' File "{__file__}", line {lineno_f+4}, in f\n' + ' raise RuntimeError("fail")\n' + ) + result_lines = self.get_exception(f_with_call) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_many_lines(self): + def f(): + x = 1 + if True: x += ( + "a" + + "a" + ) # test + + lineno_f = f.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+2}, in f\n' + ' if True: x += (\n' + ' ^^^^^^\n' + ' ...<2 lines>...\n' + ' ) # test\n' + ' ^\n' + ) + result_lines = self.get_exception(f) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_many_lines_no_caret(self): + def f(): + x = 1 + x += ( + "a" + + "a" + ) + + lineno_f = f.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+2}, in f\n' + ' x += (\n' + ' ...<2 lines>...\n' + ' )\n' + ) + result_lines = self.get_exception(f) + self.assertEqual(result_lines, expected_error.splitlines()) + + def test_many_lines_binary_op(self): + def f_with_binary_operator(): + b = 1 + c = "a" + a = ( + b + + b + ) + ( + c + + c + + c + ) + return a + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ~~~~~~~~^^\n' + f' File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n' + ' a = (\n' + ' ~\n' + ' b +\n' + ' ~~~\n' + ' b\n' + ' ~\n' + ' ) + (\n' + ' ~~^~~\n' + ' c +\n' + ' ~~~\n' + ' ...<2 lines>...\n' + ' )\n' + ' ~\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + def test_traceback_specialization_with_syntax_error(self): bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec") @@ -682,6 +1002,7 @@ def test_traceback_specialization_with_syntax_error(self): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{TESTFN}", line {lineno_f}, in \n' " 1 $ 0 / 1 / 2\n" ' ^^^^^\n' @@ -704,6 +1025,7 @@ def test_traceback_very_long_line(self): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{TESTFN}", line {lineno_f}, in \n' f' {source}\n' f' {" "*len("if True: ") + "^"*256}\n' @@ -721,6 +1043,7 @@ def f_with_subscript(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_f+2}, in f_with_subscript\n' " some_dict['x']['y']['z']\n" ' ~~~~~~~~~~~~~~~~~~~^^^^^\n' @@ -740,6 +1063,7 @@ def exc(): f' + Exception Group Traceback (most recent call last):\n' f' | File "{__file__}", line {self.callable_line}, in get_exception\n' f' | callable()\n' + f' | ~~~~~~~~^^\n' f' | File "{__file__}", line {exc.__code__.co_firstlineno + 1}, in exc\n' f' | if True: raise ExceptionGroup("eg", [ValueError(1), TypeError(2)])\n' f' | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' @@ -805,6 +1129,7 @@ def g(): pass 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_applydescs + 1}, in applydecs\n' ' @dec_error\n' ' ^^^^^^^^^\n' @@ -823,6 +1148,7 @@ class A: pass 'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' + ' ~~~~~~~~^^\n' f' File "{__file__}", line {lineno_applydescs_class + 1}, in applydecs_class\n' ' @dec_error\n' ' ^^^^^^^^^\n' @@ -841,6 +1167,7 @@ def f(): "Traceback (most recent call last):", f" File \"{__file__}\", line {self.callable_line}, in get_exception", " callable()", + " ~~~~~~~~^^", f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f", " .method", " ^^^^^^", @@ -857,6 +1184,7 @@ def f(): "Traceback (most recent call last):", f" File \"{__file__}\", line {self.callable_line}, in get_exception", " callable()", + " ~~~~~~~~^^", f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f", " method", ] @@ -872,6 +1200,7 @@ def f(): "Traceback (most recent call last):", f" File \"{__file__}\", line {self.callable_line}, in get_exception", " callable()", + " ~~~~~~~~^^", f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f", " . method", " ^^^^^^", @@ -887,6 +1216,7 @@ def f(): "Traceback (most recent call last):", f" File \"{__file__}\", line {self.callable_line}, in get_exception", " callable()", + " ~~~~~~~~^^", f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f", " width", ] @@ -903,6 +1233,7 @@ def f(): "Traceback (most recent call last):", f" File \"{__file__}\", line {self.callable_line}, in get_exception", " callable()", + " ~~~~~~~~^^", f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f", " raise ValueError(width)", ] @@ -921,9 +1252,12 @@ def f(): "Traceback (most recent call last):", f" File \"{__file__}\", line {self.callable_line}, in get_exception", " callable()", + " ~~~~~~~~^^", f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f", " print(1, www(", - " ^^^^", + " ~~~^", + " th))", + " ^^^" ] self.assertEqual(actual, expected) @@ -997,6 +1331,8 @@ def check_traceback_format(self, cleanup_func=None): raise Error("unable to create test traceback string") # Make sure that Python and the traceback module format the same thing + print(traceback_fmt) + print(python_fmt) self.assertEqual(traceback_fmt, python_fmt) # Now verify the _tb func output self.assertEqual(tbstderr.getvalue(), tbfile.getvalue()) @@ -1007,7 +1343,7 @@ def check_traceback_format(self, cleanup_func=None): # Make sure that the traceback is properly indented. tb_lines = python_fmt.splitlines() banner = tb_lines[0] - self.assertEqual(len(tb_lines), 5) + self.assertEqual(len(tb_lines), 6) location, source_line = tb_lines[-2], tb_lines[-1] self.assertTrue(banner.startswith('Traceback')) self.assertTrue(location.startswith(' File')) @@ -1072,12 +1408,16 @@ def f(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n' ' f()\n' + ' ~^^\n' f' File "{__file__}", line {lineno_f+1}, in f\n' ' f()\n' + ' ~^^\n' f' File "{__file__}", line {lineno_f+1}, in f\n' ' f()\n' + ' ~^^\n' f' File "{__file__}", line {lineno_f+1}, in f\n' ' f()\n' + ' ~^^\n' # XXX: The following line changes depending on whether the tests # are run through the interactive interpreter or with -m # It also varies depending on the platform (stack size) @@ -1118,13 +1458,13 @@ def g(count=10): result_g = ( f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' ' [Previous line repeated 7 more times]\n' f' File "{__file__}", line {lineno_g+3}, in g\n' ' raise ValueError\n' @@ -1134,6 +1474,7 @@ def g(count=10): 'Traceback (most recent call last):\n' f' File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display\n' ' g()\n' + ' ~^^\n' ) expected = (tb_line + result_g).splitlines() actual = stderr_g.getvalue().splitlines() @@ -1158,18 +1499,20 @@ def h(count=10): 'Traceback (most recent call last):\n' f' File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display\n' ' h()\n' + ' ~^^\n' f' File "{__file__}", line {lineno_h+2}, in h\n' ' return h(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_h+2}, in h\n' ' return h(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_h+2}, in h\n' ' return h(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' ' [Previous line repeated 7 more times]\n' f' File "{__file__}", line {lineno_h+3}, in h\n' ' g()\n' + ' ~^^\n' ) expected = (result_h + result_g).splitlines() actual = stderr_h.getvalue().splitlines() @@ -1186,21 +1529,22 @@ def h(count=10): result_g = ( f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+3}, in g\n' ' raise ValueError\n' 'ValueError\n' ) tb_line = ( 'Traceback (most recent call last):\n' - f' File "{__file__}", line {lineno_g+77}, in _check_recursive_traceback_display\n' + f' File "{__file__}", line {lineno_g+80}, in _check_recursive_traceback_display\n' ' g(traceback._RECURSIVE_CUTOFF)\n' + ' ~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' ) expected = (tb_line + result_g).splitlines() actual = stderr_g.getvalue().splitlines() @@ -1217,13 +1561,13 @@ def h(count=10): result_g = ( f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' - ' ^^^^^^^^^^\n' + ' ~^^^^^^^^^\n' ' [Previous line repeated 1 more time]\n' f' File "{__file__}", line {lineno_g+3}, in g\n' ' raise ValueError\n' @@ -1231,8 +1575,9 @@ def h(count=10): ) tb_line = ( 'Traceback (most recent call last):\n' - f' File "{__file__}", line {lineno_g+108}, in _check_recursive_traceback_display\n' + f' File "{__file__}", line {lineno_g+112}, in _check_recursive_traceback_display\n' ' g(traceback._RECURSIVE_CUTOFF + 1)\n' + ' ~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' ) expected = (tb_line + result_g).splitlines() actual = stderr_g.getvalue().splitlines() @@ -1698,6 +2043,7 @@ def exc(): f' + Exception Group Traceback (most recent call last):\n' f' | File "{__file__}", line {self.callable_line}, in get_exception\n' f' | exception_or_callable()\n' + f' | ~~~~~~~~~~~~~~~~~~~~~^^\n' f' | File "{__file__}", line {exc.__code__.co_firstlineno + 1}, in exc\n' f' | raise ExceptionGroup("eg", [ValueError(1), TypeError(2)])\n' f' | ExceptionGroup: eg (2 sub-exceptions)\n' @@ -1733,6 +2079,7 @@ def exc(): f' + Exception Group Traceback (most recent call last):\n' f' | File "{__file__}", line {self.callable_line}, in get_exception\n' f' | exception_or_callable()\n' + f' | ~~~~~~~~~~~~~~~~~~~~~^^\n' f' | File "{__file__}", line {exc.__code__.co_firstlineno + 5}, in exc\n' f' | raise EG("eg2", [ValueError(3), TypeError(4)]) from e\n' f' | ExceptionGroup: eg2 (2 sub-exceptions)\n' @@ -1784,6 +2131,7 @@ def exc(): f'Traceback (most recent call last):\n' f' File "{__file__}", line {self.callable_line}, in get_exception\n' f' exception_or_callable()\n' + f' ~~~~~~~~~~~~~~~~~~~~~^^\n' f' File "{__file__}", line {exc.__code__.co_firstlineno + 8}, in exc\n' f' raise ImportError(5)\n' f'ImportError: 5\n') @@ -1830,6 +2178,7 @@ def exc(): f' + Exception Group Traceback (most recent call last):\n' f' | File "{__file__}", line {self.callable_line}, in get_exception\n' f' | exception_or_callable()\n' + f' | ~~~~~~~~~~~~~~~~~~~~~^^\n' f' | File "{__file__}", line {exc.__code__.co_firstlineno + 11}, in exc\n' f' | raise EG("top", [VE(5)])\n' f' | ExceptionGroup: top (1 sub-exception)\n' @@ -1989,6 +2338,7 @@ def exc(): expected = (f' + Exception Group Traceback (most recent call last):\n' f' | File "{__file__}", line {self.callable_line}, in get_exception\n' f' | exception_or_callable()\n' + f' | ~~~~~~~~~~~~~~~~~~~~~^^\n' f' | File "{__file__}", line {exc.__code__.co_firstlineno + 9}, in exc\n' f' | raise ExceptionGroup("nested", excs)\n' f' | ExceptionGroup: nested (2 sub-exceptions)\n' @@ -2040,6 +2390,7 @@ def exc(): expected = (f' + Exception Group Traceback (most recent call last):\n' f' | File "{__file__}", line {self.callable_line}, in get_exception\n' f' | exception_or_callable()\n' + f' | ~~~~~~~~~~~~~~~~~~~~~^^\n' f' | File "{__file__}", line {exc.__code__.co_firstlineno + 10}, in exc\n' f' | raise ExceptionGroup("nested", excs)\n' f' | ExceptionGroup: nested (2 sub-exceptions)\n' @@ -2864,6 +3215,7 @@ def test_exception_group_format(self): f' | Traceback (most recent call last):', f' | File "{__file__}", line {lno_g+9}, in _get_exception_group', f' | f()', + f' | ~^^', f' | File "{__file__}", line {lno_f+1}, in f', f' | 1/0', f' | ~^~', @@ -2872,6 +3224,7 @@ def test_exception_group_format(self): f' | Traceback (most recent call last):', f' | File "{__file__}", line {lno_g+13}, in _get_exception_group', f' | g(42)', + f' | ~^^^^', f' | File "{__file__}", line {lno_g+1}, in g', f' | raise ValueError(v)', f' | ValueError: 42', @@ -2880,6 +3233,7 @@ def test_exception_group_format(self): f' | Traceback (most recent call last):', f' | File "{__file__}", line {lno_g+20}, in _get_exception_group', f' | g(24)', + f' | ~^^^^', f' | File "{__file__}", line {lno_g+1}, in g', f' | raise ValueError(v)', f' | ValueError: 24', diff --git a/Lib/traceback.py b/Lib/traceback.py index 67941ff45988c2..ec0187abbd89f6 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -263,7 +263,7 @@ class FrameSummary: """ __slots__ = ('filename', 'lineno', 'end_lineno', 'colno', 'end_colno', - 'name', '_line', 'locals') + 'name', '_line', '_line_dedented', 'locals') def __init__(self, filename, lineno, name, *, lookup_line=True, locals=None, line=None, @@ -279,15 +279,16 @@ def __init__(self, filename, lineno, name, *, lookup_line=True, """ self.filename = filename self.lineno = lineno + self.end_lineno = lineno if end_lineno is None else end_lineno + self.colno = colno + self.end_colno = end_colno self.name = name self._line = line + self._line_dedented = None if lookup_line: self.line self.locals = {k: _safe_string(v, 'local', func=repr) for k, v in locals.items()} if locals else None - self.end_lineno = end_lineno - self.colno = colno - self.end_colno = end_colno def __eq__(self, other): if isinstance(other, FrameSummary): @@ -318,14 +319,27 @@ def _original_line(self): self.line return self._line + @property + def _dedented_lines(self): + # Returns _original_line, but dedented + self.line + if self._line_dedented is None: + if self._line is not None: + self._line_dedented = textwrap.dedent(self._line).rstrip() + return self._line_dedented + @property def line(self): if self._line is None: if self.lineno is None: return None - self._line = linecache.getline(self.filename, self.lineno) - return self._line.strip() - + end_lineno = self.lineno if self.end_lineno is None else self.end_lineno + self._line = "" + for lineno in range(self.lineno, end_lineno + 1): + # treat errors and empty lines as the same + self._line += linecache.getline(self.filename, lineno).rstrip() + "\n" + # return only the first line + return self._line.partition("\n")[0].strip() def walk_stack(f): """Walk a stack yielding the frame and line number for each frame. @@ -469,45 +483,123 @@ def format_frame_summary(self, frame_summary): row = [] row.append(' File "{}", line {}, in {}\n'.format( frame_summary.filename, frame_summary.lineno, frame_summary.name)) - if frame_summary.line: - stripped_line = frame_summary.line.strip() - row.append(' {}\n'.format(stripped_line)) - - orig_line_len = len(frame_summary._original_line) - frame_line_len = len(frame_summary.line.lstrip()) - stripped_characters = orig_line_len - frame_line_len + if frame_summary._dedented_lines: if ( - frame_summary.colno is not None - and frame_summary.end_colno is not None + frame_summary.colno is None or + frame_summary.end_colno is None ): - start_offset = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.colno) + 1 - end_offset = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.end_colno) + 1 - - anchors = None - if frame_summary.lineno == frame_summary.end_lineno: - with suppress(Exception): - anchors = _extract_caret_anchors_from_line_segment( - frame_summary._original_line[start_offset - 1:end_offset - 1] - ) - else: - end_offset = stripped_characters + len(stripped_line) - - # show indicators if primary char doesn't span the frame line - if end_offset - start_offset < len(stripped_line) or ( - anchors and anchors.right_start_offset - anchors.left_end_offset > 0): - row.append(' ') - row.append(' ' * (start_offset - stripped_characters)) - - if anchors: - row.append(anchors.primary_char * (anchors.left_end_offset)) - row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset)) - row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset)) - else: - row.append('^' * (end_offset - start_offset)) + # only output first line if column information is missing + row.append(textwrap.indent(frame_summary.line, ' ') + "\n") + else: + # get first and last line + all_lines_original = frame_summary._original_line.splitlines() + first_line = all_lines_original[0] + last_line = all_lines_original[frame_summary.end_lineno - frame_summary.lineno] + + # character index of the start/end of the instruction + start_offset = _byte_offset_to_character_offset(first_line, frame_summary.colno) + end_offset = _byte_offset_to_character_offset(last_line, frame_summary.end_colno) + + all_lines = frame_summary._dedented_lines.splitlines()[ + :frame_summary.end_lineno - frame_summary.lineno + 1 + ] + + # adjust start/end offset based on dedent + dedent_characters = len(first_line) - len(all_lines[0]) + start_offset -= dedent_characters + end_offset -= dedent_characters + start_offset = max(0, start_offset) + end_offset = max(0, end_offset) + + # get exact code segment corresponding to the instruction + segment = "\n".join(all_lines) + segment = segment[start_offset:len(segment) - (len(all_lines[-1]) - end_offset)] + + # attempt to parse for anchors + anchors: Optional[_Anchors] = None + try: + anchors = _extract_caret_anchors_from_line_segment(segment) + except AssertionError: + pass + + # only use carets if there are anchors or the carets do not span all lines + show_carets = False + if anchors or all_lines[0][:start_offset].lstrip() or all_lines[-1][end_offset:].rstrip(): + show_carets = True + + result = [] + + # only display first line, last line, and lines around anchor start/end + significant_lines = {0, len(all_lines) - 1} + + anchors_left_end_offset = 0 + anchors_right_start_offset = 0 + primary_char = "^" + secondary_char = "^" + if anchors: + anchors_left_end_offset = anchors.left_end_offset + anchors_right_start_offset = anchors.right_start_offset + # computed anchor positions do not take start_offset into account, + # so account for it here + if anchors.left_end_lineno == 0: + anchors_left_end_offset += start_offset + if anchors.right_start_lineno == 0: + anchors_right_start_offset += start_offset + primary_char = anchors.primary_char + secondary_char = anchors.secondary_char + significant_lines.update( + range(anchors.left_end_lineno - 1, anchors.left_end_lineno + 2) + ) + significant_lines.update( + range(anchors.right_start_lineno - 1, anchors.right_start_lineno + 2) + ) - row.append('\n') + # remove bad line numbers + significant_lines.discard(-1) + significant_lines.discard(len(all_lines)) + + # output all_lines[lineno] along with carets + def output_line(lineno): + result.append(all_lines[lineno] + "\n") + if not show_carets: + return + num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip()) + carets = [] + num_carets = end_offset if lineno == len(all_lines) - 1 else len(all_lines[lineno]) + # compute caret character for each position + for col in range(num_carets): + if col < num_spaces or (lineno == 0 and col < start_offset): + # before first non-ws char of the line, or before start of instruction + carets.append(' ') + elif anchors and ( + lineno > anchors.left_end_lineno or + (lineno == anchors.left_end_lineno and col >= anchors_left_end_offset) + ) and ( + lineno < anchors.right_start_lineno or + (lineno == anchors.right_start_lineno and col < anchors_right_start_offset) + ): + # within anchors + carets.append(secondary_char) + else: + carets.append(primary_char) + result.append("".join(carets) + "\n") + + # display significant lines + sig_lines_list = sorted(significant_lines) + for i, lineno in enumerate(sig_lines_list): + if i: + linediff = lineno - sig_lines_list[i - 1] + if linediff == 2: + # 1 line in between - just output it + output_line(lineno - 1) + elif linediff > 2: + # > 1 line in between - abbreviate + result.append(f"...<{linediff - 1} lines>...\n") + output_line(lineno) + + row.append( + textwrap.indent(textwrap.dedent("".join(result)), ' ', lambda line: True) + ) if frame_summary.locals: for name, value in sorted(frame_summary.locals.items()): @@ -571,7 +663,9 @@ def _byte_offset_to_character_offset(str, offset): _Anchors = collections.namedtuple( "_Anchors", [ + "left_end_lineno", "left_end_offset", + "right_start_lineno", "right_start_offset", "primary_char", "secondary_char", @@ -580,49 +674,131 @@ def _byte_offset_to_character_offset(str, offset): ) def _extract_caret_anchors_from_line_segment(segment): + """ + Given source code `segment` corresponding to a FrameSummary, determine: + - for binary ops, the location of the binary op + - for indexing and function calls, the location of the brackets. + `segment` is expected to be a valid Python expression. + """ import ast try: - tree = ast.parse(segment) + # Without parentheses, `segment` is parsed as a statement. + # Binary ops, subscripts, and calls are expressions, so + # we can wrap them with parentheses to parse them as + # (possibly multi-line) expressions. + tree = ast.parse("(\n" + segment + "\n)") except SyntaxError: return None if len(tree.body) != 1: return None - normalize = lambda offset: _byte_offset_to_character_offset(segment, offset) + lines = segment.splitlines() + + # get character index given byte offset + def normalize(lineno, offset): + return _byte_offset_to_character_offset(lines[lineno], offset) + + # Gets the next valid character index in `lines`, if + # the current location is not valid. Handles empty lines. + def next_valid_char(lineno, col): + while lineno < len(lines) and col >= len(lines[lineno]): + col = 0 + lineno += 1 + assert lineno < len(lines) and col < len(lines[lineno]) + return lineno, col + + # Get the next valid character index in `lines`. + def increment(lineno, col): + col += 1 + lineno, col = next_valid_char(lineno, col) + return lineno, col + + # Get the next valid character at least on the next line + def nextline(lineno, col): + col = 0 + lineno += 1 + lineno, col = next_valid_char(lineno, col) + return lineno, col + + # Get the next valid non-"\#" character that satisfies the `stop` predicate + def increment_until(lineno, col, stop): + while True: + ch = lines[lineno][col] + if ch in "\\#": + lineno, col = nextline(lineno, col) + elif not stop(ch): + lineno, col = increment(lineno, col) + else: + break + return lineno, col + + # Get the lineno/col position of the end of `expr`. If `force_valid` is True, + # forces the position to be a valid character (e.g. if the position is beyond the + # end of the line, move to the next line) + def setup_positions(expr, force_valid=True): + # -2 since end_lineno is 1-indexed and because we added an extra + # bracket + newline to `segment` when calling ast.parse + lineno = expr.end_lineno - 2 + col = normalize(lineno, expr.end_col_offset) + return next_valid_char(lineno, col) if force_valid else (lineno, col) + + statement = tree.body[0] match statement: case ast.Expr(expr): match expr: case ast.BinOp(): - operator_start = normalize(expr.left.end_col_offset) - operator_end = normalize(expr.right.col_offset) - operator_str = segment[operator_start:operator_end] - operator_offset = len(operator_str) - len(operator_str.lstrip()) + # ast gives these locations for BinOp subexpressions + # ( left_expr ) + ( right_expr ) + # left^^^^^ right^^^^^ + lineno, col = setup_positions(expr.left) - left_anchor = expr.left.end_col_offset + operator_offset - right_anchor = left_anchor + 1 + # First operator character is the first non-space/')' character + lineno, col = increment_until(lineno, col, lambda x: not x.isspace() and x != ')') + + # binary op is 1 or 2 characters long, on the same line, + # before the right subexpression + right_col = col + 1 if ( - operator_offset + 1 < len(operator_str) - and not operator_str[operator_offset + 1].isspace() + right_col < len(lines[lineno]) + and ( + # operator char should not be in the right subexpression + expr.right.lineno - 2 > lineno or + right_col < normalize(expr.right.lineno - 2, expr.right.col_offset) + ) + and not (ch := lines[lineno][right_col]).isspace() + and ch not in "\\#" ): - right_anchor += 1 + right_col += 1 - while left_anchor < len(segment) and ((ch := segment[left_anchor]).isspace() or ch in ")#"): - left_anchor += 1 - right_anchor += 1 - return _Anchors(normalize(left_anchor), normalize(right_anchor)) + # right_col can be invalid since it is exclusive + return _Anchors(lineno, col, lineno, right_col) case ast.Subscript(): - left_anchor = normalize(expr.value.end_col_offset) - right_anchor = normalize(expr.slice.end_col_offset + 1) - while left_anchor < len(segment) and ((ch := segment[left_anchor]).isspace() or ch != "["): - left_anchor += 1 - while right_anchor < len(segment) and ((ch := segment[right_anchor]).isspace() or ch != "]"): - right_anchor += 1 - if right_anchor < len(segment): - right_anchor += 1 - return _Anchors(left_anchor, right_anchor) + # ast gives these locations for value and slice subexpressions + # ( value_expr ) [ slice_expr ] + # value^^^^^ slice^^^^^ + # subscript^^^^^^^^^^^^^^^^^^^^ + + # find left bracket + left_lineno, left_col = setup_positions(expr.value) + left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '[') + # find right bracket (final character of expression) + right_lineno, right_col = setup_positions(expr, force_valid=False) + return _Anchors(left_lineno, left_col, right_lineno, right_col) + case ast.Call(): + # ast gives these locations for function call expressions + # ( func_expr ) (args, kwargs) + # func^^^^^ + # call^^^^^^^^^^^^^^^^^^^^^^^^ + + # find left bracket + left_lineno, left_col = setup_positions(expr.func) + left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '(') + # find right bracket (final character of expression) + right_lineno, right_col = setup_positions(expr, force_valid=False) + return _Anchors(left_lineno, left_col, right_lineno, right_col) return None diff --git a/Misc/NEWS.d/next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst b/Misc/NEWS.d/next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst new file mode 100644 index 00000000000000..a6dae826c45598 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst @@ -0,0 +1 @@ +Display multiple lines with `traceback` when errors span multiple lines. diff --git a/Python/traceback.c b/Python/traceback.c index a75b7833af4e05..dffe86cd65875f 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -413,10 +413,20 @@ _Py_WriteIndentedMargin(int indent, const char *margin, PyObject *f) return 0; } +static PyObject* +join_string_list(const char *join, PyObject* seq) +{ + PyObject *separator = PyUnicode_FromString(join); + if (!separator) { + return NULL; + } + PyObject *result = PyUnicode_Join(separator, seq); + Py_DECREF(separator); + return result; +} + static int -display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent, - int margin_indent, const char *margin, - int *truncation, PyObject **line) +get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **lines) { int fd; int i; @@ -428,13 +438,14 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int PyObject *lineobj = NULL; PyObject *res; char buf[MAXPATHLEN+1]; - int kind; - const void *data; /* open the file */ if (filename == NULL) return 0; + if (lines == NULL) + return 0; + /* Do not attempt to open things like or */ assert(PyUnicode_Check(filename)); if (PyUnicode_READ_CHAR(filename, 0) == '<') { @@ -496,15 +507,27 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int } Py_DECREF(binary); - /* get the line number lineno */ - for (i = 0; i < lineno; i++) { - Py_XDECREF(lineobj); + /* get lines between lineno and end_lineno, inclusive */ + PyObject *lines_accum = PyList_New(end_lineno - lineno + 1); + if (!lines_accum) { + goto cleanup_fob; + } + for (i = 1; i <= end_lineno; i++) { lineobj = PyFile_GetLine(fob, -1); - if (!lineobj) { - PyErr_Clear(); - break; + if (i >= lineno) { + if (!lineobj || !PyUnicode_Check(lineobj)) { + Py_XSETREF(lineobj, PyUnicode_FromString("")); + if (!lineobj) { + goto cleanup_fob; + } + } + PyList_SET_ITEM(lines_accum, i - lineno, lineobj); } } + *lines = join_string_list("\n", lines_accum); +cleanup_fob: + Py_XDECREF(lines_accum); + PyErr_Clear(); res = PyObject_CallMethodNoArgs(fob, &_Py_ID(close)); if (res) { Py_DECREF(res); @@ -513,11 +536,59 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int PyErr_Clear(); } Py_DECREF(fob); - if (!lineobj || !PyUnicode_Check(lineobj)) { - Py_XDECREF(lineobj); + + return 0; +} + +static int +_write_line_with_margin_and_indent(PyObject *f, PyObject *line, int indent, + int margin_indent, const char *margin) +{ + if (line == NULL) { + return -1; + } + + if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) { + return -1; + } + + /* Write some spaces before the line */ + if (_Py_WriteIndent(indent, f) < 0) { + return -1; + } + + /* finally display the line */ + if (PyFile_WriteObject(line, f, Py_PRINT_RAW) < 0) { + return -1; + } + + if (PyFile_WriteString("\n", f) < 0) { return -1; } + return 0; +} + +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f')) + +static int +display_source_line_with_margin(PyObject *f, PyObject *filename, + int lineno, int indent, + int margin_indent, const char *margin, + int *truncation, PyObject **line) +{ + PyObject *lineobj = NULL; + int i; + int result; + int kind; + const void *data; + + result = get_source_lines(filename, lineno, lineno, &lineobj); + if (result || lineobj == NULL) { + Py_XDECREF(lineobj); + return result; + } + if (line) { *line = Py_NewRef(lineobj); } @@ -527,7 +598,7 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int data = PyUnicode_DATA(lineobj); for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (ch != ' ' && ch != '\t' && ch != '\014') + if (!IS_WHITESPACE(ch)) break; } if (i) { @@ -544,28 +615,14 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int *truncation = i - indent; } - if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) { - goto error; - } - - /* Write some spaces before the line */ - if (_Py_WriteIndent(indent, f) < 0) { + if (_write_line_with_margin_and_indent(f, lineobj, indent, margin_indent, margin)) { goto error; } - /* finally display the line */ - if (PyFile_WriteObject(lineobj, f, Py_PRINT_RAW) < 0) { - goto error; - } - - if (PyFile_WriteString("\n", f) < 0) { - goto error; - } - - Py_DECREF(lineobj); + Py_XDECREF(lineobj); return 0; error: - Py_DECREF(lineobj); + Py_XDECREF(lineobj); return -1; } @@ -593,74 +650,220 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, * TypeError: 'NoneType' object is not subscriptable */ -#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f')) +// The below functions are helper functions for anchor extraction + +// Get segment_lines[lineno] in C string form +static const char +*_get_segment_str(PyObject *segment_lines, Py_ssize_t lineno, Py_ssize_t *size) +{ + return PyUnicode_AsUTF8AndSize(PyList_GET_ITEM(segment_lines, lineno), size); +} + +// Gets the next valid offset in segment_lines[lineno], if the current offset is not valid +static int +_next_valid_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset) +{ + Py_ssize_t str_len = 0; + const char *segment_str = NULL; + while (*lineno < PyList_GET_SIZE(segment_lines)) { + segment_str = _get_segment_str(segment_lines, *lineno, &str_len); + if (!segment_str) { + return -1; + } + if (*offset < str_len) { + break; + } + *offset = 0; + ++*lineno; + } + assert(*lineno < PyList_GET_SIZE(segment_lines)); + assert(segment_str); + assert(*offset < str_len); + return 0; +} + +// Get the next valid offset +static int +_increment_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset) +{ + ++*offset; + return _next_valid_offset(segment_lines, lineno, offset); +} + +// Get the next valid offset at least on the next line +static int +_nextline(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset) +{ + *offset = 0; + ++*lineno; + return _next_valid_offset(segment_lines, lineno, offset); +} + +// Get the next valid non-"\#" character that satisfies the stop predicate +static int +_increment_until(PyObject *segment_lines, Py_ssize_t *lineno, + Py_ssize_t *offset, int (*stop)(char)) +{ + while (1) { + Py_ssize_t str_len; + const char *segment_str = _get_segment_str(segment_lines, *lineno, &str_len); + if (!segment_str || *offset >= str_len) { + return -1; + } + char ch = segment_str[*offset]; + // jump to next line if we encounter line break or comment + if (ch == '\\' || ch == '#') { + if (_nextline(segment_lines, lineno, offset)) { + return -1; + } + } else if (!stop(ch)) { + if (_increment_offset(segment_lines, lineno, offset)) { + return -1; + } + } else { + break; + } + } + return 0; +} + +// is the character a binary op character? (not whitespace or closing paren) +static int +_is_op_char(char ch) +{ + if (!IS_WHITESPACE(ch) && ch != ')') { + return 1; + } + return 0; +} + +static int +_is_open_bracket_char(char ch) +{ + return ch == '['; +} + +static int +_is_open_paren_char(char ch) +{ + return ch == '('; +} static int -extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor, +extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr, + Py_ssize_t *left_anchor_lineno, Py_ssize_t *right_anchor_lineno, + Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col, char** primary_error_char, char** secondary_error_char) { switch (expr->kind) { case BinOp_kind: { + // anchor begin: first binary op char after left subexpression + // anchor end: 1 or 2 characters after anchor begin expr_ty left = expr->v.BinOp.left; expr_ty right = expr->v.BinOp.right; - for (int i = left->end_col_offset; i < right->col_offset; i++) { - if (IS_WHITESPACE(segment_str[i])) { - continue; - } - - *left_anchor = i; - *right_anchor = i + 1; - - // Check whether if this a two-character operator (e.g //) - if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) { - ++*right_anchor; - } + *left_anchor_lineno = left->end_lineno - 2; + *left_anchor_col = left->end_col_offset; + if (_next_valid_offset( + segment_lines, left_anchor_lineno, left_anchor_col + )) { + return 0; + } + // keep going until the current char is not whitespace or ')' + if (_increment_until( + segment_lines, left_anchor_lineno, left_anchor_col, _is_op_char + )) { + return 0; + } + *right_anchor_lineno = *left_anchor_lineno; + *right_anchor_col = *left_anchor_col + 1; + + Py_ssize_t str_len = 0; + const char *segment_str = _get_segment_str( + segment_lines, *left_anchor_lineno, &str_len + ); + if (!segment_str) { + return 0; + } - // Keep going if the current char is not ')' - if (i+1 < right->col_offset && (segment_str[i] == ')')) { - continue; + // Check whether if this is a two-character operator (e.g. //) + if ( + *right_anchor_col < str_len && + ( + // operator char should not be in the right subexpression + right->lineno - 2 > *right_anchor_lineno || + *right_anchor_col < right->col_offset + ) + ) { + char ch = segment_str[*right_anchor_col]; + if (_is_op_char(ch) && ch != '\\' && ch != '#') { + ++*right_anchor_col; } - - // Set the error characters - *primary_error_char = "~"; - *secondary_error_char = "^"; - break; } + // Set the error characters + *primary_error_char = "~"; + *secondary_error_char = "^"; return 1; } case Subscript_kind: { - *left_anchor = expr->v.Subscript.value->end_col_offset; - *right_anchor = expr->v.Subscript.slice->end_col_offset + 1; - Py_ssize_t str_len = strlen(segment_str); - - // Move right_anchor and left_anchor forward to the first non-whitespace character that is not ']' and '[' - while (*left_anchor < str_len && (IS_WHITESPACE(segment_str[*left_anchor]) || segment_str[*left_anchor] != '[')) { - ++*left_anchor; - } - while (*right_anchor < str_len && (IS_WHITESPACE(segment_str[*right_anchor]) || segment_str[*right_anchor] != ']')) { - ++*right_anchor; + // anchor begin: first "[" after the value subexpression + // anchor end: end of the entire subscript expression + *left_anchor_lineno = expr->v.Subscript.value->end_lineno - 2; + *left_anchor_col = expr->v.Subscript.value->end_col_offset; + if (_next_valid_offset( + segment_lines, left_anchor_lineno, left_anchor_col + )) { + return 0; } - if (*right_anchor < str_len){ - *right_anchor += 1; + if (_increment_until( + segment_lines, left_anchor_lineno, left_anchor_col, _is_open_bracket_char + )) { + return 0; } + *right_anchor_lineno = expr->end_lineno - 2; + *right_anchor_col = expr->end_col_offset; // Set the error characters *primary_error_char = "~"; *secondary_error_char = "^"; return 1; } + case Call_kind: + // anchor positions determined similarly to Subscript + *left_anchor_lineno = expr->v.Call.func->end_lineno - 2; + *left_anchor_col = expr->v.Call.func->end_col_offset; + if (_next_valid_offset( + segment_lines, left_anchor_lineno, left_anchor_col + )) { + return 0; + } + if (_increment_until( + segment_lines, left_anchor_lineno, left_anchor_col, _is_open_paren_char + )) { + return 0; + } + *right_anchor_lineno = expr->end_lineno - 2; + *right_anchor_col = expr->end_col_offset; + + // Set the error characters + *primary_error_char = "~"; + *secondary_error_char = "^"; + return 1; default: return 0; } } static int -extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor, +extract_anchors_from_stmt(PyObject *segment_lines, stmt_ty statement, + Py_ssize_t *left_anchor_lineno, Py_ssize_t *right_anchor_lineno, + Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col, char** primary_error_char, char** secondary_error_char) { switch (statement->kind) { case Expr_kind: { - return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor, + return extract_anchors_from_expr(segment_lines, statement->v.Expr.value, + left_anchor_lineno, right_anchor_lineno, + left_anchor_col, right_anchor_col, primary_error_char, secondary_error_char); } default: @@ -668,19 +871,70 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t } } +// Returns: +// 1 if anchors were found +// 0 if anchors could not be computed +// -1 on error static int -extract_anchors_from_line(PyObject *filename, PyObject *line, +extract_anchors_from_line(PyObject *filename, PyObject *lines, Py_ssize_t start_offset, Py_ssize_t end_offset, - Py_ssize_t *left_anchor, Py_ssize_t *right_anchor, + Py_ssize_t *left_anchor_lineno, Py_ssize_t *right_anchor_lineno, + Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col, char** primary_error_char, char** secondary_error_char) { int res = -1; PyArena *arena = NULL; - PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset); + PyObject *segment = NULL; + PyObject *segment_lines = NULL; + PyObject *tmp; + + segment = join_string_list("\n", lines); if (!segment) { goto done; } + // truncate segment + Py_ssize_t num_lines = PyList_Size(lines); + PyObject *last_string = PyList_GET_ITEM(lines, num_lines - 1); + Py_ssize_t offset_from_right = PyUnicode_GET_LENGTH(last_string) - end_offset; + Py_ssize_t join_end_offset = PyUnicode_GET_LENGTH(segment) - offset_from_right; + tmp = PyUnicode_Substring( + segment, start_offset, join_end_offset + ); + if (!tmp) { + goto done; + } + Py_SETREF(segment, tmp); + + // same as `lines`, but first/last strings are truncated + segment_lines = PyUnicode_Splitlines(segment, 0); + if (!segment_lines) { + goto done; + } + + // segment = "(\n" + segment + "\n)" + PyObject *paren_str = PyUnicode_FromString("(\n"); + if (!paren_str) { + goto done; + } + tmp = PyUnicode_Concat(paren_str, segment); + Py_DECREF(paren_str); + if (!tmp) { + goto done; + } + Py_SETREF(segment, tmp); + + paren_str = PyUnicode_FromString("\n)"); + if (!paren_str) { + goto done; + } + tmp = PyUnicode_Concat(segment, paren_str); + Py_DECREF(paren_str); + if (!tmp) { + goto done; + } + Py_SETREF(segment, tmp); + const char *segment_str = PyUnicode_AsUTF8(segment); if (!segment_str) { goto done; @@ -696,6 +950,11 @@ extract_anchors_from_line(PyObject *filename, PyObject *line, mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input, &flags, arena); if (!module) { + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxError)) { + // AST parsing failed due to SyntaxError - ignore it + PyErr_Clear(); + res = 0; + } goto done; } if (!_PyAST_Optimize(module, arena, _Py_GetConfig()->optimization_level, 0)) { @@ -705,7 +964,9 @@ extract_anchors_from_line(PyObject *filename, PyObject *line, assert(module->kind == Module_kind); if (asdl_seq_LEN(module->v.Module.body) == 1) { stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0); - res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor, + res = extract_anchors_from_stmt(segment_lines, statement, + left_anchor_lineno, right_anchor_lineno, + left_anchor_col, right_anchor_col, primary_error_char, secondary_error_char); } else { res = 0; @@ -715,13 +976,26 @@ extract_anchors_from_line(PyObject *filename, PyObject *line, if (res > 0) { // Normalize the AST offsets to byte offsets and adjust them with the // start of the actual line (instead of the source code segment). - assert(segment != NULL); - assert(*left_anchor >= 0); - assert(*right_anchor >= 0); - *left_anchor = _PyPegen_byte_offset_to_character_offset(segment, *left_anchor) + start_offset; - *right_anchor = _PyPegen_byte_offset_to_character_offset(segment, *right_anchor) + start_offset; + assert(segment_lines != NULL); + assert(*left_anchor_lineno >= 0); + assert(*left_anchor_col >= 0); + assert(*right_anchor_lineno >= 0); + assert(*right_anchor_col >= 0); + *left_anchor_col = _PyPegen_byte_offset_to_character_offset( + PyList_GET_ITEM(segment_lines, *left_anchor_lineno), *left_anchor_col + ); + *right_anchor_col = _PyPegen_byte_offset_to_character_offset( + PyList_GET_ITEM(segment_lines, *right_anchor_lineno), *right_anchor_col + ); + if (*left_anchor_lineno == 0) { + *left_anchor_col += start_offset; + } + if (*right_anchor_lineno == 0) { + *right_anchor_col += start_offset; + } } Py_XDECREF(segment); + Py_XDECREF(segment_lines); if (arena) { _PyArena_Free(arena); } @@ -741,28 +1015,215 @@ ignore_source_errors(void) { return 0; } -static inline int -print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset, - Py_ssize_t right_start_offset, Py_ssize_t left_end_offset, - const char *primary, const char *secondary) { - int special_chars = (left_end_offset != -1 || right_start_offset != -1); - const char *str; - while (++offset <= end_offset) { - if (offset <= start_offset) { - str = " "; - } else if (special_chars && left_end_offset < offset && offset <= right_start_offset) { - str = secondary; - } else { - str = primary; +// helper data structure to keep track of which lines to output +typedef struct SignificantLines { + // we ony add a maximum of 8 lines + Py_ssize_t lines[8]; + size_t size; +} SignificantLines; + +static void significant_lines_init(SignificantLines *sl) { + sl->size = 0; +} + +static void significant_lines_append(SignificantLines* sl, Py_ssize_t line, Py_ssize_t max_line) +{ + if (line < 0 || line > max_line) { + return; + } + assert(sl->size < 8); + sl->lines[sl->size++] = line; +} + +static int significant_lines_compare(const void *a, const void *b) +{ + return (int)(*(Py_ssize_t *)a - *(Py_ssize_t *)b); +} + +// sort lines and remove duplicate lines +static void significant_lines_process(SignificantLines *sl) +{ + qsort(sl->lines, sl->size, sizeof(Py_ssize_t), significant_lines_compare); + Py_ssize_t lines[8]; + size_t idx = 0; + for (size_t i = 0; i < sl->size; i++) { + if (i && sl->lines[i] == sl->lines[i - 1]) { + continue; } - if (PyFile_WriteString(str, f) < 0) { - return -1; + lines[idx++] = sl->lines[i]; + } + memcpy(sl->lines, lines, idx * sizeof(Py_ssize_t)); + sl->size = idx; +} + +// output lines[lineno] along with carets +static int +print_error_location_carets(PyObject *lines, Py_ssize_t lineno, + Py_ssize_t start_offset, Py_ssize_t end_offset, + Py_ssize_t left_end_lineno, Py_ssize_t right_start_lineno, + Py_ssize_t left_end_offset, Py_ssize_t right_start_offset, + const char *primary, const char *secondary, + PyObject *f, int indent, int margin_indent, const char *margin) +{ + Py_ssize_t num_lines = PyList_Size(lines); + PyObject *line = PyList_GET_ITEM(lines, lineno); + int special_chars = ( + left_end_lineno != -1 && left_end_offset != -1 && + right_start_lineno != -1 && right_start_offset != -1 + ); + Py_ssize_t len = (lineno == num_lines - 1) ? end_offset : PyUnicode_GET_LENGTH(line); + PyObject *carets = PyList_New(len); + if (!carets) { + goto error; + } + int kind = PyUnicode_KIND(line); + const void *data = PyUnicode_DATA(line); + bool has_non_ws = 0; + for (Py_ssize_t col = 0; col < len; col++) { + const char *ch = primary; + if (!has_non_ws) { + Py_UCS4 ch = PyUnicode_READ(kind, data, col); + if (!IS_WHITESPACE(ch)) { + has_non_ws = 1; + } } + if (!has_non_ws || (lineno == 0 && col < start_offset)) { + // before first non-ws char of the line, or before start of instruction + ch = " "; + } else if ( + special_chars && + (lineno > left_end_lineno || (lineno == left_end_lineno && col >= left_end_offset)) && + (lineno < right_start_lineno || (lineno == right_start_lineno && col < right_start_offset)) + ) { + // within anchors + ch = secondary; + } // else ch = primary + + PyObject *str = PyUnicode_FromString(ch); + if (!str) { + goto error; + } + PyList_SET_ITEM(carets, col, str); } - if (PyFile_WriteString("\n", f) < 0) { - return -1; + PyObject *caret_line_str = join_string_list("", carets); + if (!caret_line_str) { + goto error; } + int res = _write_line_with_margin_and_indent(f, caret_line_str, indent, margin_indent, margin); + Py_DECREF(caret_line_str); + if (res) { + goto error; + } + Py_DECREF(carets); return 0; +error: + Py_XDECREF(carets); + return -1; +} + +static int +_is_all_whitespace(PyObject *line) +{ + int kind = PyUnicode_KIND(line); + const void *data = PyUnicode_DATA(line); + for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(line); i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!IS_WHITESPACE(ch)) + return 0; + } + return 1; +} + +// C implementation of textwrap.dedent. +// Returns a new reference to a list of dedented lines, NULL on failure. +// Sets `truncation` to the number of characters truncated. +// In abnormal cases (errors, whitespace-only input), `truncation` is set to 0. +static PyObject* +dedent(PyObject *lines, Py_ssize_t *truncation) { + *truncation = 0; + PyObject *split = PyUnicode_Splitlines(lines, 0); + if (!split) { + return NULL; + } + // Replace whitespace only lines with empty lines + Py_ssize_t num_lines = PyList_Size(split); + assert(num_lines > 0); + for (Py_ssize_t i = 0; i < num_lines; i++) { + if (_is_all_whitespace(PyList_GET_ITEM(split, i))) { + PyObject *empty = PyUnicode_FromString(""); + if (!empty) { + goto error; + } + PyList_SetItem(split, i, empty); + } + } + + // Find a reference line - the first non-empty line. + // It is guaranteed to have a non-whitespace character. + Py_ssize_t ref_lineno = 0; + for (; ref_lineno < num_lines; ref_lineno++) { + if (PyUnicode_GET_LENGTH(PyList_GET_ITEM(split, ref_lineno)) > 0) { + break; + } + } + if (ref_lineno == num_lines) { + // empty input + goto done; + } + + // Compute the number of characters to dedent by. + // Increment `col` until either lines[ref_line][col] is non-ws, + // or there is another line i with lines[i][col] != lines[ref_line][col]. + Py_ssize_t col = 0; + PyObject *ref_line = PyList_GET_ITEM(split, ref_lineno); + Py_ssize_t ref_line_len = PyUnicode_GET_LENGTH(ref_line); + for (; col < ref_line_len; col++) { + Py_UCS4 ref_ch = PyUnicode_READ_CHAR(ref_line, col); + if (!IS_WHITESPACE(ref_ch)) { + goto dedent_compute_end; + } + // every line before ref_line is empty + for (Py_ssize_t i = ref_lineno + 1; i < num_lines; i++) { + PyObject* line = PyList_GET_ITEM(split, i); + if (PyUnicode_GET_LENGTH(line) == 0) { + continue; + } + // col >= len(line) implies the line is whitespace, + // which cannot happen since we replaced whitespace lines + // with empty strings. + assert(col < PyUnicode_GET_LENGTH(line)); + Py_UCS4 ch = PyUnicode_READ_CHAR(line, col); + if (ch != ref_ch) { + goto dedent_compute_end; + } + } + } +dedent_compute_end: + + *truncation = col; + // truncate strings + if (col == 0) { + goto done; + } + for (Py_ssize_t i = 0; i < num_lines; i++) { + PyObject* line = PyList_GET_ITEM(split, i); + Py_ssize_t line_len = PyUnicode_GET_LENGTH(line); + if (line_len == 0) { + continue; + } + assert(col < line_len); + PyObject* truncated_line = PyUnicode_Substring(line, col, line_len); + if (!truncated_line) { + goto error; + } + PyList_SetItem(split, i, truncated_line); + } + +done: + return split; +error: + Py_XDECREF(split); + return NULL; } static int @@ -791,20 +1252,8 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen int err = 0; - int truncation = _TRACEBACK_SOURCE_LINE_INDENT; - PyObject* source_line = NULL; - int rc = display_source_line_with_margin( - f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT, - margin_indent, margin, &truncation, &source_line); - if (rc != 0 || !source_line) { - /* ignore errors since we can't report them, can we? */ - err = ignore_source_errors(); - goto done; - } - int code_offset = tb->tb_lasti; PyCodeObject* code = _PyFrame_GetCode(frame->f_frame); - const Py_ssize_t source_line_len = PyUnicode_GET_LENGTH(source_line); int start_line; int end_line; @@ -812,20 +1261,50 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen int end_col_byte_offset; if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset, &end_line, &end_col_byte_offset)) { - goto done; + start_line = end_line = lineno; + start_col_byte_offset = end_col_byte_offset = -1; } - if (start_line < 0 || end_line < 0 - || start_col_byte_offset < 0 - || end_col_byte_offset < 0) - { + if (start_line < 0) { + // in case something went wrong + start_line = lineno; + } + // only fetch first line if location information is missing + if (end_line < 0 || start_col_byte_offset < 0 || end_col_byte_offset < 0) { + end_line = lineno; + } + + PyObject* lines_original = NULL; + PyObject* lines = NULL; + Py_ssize_t num_lines = 0; + int rc = get_source_lines(filename, start_line, end_line, &lines_original); + if (rc || !lines_original) { + /* ignore errors since we can't report them, can we? */ + err = ignore_source_errors(); + goto error; + } + + Py_ssize_t truncation = 0; + lines = dedent(lines_original, &truncation); + if (!lines) { + goto error; + } + num_lines = PyList_Size(lines); + + // only output first line if no column location is given + if (start_col_byte_offset < 0 || end_col_byte_offset < 0) { + if (_write_line_with_margin_and_indent( + f, PyList_GET_ITEM(lines, 0), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin + )) { + goto error; + } goto done; } // When displaying errors, we will use the following generic structure: // // ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE - // ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~ + // ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~ // | |-> left_end_offset | |-> end_offset // |-> start_offset |-> right_start_offset // @@ -836,76 +1315,149 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen // AST information or we cannot identify special ranges within it, then left_end_offset and // right_end_offset will be set to -1. // + // To support displaying errors that span multiple lines, *left_end_lineno* and + // *right_start_lineno* contain the line numbers of the special ranges. + // // To keep the column indicators pertinent, they are not shown when the primary character - // spans the whole line. + // spans all of the error lines. + + PyObject *lines_original_split = PyUnicode_Splitlines(lines_original, 0); + assert(PyList_Size(lines_original_split) == num_lines); + if (!lines_original_split) { + goto error; + } // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets. - assert(source_line); - Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); + Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset( + PyList_GET_ITEM(lines_original_split, 0), start_col_byte_offset + ); if (start_offset < 0) { err = ignore_source_errors() < 0; - goto done; + Py_DECREF(lines_original_split); + goto error; } - Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); + Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset( + PyList_GET_ITEM(lines_original_split, num_lines - 1), end_col_byte_offset + ); + Py_DECREF(lines_original_split); if (end_offset < 0) { err = ignore_source_errors() < 0; - goto done; + goto error; } + // adjust start/end offset based on dedent + start_offset = (start_offset < truncation) ? 0 : start_offset - truncation; + end_offset = (end_offset < truncation) ? 0 : end_offset - truncation; + + Py_ssize_t left_end_lineno = -1; Py_ssize_t left_end_offset = -1; + Py_ssize_t right_start_lineno = -1; Py_ssize_t right_start_offset = -1; char *primary_error_char = "^"; char *secondary_error_char = primary_error_char; - if (start_line == end_line) { - int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, - &left_end_offset, &right_start_offset, - &primary_error_char, &secondary_error_char); - if (res < 0 && ignore_source_errors() < 0) { - goto done; - } + res = extract_anchors_from_line(filename, lines, start_offset, end_offset, + &left_end_lineno, &right_start_lineno, + &left_end_offset, &right_start_offset, + &primary_error_char, &secondary_error_char); + if (res < 0 && ignore_source_errors() < 0) { + goto error; } - else { - // If this is a multi-line expression, then we will highlight until - // the last non-whitespace character. - const char *source_line_str = PyUnicode_AsUTF8(source_line); - if (!source_line_str) { - goto done; - } - Py_ssize_t i = source_line_len; - while (--i >= 0) { - if (!IS_WHITESPACE(source_line_str[i])) { - break; - } + int show_carets = 1; + + // only display significant lines: first line, last line, lines around anchor start/end + SignificantLines sl; + significant_lines_init(&sl); + significant_lines_append(&sl, 0, num_lines - 1); + significant_lines_append(&sl, num_lines - 1, num_lines - 1); + + if (res == 0) { + // Elide indicators if primary char spans the frame line + PyObject *tmp = PyUnicode_Substring(PyList_GET_ITEM(lines, 0), 0, start_offset); + int before_start_empty = tmp && _is_all_whitespace(tmp); + Py_XDECREF(tmp); + PyObject *last_line = PyList_GET_ITEM(lines, num_lines - 1); + tmp = PyUnicode_Substring(last_line, end_offset, PyUnicode_GET_LENGTH(last_line)); + int after_end_empty = tmp && _is_all_whitespace(tmp); + Py_XDECREF(tmp); + if (before_start_empty && after_end_empty) { + show_carets = 0; + } + // clear anchor fields + left_end_lineno = left_end_offset = right_start_lineno = right_start_offset = -1; + } else { + for (int i = -1; i <= 1; ++i) { + significant_lines_append(&sl, i + left_end_lineno, num_lines - 1); + significant_lines_append(&sl, i + right_start_lineno, num_lines - 1); } - - end_offset = i + 1; - } - - // Elide indicators if primary char spans the frame line - Py_ssize_t stripped_line_len = source_line_len - truncation - _TRACEBACK_SOURCE_LINE_INDENT; - bool has_secondary_ranges = (left_end_offset != -1 || right_start_offset != -1); - if (end_offset - start_offset == stripped_line_len && !has_secondary_ranges) { - goto done; } - if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) { - err = -1; - goto done; - } + // sort and dedupe significant lines + significant_lines_process(&sl); - if (print_error_location_carets(f, truncation, start_offset, end_offset, - right_start_offset, left_end_offset, - primary_error_char, secondary_error_char) < 0) { - err = -1; - goto done; + for (size_t i = 0; i < sl.size; i++) { + if (i > 0) { + Py_ssize_t linediff = sl.lines[i] - sl.lines[i - 1]; + if (linediff == 2) { + // only 1 line in between - just print it out + if (_write_line_with_margin_and_indent( + f, PyList_GET_ITEM(lines, sl.lines[i] - 1), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin + )) { + goto error; + } + if (show_carets && print_error_location_carets( + lines, sl.lines[i] - 1, + start_offset, end_offset, + left_end_lineno, right_start_lineno, + left_end_offset, right_start_offset, + primary_error_char, secondary_error_char, + f, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin + )) { + goto error; + } + } else if (linediff > 2) { + // more than 1 line in between - abbreviate + PyObject *abbrv_str = PyUnicode_FromFormat("...<%d lines>...", (int)linediff - 1); + if (!abbrv_str) { + goto error; + } + int write_res = _write_line_with_margin_and_indent( + f, abbrv_str, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin + ); + Py_DECREF(abbrv_str); + if (write_res) { + goto error; + } + } + } + // print the current line + if (_write_line_with_margin_and_indent( + f, PyList_GET_ITEM(lines, sl.lines[i]), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin + )) { + goto error; + } + if (show_carets && print_error_location_carets( + lines, sl.lines[i], + start_offset, end_offset, + left_end_lineno, right_start_lineno, + left_end_offset, right_start_offset, + primary_error_char, secondary_error_char, + f, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin + )) { + goto error; + } } done: - Py_XDECREF(source_line); + Py_DECREF(lines_original); + Py_DECREF(lines); + return 0; +error: + Py_XDECREF(lines_original); + Py_XDECREF(lines); return err; } @@ -1356,4 +1908,3 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp, return NULL; } -