Skip to content

Commit e921a80

Browse files
committed
pythongh-131507: Add support for syntax highlighting in PyREPL
1 parent acb222c commit e921a80

File tree

3 files changed

+199
-15
lines changed

3 files changed

+199
-15
lines changed

Lib/_pyrepl/reader.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@
2727
from dataclasses import dataclass, field, fields
2828
from _colorize import can_colorize, ANSIColors
2929

30-
3130
from . import commands, console, input
32-
from .utils import wlen, unbracket, disp_str
31+
from .utils import wlen, unbracket, disp_str, gen_colors
3332
from .trace import trace
3433

3534

@@ -38,8 +37,7 @@
3837
from .types import Callback, SimpleContextManager, KeySpec, CommandName
3938

4039

41-
# syntax classes:
42-
40+
# syntax classes
4341
SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3)
4442

4543

@@ -144,16 +142,17 @@ class Reader:
144142
Instance variables of note include:
145143
146144
* buffer:
147-
A *list* (*not* a string at the moment :-) containing all the
148-
characters that have been entered.
145+
A per-character list containing all the characters that have been
146+
entered. Does not include color information.
149147
* console:
150148
Hopefully encapsulates the OS dependent stuff.
151149
* pos:
152150
A 0-based index into 'buffer' for where the insertion point
153151
is.
154152
* screeninfo:
155-
Ahem. This list contains some info needed to move the
156-
insertion point around reasonably efficiently.
153+
A list of screen position tuples. Each list element is a tuple
154+
representing information on visible line length for a given line.
155+
Allows for efficient skipping of color escape sequences.
157156
* cxy, lxy:
158157
the position of the insertion point in screen ...
159158
* syntax_table:
@@ -316,6 +315,11 @@ def calc_screen(self) -> list[str]:
316315
pos -= offset
317316

318317
prompt_from_cache = (offset and self.buffer[offset - 1] != "\n")
318+
319+
if self.can_colorize:
320+
colors = list(gen_colors(self.get_unicode()))
321+
else:
322+
colors = None
319323
lines = "".join(self.buffer[offset:]).split("\n")
320324
cursor_found = False
321325
lines_beyond_cursor = 0
@@ -343,7 +347,7 @@ def calc_screen(self) -> list[str]:
343347
screeninfo.append((0, []))
344348
pos -= line_len + 1
345349
prompt, prompt_len = self.process_prompt(prompt)
346-
chars, char_widths = disp_str(line)
350+
chars, char_widths = disp_str(line, colors, offset)
347351
wrapcount = (sum(char_widths) + prompt_len) // self.console.width
348352
trace("wrapcount = {wrapcount}", wrapcount=wrapcount)
349353
if wrapcount == 0 or not char_widths:
@@ -567,6 +571,7 @@ def insert(self, text: str | list[str]) -> None:
567571
def update_cursor(self) -> None:
568572
"""Move the cursor to reflect changes in self.pos"""
569573
self.cxy = self.pos2xy()
574+
trace("update_cursor({pos}) = {cxy}", pos=self.pos, cxy=self.cxy)
570575
self.console.move_cursor(*self.cxy)
571576

572577
def after_command(self, cmd: Command) -> None:

Lib/_pyrepl/utils.py

Lines changed: 119 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,56 @@
22
import unicodedata
33
import functools
44

5+
from idlelib import colorizer
6+
from typing import cast, Iterator, Literal, Match, NamedTuple, Pattern, Self
7+
from _colorize import ANSIColors
8+
59
from .types import CharBuffer, CharWidths
610
from .trace import trace
711

812
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
913
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
1014
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
15+
COLORIZE_RE: Pattern[str] = colorizer.prog
16+
IDENTIFIER_RE: Pattern[str] = colorizer.idprog
17+
IDENTIFIERS_AFTER = {"def", "class"}
18+
COLORIZE_GROUP_NAME_MAP: dict[str, str] = colorizer.prog_group_name_to_tag
19+
20+
type ColorTag = (
21+
Literal["KEYWORD"]
22+
| Literal["BUILTIN"]
23+
| Literal["COMMENT"]
24+
| Literal["STRING"]
25+
| Literal["DEFINITION"]
26+
| Literal["SYNC"]
27+
)
28+
29+
30+
class Span(NamedTuple):
31+
"""Span indexing that's inclusive on both ends."""
32+
33+
start: int
34+
end: int
35+
36+
@classmethod
37+
def from_re(cls, m: Match[str], group: int | str) -> Self:
38+
re_span = m.span(group)
39+
return cls(re_span[0], re_span[1] - 1)
40+
41+
42+
class ColorSpan(NamedTuple):
43+
span: Span
44+
tag: ColorTag
45+
46+
47+
TAG_TO_ANSI: dict[ColorTag, str] = {
48+
"KEYWORD": ANSIColors.BOLD_BLUE,
49+
"BUILTIN": ANSIColors.CYAN,
50+
"COMMENT": ANSIColors.RED,
51+
"STRING": ANSIColors.GREEN,
52+
"DEFINITION": ANSIColors.BOLD_WHITE,
53+
"SYNC": ANSIColors.RESET,
54+
}
1155

1256

1357
@functools.cache
@@ -41,25 +85,82 @@ def unbracket(s: str, including_content: bool = False) -> str:
4185
return s.translate(ZERO_WIDTH_TRANS)
4286

4387

44-
def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
45-
r"""Decompose the input buffer into a printable variant.
88+
def gen_colors(buffer: str) -> Iterator[ColorSpan]:
89+
"""Returns a list of index spans to color using the given color tag.
90+
91+
The input `buffer` should be a valid start of a Python code block, i.e.
92+
it cannot be a block starting in the middle of a multiline string.
93+
"""
94+
for match in COLORIZE_RE.finditer(buffer):
95+
yield from gen_color_spans(match)
96+
97+
98+
def gen_color_spans(re_match: Match[str]) -> Iterator[ColorSpan]:
99+
"""Generate non-empty color spans."""
100+
for tag, data in re_match.groupdict().items():
101+
if not data:
102+
continue
103+
span = Span.from_re(re_match, tag)
104+
tag = COLORIZE_GROUP_NAME_MAP.get(tag, tag)
105+
yield ColorSpan(span, cast(ColorTag, tag))
106+
if data in IDENTIFIERS_AFTER:
107+
if name_match := IDENTIFIER_RE.match(re_match.string, span.end + 1):
108+
span = Span.from_re(name_match, 1)
109+
yield ColorSpan(span, "DEFINITION")
110+
111+
112+
def disp_str(
113+
buffer: str, colors: list[ColorSpan] | None = None, start_index: int = 0
114+
) -> tuple[CharBuffer, CharWidths]:
115+
r"""Decompose the input buffer into a printable variant with applied colors.
46116
47117
Returns a tuple of two lists:
48-
- the first list is the input buffer, character by character;
118+
- the first list is the input buffer, character by character, with color
119+
escape codes added (while those codes contain multiple ASCII characters,
120+
each code is considered atomic *and is attached for the corresponding
121+
visible character*);
49122
- the second list is the visible width of each character in the input
50123
buffer.
51124
125+
Note on colors:
126+
- The `colors` list, if provided, is partially consumed within. We're using
127+
a list and not a generator since we need to hold onto the current
128+
unfinished span between calls to disp_str in case of multiline strings.
129+
- The `colors` list is computed from the start of the input block. `buffer`
130+
is only a subset of that input block, a single line within. This is why
131+
we need `start_index` to inform us which position is the start of `buffer`
132+
actually within user input. This allows us to match color spans correctly.
133+
52134
Examples:
53135
>>> utils.disp_str("a = 9")
54136
(['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1])
137+
138+
>>> line = "while 1:"
139+
>>> colors = list(utils.gen_colors(line))
140+
>>> utils.disp_str(line, colors=colors)
141+
(['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1])
142+
55143
"""
56144
chars: CharBuffer = []
57145
char_widths: CharWidths = []
58146

59147
if not buffer:
60148
return chars, char_widths
61149

62-
for c in buffer:
150+
while colors and colors[0].span.end < start_index:
151+
# move past irrelevant spans
152+
colors.pop(0)
153+
154+
pre_color = ""
155+
post_color = ""
156+
if colors and colors[0].span.start < start_index:
157+
# looks like we're continuing a previous color (e.g. a multiline str)
158+
pre_color = TAG_TO_ANSI[colors[0].tag]
159+
160+
for i, c in enumerate(buffer, start_index):
161+
if colors and colors[0].span.start == i: # new color starts now
162+
pre_color = TAG_TO_ANSI[colors[0].tag]
163+
63164
if c == "\x1a": # CTRL-Z on Windows
64165
chars.append(c)
65166
char_widths.append(2)
@@ -73,5 +174,19 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
73174
else:
74175
chars.append(c)
75176
char_widths.append(str_width(c))
177+
178+
if colors and colors[0].span.end == i: # current color ends now
179+
post_color = TAG_TO_ANSI["SYNC"]
180+
colors.pop(0)
181+
182+
chars[-1] = pre_color + chars[-1] + post_color
183+
pre_color = ""
184+
post_color = ""
185+
186+
if colors and colors[0].span.start < i and colors[0].span.end > i:
187+
# even though the current color should be continued, reset it for now.
188+
# the next call to `disp_str()` will revive it.
189+
chars[-1] += TAG_TO_ANSI["SYNC"]
190+
76191
trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths)
77192
return chars, char_widths

Lib/test/test_pyrepl/test_reader.py

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
import itertools
22
import functools
33
import rlcompleter
4+
from textwrap import dedent
45
from unittest import TestCase
56
from unittest.mock import MagicMock
67

78
from .support import handle_all_events, handle_events_narrow_console
89
from .support import ScreenEqualMixin, code_to_events
9-
from .support import prepare_reader, prepare_console
10+
from .support import prepare_reader, prepare_console, reader_force_colors
1011
from _pyrepl.console import Event
1112
from _pyrepl.reader import Reader
13+
from _pyrepl.utils import TAG_TO_ANSI
14+
15+
16+
colors = {k[0].lower(): v for k, v in TAG_TO_ANSI.items() if k != "SYNC"}
17+
colors["z"] = TAG_TO_ANSI["SYNC"]
1218

1319

1420
class TestReader(ScreenEqualMixin, TestCase):
@@ -123,8 +129,9 @@ def test_setpos_for_xy_simple(self):
123129
def test_control_characters(self):
124130
code = 'flag = "🏳️‍🌈"'
125131
events = code_to_events(code)
126-
reader, _ = handle_all_events(events)
132+
reader, _ = handle_all_events(events, prepare_reader=reader_force_colors)
127133
self.assert_screen_equal(reader, 'flag = "🏳️\\u200d🌈"', clean=True)
134+
self.assert_screen_equal(reader, 'flag = {s}"🏳️\\u200d🌈"{z}'.format(**colors))
128135

129136
def test_setpos_from_xy_multiple_lines(self):
130137
# fmt: off
@@ -355,3 +362,60 @@ def test_setpos_from_xy_for_non_printing_char(self):
355362
reader, _ = handle_all_events(events)
356363
reader.setpos_from_xy(8, 0)
357364
self.assertEqual(reader.pos, 7)
365+
366+
def test_syntax_highlighting_basic(self):
367+
code = dedent(
368+
"""\
369+
import re, sys
370+
def funct(case: str = sys.platform) -> None:
371+
match = re.search(
372+
"(me)",
373+
'''
374+
Come on
375+
Come on now
376+
You know that it's time to emerge
377+
''',
378+
)
379+
match case:
380+
case "emscripten": print("on the web")
381+
case "ios" | "android": print("on the phone")
382+
case _: print('arms around', match.group(1))
383+
"""
384+
)
385+
expected = dedent(
386+
"""\
387+
{k}import{z} re, sys
388+
{a}{k}def{z} {d}funct{z}(case: {b}str{z} = sys.platform) -> {k}None{z}:
389+
match = re.search(
390+
{s}"(me)"{z},
391+
{s}'''{z}
392+
{s} Come on{z}
393+
{s} Come on now{z}
394+
{s} You know that it's time to emerge{z}
395+
{s} '''{z},
396+
)
397+
{k}match{z} case:
398+
{k}case{z} {s}"emscripten"{z}: {b}print{z}({s}"on the web"{z})
399+
{k}case{z} {s}"ios"{z} | {s}"android"{z}: {b}print{z}({s}"on the phone"{z})
400+
{k}case{z} {k}_{z}: {b}print{z}({s}'arms around'{z}, match.group(1))
401+
"""
402+
)
403+
expected_sync = expected.format(a="", **colors)
404+
events = code_to_events(code)
405+
reader, _ = handle_all_events(events, prepare_reader=reader_force_colors)
406+
self.assert_screen_equal(reader, code, clean=True)
407+
self.assert_screen_equal(reader, expected_sync)
408+
self.assertEqual(reader.pos, 2**7 + 2**8)
409+
self.assertEqual(reader.cxy, (0, 14))
410+
411+
async_msg = "{k}async{z} ".format(**colors)
412+
expected_async = expected.format(a=async_msg, **colors)
413+
more_events = itertools.chain(
414+
code_to_events(code),
415+
[Event(evt="key", data="up", raw=bytearray(b"\x1bOA"))] * 13,
416+
code_to_events("async "),
417+
)
418+
reader, _ = handle_all_events(more_events, prepare_reader=reader_force_colors)
419+
self.assert_screen_equal(reader, expected_async)
420+
self.assertEqual(reader.pos, 21)
421+
self.assertEqual(reader.cxy, (6, 1))

0 commit comments

Comments
 (0)