2
2
import unicodedata
3
3
import functools
4
4
5
+ from idlelib import colorizer
6
+ from typing import cast , Iterator , Literal , Match , NamedTuple , Pattern , Self
7
+ from _colorize import ANSIColors
8
+
5
9
from .types import CharBuffer , CharWidths
6
10
from .trace import trace
7
11
8
12
ANSI_ESCAPE_SEQUENCE = re .compile (r"\x1b\[[ -@]*[A-~]" )
9
13
ZERO_WIDTH_BRACKET = re .compile (r"\x01.*?\x02" )
10
14
ZERO_WIDTH_TRANS = str .maketrans ({"\x01 " : "" , "\x02 " : "" })
15
+ COLORIZE_RE : Pattern [str ] = colorizer .prog
16
+ IDENTIFIER_RE : Pattern [str ] = colorizer .idprog
17
+ IDENTIFIERS_AFTER = {"def" , "class" }
18
+ COLORIZE_GROUP_NAME_MAP : dict [str , str ] = colorizer .prog_group_name_to_tag
19
+
20
+ type ColorTag = (
21
+ Literal ["KEYWORD" ]
22
+ | Literal ["BUILTIN" ]
23
+ | Literal ["COMMENT" ]
24
+ | Literal ["STRING" ]
25
+ | Literal ["DEFINITION" ]
26
+ | Literal ["SYNC" ]
27
+ )
28
+
29
+
30
+ class Span (NamedTuple ):
31
+ """Span indexing that's inclusive on both ends."""
32
+
33
+ start : int
34
+ end : int
35
+
36
+ @classmethod
37
+ def from_re (cls , m : Match [str ], group : int | str ) -> Self :
38
+ re_span = m .span (group )
39
+ return cls (re_span [0 ], re_span [1 ] - 1 )
40
+
41
+
42
+ class ColorSpan (NamedTuple ):
43
+ span : Span
44
+ tag : ColorTag
45
+
46
+
47
+ TAG_TO_ANSI : dict [ColorTag , str ] = {
48
+ "KEYWORD" : ANSIColors .BOLD_BLUE ,
49
+ "BUILTIN" : ANSIColors .CYAN ,
50
+ "COMMENT" : ANSIColors .RED ,
51
+ "STRING" : ANSIColors .GREEN ,
52
+ "DEFINITION" : ANSIColors .BOLD_WHITE ,
53
+ "SYNC" : ANSIColors .RESET ,
54
+ }
11
55
12
56
13
57
@functools .cache
@@ -41,25 +85,82 @@ def unbracket(s: str, including_content: bool = False) -> str:
41
85
return s .translate (ZERO_WIDTH_TRANS )
42
86
43
87
44
- def disp_str (buffer : str ) -> tuple [CharBuffer , CharWidths ]:
45
- r"""Decompose the input buffer into a printable variant.
88
+ def gen_colors (buffer : str ) -> Iterator [ColorSpan ]:
89
+ """Returns a list of index spans to color using the given color tag.
90
+
91
+ The input `buffer` should be a valid start of a Python code block, i.e.
92
+ it cannot be a block starting in the middle of a multiline string.
93
+ """
94
+ for match in COLORIZE_RE .finditer (buffer ):
95
+ yield from gen_color_spans (match )
96
+
97
+
98
+ def gen_color_spans (re_match : Match [str ]) -> Iterator [ColorSpan ]:
99
+ """Generate non-empty color spans."""
100
+ for tag , data in re_match .groupdict ().items ():
101
+ if not data :
102
+ continue
103
+ span = Span .from_re (re_match , tag )
104
+ tag = COLORIZE_GROUP_NAME_MAP .get (tag , tag )
105
+ yield ColorSpan (span , cast (ColorTag , tag ))
106
+ if data in IDENTIFIERS_AFTER :
107
+ if name_match := IDENTIFIER_RE .match (re_match .string , span .end + 1 ):
108
+ span = Span .from_re (name_match , 1 )
109
+ yield ColorSpan (span , "DEFINITION" )
110
+
111
+
112
+ def disp_str (
113
+ buffer : str , colors : list [ColorSpan ] | None = None , start_index : int = 0
114
+ ) -> tuple [CharBuffer , CharWidths ]:
115
+ r"""Decompose the input buffer into a printable variant with applied colors.
46
116
47
117
Returns a tuple of two lists:
48
- - the first list is the input buffer, character by character;
118
+ - the first list is the input buffer, character by character, with color
119
+ escape codes added (while those codes contain multiple ASCII characters,
120
+ each code is considered atomic *and is attached for the corresponding
121
+ visible character*);
49
122
- the second list is the visible width of each character in the input
50
123
buffer.
51
124
125
+ Note on colors:
126
+ - The `colors` list, if provided, is partially consumed within. We're using
127
+ a list and not a generator since we need to hold onto the current
128
+ unfinished span between calls to disp_str in case of multiline strings.
129
+ - The `colors` list is computed from the start of the input block. `buffer`
130
+ is only a subset of that input block, a single line within. This is why
131
+ we need `start_index` to inform us which position is the start of `buffer`
132
+ actually within user input. This allows us to match color spans correctly.
133
+
52
134
Examples:
53
135
>>> utils.disp_str("a = 9")
54
136
(['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1])
137
+
138
+ >>> line = "while 1:"
139
+ >>> colors = list(utils.gen_colors(line))
140
+ >>> utils.disp_str(line, colors=colors)
141
+ (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1])
142
+
55
143
"""
56
144
chars : CharBuffer = []
57
145
char_widths : CharWidths = []
58
146
59
147
if not buffer :
60
148
return chars , char_widths
61
149
62
- for c in buffer :
150
+ while colors and colors [0 ].span .end < start_index :
151
+ # move past irrelevant spans
152
+ colors .pop (0 )
153
+
154
+ pre_color = ""
155
+ post_color = ""
156
+ if colors and colors [0 ].span .start < start_index :
157
+ # looks like we're continuing a previous color (e.g. a multiline str)
158
+ pre_color = TAG_TO_ANSI [colors [0 ].tag ]
159
+
160
+ for i , c in enumerate (buffer , start_index ):
161
+ if colors and colors [0 ].span .start == i : # new color starts now
162
+ pre_color = TAG_TO_ANSI [colors [0 ].tag ]
163
+
63
164
if c == "\x1a " : # CTRL-Z on Windows
64
165
chars .append (c )
65
166
char_widths .append (2 )
@@ -73,5 +174,19 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
73
174
else :
74
175
chars .append (c )
75
176
char_widths .append (str_width (c ))
177
+
178
+ if colors and colors [0 ].span .end == i : # current color ends now
179
+ post_color = TAG_TO_ANSI ["SYNC" ]
180
+ colors .pop (0 )
181
+
182
+ chars [- 1 ] = pre_color + chars [- 1 ] + post_color
183
+ pre_color = ""
184
+ post_color = ""
185
+
186
+ if colors and colors [0 ].span .start < i and colors [0 ].span .end > i :
187
+ # even though the current color should be continued, reset it for now.
188
+ # the next call to `disp_str()` will revive it.
189
+ chars [- 1 ] += TAG_TO_ANSI ["SYNC" ]
190
+
76
191
trace ("disp_str({buffer}) = {s}, {b}" , buffer = repr (buffer ), s = chars , b = char_widths )
77
192
return chars , char_widths
0 commit comments