@@ -77,83 +77,13 @@ def translate(pat):
77
77
There is no way to quote meta-characters.
78
78
"""
79
79
80
- STAR = object ()
81
- res = []
82
- add = res .append
83
- i , n = 0 , len (pat )
84
- while i < n :
85
- c = pat [i ]
86
- i = i + 1
87
- if c == '*' :
88
- # compress consecutive `*` into one
89
- if (not res ) or res [- 1 ] is not STAR :
90
- add (STAR )
91
- elif c == '?' :
92
- add ('.' )
93
- elif c == '[' :
94
- j = i
95
- if j < n and pat [j ] == '!' :
96
- j = j + 1
97
- if j < n and pat [j ] == ']' :
98
- j = j + 1
99
- while j < n and pat [j ] != ']' :
100
- j = j + 1
101
- if j >= n :
102
- add ('\\ [' )
103
- else :
104
- stuff = pat [i :j ]
105
- if '-' not in stuff :
106
- stuff = stuff .replace ('\\ ' , r'\\' )
107
- else :
108
- chunks = []
109
- k = i + 2 if pat [i ] == '!' else i + 1
110
- while True :
111
- k = pat .find ('-' , k , j )
112
- if k < 0 :
113
- break
114
- chunks .append (pat [i :k ])
115
- i = k + 1
116
- k = k + 3
117
- chunk = pat [i :j ]
118
- if chunk :
119
- chunks .append (chunk )
120
- else :
121
- chunks [- 1 ] += '-'
122
- # Remove empty ranges -- invalid in RE.
123
- for k in range (len (chunks )- 1 , 0 , - 1 ):
124
- if chunks [k - 1 ][- 1 ] > chunks [k ][0 ]:
125
- chunks [k - 1 ] = chunks [k - 1 ][:- 1 ] + chunks [k ][1 :]
126
- del chunks [k ]
127
- # Escape backslashes and hyphens for set difference (--).
128
- # Hyphens that create ranges shouldn't be escaped.
129
- stuff = '-' .join (s .replace ('\\ ' , r'\\' ).replace ('-' , r'\-' )
130
- for s in chunks )
131
- # Escape set operations (&&, ~~ and ||).
132
- stuff = re .sub (r'([&~|])' , r'\\\1' , stuff )
133
- i = j + 1
134
- if not stuff :
135
- # Empty range: never match.
136
- add ('(?!)' )
137
- elif stuff == '!' :
138
- # Negated empty range: match any character.
139
- add ('.' )
140
- else :
141
- if stuff [0 ] == '!' :
142
- stuff = '^' + stuff [1 :]
143
- elif stuff [0 ] in ('^' , '[' ):
144
- stuff = '\\ ' + stuff
145
- add (f'[{ stuff } ]' )
146
- else :
147
- add (re .escape (c ))
148
- assert i == n
149
-
150
80
# Deal with STARs.
151
- inp = res
81
+ inp = _scanner . scan ( pat )[ 0 ]
152
82
res = []
153
83
add = res .append
154
84
i , n = 0 , len (inp )
155
85
# Fixed pieces at the start?
156
- while i < n and inp [i ] is not STAR :
86
+ while i < n and inp [i ] is not _STAR :
157
87
add (inp [i ])
158
88
i += 1
159
89
# Now deal with STAR fixed STAR fixed ...
@@ -164,14 +94,14 @@ def translate(pat):
164
94
# translate() results together via "|" to build large regexps matching
165
95
# "one of many" shell patterns.
166
96
while i < n :
167
- assert inp [i ] is STAR
97
+ assert inp [i ] is _STAR
168
98
i += 1
169
99
if i == n :
170
100
add (".*" )
171
101
break
172
- assert inp [i ] is not STAR
102
+ assert inp [i ] is not _STAR
173
103
fixed = []
174
- while i < n and inp [i ] is not STAR :
104
+ while i < n and inp [i ] is not _STAR :
175
105
fixed .append (inp [i ])
176
106
i += 1
177
107
fixed = "" .join (fixed )
@@ -183,3 +113,43 @@ def translate(pat):
183
113
assert i == n
184
114
res = "" .join (res )
185
115
return fr'(?s:{ res } )\Z'
116
+
117
+
118
+ def _translate_literal (scanner , token ):
119
+ """Translate a literal token to a regular expression."""
120
+ return re .escape (token )
121
+
122
+
123
+ def _translate_range (scanner , token ):
124
+ """Translate a character range, like 'a-z', to a regular expression."""
125
+ start , end = token [0 ], token [2 ]
126
+ if start > end :
127
+ # Remove empty ranges -- invalid in RE.
128
+ return ''
129
+ return f'{ re .escape (start )} -{ re .escape (end )} '
130
+
131
+
132
+ def _translate_set (scanner , token ):
133
+ """Translate a set wildcard, like '[a-z]' or '[!ij]', to a regular expression."""
134
+ negated = token [1 ] == '!'
135
+ token = token [1 + negated :- 1 ]
136
+ token = '' .join (_set_scanner .scan (token )[0 ])
137
+ if negated :
138
+ return f'[^{ token } ]' if token else '.'
139
+ else :
140
+ return f'[{ token } ]' if token else '(?!)'
141
+
142
+
143
+ _STAR = object ()
144
+
145
+ _scanner = re .Scanner ([
146
+ (r'\*+' , _STAR ),
147
+ (r'\?' , '.' ),
148
+ (r'\[!?+\]?+[^\]]*\]' , _translate_set ),
149
+ (r'.' , _translate_literal ),
150
+ ], flags = re .DOTALL )
151
+
152
+ _set_scanner = re .Scanner ([
153
+ (r'.-.' , _translate_range ),
154
+ (r'.' , _translate_literal ),
155
+ ], flags = re .DOTALL )
0 commit comments