Skip to content

Commit 83accca

Browse files
committed
pythonGH-72904: Simplify implementation of fnmatch.translate()
Use `re.Scanner` to scan shell-style patterns, rather than parsing them by hand in a fat loop. This makes the code slower (!) but more obvious, and lays some groundwork for a future `glob.translate()` function.
1 parent d73c12b commit 83accca

File tree

2 files changed

+47
-77
lines changed

2 files changed

+47
-77
lines changed

Lib/fnmatch.py

Lines changed: 45 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -77,83 +77,13 @@ def translate(pat):
7777
There is no way to quote meta-characters.
7878
"""
7979

80-
STAR = object()
81-
res = []
82-
add = res.append
83-
i, n = 0, len(pat)
84-
while i < n:
85-
c = pat[i]
86-
i = i+1
87-
if c == '*':
88-
# compress consecutive `*` into one
89-
if (not res) or res[-1] is not STAR:
90-
add(STAR)
91-
elif c == '?':
92-
add('.')
93-
elif c == '[':
94-
j = i
95-
if j < n and pat[j] == '!':
96-
j = j+1
97-
if j < n and pat[j] == ']':
98-
j = j+1
99-
while j < n and pat[j] != ']':
100-
j = j+1
101-
if j >= n:
102-
add('\\[')
103-
else:
104-
stuff = pat[i:j]
105-
if '-' not in stuff:
106-
stuff = stuff.replace('\\', r'\\')
107-
else:
108-
chunks = []
109-
k = i+2 if pat[i] == '!' else i+1
110-
while True:
111-
k = pat.find('-', k, j)
112-
if k < 0:
113-
break
114-
chunks.append(pat[i:k])
115-
i = k+1
116-
k = k+3
117-
chunk = pat[i:j]
118-
if chunk:
119-
chunks.append(chunk)
120-
else:
121-
chunks[-1] += '-'
122-
# Remove empty ranges -- invalid in RE.
123-
for k in range(len(chunks)-1, 0, -1):
124-
if chunks[k-1][-1] > chunks[k][0]:
125-
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
126-
del chunks[k]
127-
# Escape backslashes and hyphens for set difference (--).
128-
# Hyphens that create ranges shouldn't be escaped.
129-
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
130-
for s in chunks)
131-
# Escape set operations (&&, ~~ and ||).
132-
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
133-
i = j+1
134-
if not stuff:
135-
# Empty range: never match.
136-
add('(?!)')
137-
elif stuff == '!':
138-
# Negated empty range: match any character.
139-
add('.')
140-
else:
141-
if stuff[0] == '!':
142-
stuff = '^' + stuff[1:]
143-
elif stuff[0] in ('^', '['):
144-
stuff = '\\' + stuff
145-
add(f'[{stuff}]')
146-
else:
147-
add(re.escape(c))
148-
assert i == n
149-
15080
# Deal with STARs.
151-
inp = res
81+
inp = _scanner.scan(pat)[0]
15282
res = []
15383
add = res.append
15484
i, n = 0, len(inp)
15585
# Fixed pieces at the start?
156-
while i < n and inp[i] is not STAR:
86+
while i < n and inp[i] is not _STAR:
15787
add(inp[i])
15888
i += 1
15989
# Now deal with STAR fixed STAR fixed ...
@@ -164,14 +94,14 @@ def translate(pat):
16494
# translate() results together via "|" to build large regexps matching
16595
# "one of many" shell patterns.
16696
while i < n:
167-
assert inp[i] is STAR
97+
assert inp[i] is _STAR
16898
i += 1
16999
if i == n:
170100
add(".*")
171101
break
172-
assert inp[i] is not STAR
102+
assert inp[i] is not _STAR
173103
fixed = []
174-
while i < n and inp[i] is not STAR:
104+
while i < n and inp[i] is not _STAR:
175105
fixed.append(inp[i])
176106
i += 1
177107
fixed = "".join(fixed)
@@ -183,3 +113,43 @@ def translate(pat):
183113
assert i == n
184114
res = "".join(res)
185115
return fr'(?s:{res})\Z'
116+
117+
118+
def _translate_literal(scanner, token):
119+
"""Translate a literal token to a regular expression."""
120+
return re.escape(token)
121+
122+
123+
def _translate_range(scanner, token):
124+
"""Translate a character range, like 'a-z', to a regular expression."""
125+
start, end = token[0], token[2]
126+
if start > end:
127+
# Remove empty ranges -- invalid in RE.
128+
return ''
129+
return f'{re.escape(start)}-{re.escape(end)}'
130+
131+
132+
def _translate_set(scanner, token):
133+
"""Translate a set wildcard, like '[a-z]' or '[!ij]', to a regular expression."""
134+
negated = token[1] == '!'
135+
token = token[1+negated:-1]
136+
token = ''.join(_set_scanner.scan(token)[0])
137+
if negated:
138+
return f'[^{token}]' if token else '.'
139+
else:
140+
return f'[{token}]' if token else '(?!)'
141+
142+
143+
_STAR = object()
144+
145+
_scanner = re.Scanner([
146+
(r'\*+', _STAR),
147+
(r'\?', '.'),
148+
(r'\[!?+\]?+[^\]]*\]', _translate_set),
149+
(r'.', _translate_literal),
150+
], flags=re.DOTALL)
151+
152+
_set_scanner = re.Scanner([
153+
(r'.-.', _translate_range),
154+
(r'.', _translate_literal),
155+
], flags=re.DOTALL)

Lib/test/test_fnmatch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def test_translate(self):
225225
self.assertEqual(translate('?'), r'(?s:.)\Z')
226226
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
227227
self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
228-
self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
228+
self.assertEqual(translate('[]]'), r'(?s:[\]])\Z')
229229
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
230230
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
231231
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
@@ -235,7 +235,7 @@ def test_translate(self):
235235
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
236236
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
237237
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
238-
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
238+
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[\?].)\Z')
239239
# fancy translation to prevent exponential-time match failure
240240
t = translate('**a*a****a')
241241
self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z')

0 commit comments

Comments
 (0)