diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index d5e296f7748c1c..96b2cd8bbc8cbe 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -71,6 +71,46 @@ def fnmatchcase(name, pat): return match(name) is not None +_TRANSLATE_RE = re.compile( + r'(?P\*+)|' + r'(?P\?)|' + r'(?P\[(?P!?+)(?P\]?+[^\]]*)\])|' + r'(?P.)', + flags=re.DOTALL) + + +def _translate_iter(pat, star, question_mark): + for match in _TRANSLATE_RE.finditer(pat): + if match.group('star'): + yield star + elif match.group('question_mark'): + yield question_mark + elif match.group('set'): + inner = ''.join(_translate_set_iter(match.group('set_inner'))) + if match.group('negated'): + yield f'[^{inner}]' if inner else question_mark + else: + yield f'[{inner}]' if inner else '(?!)' + else: + yield re.escape(match.group()) + + +_TRANSLATE_SET_RE = re.compile( + r'(?P(?P.)-(?P.))|' + r'(?P.)', + flags=re.DOTALL) + + +def _translate_set_iter(token): + for match in _TRANSLATE_SET_RE.finditer(token): + if match.group('range'): + start, end = match.group('start'), match.group('end') + if start <= end: + yield f'{re.escape(start)}-{re.escape(end)}' + else: + yield re.escape(match.group()) + + def translate(pat): """Translate a shell PATTERN to a regular expression. @@ -78,77 +118,9 @@ def translate(pat): """ STAR = object() - res = [] - add = res.append - i, n = 0, len(pat) - while i < n: - c = pat[i] - i = i+1 - if c == '*': - # compress consecutive `*` into one - if (not res) or res[-1] is not STAR: - add(STAR) - elif c == '?': - add('.') - elif c == '[': - j = i - if j < n and pat[j] == '!': - j = j+1 - if j < n and pat[j] == ']': - j = j+1 - while j < n and pat[j] != ']': - j = j+1 - if j >= n: - add('\\[') - else: - stuff = pat[i:j] - if '-' not in stuff: - stuff = stuff.replace('\\', r'\\') - else: - chunks = [] - k = i+2 if pat[i] == '!' else i+1 - while True: - k = pat.find('-', k, j) - if k < 0: - break - chunks.append(pat[i:k]) - i = k+1 - k = k+3 - chunk = pat[i:j] - if chunk: - chunks.append(chunk) - else: - chunks[-1] += '-' - # Remove empty ranges -- invalid in RE. - for k in range(len(chunks)-1, 0, -1): - if chunks[k-1][-1] > chunks[k][0]: - chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] - del chunks[k] - # Escape backslashes and hyphens for set difference (--). - # Hyphens that create ranges shouldn't be escaped. - stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') - for s in chunks) - # Escape set operations (&&, ~~ and ||). - stuff = re.sub(r'([&~|])', r'\\\1', stuff) - i = j+1 - if not stuff: - # Empty range: never match. - add('(?!)') - elif stuff == '!': - # Negated empty range: match any character. - add('.') - else: - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] in ('^', '['): - stuff = '\\' + stuff - add(f'[{stuff}]') - else: - add(re.escape(c)) - assert i == n + inp = list(_translate_iter(pat, STAR, '.')) # Deal with STARs. - inp = res res = [] add = res.append i, n = 0, len(inp) diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 10ed496d4e2f37..eb25de2d9b07ed 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -225,7 +225,7 @@ def test_translate(self): self.assertEqual(translate('?'), r'(?s:.)\Z') self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z') self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z') - self.assertEqual(translate('[]]'), r'(?s:[]])\Z') + self.assertEqual(translate('[]]'), r'(?s:[\]])\Z') self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z') self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z') self.assertEqual(translate('[x'), r'(?s:\[x)\Z') @@ -235,7 +235,7 @@ def test_translate(self): self.assertEqual(translate('*********'), r'(?s:.*)\Z') self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') - self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[\?].)\Z') # fancy translation to prevent exponential-time match failure t = translate('**a*a****a') self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z')