From ab4b759a6f24c95548f4f37238fba93f342fa6d7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 22 May 2022 16:08:20 +0300 Subject: [PATCH 1/4] gh-89973: Fix re.error in the fnmatch module. Character ranges with upper bound less that lower bound are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. --- Lib/fnmatch.py | 23 ++++-- Lib/test/test_fnmatch.py | 71 +++++++++++++++++++ ...2-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst | 3 + 3 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 0f5a41ac06f3e6..9bed7447f74492 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -102,7 +102,7 @@ def translate(pat): add('\\[') else: stuff = pat[i:j] - if '--' not in stuff: + if '-' not in stuff: stuff = stuff.replace('\\', r'\\') else: chunks = [] @@ -115,6 +115,12 @@ def translate(pat): i = k+1 k = k+3 chunks.append(pat[i:j]) + if not chunks[-1]: + del chunks[-1] + chunks[-1] += '-' + for k in range(len(chunks)-1, 0, -1): + if chunks[k-1][-1] > chunks[k][0]: + chunks[k-1:k+1] = [chunks[k-1][:-1] + chunks[k][1:]] # Escape backslashes and hyphens for set difference (--). # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') @@ -122,11 +128,16 @@ def translate(pat): # Escape set operations (&&, ~~ and ||). stuff = re.sub(r'([&~|])', r'\\\1', stuff) i = j+1 - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] in ('^', '['): - stuff = '\\' + stuff - add(f'[{stuff}]') + if not stuff: + add(f'(?!)') # never match + elif stuff == '!': + add(f'.') # match any character + else: + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] in ('^', '['): + stuff = '\\' + stuff + add(f'[{stuff}]') else: add(re.escape(c)) assert i == n diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index ca695d6f3f019b..c37dc89986b948 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -2,6 +2,7 @@ import unittest import os +import string import warnings from fnmatch import fnmatch, fnmatchcase, translate, filter @@ -91,6 +92,76 @@ def test_sep(self): check('usr/bin', 'usr\\bin', normsep) check('usr\\bin', 'usr\\bin') + def test_char_set(self): + ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') + check = self.check_match + tescases = string.ascii_lowercase + string.digits + string.punctuation + for c in tescases: + check(c, '[az]', c in 'az') + check(c, '[!az]', c not in 'az') + # Case insensitive. + for c in tescases: + check(c, '[AZ]', (c in 'az') and ignorecase) + check(c, '[!AZ]', (c not in 'az') or not ignorecase) + for c in string.ascii_uppercase: + check(c, '[az]', (c in 'AZ') and ignorecase) + check(c, '[!az]', (c not in 'AZ') or not ignorecase) + # Repeated same character. + for c in tescases: + check(c, '[aa]', c == 'a') + # Special cases. + for c in tescases: + check(c, '[^az]', c in '^az') + check(c, '[[az]', c in '[az') + check(c, r'[\]', c == '\\') + check(c, r'[\az]', c in r'\az') + check(c, r'[!]]', c != ']') + check('[', '[') + check('[]', '[]') + check('[!', '[!') + check('[!]', '[!]') + + def test_range(self): + ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') + check = self.check_match + tescases = string.ascii_lowercase + string.digits + string.punctuation + for c in tescases: + check(c, '[b-d]', c in 'bcd') + check(c, '[!b-d]', c not in 'bcd') + check(c, '[b-dx-z]', c in 'bcdxyz') + check(c, '[!b-dx-z]', c not in 'bcdxyz') + # Case insensitive. + for c in tescases: + check(c, '[B-D]', (c in 'bcd') and ignorecase) + check(c, '[!B-D]', (c not in 'bcd') or not ignorecase) + for c in string.ascii_uppercase: + check(c, '[b-d]', (c in 'BCD') and ignorecase) + check(c, '[!b-d]', (c not in 'BCD') or not ignorecase) + # Upper bound == lower bound. + for c in tescases: + check(c, '[b-b]', c == 'b') + # Special cases. + for c in tescases: + check(c, '[!-#]', c not in '-#') + check(c, '[!--/]', c not in '-./') + check(c, '[^-`]', c in '^_`') + check(c, '[[-^]', c in r'[\]^') + check(c, r'[\-^]', c in r'\]^') + check(c, '[b-]', c in '-b') + check(c, '[!b-]', c not in '-b') + check(c, '[-b]', c in '-b') + check(c, '[!-b]', c not in '-b') + check(c, '[-]', c in '-') + check(c, '[!-]', c not in '-') + # Upper bound is less that lower bound: error in RE. + for c in tescases: + check(c, '[d-b]', False) + check(c, '[!d-b]', True) + check(c, '[d-bx-z]', c in 'xyz') + check(c, '[!d-bx-z]', c not in 'xyz') + check(c, '[d-b^-`]', c in '^_`') + check(c, '[d-b[-^]', c in '[\\]^') + def test_warnings(self): with warnings.catch_warnings(): warnings.simplefilter('error', Warning) diff --git a/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst new file mode 100644 index 00000000000000..d1fc7ad003b457 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst @@ -0,0 +1,3 @@ +Fix :exc:`re.error` raised in :mod:`fnmatch` if the patterna contains +character range with upeer bound lower than lower bound (e.g. ``[c-a]``). +Now such ranges are interpreted as empty ranges. From 7be4e0bbebce211c44ae2d1a1f80ed3b88ce998e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 22 May 2022 19:33:11 +0300 Subject: [PATCH 2/4] Fix tests on Windows. --- Lib/test/test_fnmatch.py | 55 +++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index c37dc89986b948..10ed496d4e2f37 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -113,8 +113,6 @@ def test_char_set(self): for c in tescases: check(c, '[^az]', c in '^az') check(c, '[[az]', c in '[az') - check(c, r'[\]', c == '\\') - check(c, r'[\az]', c in r'\az') check(c, r'[!]]', c != ']') check('[', '[') check('[]', '[]') @@ -123,6 +121,7 @@ def test_char_set(self): def test_range(self): ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') + normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match tescases = string.ascii_lowercase + string.digits + string.punctuation for c in tescases: @@ -143,10 +142,11 @@ def test_range(self): # Special cases. for c in tescases: check(c, '[!-#]', c not in '-#') - check(c, '[!--/]', c not in '-./') + check(c, '[!--.]', c not in '-.') check(c, '[^-`]', c in '^_`') - check(c, '[[-^]', c in r'[\]^') - check(c, r'[\-^]', c in r'\]^') + if not (normsep and c == '/'): + check(c, '[[-^]', c in r'[\]^') + check(c, r'[\-^]', c in r'\]^') check(c, '[b-]', c in '-b') check(c, '[!b-]', c not in '-b') check(c, '[-b]', c in '-b') @@ -160,7 +160,50 @@ def test_range(self): check(c, '[d-bx-z]', c in 'xyz') check(c, '[!d-bx-z]', c not in 'xyz') check(c, '[d-b^-`]', c in '^_`') - check(c, '[d-b[-^]', c in '[\\]^') + if not (normsep and c == '/'): + check(c, '[d-b[-^]', c in r'[\]^') + + def test_sep_in_char_set(self): + normsep = os.path.normcase('\\') == os.path.normcase('/') + check = self.check_match + check('/', r'[/]') + check('\\', r'[\]') + check('/', r'[\]', normsep) + check('\\', r'[/]', normsep) + check('[/]', r'[/]', False) + check(r'[\\]', r'[/]', False) + check('\\', r'[\t]') + check('/', r'[\t]', normsep) + check('t', r'[\t]') + check('\t', r'[\t]', False) + + def test_sep_in_range(self): + normsep = os.path.normcase('\\') == os.path.normcase('/') + check = self.check_match + check('a/b', 'a[.-0]b', not normsep) + check('a\\b', 'a[.-0]b', False) + check('a\\b', 'a[Z-^]b', not normsep) + check('a/b', 'a[Z-^]b', False) + + check('a/b', 'a[/-0]b', not normsep) + check(r'a\b', 'a[/-0]b', False) + check('a[/-0]b', 'a[/-0]b', False) + check(r'a[\-0]b', 'a[/-0]b', False) + + check('a/b', 'a[.-/]b') + check(r'a\b', 'a[.-/]b', normsep) + check('a[.-/]b', 'a[.-/]b', False) + check(r'a[.-\]b', 'a[.-/]b', False) + + check(r'a\b', r'a[\-^]b') + check('a/b', r'a[\-^]b', normsep) + check(r'a[\-^]b', r'a[\-^]b', False) + check('a[/-^]b', r'a[\-^]b', False) + + check(r'a\b', r'a[Z-\]b', not normsep) + check('a/b', r'a[Z-\]b', False) + check(r'a[Z-\]b', r'a[Z-\]b', False) + check('a[Z-/]b', r'a[Z-\]b', False) def test_warnings(self): with warnings.catch_warnings(): From 9b65639bd397cf717f575db3dc20be6193707ade Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 25 May 2022 11:11:25 +0300 Subject: [PATCH 3/4] Update Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> --- .../Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst index d1fc7ad003b457..7e61fd7d46a0bb 100644 --- a/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst +++ b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst @@ -1,3 +1,3 @@ -Fix :exc:`re.error` raised in :mod:`fnmatch` if the patterna contains -character range with upeer bound lower than lower bound (e.g. ``[c-a]``). +Fix :exc:`re.error` raised in :mod:`fnmatch` if the pattern contains a +character range with upper bound lower than lower bound (e.g. ``[c-a]``). Now such ranges are interpreted as empty ranges. From d29d8af96d948ed7ab35404732316d228942c8f3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 26 May 2022 17:29:47 +0300 Subject: [PATCH 4/4] Refactor. --- Lib/fnmatch.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 9bed7447f74492..d5e296f7748c1c 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -114,13 +114,16 @@ def translate(pat): chunks.append(pat[i:k]) i = k+1 k = k+3 - chunks.append(pat[i:j]) - if not chunks[-1]: - del chunks[-1] + chunk = pat[i:j] + if chunk: + chunks.append(chunk) + else: chunks[-1] += '-' + # Remove empty ranges -- invalid in RE. for k in range(len(chunks)-1, 0, -1): if chunks[k-1][-1] > chunks[k][0]: - chunks[k-1:k+1] = [chunks[k-1][:-1] + chunks[k][1:]] + chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] + del chunks[k] # Escape backslashes and hyphens for set difference (--). # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') @@ -129,9 +132,11 @@ def translate(pat): stuff = re.sub(r'([&~|])', r'\\\1', stuff) i = j+1 if not stuff: - add(f'(?!)') # never match + # Empty range: never match. + add('(?!)') elif stuff == '!': - add(f'.') # match any character + # Negated empty range: match any character. + add('.') else: if stuff[0] == '!': stuff = '^' + stuff[1:]