Skip to content

Commit a91f0a7

Browse files
committed
Simplify outputs, dont capture
1 parent e44e44f commit a91f0a7

File tree

2 files changed

+49
-22
lines changed

2 files changed

+49
-22
lines changed

range_ex/range_regex.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,34 @@
1-
import re
21
from typing import Optional
32

43

5-
def __compute_numerical_range(str_a, str_b, any_digit="[0-9]", start_appender_str=""):
4+
def __digit_range(start: int, end: int) -> str:
5+
if start == end:
6+
return str(start)
7+
if start == 0 and end == 9:
8+
return r"\d"
9+
return f"[{start}-{end}]"
10+
11+
12+
def __tokenize_numeric_pattern(pattern: str) -> list[str]:
13+
tokens = []
14+
i = 0
15+
while i < len(pattern):
16+
if pattern[i] == "[":
17+
end = pattern.find("]", i)
18+
if end == -1:
19+
raise ValueError(f"Malformed range expression: {pattern}")
20+
tokens.append(pattern[i : end + 1])
21+
i = end + 1
22+
elif i + 1 < len(pattern) and pattern[i] == "\\" and pattern[i + 1] == "d":
23+
tokens.append(r"\d")
24+
i += 2
25+
else:
26+
tokens.append(pattern[i])
27+
i += 1
28+
return tokens
29+
30+
31+
def __compute_numerical_range(str_a, str_b, start_appender_str=""):
632
"""
733
Helps generating regex for numerical range.
834
Assumptions are int(str_a) <= int(str_b) and should have equal number of digits.
@@ -19,7 +45,7 @@ def __compute_numerical_range(str_a, str_b, any_digit="[0-9]", start_appender_st
1945
if str_a == str_b:
2046
return start_appender_str + str_a
2147
if str_len == 1:
22-
return f"{start_appender_str}[{str_a}-{str_b}]"
48+
return f"{start_appender_str}{__digit_range(int(str_a), int(str_b))}"
2349
# Counting index position till the characteres are equal
2450
check_equal = -1
2551
for i in range(str_len):
@@ -31,7 +57,6 @@ def __compute_numerical_range(str_a, str_b, any_digit="[0-9]", start_appender_st
3157
return __compute_numerical_range(
3258
str_a[check_equal + 1 :],
3359
str_b[check_equal + 1 :],
34-
any_digit=any_digit,
3560
start_appender_str=start_appender_str + str_a[: check_equal + 1],
3661
)
3762

@@ -41,37 +66,37 @@ def __compute_numerical_range(str_a, str_b, any_digit="[0-9]", start_appender_st
4166
patterns = []
4267
if intermediate_range:
4368
patterns.append(
44-
f"{start_appender_str}[{intermediate_range[0]}-{intermediate_range[-1]}]{''.join([any_digit]*(str_len-1))}"
69+
f"{start_appender_str}{__digit_range(intermediate_range[0], intermediate_range[-1])}{''.join([r'\d']*(str_len-1))}"
4570
)
4671
# patterns for the above part ['[2-4][0-9][0-9]']
4772

4873
# Case for str_a
4974
for loop_counter in range(str_len - 1): # no_of_digits-1 units
5075
if loop_counter == str_len - 2: # Find the last loop
5176
patterns.append(
52-
f"{start_appender_str}{str_a[:loop_counter+1]}[{str_a[-1]}-9]"
77+
f"{start_appender_str}{str_a[:loop_counter+1]}{__digit_range(int(str_a[-1]), 9)}"
5378
)
5479
else:
5580
if (
5681
str_a[loop_counter + 1] != "9"
5782
): # if 599 then avoid 10 in '[6-8]...|5[10-9]..|59[9-9].|598[9-9]'
5883
patterns.append(
59-
f"{start_appender_str}{str_a[:loop_counter+1]}[{int(str_a[loop_counter+1])+1}-9]{''.join([any_digit]*(str_len-2-loop_counter))}"
84+
f"{start_appender_str}{str_a[:loop_counter+1]}{__digit_range(int(str_a[loop_counter+1]) + 1, 9)}{''.join([r'\d']*(str_len-2-loop_counter))}"
6085
)
6186
# patterns for the above part ['1[7-9][0-9]','16[9-9]']
6287

6388
# Case for str_b
6489
for loop_counter in range(str_len - 1): # no_of_digits-1 units
6590
if loop_counter == str_len - 2: # Find the last loop
6691
patterns.append(
67-
f"{start_appender_str}{str_b[:loop_counter+1]}[0-{str_b[-1]}]"
92+
f"{start_appender_str}{str_b[:loop_counter+1]}{__digit_range(0, int(str_b[-1]))}"
6893
)
6994
else:
7095
if (
7196
str_b[loop_counter + 1] != "0"
7297
): # if 1102 then avoid -1 in '11[0--1].|110[0-2]'
7398
patterns.append(
74-
f"{start_appender_str}{str_b[:loop_counter+1]}[0-{int(str_b[loop_counter+1])-1}]{''.join([any_digit]*(str_len-2-loop_counter))}"
99+
f"{start_appender_str}{str_b[:loop_counter+1]}{__digit_range(0, int(str_b[loop_counter+1]) - 1)}{''.join([r'\d']*(str_len-2-loop_counter))}"
75100
)
76101
# patterns for the above part ['5[0-3][0-9]','54[0-3]']
77102

@@ -169,29 +194,22 @@ def _range_regex(a, b):
169194
ranges = __range_splitter(a, b)
170195
intermediate_regex = "|".join(
171196
[
172-
__compute_numerical_range(
173-
str(r[0]), str(r[1]), any_digit="[0-9]", start_appender_str=r[2]
174-
)
197+
__compute_numerical_range(str(r[0]), str(r[1]), start_appender_str=r[2])
175198
for r in ranges
176199
]
177200
)
178201

179202
# Modifying the integer supported regex to support float
180203
new_regex = []
181204
for p in intermediate_regex.split("|"):
182-
if p.find("[") == -1:
183-
x = [c for c in p if c != "-"]
184-
else:
185-
x = [
186-
c for d in re.findall(r"-{0,1}(\d+)\[\d-\d\]*", p) for c in d
187-
] + re.findall(r"-{0,1}[\d]*(\[\d-\d\]*)", p)
205+
x = __tokenize_numeric_pattern(p[1:] if p.startswith("-") else p)
188206

189207
# If x = ['[0-9]'] and max_num_decimal = 2, We need x = ['0','[0-9]']
190208
if len(x) < max_num_decimal:
191209
x = (["0"] * (max_num_decimal - len(x))) + x
192210

193211
# Example x = ['3', '2', '[0-1]', '[0-9]'] for p=32[0-1][0-9]
194-
start_appender_str = "-" if re.findall("^-", p) else ""
212+
start_appender_str = "-" if p.startswith("-") else ""
195213
# Add a decimal point inbetween, keep the next digit mandatory and others optional (32.[0-1][0-9]?[0-9]*)
196214
fractional_part = (
197215
[x[-max_num_decimal]] + [z + "?" for z in x[-max_num_decimal + 1 :]]
@@ -202,7 +220,7 @@ def _range_regex(a, b):
202220
"".join(x[:-max_num_decimal]) if "".join(x[:-max_num_decimal]) else "0?"
203221
)
204222
new_regex.append(
205-
rf"{start_appender_str}{non_fractional_part}\.{''.join(fractional_part)}[0-9]*"
223+
rf"{start_appender_str}{non_fractional_part}\.{''.join(fractional_part)}\d*"
206224
)
207225
regex = f"(?:{'|'.join(new_regex)})"
208226
return regex
@@ -211,7 +229,7 @@ def _range_regex(a, b):
211229
elif isinstance(a, (int)) and isinstance(b, (int)):
212230
a, b = (a, b) if a < b else (b, a)
213231
ranges = __range_splitter(a, b)
214-
regex = f"(?:{'|'.join([__compute_numerical_range(str(r[0]),str(r[1]),any_digit='[0-9]',start_appender_str=r[2]) for r in ranges])})"
232+
regex = f"(?:{'|'.join([__compute_numerical_range(str(r[0]),str(r[1]),start_appender_str=r[2]) for r in ranges])})"
215233
return regex
216234

217235
# Neither integer nor float
@@ -234,7 +252,7 @@ def range_regex(minimum: Optional[int] = None, maximum: Optional[int] = None):
234252
If you omit both, all numbers will be matched.
235253
"""
236254
if minimum is None and maximum is None:
237-
return r"-?(?:[1-9][0-9]*|0)"
255+
return r"-?(?:[1-9]\d*|0)"
238256
if minimum is None:
239257
if maximum == 0:
240258
return r"(?:-[1-9]\d*|0)"

tests/test_range_regex.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,12 @@ def test_range_upper_bounded(upper_bound, value):
8080
def test_range_no_bound(value):
8181
generated_regex = range_regex()
8282
assert re.compile(generated_regex).fullmatch(str(value)) is not None
83+
84+
85+
def test_single_digit_class_uses_shorthand():
86+
assert range_regex(0, 9) == r"(?:\d)"
87+
88+
89+
def test_redundant_single_value_ranges_are_collapsed():
90+
generated_regex = range_regex(169, 543)
91+
assert re.search(r"\[([0-9])-\1\]", generated_regex) is None

0 commit comments

Comments
 (0)