Skip to content

Commit 9b55657

Browse files
committed
Bunch of small fixes
1 parent a91f0a7 commit 9b55657

File tree

3 files changed

+114
-74
lines changed

3 files changed

+114
-74
lines changed

range_ex/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
__version__ = "1.1.0"
2+
from .range_regex import float_range_regex as float_range_regex
23
from .range_regex import range_regex as range_regex

range_ex/range_regex.py

Lines changed: 77 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from typing import Optional
22

3+
ANY_DIGIT = r"\d"
4+
35

46
def __digit_range(start: int, end: int) -> str:
57
if start == end:
68
return str(start)
79
if start == 0 and end == 9:
8-
return r"\d"
10+
return ANY_DIGIT
911
return f"[{start}-{end}]"
1012

1113

@@ -66,7 +68,7 @@ def __compute_numerical_range(str_a, str_b, start_appender_str=""):
6668
patterns = []
6769
if intermediate_range:
6870
patterns.append(
69-
f"{start_appender_str}{__digit_range(intermediate_range[0], intermediate_range[-1])}{''.join([r'\d']*(str_len-1))}"
71+
f"{start_appender_str}{__digit_range(intermediate_range[0], intermediate_range[-1])}{ANY_DIGIT * (str_len-1)}"
7072
)
7173
# patterns for the above part ['[2-4][0-9][0-9]']
7274

@@ -81,7 +83,7 @@ def __compute_numerical_range(str_a, str_b, start_appender_str=""):
8183
str_a[loop_counter + 1] != "9"
8284
): # if 599 then avoid 10 in '[6-8]...|5[10-9]..|59[9-9].|598[9-9]'
8385
patterns.append(
84-
f"{start_appender_str}{str_a[:loop_counter+1]}{__digit_range(int(str_a[loop_counter+1]) + 1, 9)}{''.join([r'\d']*(str_len-2-loop_counter))}"
86+
f"{start_appender_str}{str_a[:loop_counter+1]}{__digit_range(int(str_a[loop_counter+1]) + 1, 9)}{ANY_DIGIT * (str_len-2-loop_counter)}"
8587
)
8688
# patterns for the above part ['1[7-9][0-9]','16[9-9]']
8789

@@ -96,7 +98,7 @@ def __compute_numerical_range(str_a, str_b, start_appender_str=""):
9698
str_b[loop_counter + 1] != "0"
9799
): # if 1102 then avoid -1 in '11[0--1].|110[0-2]'
98100
patterns.append(
99-
f"{start_appender_str}{str_b[:loop_counter+1]}{__digit_range(0, int(str_b[loop_counter+1]) - 1)}{''.join([r'\d']*(str_len-2-loop_counter))}"
101+
f"{start_appender_str}{str_b[:loop_counter+1]}{__digit_range(0, int(str_b[loop_counter+1]) - 1)}{ANY_DIGIT * (str_len-2-loop_counter)}"
100102
)
101103
# patterns for the above part ['5[0-3][0-9]','54[0-3]']
102104

@@ -158,7 +160,7 @@ def __range_splitter(a, b):
158160
return ranges
159161

160162

161-
def _range_regex(a, b):
163+
def _float_range_regex(a, b):
162164
"""
163165
Generate regex for matching a number between a range.
164166
The regex might not be optimal but it serves the purpose.
@@ -167,78 +169,64 @@ def _range_regex(a, b):
167169
ie, If you pass two floating number the regex can only match floating number,
168170
else if you pass two integer number you can only mtach integer number.
169171
"""
170-
if a == 0 and b == 0:
171-
return "-?0"
172-
# Handling floating point numbers
173-
if (isinstance(a, (float)) and isinstance(b, (float, int))) or (
174-
isinstance(a, (float, int)) and isinstance(b, (float))
175-
):
176-
a, b = (a, b) if a < b else (b, a)
177-
num_of_decimal_in_a = len(str(float(a))) - (str(float(a)).find(".") + 1)
178-
num_of_decimal_in_b = len(str(float(b))) - (str(float(b)).find(".") + 1)
179-
max_num_decimal = max(num_of_decimal_in_a, num_of_decimal_in_b)
180-
181-
# Properly removing floating point and converting to integer
182-
a, b = (
183-
"".join([c for c in str(float(a)) if c != "."]),
184-
"".join([c for c in str(float(b)) if c != "."]),
172+
a, b = (a, b) if a < b else (b, a)
173+
num_of_decimal_in_a = len(str(float(a))) - (str(float(a)).find(".") + 1)
174+
num_of_decimal_in_b = len(str(float(b))) - (str(float(b)).find(".") + 1)
175+
max_num_decimal = max(num_of_decimal_in_a, num_of_decimal_in_b)
176+
177+
# Properly removing floating point and converting to integer
178+
a, b = (
179+
"".join([c for c in str(float(a)) if c != "."]),
180+
"".join([c for c in str(float(b)) if c != "."]),
181+
)
182+
if len(str(a)) < len(str(b)):
183+
a = a + f"{'0'*(max_num_decimal-num_of_decimal_in_a)}"
184+
else:
185+
b = b + f"{'0'*(max_num_decimal-num_of_decimal_in_b)}"
186+
a, b = int(a), int(b)
187+
a, b = (a, b) if a < b else (b, a)
188+
189+
# Generate regex by treating float as integer
190+
ranges = __range_splitter(a, b)
191+
intermediate_regex = "|".join(
192+
[
193+
__compute_numerical_range(str(r[0]), str(r[1]), start_appender_str=r[2])
194+
for r in ranges
195+
]
196+
)
197+
198+
# Modifying the integer supported regex to support float
199+
new_regex = []
200+
for p in intermediate_regex.split("|"):
201+
x = __tokenize_numeric_pattern(p[1:] if p.startswith("-") else p)
202+
203+
# If x = ['[0-9]'] and max_num_decimal = 2, We need x = ['0','[0-9]']
204+
if len(x) < max_num_decimal:
205+
x = (["0"] * (max_num_decimal - len(x))) + x
206+
207+
# Example x = ['3', '2', '[0-1]', '[0-9]'] for p=32[0-1][0-9]
208+
start_appender_str = "-" if p.startswith("-") else ""
209+
# Add a decimal point inbetween, keep the next digit mandatory and others optional (32.[0-1][0-9]?[0-9]*)
210+
fractional_part = (
211+
[x[-max_num_decimal]] + [z + "?" for z in x[-max_num_decimal + 1 :]]
212+
if max_num_decimal > 1
213+
else [z for z in x[-max_num_decimal:]]
185214
)
186-
if len(str(a)) < len(str(b)):
187-
a = a + f"{'0'*(max_num_decimal-num_of_decimal_in_a)}"
188-
else:
189-
b = b + f"{'0'*(max_num_decimal-num_of_decimal_in_b)}"
190-
a, b = int(a), int(b)
191-
a, b = (a, b) if a < b else (b, a)
192-
193-
# Generate regex by treating float as integer
194-
ranges = __range_splitter(a, b)
195-
intermediate_regex = "|".join(
196-
[
197-
__compute_numerical_range(str(r[0]), str(r[1]), start_appender_str=r[2])
198-
for r in ranges
199-
]
215+
non_fractional_part = (
216+
"".join(x[:-max_num_decimal]) if "".join(x[:-max_num_decimal]) else "0?"
200217
)
218+
new_regex.append(
219+
rf"{start_appender_str}{non_fractional_part}\.{''.join(fractional_part)}\d*"
220+
)
221+
regex = f"(?:{'|'.join(new_regex)})"
222+
return regex
201223

202-
# Modifying the integer supported regex to support float
203-
new_regex = []
204-
for p in intermediate_regex.split("|"):
205-
x = __tokenize_numeric_pattern(p[1:] if p.startswith("-") else p)
206-
207-
# If x = ['[0-9]'] and max_num_decimal = 2, We need x = ['0','[0-9]']
208-
if len(x) < max_num_decimal:
209-
x = (["0"] * (max_num_decimal - len(x))) + x
210-
211-
# Example x = ['3', '2', '[0-1]', '[0-9]'] for p=32[0-1][0-9]
212-
start_appender_str = "-" if p.startswith("-") else ""
213-
# Add a decimal point inbetween, keep the next digit mandatory and others optional (32.[0-1][0-9]?[0-9]*)
214-
fractional_part = (
215-
[x[-max_num_decimal]] + [z + "?" for z in x[-max_num_decimal + 1 :]]
216-
if max_num_decimal > 1
217-
else [z for z in x[-max_num_decimal:]]
218-
)
219-
non_fractional_part = (
220-
"".join(x[:-max_num_decimal]) if "".join(x[:-max_num_decimal]) else "0?"
221-
)
222-
new_regex.append(
223-
rf"{start_appender_str}{non_fractional_part}\.{''.join(fractional_part)}\d*"
224-
)
225-
regex = f"(?:{'|'.join(new_regex)})"
226-
return regex
227224

228-
# Handling integer numbers
229-
elif isinstance(a, (int)) and isinstance(b, (int)):
230-
a, b = (a, b) if a < b else (b, a)
231-
ranges = __range_splitter(a, b)
232-
regex = f"(?:{'|'.join([__compute_numerical_range(str(r[0]),str(r[1]),start_appender_str=r[2]) for r in ranges])})"
233-
return regex
234-
235-
# Neither integer nor float
236-
else:
237-
raise (
238-
ValueError(
239-
f"Unsupported data types for {a}:{type(a)} or {b}:{type(a)}, Only supported float/int"
240-
)
241-
)
225+
def _range_regex(a: int, b: int):
226+
a, b = (a, b) if a < b else (b, a)
227+
ranges = __range_splitter(a, b)
228+
regex = f"(?:{'|'.join([__compute_numerical_range(str(r[0]),str(r[1]),start_appender_str=r[2]) for r in ranges])})"
229+
return regex
242230

243231

244232
def range_regex(minimum: Optional[int] = None, maximum: Optional[int] = None):
@@ -251,6 +239,11 @@ def range_regex(minimum: Optional[int] = None, maximum: Optional[int] = None):
251239
If you omit maximum, the regex will match all numbers larger than minimum (minimum must be < 0).
252240
If you omit both, all numbers will be matched.
253241
"""
242+
if minimum is not None and not isinstance(minimum, int):
243+
raise TypeError(f"minimum must be int or None, got {type(minimum).__name__}")
244+
if maximum is not None and not isinstance(maximum, int):
245+
raise TypeError(f"maximum must be int or None, got {type(maximum).__name__}")
246+
254247
if minimum is None and maximum is None:
255248
return r"-?(?:[1-9]\d*|0)"
256249
if minimum is None:
@@ -280,3 +273,14 @@ def range_regex(minimum: Optional[int] = None, maximum: Optional[int] = None):
280273
# now match any number with at least one more digit
281274
return rf"(?:{lowerbound_regex}|[1-9]\d{{{num_digits}}}\d*)"
282275
return _range_regex(minimum, maximum)
276+
277+
278+
def float_range_regex(minimum: float, maximum: float):
279+
"""
280+
Generate regex for matching a floating-point number between a range, inclusive on both ends.
281+
"""
282+
if not isinstance(minimum, (int, float)):
283+
raise TypeError(f"minimum must be int or float, got {type(minimum).__name__}")
284+
if not isinstance(maximum, (int, float)):
285+
raise TypeError(f"maximum must be int or float, got {type(maximum).__name__}")
286+
return _float_range_regex(minimum, maximum)

tests/test_range_regex.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
from hypothesis import given, strategies as st, settings
44
from hypothesis.strategies import one_of
5+
import pytest
56

6-
from range_ex import range_regex
7+
from range_ex import float_range_regex, range_regex
78

89
NUM_EXAMPLES = 5000
910

@@ -36,6 +37,18 @@ def ranges_samples_above(draw):
3637
return (lower_bound, upper_bound, outside)
3738

3839

40+
@st.composite
41+
def float_ranges_and_values(draw):
42+
lower_bound = draw(st.integers(min_value=-10000, max_value=10000))
43+
upper_bound = draw(st.integers(min_value=lower_bound, max_value=10000))
44+
value = draw(st.integers(min_value=-10000, max_value=10000))
45+
return (lower_bound / 10, upper_bound / 10, value / 10)
46+
47+
48+
def _one_decimal_str(value: float) -> str:
49+
return "0.0" if value == 0 else f"{value:.1f}"
50+
51+
3952
@given(ranges_samples_inside())
4053
@settings(max_examples=NUM_EXAMPLES)
4154
def test_numerical_range(pair):
@@ -89,3 +102,25 @@ def test_single_digit_class_uses_shorthand():
89102
def test_redundant_single_value_ranges_are_collapsed():
90103
generated_regex = range_regex(169, 543)
91104
assert re.search(r"\[([0-9])-\1\]", generated_regex) is None
105+
106+
107+
@given(float_ranges_and_values())
108+
@settings(max_examples=NUM_EXAMPLES)
109+
def test_float_range(pair):
110+
(start_range, end_range, value) = pair
111+
generated_regex = float_range_regex(start_range, end_range)
112+
matched = re.compile(generated_regex).fullmatch(_one_decimal_str(value)) is not None
113+
assert matched == (start_range <= value <= end_range)
114+
115+
116+
def test_range_regex_rejects_float_bounds():
117+
with pytest.raises(TypeError):
118+
range_regex(0.0, 10)
119+
with pytest.raises(TypeError):
120+
range_regex(0, 10.0)
121+
122+
123+
def test_range_regex_does_not_match_decimal_strings():
124+
assert re.compile(range_regex()).fullmatch("0.0") is None
125+
assert re.compile(range_regex(-10, 10)).fullmatch("0.0") is None
126+
assert re.compile(range_regex(0, 0)).fullmatch("0.0") is None

0 commit comments

Comments
 (0)