11from typing import Optional
22
3+ ANY_DIGIT = r"\d"
4+
35
46def __digit_range (start : int , end : int ) -> str :
57 if start == end :
68 return str (start )
79 if start == 0 and end == 9 :
8- return r"\d"
10+ return ANY_DIGIT
911 return f"[{ start } -{ end } ]"
1012
1113
@@ -66,7 +68,7 @@ def __compute_numerical_range(str_a, str_b, start_appender_str=""):
6668 patterns = []
6769 if intermediate_range :
6870 patterns .append (
69- f"{ start_appender_str } { __digit_range (intermediate_range [0 ], intermediate_range [- 1 ])} { '' . join ([ r'\d' ] * (str_len - 1 ) )} "
71+ f"{ start_appender_str } { __digit_range (intermediate_range [0 ], intermediate_range [- 1 ])} { ANY_DIGIT * (str_len - 1 )} "
7072 )
7173 # patterns for the above part ['[2-4][0-9][0-9]']
7274
@@ -81,7 +83,7 @@ def __compute_numerical_range(str_a, str_b, start_appender_str=""):
8183 str_a [loop_counter + 1 ] != "9"
8284 ): # if 599 then avoid 10 in '[6-8]...|5[10-9]..|59[9-9].|598[9-9]'
8385 patterns .append (
84- f"{ start_appender_str } { str_a [:loop_counter + 1 ]} { __digit_range (int (str_a [loop_counter + 1 ]) + 1 , 9 )} { '' . join ([ r'\d' ] * (str_len - 2 - loop_counter ) )} "
86+ f"{ start_appender_str } { str_a [:loop_counter + 1 ]} { __digit_range (int (str_a [loop_counter + 1 ]) + 1 , 9 )} { ANY_DIGIT * (str_len - 2 - loop_counter )} "
8587 )
8688 # patterns for the above part ['1[7-9][0-9]','16[9-9]']
8789
@@ -96,7 +98,7 @@ def __compute_numerical_range(str_a, str_b, start_appender_str=""):
9698 str_b [loop_counter + 1 ] != "0"
9799 ): # if 1102 then avoid -1 in '11[0--1].|110[0-2]'
98100 patterns .append (
99- f"{ start_appender_str } { str_b [:loop_counter + 1 ]} { __digit_range (0 , int (str_b [loop_counter + 1 ]) - 1 )} { '' . join ([ r'\d' ] * (str_len - 2 - loop_counter ) )} "
101+ f"{ start_appender_str } { str_b [:loop_counter + 1 ]} { __digit_range (0 , int (str_b [loop_counter + 1 ]) - 1 )} { ANY_DIGIT * (str_len - 2 - loop_counter )} "
100102 )
101103 # patterns for the above part ['5[0-3][0-9]','54[0-3]']
102104
@@ -158,7 +160,7 @@ def __range_splitter(a, b):
158160 return ranges
159161
160162
161- def _range_regex (a , b ):
163+ def _float_range_regex (a , b ):
162164 """
163165 Generate regex for matching a number between a range.
164166 The regex might not be optimal but it serves the purpose.
@@ -167,78 +169,64 @@ def _range_regex(a, b):
167169 ie, If you pass two floating number the regex can only match floating number,
168170 else if you pass two integer number you can only mtach integer number.
169171 """
170- if a == 0 and b == 0 :
171- return "-?0"
172- # Handling floating point numbers
173- if (isinstance (a , (float )) and isinstance (b , (float , int ))) or (
174- isinstance (a , (float , int )) and isinstance (b , (float ))
175- ):
176- a , b = (a , b ) if a < b else (b , a )
177- num_of_decimal_in_a = len (str (float (a ))) - (str (float (a )).find ("." ) + 1 )
178- num_of_decimal_in_b = len (str (float (b ))) - (str (float (b )).find ("." ) + 1 )
179- max_num_decimal = max (num_of_decimal_in_a , num_of_decimal_in_b )
180-
181- # Properly removing floating point and converting to integer
182- a , b = (
183- "" .join ([c for c in str (float (a )) if c != "." ]),
184- "" .join ([c for c in str (float (b )) if c != "." ]),
172+ a , b = (a , b ) if a < b else (b , a )
173+ num_of_decimal_in_a = len (str (float (a ))) - (str (float (a )).find ("." ) + 1 )
174+ num_of_decimal_in_b = len (str (float (b ))) - (str (float (b )).find ("." ) + 1 )
175+ max_num_decimal = max (num_of_decimal_in_a , num_of_decimal_in_b )
176+
177+ # Properly removing floating point and converting to integer
178+ a , b = (
179+ "" .join ([c for c in str (float (a )) if c != "." ]),
180+ "" .join ([c for c in str (float (b )) if c != "." ]),
181+ )
182+ if len (str (a )) < len (str (b )):
183+ a = a + f"{ '0' * (max_num_decimal - num_of_decimal_in_a )} "
184+ else :
185+ b = b + f"{ '0' * (max_num_decimal - num_of_decimal_in_b )} "
186+ a , b = int (a ), int (b )
187+ a , b = (a , b ) if a < b else (b , a )
188+
189+ # Generate regex by treating float as integer
190+ ranges = __range_splitter (a , b )
191+ intermediate_regex = "|" .join (
192+ [
193+ __compute_numerical_range (str (r [0 ]), str (r [1 ]), start_appender_str = r [2 ])
194+ for r in ranges
195+ ]
196+ )
197+
198+ # Modifying the integer supported regex to support float
199+ new_regex = []
200+ for p in intermediate_regex .split ("|" ):
201+ x = __tokenize_numeric_pattern (p [1 :] if p .startswith ("-" ) else p )
202+
203+ # If x = ['[0-9]'] and max_num_decimal = 2, We need x = ['0','[0-9]']
204+ if len (x ) < max_num_decimal :
205+ x = (["0" ] * (max_num_decimal - len (x ))) + x
206+
207+ # Example x = ['3', '2', '[0-1]', '[0-9]'] for p=32[0-1][0-9]
208+ start_appender_str = "-" if p .startswith ("-" ) else ""
209+ # Add a decimal point inbetween, keep the next digit mandatory and others optional (32.[0-1][0-9]?[0-9]*)
210+ fractional_part = (
211+ [x [- max_num_decimal ]] + [z + "?" for z in x [- max_num_decimal + 1 :]]
212+ if max_num_decimal > 1
213+ else [z for z in x [- max_num_decimal :]]
185214 )
186- if len (str (a )) < len (str (b )):
187- a = a + f"{ '0' * (max_num_decimal - num_of_decimal_in_a )} "
188- else :
189- b = b + f"{ '0' * (max_num_decimal - num_of_decimal_in_b )} "
190- a , b = int (a ), int (b )
191- a , b = (a , b ) if a < b else (b , a )
192-
193- # Generate regex by treating float as integer
194- ranges = __range_splitter (a , b )
195- intermediate_regex = "|" .join (
196- [
197- __compute_numerical_range (str (r [0 ]), str (r [1 ]), start_appender_str = r [2 ])
198- for r in ranges
199- ]
215+ non_fractional_part = (
216+ "" .join (x [:- max_num_decimal ]) if "" .join (x [:- max_num_decimal ]) else "0?"
200217 )
218+ new_regex .append (
219+ rf"{ start_appender_str } { non_fractional_part } \.{ '' .join (fractional_part )} \d*"
220+ )
221+ regex = f"(?:{ '|' .join (new_regex )} )"
222+ return regex
201223
202- # Modifying the integer supported regex to support float
203- new_regex = []
204- for p in intermediate_regex .split ("|" ):
205- x = __tokenize_numeric_pattern (p [1 :] if p .startswith ("-" ) else p )
206-
207- # If x = ['[0-9]'] and max_num_decimal = 2, We need x = ['0','[0-9]']
208- if len (x ) < max_num_decimal :
209- x = (["0" ] * (max_num_decimal - len (x ))) + x
210-
211- # Example x = ['3', '2', '[0-1]', '[0-9]'] for p=32[0-1][0-9]
212- start_appender_str = "-" if p .startswith ("-" ) else ""
213- # Add a decimal point inbetween, keep the next digit mandatory and others optional (32.[0-1][0-9]?[0-9]*)
214- fractional_part = (
215- [x [- max_num_decimal ]] + [z + "?" for z in x [- max_num_decimal + 1 :]]
216- if max_num_decimal > 1
217- else [z for z in x [- max_num_decimal :]]
218- )
219- non_fractional_part = (
220- "" .join (x [:- max_num_decimal ]) if "" .join (x [:- max_num_decimal ]) else "0?"
221- )
222- new_regex .append (
223- rf"{ start_appender_str } { non_fractional_part } \.{ '' .join (fractional_part )} \d*"
224- )
225- regex = f"(?:{ '|' .join (new_regex )} )"
226- return regex
227224
228- # Handling integer numbers
229- elif isinstance (a , (int )) and isinstance (b , (int )):
230- a , b = (a , b ) if a < b else (b , a )
231- ranges = __range_splitter (a , b )
232- regex = f"(?:{ '|' .join ([__compute_numerical_range (str (r [0 ]),str (r [1 ]),start_appender_str = r [2 ]) for r in ranges ])} )"
233- return regex
234-
235- # Neither integer nor float
236- else :
237- raise (
238- ValueError (
239- f"Unsupported data types for { a } :{ type (a )} or { b } :{ type (a )} , Only supported float/int"
240- )
241- )
225+ def _range_regex (a : int , b : int ):
226+ a , b = (a , b ) if a < b else (b , a )
227+ ranges = __range_splitter (a , b )
228+ regex = f"(?:{ '|' .join ([__compute_numerical_range (str (r [0 ]),str (r [1 ]),start_appender_str = r [2 ]) for r in ranges ])} )"
229+ return regex
242230
243231
244232def range_regex (minimum : Optional [int ] = None , maximum : Optional [int ] = None ):
@@ -251,6 +239,11 @@ def range_regex(minimum: Optional[int] = None, maximum: Optional[int] = None):
251239 If you omit maximum, the regex will match all numbers larger than minimum (minimum must be < 0).
252240 If you omit both, all numbers will be matched.
253241 """
242+ if minimum is not None and not isinstance (minimum , int ):
243+ raise TypeError (f"minimum must be int or None, got { type (minimum ).__name__ } " )
244+ if maximum is not None and not isinstance (maximum , int ):
245+ raise TypeError (f"maximum must be int or None, got { type (maximum ).__name__ } " )
246+
254247 if minimum is None and maximum is None :
255248 return r"-?(?:[1-9]\d*|0)"
256249 if minimum is None :
@@ -280,3 +273,14 @@ def range_regex(minimum: Optional[int] = None, maximum: Optional[int] = None):
280273 # now match any number with at least one more digit
281274 return rf"(?:{ lowerbound_regex } |[1-9]\d{{{ num_digits } }}\d*)"
282275 return _range_regex (minimum , maximum )
276+
277+
278+ def float_range_regex (minimum : float , maximum : float ):
279+ """
280+ Generate regex for matching a floating-point number between a range, inclusive on both ends.
281+ """
282+ if not isinstance (minimum , (int , float )):
283+ raise TypeError (f"minimum must be int or float, got { type (minimum ).__name__ } " )
284+ if not isinstance (maximum , (int , float )):
285+ raise TypeError (f"maximum must be int or float, got { type (maximum ).__name__ } " )
286+ return _float_range_regex (minimum , maximum )
0 commit comments