From 273946af6e5b0a8a091fd855144c9d885538bd6a Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 5 Jun 2013 17:02:11 -0400 Subject: [PATCH 1/5] TST: Add numexpr arithmetic tests. Only add 'div' if not python 3 Also checks dtype in test cases for series --- RELEASE.rst | 3 ++ pandas/tests/test_expressions.py | 50 +++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/RELEASE.rst b/RELEASE.rst index 57cb53c1096f6..ebd88091050f1 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -250,6 +250,8 @@ pandas 0.11.1 not converting dtypes (GH3911_) - Fixed a bug where ``DataFrame.replace`` with a compiled regular expression in the ``to_replace`` argument wasn't working (GH3907_) + - Fixed ``__truediv__`` in Python 2.7 with ``numexpr`` installed to actually do true division when dividing + two integer arrays with at least 10000 cells total (GH3764_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -351,6 +353,7 @@ pandas 0.11.1 .. _GH3907: https://github.com/pydata/pandas/issues/3907 .. _GH3911: https://github.com/pydata/pandas/issues/3911 .. _GH3912: https://github.com/pydata/pandas/issues/3912 +.. _GH3764: https://github.com/pydata/pandas/issues/3764 pandas 0.11.0 ============= diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index af7f20a65fa7c..7490051f2f6c8 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -30,6 +30,7 @@ _frame2 = DataFrame(np.random.randn(100, 4), columns = list('ABCD'), dtype='float64') _mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') }) _mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') }) +_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64') class TestExpressions(unittest.TestCase): @@ -41,7 +42,54 @@ def setUp(self): self.frame2 = _frame2.copy() self.mixed = _mixed.copy() self.mixed2 = _mixed2.copy() - + self.integer = _integer.copy() + self._MIN_ELEMENTS = expr._MIN_ELEMENTS + + def tearDown(self): + expr._MIN_ELEMENTS = self._MIN_ELEMENTS + + @nose.tools.nottest + def run_arithmetic_test(self, df, assert_func, check_dtype=False): + expr._MIN_ELEMENTS = 0 + operations = ['add', 'sub', 'mul', 'truediv'] + if not py3compat.PY3: + operations.append('div') + for arith in operations: + op = getattr(operator, arith) + expr.set_use_numexpr(False) + expected = op(df, df) + expr.set_use_numexpr(True) + result = op(df, df) + try: + if check_dtype: + if arith == 'div': + assert expected.dtype.kind == df.dtype.kind + if arith == 'truediv': + assert expected.dtype.kind == 'f' + assert_func(expected, result) + except Exception: + print("Failed test with operator %r" % op.__name__) + raise + + def test_integer_arithmetic(self): + self.run_arithmetic_test(self.integer, assert_frame_equal) + self.run_arithmetic_test(self.integer.icol(0), assert_series_equal, + check_dtype=True) + + def test_float_arithemtic(self): + self.run_arithmetic_test(self.frame, assert_frame_equal) + self.run_arithmetic_test(self.frame.icol(0), assert_series_equal, + check_dtype=True) + + def test_mixed_arithmetic(self): + self.run_arithmetic_test(self.mixed, assert_frame_equal) + for col in self.mixed.columns: + self.run_arithmetic_test(self.mixed[col], assert_series_equal) + + def test_integer_with_zeros(self): + self.integer *= np.random.randint(0, 2, size=np.shape(self.integer)) + self.run_arithmetic_test(self.integer, assert_frame_equal) + self.run_arithmetic_test(self.integer.icol(0), assert_series_equal) def test_invalid(self): From ae8b7abbd6c360858934083c6813742909ac811a Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 5 Jun 2013 17:09:54 -0400 Subject: [PATCH 2/5] ENH: Allow evaluate to pass kwargs to numexpr --- pandas/core/expressions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/expressions.py b/pandas/core/expressions.py index 34e56fe576a07..abe891b82410c 100644 --- a/pandas/core/expressions.py +++ b/pandas/core/expressions.py @@ -51,7 +51,7 @@ def set_numexpr_threads(n = None): pass -def _evaluate_standard(op, op_str, a, b, raise_on_error=True): +def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs): """ standard evaluation """ return op(a,b) @@ -79,7 +79,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): return False -def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False): +def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs): result = None if _can_use_numexpr(op, op_str, a, b, 'evaluate'): @@ -92,7 +92,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False): result = ne.evaluate('a_value %s b_value' % op_str, local_dict={ 'a_value' : a_value, 'b_value' : b_value }, - casting='safe') + casting='safe', **eval_kwargs) except (ValueError), detail: if 'unknown type object' in str(detail): pass @@ -142,7 +142,7 @@ def _where_numexpr(cond, a, b, raise_on_error = False): # turn myself on set_use_numexpr(True) -def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True): +def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kwargs): """ evaluate and return the expression of the op on a and b Parameters @@ -158,7 +158,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True): """ if use_numexpr: - return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error) + return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs) return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error) def where(cond, a, b, raise_on_error=False, use_numexpr=True): From 5d69a4db42cb93ed9c3a422523b00df8636ecddf Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 5 Jun 2013 17:10:46 -0400 Subject: [PATCH 3/5] BUG: Fix __truediv__ numexpr error by passing truediv=True to evaluate for __truediv__ and truediv=False for __div__. --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f0145364363ac..7c8dc3d6185b1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -190,10 +190,10 @@ class DataConflictError(Exception): # Factory helper methods -def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None): +def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None, **eval_kwargs): def na_op(x, y): try: - result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True) + result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) result = com._fill_zeros(result,y,fill_zeros) except TypeError: @@ -853,7 +853,7 @@ def __contains__(self, key): __sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None) __mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None) __truediv__ = _arith_method(operator.truediv, '__truediv__', '/', - default_axis=None, fill_zeros=np.inf) + default_axis=None, fill_zeros=np.inf, truediv=True) __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', default_axis=None, fill_zeros=np.inf) __pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None) @@ -879,7 +879,7 @@ def __contains__(self, key): # Python 2 division methods if not py3compat.PY3: __div__ = _arith_method(operator.div, '__div__', '/', - default_axis=None, fill_zeros=np.inf) + default_axis=None, fill_zeros=np.inf, truediv=False) __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', default_axis=None, fill_zeros=np.inf) From f927fed36323d94afa1f77cf19063c114e43bbfc Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sat, 8 Jun 2013 21:42:06 -0400 Subject: [PATCH 4/5] BUG: Remove broken frame mod and floordiv str_rep `%` (modulus) intermittently causes floating point exceptions when used with numexpr. `//` (floordiv) is inconsistent between unaccelerated and accelerated when dividing by zero (unaccelerated produces 0.0000, accelerated produces `inf`) --- pandas/core/frame.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7c8dc3d6185b1..47142daa8b20b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -854,11 +854,16 @@ def __contains__(self, key): __mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None) __truediv__ = _arith_method(operator.truediv, '__truediv__', '/', default_axis=None, fill_zeros=np.inf, truediv=True) + # numexpr produces a different value (python/numpy: 0.000, numexpr: inf) + # when dividing by zero, so can't use floordiv speed up (yet) + # __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', '//', __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', default_axis=None, fill_zeros=np.inf) __pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None) - __mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan) + # currently causes a floating point exception to occur - so sticking with unaccelerated for now + # __mod__ = _arith_method(operator.mod, '__mod__', '%', default_axis=None, fill_zeros=np.nan) + __mod__ = _arith_method(operator.mod, '__mod__', default_axis=None, fill_zeros=np.nan) __radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None) __rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None) From 0e7781c705d4fceea97108e867e2e515579d8219 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sat, 8 Jun 2013 21:40:28 -0400 Subject: [PATCH 5/5] TST: Make test_expressions cover more arithmetic operators --- pandas/tests/test_expressions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 7490051f2f6c8..ba0a9926dfa78 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -48,10 +48,12 @@ def setUp(self): def tearDown(self): expr._MIN_ELEMENTS = self._MIN_ELEMENTS + #TODO: add test for Panel + #TODO: add tests for binary operations @nose.tools.nottest def run_arithmetic_test(self, df, assert_func, check_dtype=False): expr._MIN_ELEMENTS = 0 - operations = ['add', 'sub', 'mul', 'truediv'] + operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow'] if not py3compat.PY3: operations.append('div') for arith in operations: