diff --git a/doc/source/release.rst b/doc/source/release.rst index a23936ae154c0..6838eb9c90581 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -171,6 +171,7 @@ API Changes - default sorting algorithm for ``Series.order`` is not ``quicksort``, to conform with ``Series.sort`` (and numpy defaults) - add ``inplace`` keyword to ``Series.order/sort`` to make them inverses (:issue:`6859`) +- align on Series comparison operations (e.g. ``x == y``), (:issue:`1134`) Deprecations ~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index ded10fd75e8d4..9d54043085b4f 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -213,6 +213,25 @@ API changes - default sorting algorithm for ``Series.order`` is not ``quicksort``, to conform with ``Series.sort`` (and numpy defaults) - add ``inplace`` keyword to ``Series.order/sort`` to make them inverses (:issue:`6859`) +- align on Series comparison operations (e.g. ``x == y``), (:issue:`1134`) + + This is a reordered comparison + + .. ipython:: python + + s1 = Series(index=["A", "B", "C"], data=[1,2,3]) + s1 + s2 = Series(index=["C", "B", "A"], data=[3,2,1]) + s2 + s1 == s2 + + In the following example, 'A' is missing so it will always compare False (as it has a ``nan`` value) + + .. ipython:: python + + s3 = Series(index=["C", "B"], data=[3,2]) + s3 + s1 == s3 .. _whatsnew_0140.sql: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index b8e92fb25cec5..8762d5edddd82 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -537,13 +537,18 @@ def na_op(x, y): def wrapper(self, other): if isinstance(other, pd.Series): name = _maybe_match_name(self, other) - if len(self) != len(other): - raise ValueError('Series lengths must match to compare') - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) + if self.index.equals(other): + s1, s2 = self, other + index = self.index + else: + index = self.index + other.index + s1 = self.reindex(index) + s2 = other.reindex(index) + return self._constructor(na_op(s1.values, s2.values), + index=index, name=name) elif isinstance(other, pd.DataFrame): # pragma: no cover return NotImplemented - elif isinstance(other, (pa.Array, pd.Series)): + elif isinstance(other, pa.Array): if len(self) != len(other): raise ValueError('Lengths must match to compare') return self._constructor(na_op(self.values, np.asarray(other)), diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 36963d193e5ae..6af45a81fe3eb 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -1241,48 +1241,51 @@ def testDataFrameNumpyLabelled(self): assert_array_equal(df.index, outp.index) def testSeries(self): + s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]) s.sort() + def check(x,y): + tm.assert_series_equal(x,y,check_index_type=False) + y.index = Index(outp.astype('int64')) + tm.assert_series_equal(x,y) + # column indexed outp = Series(ujson.decode(ujson.encode(s))) outp.sort() - self.assertTrue((s == outp).values.all()) outp = Series(ujson.decode(ujson.encode(s), numpy=True)) outp.sort() - self.assertTrue((s == outp).values.all()) + check(s,outp) dec = _clean_dict(ujson.decode(ujson.encode(s, orient="split"))) outp = Series(**dec) - self.assertTrue((s == outp).values.all()) - self.assertTrue(s.name == outp.name) + check(s,outp) dec = _clean_dict(ujson.decode(ujson.encode(s, orient="split"), numpy=True)) outp = Series(**dec) - self.assertTrue((s == outp).values.all()) - self.assertTrue(s.name == outp.name) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="records"), numpy=True)) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="records"))) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="values"), numpy=True)) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="values"))) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="index"))) outp.sort() - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="index"), numpy=True)) outp.sort() - self.assertTrue((s == outp).values.all()) + check(s,outp) def testSeriesNested(self): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 744a020347af9..b54c832d56943 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2354,6 +2354,37 @@ def check_comparators(series, other): check_comparators(self.ts, 5) check_comparators(self.ts, self.ts + 1) + def test_align_eq(self): + + # GH 1134 + # eq should align! + + # needs alignment + s1 = Series([1,2], ['a','b']) + s2 = Series([2,3], ['b','c']) + result1 = s1 == s2 + result2 = s2 == s1 + index = s1.index+s2.index + expected = s1.reindex(index) == s2.reindex(index) + assert_series_equal(result1,expected) + assert_series_equal(result2,expected) + + # differs in order + s1 = Series(index=["A", "B", "C"], data=[1,2,3]) + s2 = Series(index=["C", "B", "A"], data=[3,2,1]) + result1 = s1 == s2 + result2 = s2 == s1 + index = s1.index+s2.index + expected = s1.reindex(index) == s2.reindex(index) + assert_series_equal(result1,expected) + assert_series_equal(result2,expected) + + s1 = Series([10,20,30,40,50,60],index=[6,7,8,9,10,15],name='series') + s2 = Series([10,20,30,40,50,60],index=[6,7,8,9,10,15]) + result = s1 == s2 + expected = Series(True,index=[6,7,8,9,10,15]) + assert_series_equal(result,expected) + def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) s2 = Series({'x': 0.}) @@ -3214,15 +3245,6 @@ def test_more_na_comparisons(self): expected = Series([True, True, True]) assert_series_equal(result, expected) - def test_comparison_different_length(self): - a = Series(['a', 'b', 'c']) - b = Series(['b', 'a']) - self.assertRaises(ValueError, a.__lt__, b) - - a = Series([1, 2]) - b = Series([2, 3, 4]) - self.assertRaises(ValueError, a.__eq__, b) - def test_comparison_label_based(self): # GH 4947 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 8abbb37646b49..07484c60d54a8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -511,15 +511,15 @@ def assert_series_equal(left, right, check_dtype=True, right.values)) else: assert_almost_equal(left.values, right.values, check_less_precise) - if check_less_precise: - assert_almost_equal( - left.index.values, right.index.values, check_less_precise) - else: - assert_index_equal(left.index, right.index) if check_index_type: - assert_isinstance(left.index, type(right.index)) - assert_attr_equal('dtype', left.index, right.index) - assert_attr_equal('inferred_type', left.index, right.index) + if check_less_precise: + assert_almost_equal( + left.index.values, right.index.values, check_less_precise) + else: + assert_index_equal(left.index, right.index) + assert_isinstance(left.index, type(right.index)) + assert_attr_equal('dtype', left.index, right.index) + assert_attr_equal('inferred_type', left.index, right.index) # This could be refactored to use the NDFrame.equals method def assert_frame_equal(left, right, check_dtype=True,