Skip to content

Commit 5784380

Browse files
committed
remove .hash() method
1 parent 0d7d8c0 commit 5784380

File tree

4 files changed

+7
-111
lines changed

4 files changed

+7
-111
lines changed

doc/source/whatsnew/v0.19.2.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,6 @@ Highlights include:
1616
:backlinks: none
1717

1818

19-
.. _whatsnew_0192.enhancements:
20-
21-
Enhancements
22-
~~~~~~~~~~~~
23-
24-
- ``Series/DataFrame/Index`` gain a ``.hash()`` method to provide a row-wise unique data hash , see :meth:`pandas.DataFrame.hash` (:issue:`14729`)
25-
26-
2719
.. _whatsnew_0192.performance:
2820

2921
Performance Improvements

pandas/core/base.py

Lines changed: 1 addition & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -795,99 +795,7 @@ def __unicode__(self):
795795
return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
796796

797797

798-
class HashableMixin(object):
799-
""" provide methods for hashable pandas objects """
800-
801-
def hash(self, index=True, encoding='utf8', hash_key=None):
802-
"""
803-
Return a data hash of the Index/Series/DataFrame
804-
This is a 1-d Series of unique hashses of all of the elements in that
805-
row, including the index if desired.
806-
807-
Parameters
808-
----------
809-
index : boolean, default True
810-
include the index in the hash (if Series/DataFrame)
811-
encoding : string, default 'utf8'
812-
encoding for data & key when strings
813-
hash_key : string, must be 16 bytes length if passed
814-
815-
Returns
816-
-------
817-
Series of uint64 hash values. The index of the Series
818-
will be the original index (Series/DataFrame),
819-
or the original object if Index
820-
821-
Examples
822-
--------
823-
>>> pd.Index([1, 2, 3]).hash()
824-
1 6238072747940578789
825-
2 15839785061582574730
826-
3 2185194620014831856
827-
dtype: uint64
828-
829-
>>> pd.Series([1, 2, 3], index=[2, 3, 4]).hash()
830-
2 16107259231694759481
831-
3 12811061657343452814
832-
4 1341665827200607204
833-
dtype: uint64
834-
835-
>>> pd.Series([1, 2, 3]).hash(index=False)
836-
0 6238072747940578789
837-
1 15839785061582574730
838-
2 2185194620014831856
839-
dtype: uint64
840-
841-
>>> pd.DataFrame({'A': [1, 2, 3]}).hash()
842-
0 267474170112184751
843-
1 16863939785269199747
844-
2 3948624847917518682
845-
dtype: uint64
846-
847-
>>> pd.DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']}).hash()
848-
0 11603696091789712533
849-
1 5345384428795788641
850-
2 46691607209239364
851-
dtype: uint64
852-
853-
In [10]: pd.DataFrame({'A': [1, 1, 2], 'B': ['foo', 'bar', 'foo'],
854-
'C': [1, 2, 3]}).set_index(['A', 'B']).hash()
855-
A B
856-
1 foo 553964757138028680
857-
bar 13757638258637221887
858-
2 foo 4843577173406411690
859-
dtype: uint64
860-
861-
Notes
862-
-----
863-
These functions do not hash attributes attached to the object
864-
e.g. name for Index/Series. Nor do they hash the columns of
865-
a DataFrame.
866-
867-
Mixed dtypes within a Series (or a column of a DataFrame) will
868-
be stringified, for example.
869-
870-
>>> Series(['1', 2, 3]).hash()
871-
array([ 8973981985592347666, 16940873351292606887,
872-
10100427194775696709], dtype=uint64)
873-
874-
>>> Series(['1', '2', '3']).hash()
875-
array([ 8973981985592347666, 16940873351292606887,
876-
10100427194775696709], dtype=uint64)
877-
878-
These have the same data hash, while a pure dtype is different.
879-
880-
>>> Series([1, 2, 3]).hash()
881-
array([ 267474170112184751, 16863939785269199747,
882-
3948624847917518682], dtype=uint64)
883-
884-
"""
885-
from pandas.tools.hashing import hash_pandas_object
886-
return hash_pandas_object(self, index=index, encoding=encoding,
887-
hash_key=hash_key)
888-
889-
890-
class IndexOpsMixin(HashableMixin):
798+
class IndexOpsMixin(object):
891799
""" common ops mixin to support a unified inteface / docs for Series /
892800
Index
893801
"""

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@
201201
# DataFrame class
202202

203203

204-
class DataFrame(NDFrame, base.HashableMixin):
204+
class DataFrame(NDFrame):
205205
""" Two-dimensional size-mutable, potentially heterogeneous tabular data
206206
structure with labeled axes (rows and columns). Arithmetic operations
207207
align on both row and column labels. Can be thought of as a dict-like

pandas/tools/tests/test_hashing.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def setUp(self):
2525
def test_consistency(self):
2626
# check that our hash doesn't change because of a mistake
2727
# in the actual code; this is the ground truth
28-
result = Index(['foo', 'bar', 'baz']).hash()
28+
result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
2929
expected = Series(np.array([3600424527151052760, 1374399572096150070,
3030
477881037637427054], dtype='uint64'),
3131
index=['foo', 'bar', 'baz'])
@@ -41,22 +41,18 @@ def check_equal(self, obj, **kwargs):
4141
b = hash_pandas_object(obj, **kwargs)
4242
tm.assert_series_equal(a, b)
4343

44-
a = obj.hash(**kwargs)
45-
b = obj.hash(**kwargs)
46-
tm.assert_series_equal(a, b)
47-
4844
kwargs.pop('index', None)
49-
a = obj.hash(index=False, **kwargs)
50-
b = obj.hash(index=False, **kwargs)
45+
a = hash_pandas_object(obj, **kwargs)
46+
b = hash_pandas_object(obj, **kwargs)
5147
tm.assert_series_equal(a, b)
5248

5349
def check_not_equal_with_index(self, obj):
5450

5551
# check that we are not hashing the same if
5652
# we include the index
5753
if not isinstance(obj, Index):
58-
a = obj.hash(index=True)
59-
b = obj.hash(index=False)
54+
a = hash_pandas_object(obj, index=True)
55+
b = hash_pandas_object(obj, index=False)
6056
self.assertFalse((a == b).all())
6157

6258
def test_hash_pandas_object(self):

0 commit comments

Comments
 (0)