diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 9807639143ddb..53b7d55368f6a 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+from pandas.util import testing as tm
 
 
 class algorithm(object):
@@ -55,3 +56,35 @@ def time_add_overflow_neg_arr(self):
 
     def time_add_overflow_mixed_arr(self):
         self.checked_add(self.arr, self.arrmixed)
+
+
+class hashing(object):
+    goal_time = 0.2
+
+    def setup(self):
+        N = 100000
+
+        self.df = pd.DataFrame(
+            {'A': pd.Series(tm.makeStringIndex(100).take(
+                np.random.randint(0, 100, size=N))),
+             'B': pd.Series(tm.makeStringIndex(10000).take(
+                 np.random.randint(0, 10000, size=N))),
+             'D': np.random.randn(N),
+             'E': np.arange(N),
+             'F': pd.date_range('20110101', freq='s', periods=N),
+             'G': pd.timedelta_range('1 day', freq='s', periods=N),
+             })
+        self.df['C'] = self.df['B'].astype('category')
+        self.df.iloc[10:20] = np.nan
+
+    def time_frame(self):
+        self.df.hash()
+
+    def time_series_int(self):
+        self.df.E.hash()
+
+    def time_series_string(self):
+        self.df.B.hash()
+
+    def time_series_categorical(self):
+        self.df.C.hash()
diff --git a/pandas/src/hash.pyx b/pandas/src/hash.pyx
new file mode 100644
index 0000000000000..b8c309f1f7a13
--- /dev/null
+++ b/pandas/src/hash.pyx
@@ -0,0 +1,180 @@
+# cython: profile=False
+# Translated from the reference implementation
+# at https://github.com/veorq/SipHash
+
+import cython
+cimport numpy as cnp
+import numpy as np
+from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
+
+from cpython cimport (PyString_Check,
+                      PyBytes_Check,
+                      PyUnicode_Check)
+from libc.stdlib cimport malloc, free
+
+DEF cROUNDS = 2
+DEF dROUNDS = 4
+
+
+@cython.boundscheck(False)
+def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
+    """
+    Parameters
+    ----------
+    arr : 1-d object ndarray of objects
+    key : hash key, must be 16 byte len encoded
+    encoding : encoding for key & arr, default to 'utf8'
+
+    Returns
+    -------
+    1-d uint64 ndarray of hashes
+
+    """
+    cdef:
+        Py_ssize_t i, l, n
+        ndarray[uint64_t] result
+        bytes data, k
+        uint8_t *kb, *lens
+        char **vecs, *cdata
+        object val
+
+    k = <bytes>key.encode(encoding)
+    kb = <uint8_t *>k
+    if len(k) != 16:
+        raise ValueError(
+            'key should be a 16-byte string encoded, got {!r} (len {})'.format(
+                k, len(k)))
+
+    n = len(arr)
+
+    # create an array of bytes
+    vecs = <char **> malloc(n * sizeof(char *))
+    lens = <uint8_t*> malloc(n * sizeof(uint8_t))
+
+    cdef list datas = []
+    for i in range(n):
+        val = arr[i]
+        if PyString_Check(val):
+            data = <bytes>val.encode(encoding)
+        elif PyBytes_Check(val):
+            data = <bytes>val
+        elif PyUnicode_Check(val):
+            data = <bytes>val.encode(encoding)
+        else:
+            # non-strings
+            data = <bytes>str(val).encode(encoding)
+
+        l = len(data)
+        lens[i] = l
+        cdata = data
+
+        # keep the refernce alive thru the end of the
+        # function
+        datas.append(data)
+        vecs[i] = cdata
+
+    result = np.empty(n, dtype=np.uint64)
+    with nogil:
+        for i in range(n):
+            result[i] = low_level_siphash(<uint8_t *>vecs[i], lens[i], kb)
+
+    free(vecs)
+    free(lens)
+    return result
+
+cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
+    return (x << b) | (x >> (64 - b))
+
+cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil:
+    p[0] = <uint8_t>(v)
+    p[1] = <uint8_t>(v >> 8)
+    p[2] = <uint8_t>(v >> 16)
+    p[3] = <uint8_t>(v >> 24)
+
+cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil:
+    u32to8_le(p, <uint32_t>v)
+    u32to8_le(p + 4, <uint32_t>(v >> 32))
+
+cdef inline uint64_t u8to64_le(uint8_t* p) nogil:
+    return (<uint64_t>p[0] |
+            <uint64_t>p[1] <<  8 |
+            <uint64_t>p[2] << 16 |
+            <uint64_t>p[3] << 24 |
+            <uint64_t>p[4] << 32 |
+            <uint64_t>p[5] << 40 |
+            <uint64_t>p[6] << 48 |
+            <uint64_t>p[7] << 56)
+
+cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
+                           uint64_t* v2, uint64_t* v3) nogil:
+    v0[0] += v1[0]
+    v1[0] = _rotl(v1[0], 13)
+    v1[0] ^= v0[0]
+    v0[0] = _rotl(v0[0], 32)
+    v2[0] += v3[0]
+    v3[0] = _rotl(v3[0], 16)
+    v3[0] ^= v2[0]
+    v0[0] += v3[0]
+    v3[0] = _rotl(v3[0], 21)
+    v3[0] ^= v0[0]
+    v2[0] += v1[0]
+    v1[0] = _rotl(v1[0], 17)
+    v1[0] ^= v2[0]
+    v2[0] = _rotl(v2[0], 32)
+
+cpdef uint64_t siphash(bytes data, bytes key) except? 0:
+    if len(key) != 16:
+        raise ValueError(
+            'key should be a 16-byte bytestring, got {!r} (len {})'.format(
+                key, len(key)))
+    return low_level_siphash(data, len(data), key)
+
+
+@cython.cdivision(True)
+cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen,
+                                uint8_t* key) nogil:
+    cdef uint64_t v0 = 0x736f6d6570736575ULL
+    cdef uint64_t v1 = 0x646f72616e646f6dULL
+    cdef uint64_t v2 = 0x6c7967656e657261ULL
+    cdef uint64_t v3 = 0x7465646279746573ULL
+    cdef uint64_t b
+    cdef uint64_t k0 = u8to64_le(key)
+    cdef uint64_t k1 = u8to64_le(key + 8)
+    cdef uint64_t m
+    cdef int i
+    cdef uint8_t* end = data + datalen - (datalen % sizeof(uint64_t))
+    cdef int left = datalen & 7
+    cdef int left_byte
+
+    b = (<uint64_t>datalen) << 56
+    v3 ^= k1
+    v2 ^= k0
+    v1 ^= k1
+    v0 ^= k0
+
+    while (data != end):
+        m = u8to64_le(data)
+        v3 ^= m
+        for i in range(cROUNDS):
+            _sipround(&v0, &v1, &v2, &v3)
+        v0 ^= m
+
+        data += sizeof(uint64_t)
+
+    for i in range(left-1, -1, -1):
+        b |= (<uint64_t>data[i]) << (i * 8)
+
+    v3 ^= b
+
+    for i in range(cROUNDS):
+        _sipround(&v0, &v1, &v2, &v3)
+
+    v0 ^= b
+    v2 ^= 0xff
+
+    for i in range(dROUNDS):
+        _sipround(&v0, &v1, &v2, &v3)
+
+    b = v0 ^ v1 ^ v2 ^ v3
+
+    return b
diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py
new file mode 100644
index 0000000000000..aa18b8bc70c37
--- /dev/null
+++ b/pandas/tools/hashing.py
@@ -0,0 +1,137 @@
+"""
+data hash pandas / numpy objects
+"""
+
+import numpy as np
+from pandas import _hash, Series, factorize, Categorical, Index
+from pandas.lib import infer_dtype
+from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame
+from pandas.types.common import is_categorical_dtype
+
+# 16 byte long hashing key
+_default_hash_key = '0123456789123456'
+
+
+def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None):
+    """
+    Return a data hash of the Index/Series/DataFrame
+
+    .. versionadded:: 0.19.2
+
+    Parameters
+    ----------
+    index : boolean, default True
+        include the index in the hash (if Series/DataFrame)
+    encoding : string, default 'utf8'
+        encoding for data & key when strings
+    hash_key : string key to encode, default to _default_hash_key
+
+    Returns
+    -------
+    Series of uint64, same length as the object
+
+    """
+    if hash_key is None:
+        hash_key = _default_hash_key
+
+    def adder(h, hashed_to_add):
+        h = np.multiply(h, np.uint(3), h)
+        return np.add(h, hashed_to_add, h)
+
+    if isinstance(obj, ABCIndexClass):
+        h = hash_array(obj.values, encoding, hash_key).astype('uint64')
+        h = Series(h, index=obj, dtype='uint64')
+    elif isinstance(obj, ABCSeries):
+        h = hash_array(obj.values, encoding, hash_key).astype('uint64')
+        if index:
+            h = adder(h, hash_pandas_object(obj.index,
+                                            index=False,
+                                            encoding=encoding,
+                                            hash_key=hash_key).values)
+        h = Series(h, index=obj.index, dtype='uint64')
+    elif isinstance(obj, ABCDataFrame):
+        cols = obj.iteritems()
+        first_series = next(cols)[1]
+        h = hash_array(first_series.values, encoding,
+                       hash_key).astype('uint64')
+        for _, col in cols:
+            h = adder(h, hash_array(col.values, encoding, hash_key))
+        if index:
+            h = adder(h, hash_pandas_object(obj.index,
+                                            index=False,
+                                            encoding=encoding,
+                                            hash_key=hash_key).values)
+
+        h = Series(h, index=obj.index, dtype='uint64')
+    else:
+        raise TypeError("Unexpected type for hashing %s" % type(obj))
+    return h
+
+
+def hash_array(vals, encoding='utf8', hash_key=None):
+    """
+    Given a 1d array, return an array of deterministic integers.
+
+    .. versionadded:: 0.19.2
+
+    Parameters
+    ----------
+    vals : ndarray
+    encoding : string, default 'utf8'
+        encoding for data & key when strings
+    hash_key : string key to encode, default to _default_hash_key
+
+    Returns
+    -------
+    1d uint64 numpy array of hash values, same length as the vals
+
+    """
+
+    # work with cagegoricals as ints. (This check is above the complex
+    # check so that we don't ask numpy if categorical is a subdtype of
+    # complex, as it will choke.
+    if hash_key is None:
+        hash_key = _default_hash_key
+
+    if is_categorical_dtype(vals.dtype):
+        vals = vals.codes
+
+    # we'll be working with everything as 64-bit values, so handle this
+    # 128-bit value early
+    if np.issubdtype(vals.dtype, np.complex128):
+        return hash_array(vals.real) + 23 * hash_array(vals.imag)
+
+    # MAIN LOGIC:
+    inferred = infer_dtype(vals)
+
+    # First, turn whatever array this is into unsigned 64-bit ints, if we can
+    # manage it.
+    if inferred == 'boolean':
+        vals = vals.astype('u8')
+
+    if (np.issubdtype(vals.dtype, np.datetime64) or
+       np.issubdtype(vals.dtype, np.timedelta64) or
+       np.issubdtype(vals.dtype, np.number)) and vals.dtype.itemsize <= 8:
+
+        vals = vals.view('u{}'.format(vals.dtype.itemsize)).astype('u8')
+    else:
+
+        # its MUCH faster to categorize object dtypes, then hash and rename
+        codes, categories = factorize(vals, sort=False)
+        categories = Index(categories)
+        c = Series(Categorical(codes, categories,
+                               ordered=False, fastpath=True))
+        vals = _hash.hash_object_array(categories.values,
+                                       hash_key,
+                                       encoding)
+
+        # rename & extract
+        vals = c.cat.rename_categories(Index(vals)).astype(np.uint64).values
+
+    # Then, redistribute these 64-bit ints within the space of 64-bit ints
+    vals ^= vals >> 30
+    vals *= np.uint64(0xbf58476d1ce4e5b9)
+    vals ^= vals >> 27
+    vals *= np.uint64(0x94d049bb133111eb)
+    vals ^= vals >> 31
+    return vals
diff --git a/pandas/tools/tests/test_hashing.py b/pandas/tools/tests/test_hashing.py
new file mode 100644
index 0000000000000..3e4c77244d2f7
--- /dev/null
+++ b/pandas/tools/tests/test_hashing.py
@@ -0,0 +1,143 @@
+import numpy as np
+import pandas as pd
+
+from pandas import DataFrame, Series, Index
+from pandas.tools.hashing import hash_array, hash_pandas_object
+import pandas.util.testing as tm
+
+
+class TestHashing(tm.TestCase):
+
+    _multiprocess_can_split_ = True
+
+    def setUp(self):
+        self.df = DataFrame(
+            {'i32': np.array([1, 2, 3] * 3, dtype='int32'),
+             'f32': np.array([None, 2.5, 3.5] * 3, dtype='float32'),
+             'cat': Series(['a', 'b', 'c'] * 3).astype('category'),
+             'obj': Series(['d', 'e', 'f'] * 3),
+             'bool': np.array([True, False, True] * 3),
+             'dt': Series(pd.date_range('20130101', periods=9)),
+             'dt_tz': Series(pd.date_range('20130101', periods=9,
+                                           tz='US/Eastern')),
+             'td': Series(pd.timedelta_range('2000', periods=9))})
+
+    def test_consistency(self):
+        # check that our hash doesn't change because of a mistake
+        # in the actual code; this is the ground truth
+        result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
+        expected = Series(np.array([3600424527151052760, 1374399572096150070,
+                                    477881037637427054], dtype='uint64'),
+                          index=['foo', 'bar', 'baz'])
+        tm.assert_series_equal(result, expected)
+
+    def test_hash_array(self):
+        for name, s in self.df.iteritems():
+            a = s.values
+            tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
+
+    def check_equal(self, obj, **kwargs):
+        a = hash_pandas_object(obj, **kwargs)
+        b = hash_pandas_object(obj, **kwargs)
+        tm.assert_series_equal(a, b)
+
+        kwargs.pop('index', None)
+        a = hash_pandas_object(obj, **kwargs)
+        b = hash_pandas_object(obj, **kwargs)
+        tm.assert_series_equal(a, b)
+
+    def check_not_equal_with_index(self, obj):
+
+        # check that we are not hashing the same if
+        # we include the index
+        if not isinstance(obj, Index):
+            a = hash_pandas_object(obj, index=True)
+            b = hash_pandas_object(obj, index=False)
+            self.assertFalse((a == b).all())
+
+    def test_hash_pandas_object(self):
+
+        for obj in [Series([1, 2, 3]),
+                    Series([1.0, 1.5, 3.2]),
+                    Series([1.0, 1.5, np.nan]),
+                    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
+                    Series(['a', 'b', 'c']),
+                    Series(['a', np.nan, 'c']),
+                    Series([True, False, True]),
+                    Index([1, 2, 3]),
+                    Index([True, False, True]),
+                    DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
+                    tm.makeMissingDataframe(),
+                    tm.makeMixedDataFrame(),
+                    tm.makeTimeDataFrame(),
+                    tm.makeTimeSeries(),
+                    tm.makeTimedeltaIndex(),
+                    Series([1, 2, 3], index=pd.MultiIndex.from_tuples(
+                        [('a', 1), ('a', 2), ('b', 1)]))]:
+            self.check_equal(obj)
+            self.check_not_equal_with_index(obj)
+
+    def test_hash_pandas_object2(self):
+        for name, s in self.df.iteritems():
+            self.check_equal(s)
+            self.check_not_equal_with_index(s)
+
+    def test_hash_pandas_empty_object(self):
+        for obj in [Series([], dtype='float64'),
+                    Series([], dtype='object'),
+                    Index([])]:
+            self.check_equal(obj)
+
+            # these are by-definition the same with
+            # or w/o the index as the data is empty
+
+    def test_errors(self):
+
+        for obj in [pd.Timestamp('20130101'), tm.makePanel()]:
+            def f():
+                hash_pandas_object(f)
+
+            self.assertRaises(TypeError, f)
+
+    def test_hash_keys(self):
+        # using different hash keys, should have different hashes
+        # for the same data
+
+        # this only matters for object dtypes
+        obj = Series(list('abc'))
+        a = hash_pandas_object(obj, hash_key='9876543210123456')
+        b = hash_pandas_object(obj, hash_key='9876543210123465')
+        self.assertTrue((a != b).all())
+
+    def test_invalid_key(self):
+        # this only matters for object dtypes
+        def f():
+            hash_pandas_object(Series(list('abc')), hash_key='foo')
+        self.assertRaises(ValueError, f)
+
+    def test_mixed(self):
+        # mixed objects
+        obj = Series(['1', 2, 3])
+        self.check_equal(obj)
+        self.check_not_equal_with_index(obj)
+
+        # mixed are actually equal when stringified
+        a = hash_pandas_object(obj)
+        b = hash_pandas_object(Series(list('123')))
+        self.assert_series_equal(a, b)
+
+    def test_alread_encoded(self):
+        # if already encoded then ok
+
+        obj = Series(list('abc')).str.encode('utf8')
+        self.check_equal(obj)
+
+    def test_alternate_encoding(self):
+
+        obj = Series(list('abc'))
+        self.check_equal(obj, encoding='ascii')
+
+    def test_long_strings(self):
+
+        obj = Index(tm.rands_array(nchars=10000, size=100))
+        self.check_equal(obj)
diff --git a/setup.py b/setup.py
index 2dd3fec150781..8d2e2669852ea 100755
--- a/setup.py
+++ b/setup.py
@@ -331,6 +331,7 @@ class CheckSDist(sdist_class):
                  'pandas/src/period.pyx',
                  'pandas/src/sparse.pyx',
                  'pandas/src/testing.pyx',
+                 'pandas/src/hash.pyx',
                  'pandas/io/sas/saslib.pyx']
 
     def initialize_options(self):
@@ -501,10 +502,12 @@ def pxd(name):
             'sources': ['pandas/src/parser/tokenizer.c',
                         'pandas/src/parser/io.c']},
     _sparse={'pyxfile': 'src/sparse',
-             'depends': ([srcpath('sparse', suffix='.pyx')]
-                         + _pxi_dep['_sparse'])},
+             'depends': ([srcpath('sparse', suffix='.pyx')] +
+                         _pxi_dep['_sparse'])},
     _testing={'pyxfile': 'src/testing',
               'depends': [srcpath('testing', suffix='.pyx')]},
+    _hash={'pyxfile': 'src/hash',
+           'depends': [srcpath('hash', suffix='.pyx')]},
 )
 
 ext_data["io.sas.saslib"] = {'pyxfile': 'io/sas/saslib'}