pandas-dev
diff --git a/‎.travis.yml
Lines changed: 1 addition & 1 deletion b/‎.travis.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/indexing.py
Lines changed: 19 additions & 4 deletions b/‎asv_bench/benchmarks/indexing.py
Lines changed: 19 additions & 4 deletions
diff --git a/‎ci/script_multi.sh
Lines changed: 1 addition & 1 deletion b/‎ci/script_multi.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/install.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/install.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/style.ipynb
Lines changed: 1 addition & 1 deletion b/‎doc/source/style.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.20.2.txt
Lines changed: 10 additions & 6 deletions b/‎doc/source/whatsnew/v0.20.2.txt
Lines changed: 10 additions & 6 deletions
diff --git a/‎pandas/_libs/hashtable.pxd
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/hashtable.pxd
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/hashtable.pyx
Lines changed: 13 additions & 0 deletions b/‎pandas/_libs/hashtable.pyx
Lines changed: 13 additions & 0 deletions
diff --git a/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 39 additions & 7 deletions b/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 39 additions & 7 deletions
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 32 additions & 1 deletion b/‎pandas/_libs/index.pyx
Lines changed: 32 additions & 1 deletion
@@ -123,7 +123,7 @@ after_success:
 
 after_script:
   - echo "after_script start"
-  - source activate pandas && cd /tmp && python -c "import pandas; pandas.show_versions();"
+  - source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
   - if [ -e /tmp/single.xml ]; then
     ci/print_skipped.py /tmp/single.xml;
     fi
 
@@ -19,6 +19,9 @@ def time_getitem_list_like(self):
     def time_getitem_array(self):
         self.s[np.arange(10000)]
 
+    def time_getitem_lists(self):
+        self.s[np.arange(10000).tolist()]
+
     def time_iloc_array(self):
         self.s.iloc[np.arange(10000)]
 
@@ -190,9 +193,15 @@ def setup(self):
              np.arange(1000)], names=['one', 'two'])
 
         import string
-        self.mistring = MultiIndex.from_product(
-            [np.arange(1000),
-             np.arange(20), list(string.ascii_letters)],
+
+        self.mi_large = MultiIndex.from_product(
+            [np.arange(1000), np.arange(20), list(string.ascii_letters)],
+            names=['one', 'two', 'three'])
+        self.mi_med = MultiIndex.from_product(
+            [np.arange(1000), np.arange(10), list('A')],
+            names=['one', 'two', 'three'])
+        self.mi_small = MultiIndex.from_product(
+            [np.arange(100), list('A'), list('A')],
             names=['one', 'two', 'three'])
 
     def time_series_xs_mi_ix(self):
@@ -215,8 +224,14 @@ def time_multiindex_get_indexer(self):
                       (0, 16), (0, 17), (0, 18),
                       (0, 19)], dtype=object))
 
+    def time_multiindex_large_get_loc(self):
+        self.mi_large.get_loc((999, 19, 'Z'))
+
+    def time_multiindex_med_get_loc(self):
+        self.mi_med.get_loc((999, 9, 'A'))
+
     def time_multiindex_string_get_loc(self):
-        self.mistring.get_loc((999, 19, 'Z'))
+        self.mi_small.get_loc((99, 'A', 'A'))
 
     def time_is_monotonic(self):
         self.miint.is_monotonic
 
@@ -27,7 +27,7 @@ if [ "$BUILD_TEST" ]; then
     echo "[running]"
     cd /tmp
     unset PYTHONPATH
-    python -c "import pandas; pandas.test(['-n 2', '--skip-slow', '--skip-network', '-r xX'])"
+    python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'
 
 elif [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
 
@@ -202,7 +202,7 @@ installed), make sure you have `pytest
 Dependencies
 ------------
 
-* `setuptools <http://pythonhosted.org/setuptools>`__
+* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__
 * `NumPy <http://www.numpy.org>`__: 1.7.1 or higher
 * `python-dateutil <http://labix.org/python-dateutil>`__: 1.5 or higher
 * `pytz <http://pytz.sourceforge.net/>`__: Needed for time zone support
 
@@ -12,7 +12,7 @@
     "\n",
     "<span style=\"color: red\">*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*</span>\n",
     "\n",
-    "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/html-styling.ipynb).\n",
+    "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/style.ipynb).\n",
     "\n",
     "You can apply **conditional formatting**, the visual styling of a DataFrame\n",
     "depending on the data within, by using the ``DataFrame.style`` property.\n",
 
@@ -19,14 +19,15 @@ Highlights include:
 Enhancements
 ~~~~~~~~~~~~
 
-
+- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) 
 
 .. _whatsnew_0202.performance:
 
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-
+- Performance regression fix when indexing with a list-like (:issue:`16285`)
+- Performance regression fix for small MultiIndexes (:issuse:`16319`)
 
 .. _whatsnew_0202.bug_fixes:
 
@@ -36,7 +37,7 @@ Bug Fixes
 Conversion
 ^^^^^^^^^^
 
-
+- Bug in ``pd.to_numeric()`` in which empty data inputs were causing Python to crash (:issue:`16302`)
 
 
 Indexing
@@ -50,11 +51,15 @@ I/O
 
 - :class:`pandas.io.formats.style.Styler` now has ``index`` parameter and corresponding method ``hide_index()`` to determine whether the index will be rendered in ouptut (:issue:`14194`)
 - :class:`pandas.io.formats.style.Styler` now has ``hidden_cols`` parameter and corresponding method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`)
+- Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
+
 
 
 Plotting
 ^^^^^^^^
 
+- Bug in ``DataFrame.plot`` with a single column and a list-like ``color`` (:issue:`3486`)
+
 
 
 
@@ -67,13 +72,12 @@ Groupby/Resample/Rolling
 Sparse
 ^^^^^^
 
-
-
+- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)
 
 Reshaping
 ^^^^^^^^^
 
-
+- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`)
 
 
 Numeric
 
@@ -52,6 +52,7 @@ cdef struct Int64VectorData:
 cdef class Int64Vector:
     cdef Int64VectorData *data
     cdef ndarray ao
+    cdef bint external_view_exists
 
     cdef resize(self)
     cpdef to_array(self)
 
@@ -64,6 +64,10 @@ cdef class Factorizer:
         >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
         array([ 0,  1, 20])
         """
+        if self.uniques.external_view_exists:
+            uniques = ObjectVector()
+            uniques.extend(self.uniques.to_array())
+            self.uniques = uniques
         labels = self.table.get_labels(values, self.uniques,
                                        self.count, na_sentinel, check_null)
         mask = (labels == na_sentinel)
@@ -99,6 +103,15 @@ cdef class Int64Factorizer:
 
     def factorize(self, int64_t[:] values, sort=False,
                   na_sentinel=-1, check_null=True):
+        """
+        Factorize values with nans replaced by na_sentinel
+        >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
+        array([ 0,  1, 20])
+        """
+        if self.uniques.external_view_exists:
+            uniques = Int64Vector()
+            uniques.extend(self.uniques.to_array())
+            self.uniques = uniques
         labels = self.table.get_labels(values, self.uniques,
                                        self.count, na_sentinel,
                                        check_null)
 
@@ -71,6 +71,7 @@ cdef class {{name}}Vector:
 
     {{if dtype != 'int64'}}
     cdef:
+        bint external_view_exists
         {{name}}VectorData *data
         ndarray ao
     {{endif}}
@@ -80,14 +81,15 @@ cdef class {{name}}Vector:
             sizeof({{name}}VectorData))
         if not self.data:
             raise MemoryError()
+        self.external_view_exists = False
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
         self.ao = np.empty(self.data.m, dtype={{idtype}})
         self.data.data = <{{arg}}*> self.ao.data
 
     cdef resize(self):
         self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
-        self.ao.resize(self.data.m)
+        self.ao.resize(self.data.m, refcheck=False)
         self.data.data = <{{arg}}*> self.ao.data
 
     def __dealloc__(self):
@@ -99,13 +101,20 @@ cdef class {{name}}Vector:
         return self.data.n
 
     cpdef to_array(self):
-        self.ao.resize(self.data.n)
-        self.data.m = self.data.n
+        if self.data.m != self.data.n:
+            if self.external_view_exists:
+                # should never happen
+                raise ValueError("should have raised on append()")
+            self.ao.resize(self.data.n, refcheck=False)
+            self.data.m = self.data.n
+        self.external_view_exists = True
         return self.ao
 
     cdef inline void append(self, {{arg}} x):
 
         if needs_resize(self.data):
+            if self.external_view_exists:
+                raise ValueError("external reference but Vector.resize() needed")
             self.resize()
 
         append_data_{{dtype}}(self.data, x)
@@ -120,15 +129,19 @@ cdef class StringVector:
 
     cdef:
         StringVectorData *data
+        bint external_view_exists
 
     def __cinit__(self):
         self.data = <StringVectorData *>PyMem_Malloc(
             sizeof(StringVectorData))
         if not self.data:
             raise MemoryError()
+        self.external_view_exists = False
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
         self.data.data = <char **> malloc(self.data.m * sizeof(char *))
+        if not self.data.data:
+            raise MemoryError()
 
     cdef resize(self):
         cdef:
@@ -138,9 +151,10 @@ cdef class StringVector:
         m = self.data.m
         self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
 
-        # TODO: can resize?
         orig_data = self.data.data
         self.data.data = <char **> malloc(self.data.m * sizeof(char *))
+        if not self.data.data:
+            raise MemoryError()
         for i in range(m):
             self.data.data[i] = orig_data[i]
 
@@ -164,6 +178,7 @@ cdef class StringVector:
         for i in range(self.data.n):
             val = self.data.data[i]
             ao[i] = val
+        self.external_view_exists = True
         self.data.m = self.data.n
         return ao
 
@@ -174,15 +189,20 @@ cdef class StringVector:
 
         append_data_string(self.data, x)
 
+    cdef extend(self, ndarray[:] x):
+        for i in range(len(x)):
+            self.append(x[i])
 
 cdef class ObjectVector:
 
     cdef:
         PyObject **data
         size_t n, m
         ndarray ao
+        bint external_view_exists
 
     def __cinit__(self):
+        self.external_view_exists = False
         self.n = 0
         self.m = _INIT_VEC_CAP
         self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
@@ -193,19 +213,28 @@ cdef class ObjectVector:
 
     cdef inline append(self, object o):
         if self.n == self.m:
+            if self.external_view_exists:
+                raise ValueError("external reference but Vector.resize() needed")
             self.m = max(self.m * 2, _INIT_VEC_CAP)
-            self.ao.resize(self.m)
+            self.ao.resize(self.m, refcheck=False)
             self.data = <PyObject**> self.ao.data
 
         Py_INCREF(o)
         self.data[self.n] = <PyObject*> o
         self.n += 1
 
     def to_array(self):
-        self.ao.resize(self.n)
-        self.m = self.n
+        if self.m != self.n:
+            if self.external_view_exists:
+                raise ValueError("should have raised on append()")
+            self.ao.resize(self.n, refcheck=False)
+            self.m = self.n
+        self.external_view_exists = True
         return self.ao
 
+    cdef extend(self, ndarray[:] x):
+        for i in range(len(x)):
+            self.append(x[i])
 
 #----------------------------------------------------------------------
 # HashTable
@@ -362,6 +391,9 @@ cdef class {{name}}HashTable(HashTable):
 
                     if needs_resize(ud):
                         with gil:
+                            if uniques.external_view_exists:
+                                raise ValueError("external reference to uniques held, "
+                                        "but Vector.resize() needed")
                             uniques.resize()
                     append_data_{{dtype}}(ud, val)
                     labels[i] = count
 
@@ -553,7 +553,34 @@ cdef inline bint _is_utc(object tz):
     return tz is UTC or isinstance(tz, _du_utc)
 
 
-cdef class MultiIndexEngine(IndexEngine):
+cdef class MultiIndexObjectEngine(ObjectEngine):
+    """
+    provide the same interface as the MultiIndexEngine
+    but use the IndexEngine for computation
+
+    This provides good performance with samller MI's
+    """
+    def get_indexer(self, values):
+        # convert a MI to an ndarray
+        if hasattr(values, 'values'):
+            values = values.values
+        return super(MultiIndexObjectEngine, self).get_indexer(values)
+
+    cpdef get_loc(self, object val):
+
+        # convert a MI to an ndarray
+        if hasattr(val, 'values'):
+            val = val.values
+        return super(MultiIndexObjectEngine, self).get_loc(val)
+
+
+cdef class MultiIndexHashEngine(ObjectEngine):
+    """
+    Use a hashing based MultiIndex impl
+    but use the IndexEngine for computation
+
+    This provides good performance with larger MI's
+    """
 
     def _call_monotonic(self, object mi):
         # defer these back to the mi iteself
@@ -584,6 +611,10 @@ cdef class MultiIndexEngine(IndexEngine):
         except TypeError:
             raise KeyError(val)
 
+    def get_indexer(self, values):
+        self._ensure_mapping_populated()
+        return self.mapping.lookup(values)
+
     cdef _make_hash_table(self, n):
         return _hash.MultiIndexHashTable(n)