From 9024a67d62aba27d58a173220dcea6cb8ff5456d Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@google.com>
Date: Wed, 23 Sep 2020 13:09:26 -0700
Subject: [PATCH 1/2] Simplify and restore old behavior for deep-copies

Intended to fix https://github.com/pydata/xarray/issues/4449

Needs tests!
---
 xarray/core/indexing.py | 14 ++++---
 xarray/core/variable.py | 83 +++++++++++++----------------------------
 2 files changed, 34 insertions(+), 63 deletions(-)

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index 9627f431cb6..a3c5971cc46 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -1,3 +1,4 @@
+import copy
 import enum
 import functools
 import operator
@@ -1466,13 +1467,14 @@ def __repr__(self) -> str:
             type(self).__name__, self.array, self.dtype
         )
 
-    def copy(self, deep: bool = True) -> "PandasIndexAdapter":
-        # Not the same as just writing `self.array.copy(deep=deep)`, as
-        # shallow copies of the underlying numpy.ndarrays become deep ones
-        # upon pickling
+    def __copy__(self):
+        # We don't use `copy.copy(self.array)`, as shallow copies of the
+        # underlying numpy.ndarrays become deep ones upon pickling
         # >>> len(pickle.dumps((self.array, self.array)))
         # 4000281
         # >>> len(pickle.dumps((self.array, self.array.copy(deep=False))))
         # 8000341
-        array = self.array.copy(deep=True) if deep else self.array
-        return PandasIndexAdapter(array, self._dtype)
+        return PandasIndexAdapter(self.array, self._dtype)
+
+    def __deepcopy__(self, memo):
+        return PandasIndexAdapter(copy.deepcopy(self.array, memo), self._dtype)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index c55e61cb816..ca4c04cab5c 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -927,50 +927,55 @@ def copy(self, deep=True, data=None):
         pandas.DataFrame.copy
         """
         if data is None:
-            data = self._data
-
-            if isinstance(data, indexing.MemoryCachedArray):
-                # don't share caching between copies
-                data = indexing.MemoryCachedArray(data.array)
-
-            if deep:
-                data = copy.deepcopy(data)
-
+            return copy.deepcopy(self) if deep else copy.copy(self)
         else:
             data = as_compatible_data(data)
             if self.shape != data.shape:
                 raise ValueError(
-                    "Data shape {} must match shape of object {}".format(
-                        data.shape, self.shape
-                    )
+                    f"Data shape {data.shape} must match shape of object"
+                    f"{self.shape}"
                 )
+            return self._replace(data=data)
 
+    def __copy__(self):
+        data = self._data
+        if isinstance(data, indexing.MemoryCachedArray):
+            # don't share mutable caches between copies
+            # TODO: Can we remove this special case? It seems unnecessary and
+            # inconsistent with how shallow copies work with NumPy array data.
+            data = indexing.MemoryCachedArray(data.array)
         # note:
         # dims is already an immutable tuple
         # attributes and encoding will be copied when the new Array is created
         return self._replace(data=data)
 
+    def __deepcopy__(self, memo):
+        data = self._data
+        if is_duck_array(data) or isinstance(data, PandasIndexAdapter):
+            data = copy.deepcopy(data, memo)
+        else:
+            # TODO: remove this legacy code path? It exists to ensure that
+            # xarray's lazy backend array objects get loaded into memory by a
+            # copy, but that purpose is better served by load() or compute().
+            # See https://github.com/pydata/xarray/issues/4449 for discussion.
+            data = np.asarray(data)
+        copied = self._replace(data=data)
+        memo[id(self)] = copied
+        return copied
+
     def _replace(
         self, dims=_default, data=_default, attrs=_default, encoding=_default
     ) -> "Variable":
         if dims is _default:
             dims = copy.copy(self._dims)
         if data is _default:
-            data = copy.copy(self.data)
+            data = self.data
         if attrs is _default:
             attrs = copy.copy(self._attrs)
         if encoding is _default:
             encoding = copy.copy(self._encoding)
         return type(self)(dims, data, attrs, encoding, fastpath=True)
 
-    def __copy__(self):
-        return self.copy(deep=False)
-
-    def __deepcopy__(self, memo=None):
-        # memo does nothing but is required for compatibility with
-        # copy.deepcopy
-        return self.copy(deep=True)
-
     # mutable objects should not be hashable
     # https://github.com/python/mypy/issues/4266
     __hash__ = None  # type: ignore
@@ -2432,42 +2437,6 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
 
         return cls(first_var.dims, data, attrs)
 
-    def copy(self, deep=True, data=None):
-        """Returns a copy of this object.
-
-        `deep` is ignored since data is stored in the form of
-        pandas.Index, which is already immutable. Dimensions, attributes
-        and encodings are always copied.
-
-        Use `data` to create a new object with the same structure as
-        original but entirely new data.
-
-        Parameters
-        ----------
-        deep : bool, optional
-            Deep is ignored when data is given. Whether the data array is
-            loaded into memory and copied onto the new object. Default is True.
-        data : array_like, optional
-            Data to use in the new object. Must have same shape as original.
-
-        Returns
-        -------
-        object : Variable
-            New object with dimensions, attributes, encodings, and optionally
-            data copied from original.
-        """
-        if data is None:
-            data = self._data.copy(deep=deep)
-        else:
-            data = as_compatible_data(data)
-            if self.shape != data.shape:
-                raise ValueError(
-                    "Data shape {} must match shape of object {}".format(
-                        data.shape, self.shape
-                    )
-                )
-        return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True)
-
     def equals(self, other, equiv=None):
         # if equiv is specified, super up
         if equiv is not None:

From b503e8ef0bedca1765fe34490902651541120b51 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@google.com>
Date: Wed, 23 Sep 2020 13:46:16 -0700
Subject: [PATCH 2/2] simplify copy methods on PandasIndexAdapter

---
 xarray/core/indexing.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index a3c5971cc46..767374f9f93 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -1467,14 +1467,6 @@ def __repr__(self) -> str:
             type(self).__name__, self.array, self.dtype
         )
 
-    def __copy__(self):
-        # We don't use `copy.copy(self.array)`, as shallow copies of the
-        # underlying numpy.ndarrays become deep ones upon pickling
-        # >>> len(pickle.dumps((self.array, self.array)))
-        # 4000281
-        # >>> len(pickle.dumps((self.array, self.array.copy(deep=False))))
-        # 8000341
-        return PandasIndexAdapter(self.array, self._dtype)
-
     def __deepcopy__(self, memo):
-        return PandasIndexAdapter(copy.deepcopy(self.array, memo), self._dtype)
+        # pandas.Index is (mostly) immutable
+        return PandasIndexAdapter(self.array, self._dtype)