Skip to content

Simplify and restore old behavior for deep-copies #4453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import enum
import functools
import operator
Expand Down Expand Up @@ -1466,13 +1467,6 @@ def __repr__(self) -> str:
type(self).__name__, self.array, self.dtype
)

def copy(self, deep: bool = True) -> "PandasIndexAdapter":
# Not the same as just writing `self.array.copy(deep=deep)`, as
# shallow copies of the underlying numpy.ndarrays become deep ones
# upon pickling
# >>> len(pickle.dumps((self.array, self.array)))
# 4000281
# >>> len(pickle.dumps((self.array, self.array.copy(deep=False))))
# 8000341
array = self.array.copy(deep=True) if deep else self.array
return PandasIndexAdapter(array, self._dtype)
def __deepcopy__(self, memo):
# pandas.Index is (mostly) immutable
return PandasIndexAdapter(self.array, self._dtype)
83 changes: 26 additions & 57 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,50 +927,55 @@ def copy(self, deep=True, data=None):
pandas.DataFrame.copy
"""
if data is None:
data = self._data

if isinstance(data, indexing.MemoryCachedArray):
# don't share caching between copies
data = indexing.MemoryCachedArray(data.array)

if deep:
data = copy.deepcopy(data)

return copy.deepcopy(self) if deep else copy.copy(self)
else:
data = as_compatible_data(data)
if self.shape != data.shape:
raise ValueError(
"Data shape {} must match shape of object {}".format(
data.shape, self.shape
)
f"Data shape {data.shape} must match shape of object"
f"{self.shape}"
)
return self._replace(data=data)

def __copy__(self):
data = self._data
if isinstance(data, indexing.MemoryCachedArray):
# don't share mutable caches between copies
# TODO: Can we remove this special case? It seems unnecessary and
# inconsistent with how shallow copies work with NumPy array data.
data = indexing.MemoryCachedArray(data.array)
# note:
# dims is already an immutable tuple
# attributes and encoding will be copied when the new Array is created
return self._replace(data=data)

def __deepcopy__(self, memo):
data = self._data
if is_duck_array(data) or isinstance(data, PandasIndexAdapter):
data = copy.deepcopy(data, memo)
else:
# TODO: remove this legacy code path? It exists to ensure that
# xarray's lazy backend array objects get loaded into memory by a
# copy, but that purpose is better served by load() or compute().
# See https://github.com/pydata/xarray/issues/4449 for discussion.
data = np.asarray(data)
copied = self._replace(data=data)
memo[id(self)] = copied
return copied

def _replace(
self, dims=_default, data=_default, attrs=_default, encoding=_default
) -> "Variable":
if dims is _default:
dims = copy.copy(self._dims)
if data is _default:
data = copy.copy(self.data)
data = self.data
if attrs is _default:
attrs = copy.copy(self._attrs)
if encoding is _default:
encoding = copy.copy(self._encoding)
return type(self)(dims, data, attrs, encoding, fastpath=True)

def __copy__(self):
return self.copy(deep=False)

def __deepcopy__(self, memo=None):
# memo does nothing but is required for compatibility with
# copy.deepcopy
return self.copy(deep=True)

# mutable objects should not be hashable
# https://github.com/python/mypy/issues/4266
__hash__ = None # type: ignore
Expand Down Expand Up @@ -2432,42 +2437,6 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):

return cls(first_var.dims, data, attrs)

def copy(self, deep=True, data=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we keep this but use self.__copy__ under the hood?

It lines up well with Dataset.copy() and DataArray.copy()

"""Returns a copy of this object.

`deep` is ignored since data is stored in the form of
pandas.Index, which is already immutable. Dimensions, attributes
and encodings are always copied.

Use `data` to create a new object with the same structure as
original but entirely new data.

Parameters
----------
deep : bool, optional
Deep is ignored when data is given. Whether the data array is
loaded into memory and copied onto the new object. Default is True.
data : array_like, optional
Data to use in the new object. Must have same shape as original.

Returns
-------
object : Variable
New object with dimensions, attributes, encodings, and optionally
data copied from original.
"""
if data is None:
data = self._data.copy(deep=deep)
else:
data = as_compatible_data(data)
if self.shape != data.shape:
raise ValueError(
"Data shape {} must match shape of object {}".format(
data.shape, self.shape
)
)
return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True)

def equals(self, other, equiv=None):
# if equiv is specified, super up
if equiv is not None:
Expand Down