Description
What happened:
When creating a dataset from two variables with a common dimension, there is a TypeError thrown when that dimension has shape nan.
What you expected to happen:
A dataset should be created. I believe dask has an allow_unknown_chunksizes
parameter for cases like this -- would that be something that could work here? (Assuming I'm not making a mistake myself.)
Minimal Complete Verifiable Example:
import dask
import dask.array as da
import xarray as xr
import numpy as np
def foo():
return np.zeros(3)
arr0 = da.from_delayed(dask.delayed(foo)(), shape=(np.nan,), dtype=float)
arr0_xr = xr.DataArray(arr0, dims=('z',))
arr1 = da.from_delayed(dask.delayed(foo)(), shape=(np.nan,), dtype=float)
arr1_xr = xr.DataArray(arr1, dims=('z',))
ds = xr.Dataset({'arr0': arr0_xr, 'arr1': arr0_xr})
stack trace
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/kitchen_sync/xarray/xarray/core/dataarray.py in _getitem_coord(self, key)
692 try:
--> 693 var = self._coords[key]
694 except KeyError:
KeyError: 'z'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-4-06b01b94eab3> in <module>
8 arr1_xr = xr.DataArray(arr1, dims=('z',))
9
---> 10 ds = xr.Dataset({'arr0': arr0_xr, 'arr1': arr0_xr})
~/kitchen_sync/xarray/xarray/core/dataset.py in __init__(self, data_vars, coords, attrs)
739 coords = coords.variables
740
--> 741 variables, coord_names, dims, indexes, _ = merge_data_and_coords(
742 data_vars, coords, compat="broadcast_equals"
743 )
~/kitchen_sync/xarray/xarray/core/merge.py in merge_data_and_coords(data, coords, compat, join)
465 explicit_coords = coords.keys()
466 indexes = dict(_extract_indexes_from_coords(coords))
--> 467 return merge_core(
468 objects, compat, join, explicit_coords=explicit_coords, indexes=indexes
469 )
~/kitchen_sync/xarray/xarray/core/merge.py in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)
608
609 coerced = coerce_pandas_values(objects)
--> 610 aligned = deep_align(
611 coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value
612 )
~/kitchen_sync/xarray/xarray/core/alignment.py in deep_align(objects, join, copy, indexes, exclude, raise_on_invalid, fill_value)
422 out.append(variables)
423
--> 424 aligned = align(
425 *targets,
426 join=join,
~/kitchen_sync/xarray/xarray/core/alignment.py in align(join, copy, indexes, exclude, fill_value, *objects)
283 for dim in obj.dims:
284 if dim not in exclude:
--> 285 all_coords[dim].append(obj.coords[dim])
286 try:
287 index = obj.indexes[dim]
~/kitchen_sync/xarray/xarray/core/coordinates.py in __getitem__(self, key)
326
327 def __getitem__(self, key: Hashable) -> "DataArray":
--> 328 return self._data._getitem_coord(key)
329
330 def _update_coords(
~/kitchen_sync/xarray/xarray/core/dataarray.py in _getitem_coord(self, key)
694 except KeyError:
695 dim_sizes = dict(zip(self.dims, self.shape))
--> 696 _, key, var = _get_virtual_variable(
697 self._coords, key, self._level_coords, dim_sizes
698 )
~/kitchen_sync/xarray/xarray/core/dataset.py in _get_virtual_variable(variables, key, level_vars, dim_sizes)
146
147 if key in dim_sizes:
--> 148 data = pd.Index(range(dim_sizes[key]), name=key)
149 variable = IndexVariable((key,), data)
150 return key, key, variable
TypeError: 'float' object cannot be interpreted as an integer
Anything else we need to know?:
Environment:
Output of xr.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.8.8 | packaged by conda-forge | (default, Feb 20 2021, 16:12:38)
[Clang 11.0.1 ]
python-bits: 64
OS: Darwin
OS-release: 20.3.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.UTF-8
libhdf5: 1.10.6
libnetcdf: 4.7.4
xarray: 0.17.1.dev66+g18ed29e4
pandas: 1.2.4
numpy: 1.20.2
scipy: 1.6.2
netCDF4: 1.5.6
pydap: installed
h5netcdf: 0.10.0
h5py: 3.1.0
Nio: None
zarr: 2.7.0
cftime: 1.4.1
nc_time_axis: 1.2.0
PseudoNetCDF: installed
rasterio: None
cfgrib: 0.9.9.0
iris: 2.4.0
bottleneck: 1.3.2
dask: 2021.04.0
distributed: 2021.04.0
matplotlib: 3.4.1
cartopy: 0.18.0
seaborn: 0.11.1
numbagg: installed
pint: 0.17
setuptools: 49.6.0.post20210108
pip: 20.2.4
conda: None
pytest: 6.2.3
IPython: 7.22.0
sphinx: None