Skip to content

Commit ec7fced

Browse files
committed
mfdatset, concat now support the 'join' kwarg.
Closes pydata#1354
1 parent 8f0d9e5 commit ec7fced

File tree

3 files changed

+45
-27
lines changed

3 files changed

+45
-27
lines changed

xarray/backends/api.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
608608
compat='no_conflicts', preprocess=None, engine=None,
609609
lock=None, data_vars='all', coords='different',
610610
combine='_old_auto', autoclose=None, parallel=False,
611-
**kwargs):
611+
join='outer', **kwargs):
612612
"""Open multiple files as a single dataset.
613613
614614
If combine='by_coords' then the function ``combine_by_coords`` is used to
@@ -703,6 +703,8 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
703703
parallel : bool, optional
704704
If True, the open and preprocess steps of this function will be
705705
performed in parallel using ``dask.delayed``. Default is False.
706+
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
707+
Passed on to align.
706708
**kwargs : optional
707709
Additional arguments passed on to :py:func:`xarray.open_dataset`.
708710
@@ -788,18 +790,19 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
788790
# Remove this after deprecation cycle from #2616 is complete
789791
combined = auto_combine(datasets, concat_dim=concat_dim,
790792
compat=compat, data_vars=data_vars,
791-
coords=coords)
793+
coords=coords, join=join)
792794
elif combine == 'nested':
793795
# Combined nested list by successive concat and merge operations
794796
# along each dimension, using structure given by "ids"
795797
combined = _nested_combine(datasets, concat_dims=concat_dim,
796798
compat=compat, data_vars=data_vars,
797-
coords=coords, ids=ids)
799+
coords=coords, ids=ids, join=join)
798800
elif combine == 'by_coords':
799801
# Redo ordering from coordinates, ignoring how they were ordered
800802
# previously
801803
combined = combine_by_coords(datasets, compat=compat,
802-
data_vars=data_vars, coords=coords)
804+
data_vars=data_vars, coords=coords,
805+
join=join)
803806
else:
804807
raise ValueError("{} is an invalid option for the keyword argument"
805808
" ``combine``".format(combine))

xarray/core/combine.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def _check_shape_tile_ids(combined_tile_ids):
136136

137137
def _combine_nd(combined_ids, concat_dims, data_vars='all',
138138
coords='different', compat='no_conflicts',
139-
fill_value=dtypes.NA):
139+
fill_value=dtypes.NA, join='outer'):
140140
"""
141141
Combines an N-dimensional structure of datasets into one by applying a
142142
series of either concat and merge operations along each dimension.
@@ -177,13 +177,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
177177
data_vars=data_vars,
178178
coords=coords,
179179
compat=compat,
180-
fill_value=fill_value)
180+
fill_value=fill_value,
181+
join=join)
181182
(combined_ds,) = combined_ids.values()
182183
return combined_ds
183184

184185

185186
def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
186-
fill_value=dtypes.NA):
187+
fill_value=dtypes.NA, join='outer'):
187188

188189
# Group into lines of datasets which must be combined along dim
189190
# need to sort by _new_tile_id first for groupby to work
@@ -197,12 +198,13 @@ def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
197198
combined_ids = OrderedDict(sorted(group))
198199
datasets = combined_ids.values()
199200
new_combined_ids[new_id] = _combine_1d(datasets, dim, compat,
200-
data_vars, coords, fill_value)
201+
data_vars, coords, fill_value,
202+
join)
201203
return new_combined_ids
202204

203205

204206
def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
205-
coords='different', fill_value=dtypes.NA):
207+
coords='different', fill_value=dtypes.NA, join='outer'):
206208
"""
207209
Applies either concat or merge to 1D list of datasets depending on value
208210
of concat_dim
@@ -222,7 +224,8 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
222224
else:
223225
raise
224226
else:
225-
combined = merge(datasets, compat=compat, fill_value=fill_value)
227+
combined = merge(datasets, compat=compat, fill_value=fill_value,
228+
join=join)
226229

227230
return combined
228231

@@ -233,7 +236,7 @@ def _new_tile_id(single_id_ds_pair):
233236

234237

235238
def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
236-
fill_value=dtypes.NA):
239+
fill_value=dtypes.NA, join='outer'):
237240

238241
if len(datasets) == 0:
239242
return Dataset()
@@ -254,12 +257,13 @@ def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
254257
# Apply series of concatenate or merge operations along each dimension
255258
combined = _combine_nd(combined_ids, concat_dims, compat=compat,
256259
data_vars=data_vars, coords=coords,
257-
fill_value=fill_value)
260+
fill_value=fill_value, join=join)
258261
return combined
259262

260263

261264
def combine_nested(datasets, concat_dim, compat='no_conflicts',
262-
data_vars='all', coords='different', fill_value=dtypes.NA):
265+
data_vars='all', coords='different', fill_value=dtypes.NA,
266+
join='outer'):
263267
"""
264268
Explicitly combine an N-dimensional grid of datasets into one by using a
265269
succession of concat and merge operations along each dimension of the grid.
@@ -312,6 +316,8 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
312316
Details are in the documentation of concat
313317
fill_value : scalar, optional
314318
Value to use for newly missing values
319+
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
320+
How to combine objects with different indexes.
315321
316322
Returns
317323
-------
@@ -383,15 +389,15 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
383389
# The IDs argument tells _manual_combine that datasets aren't yet sorted
384390
return _nested_combine(datasets, concat_dims=concat_dim, compat=compat,
385391
data_vars=data_vars, coords=coords, ids=False,
386-
fill_value=fill_value)
392+
fill_value=fill_value, join=join)
387393

388394

389395
def vars_as_keys(ds):
390396
return tuple(sorted(ds))
391397

392398

393399
def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
394-
coords='different', fill_value=dtypes.NA):
400+
coords='different', fill_value=dtypes.NA, join='outer'):
395401
"""
396402
Attempt to auto-magically combine the given datasets into one by using
397403
dimension coordinates.
@@ -439,6 +445,8 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
439445
Details are in the documentation of concat
440446
fill_value : scalar, optional
441447
Value to use for newly missing values
448+
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
449+
How to combine objects with different indexes.
442450
443451
Returns
444452
-------
@@ -523,7 +531,8 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
523531

524532

525533
def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
526-
data_vars='all', coords='different', fill_value=dtypes.NA):
534+
data_vars='all', coords='different', fill_value=dtypes.NA,
535+
join='outer'):
527536
"""
528537
Attempt to auto-magically combine the given datasets into one.
529538
@@ -571,6 +580,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
571580
Details are in the documentation of concat
572581
fill_value : scalar, optional
573582
Value to use for newly missing values
583+
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
584+
How to combine objects with different indexes.
574585
575586
Returns
576587
-------
@@ -626,7 +637,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
626637

627638
return _old_auto_combine(datasets, concat_dim=concat_dim,
628639
compat=compat, data_vars=data_vars,
629-
coords=coords, fill_value=fill_value)
640+
coords=coords, fill_value=fill_value,
641+
join=join)
630642

631643

632644
def _dimension_coords_exist(datasets):
@@ -667,7 +679,7 @@ def _requires_concat_and_merge(datasets):
667679
def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
668680
compat='no_conflicts',
669681
data_vars='all', coords='different',
670-
fill_value=dtypes.NA):
682+
fill_value=dtypes.NA, join='outer'):
671683
if concat_dim is not None:
672684
dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
673685

@@ -676,16 +688,17 @@ def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
676688

677689
concatenated = [_auto_concat(list(datasets), dim=dim,
678690
data_vars=data_vars, coords=coords,
679-
fill_value=fill_value)
691+
fill_value=fill_value, join=join)
680692
for vars, datasets in grouped]
681693
else:
682694
concatenated = datasets
683-
merged = merge(concatenated, compat=compat, fill_value=fill_value)
695+
merged = merge(concatenated, compat=compat, fill_value=fill_value,
696+
join=join)
684697
return merged
685698

686699

687700
def _auto_concat(datasets, dim=None, data_vars='all', coords='different',
688-
fill_value=dtypes.NA):
701+
fill_value=dtypes.NA, join='outer'):
689702
if len(datasets) == 1 and dim is None:
690703
# There is nothing more to combine, so kick out early.
691704
return datasets[0]

xarray/core/concat.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
def concat(objs, dim=None, data_vars='all', coords='different',
1313
compat='equals', positions=None, indexers=None, mode=None,
14-
concat_over=None, fill_value=dtypes.NA):
14+
concat_over=None, fill_value=dtypes.NA, join='outer'):
1515
"""Concatenate xarray objects along a new or existing dimension.
1616
1717
Parameters
@@ -65,6 +65,8 @@ def concat(objs, dim=None, data_vars='all', coords='different',
6565
supplied, objects are concatenated in the provided order.
6666
fill_value : scalar, optional
6767
Value to use for newly missing values
68+
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
69+
How to combine objects with different indexes.
6870
indexers, mode, concat_over : deprecated
6971
7072
Returns
@@ -116,7 +118,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
116118
else:
117119
raise TypeError('can only concatenate xarray Dataset and DataArray '
118120
'objects, got %s' % type(first_obj))
119-
return f(objs, dim, data_vars, coords, compat, positions, fill_value)
121+
return f(objs, dim, data_vars, coords, compat, positions, fill_value, join)
120122

121123

122124
def _calc_concat_dim_coord(dim):
@@ -212,7 +214,7 @@ def process_subset_opt(opt, subset):
212214

213215

214216
def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
215-
fill_value=dtypes.NA):
217+
fill_value=dtypes.NA, join='outer'):
216218
"""
217219
Concatenate a sequence of datasets along a new or existing dimension
218220
"""
@@ -225,7 +227,7 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
225227
dim, coord = _calc_concat_dim_coord(dim)
226228
# Make sure we're working on a copy (we'll be loading variables)
227229
datasets = [ds.copy() for ds in datasets]
228-
datasets = align(*datasets, join='outer', copy=False, exclude=[dim],
230+
datasets = align(*datasets, join=join, copy=False, exclude=[dim],
229231
fill_value=fill_value)
230232

231233
concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)
@@ -318,7 +320,7 @@ def ensure_common_dims(vars):
318320

319321

320322
def _dataarray_concat(arrays, dim, data_vars, coords, compat,
321-
positions, fill_value=dtypes.NA):
323+
positions, fill_value=dtypes.NA, join='outer'):
322324
arrays = list(arrays)
323325

324326
if data_vars != 'all':
@@ -337,5 +339,5 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
337339
datasets.append(arr._to_temp_dataset())
338340

339341
ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
340-
positions, fill_value=fill_value)
342+
positions, fill_value=fill_value, join=join)
341343
return arrays[0]._from_temp_dataset(ds, name)

0 commit comments

Comments
 (0)