@@ -148,68 +148,85 @@ def _calc_concat_over(datasets, dim, data_vars, coords):
148
148
Determine which dataset variables need to be concatenated in the result,
149
149
and which can simply be taken from the first dataset.
150
150
"""
151
- def process_subset_opt (opt , subset ):
152
- if subset == 'coords' :
153
- subset_long_name = 'coordinates'
154
- else :
155
- subset_long_name = 'data variables'
151
+ # Return values
152
+ concat_over = set ()
153
+ equals = {}
154
+
155
+ if dim in datasets [0 ]:
156
+ concat_over .add (dim )
157
+ for ds in datasets :
158
+ concat_over .update (k for k , v in ds .variables .items ()
159
+ if dim in v .dims )
156
160
161
+ def process_subset_opt (opt , subset ):
157
162
if isinstance (opt , basestring ):
158
163
if opt == 'different' :
159
- def differs (vname ):
160
- # simple helper function which compares a variable
161
- # across all datasets and indicates whether that
162
- # variable differs or not.
163
- v = datasets [0 ].variables [vname ]
164
- return any (not ds .variables [vname ].equals (v )
165
- for ds in datasets [1 :])
166
164
# all nonindexes that are not the same in each dataset
167
- concat_new = set (k for k in getattr (datasets [0 ], subset )
168
- if k not in concat_over and differs (k ))
165
+ for k in getattr (datasets [0 ], subset ):
166
+ if k not in concat_over :
167
+ # Compare the variable of all datasets vs. the one
168
+ # of the first dataset. Perform the minimum amount of
169
+ # loads in order to avoid multiple loads from disk while
170
+ # keeping the RAM footprint low.
171
+ v_lhs = datasets [0 ].variables [k ].load ()
172
+ # We'll need to know later on if variables are equal.
173
+ computed = []
174
+ for ds_rhs in datasets [1 :]:
175
+ v_rhs = ds_rhs .variables [k ].compute ()
176
+ computed .append (v_rhs )
177
+ if not v_lhs .equals (v_rhs ):
178
+ concat_over .add (k )
179
+ equals [k ] = False
180
+ # computed variables are not to be re-computed
181
+ # again in the future
182
+ for ds , v in zip (datasets [1 :], computed ):
183
+ ds .variables [k ].data = v .data
184
+ break
185
+ else :
186
+ equals [k ] = True
187
+
169
188
elif opt == 'all' :
170
- concat_new = (set (getattr (datasets [0 ], subset )) -
171
- set (datasets [0 ].dims ))
189
+ concat_over . update (set (getattr (datasets [0 ], subset )) -
190
+ set (datasets [0 ].dims ))
172
191
elif opt == 'minimal' :
173
- concat_new = set ()
192
+ pass
174
193
else :
175
- raise ValueError ("unexpected value for concat_%s: %s"
176
- % (subset , opt ))
194
+ raise ValueError ("unexpected value for %s: %s" % (subset , opt ))
177
195
else :
178
196
invalid_vars = [k for k in opt
179
197
if k not in getattr (datasets [0 ], subset )]
180
198
if invalid_vars :
181
- raise ValueError ('some variables in %s are not '
182
- '%s on the first dataset: %s'
183
- % (subset , subset_long_name , invalid_vars ))
184
- concat_new = set (opt )
185
- return concat_new
199
+ if subset == 'coords' :
200
+ raise ValueError (
201
+ 'some variables in coords are not coordinates on '
202
+ 'the first dataset: %s' % invalid_vars )
203
+ else :
204
+ raise ValueError (
205
+ 'some variables in data_vars are not data variables on '
206
+ 'the first dataset: %s' % invalid_vars )
207
+ concat_over .update (opt )
186
208
187
- concat_over = set ()
188
- for ds in datasets :
189
- concat_over .update (k for k , v in ds .variables .items ()
190
- if dim in v .dims )
191
- concat_over .update (process_subset_opt (data_vars , 'data_vars' ))
192
- concat_over .update (process_subset_opt (coords , 'coords' ))
193
- if dim in datasets [0 ]:
194
- concat_over .add (dim )
195
- return concat_over
209
+ process_subset_opt (data_vars , 'data_vars' )
210
+ process_subset_opt (coords , 'coords' )
211
+ return concat_over , equals
196
212
197
213
198
214
def _dataset_concat (datasets , dim , data_vars , coords , compat , positions ):
199
215
"""
200
216
Concatenate a sequence of datasets along a new or existing dimension
201
217
"""
202
- from .dataset import Dataset , as_dataset
218
+ from .dataset import Dataset
203
219
204
220
if compat not in ['equals' , 'identical' ]:
205
221
raise ValueError ("compat=%r invalid: must be 'equals' "
206
222
"or 'identical'" % compat )
207
223
208
224
dim , coord = _calc_concat_dim_coord (dim )
209
- datasets = [as_dataset (ds ) for ds in datasets ]
225
+ # Make sure we're working on a copy (we'll be loading variables)
226
+ datasets = [ds .copy () for ds in datasets ]
210
227
datasets = align (* datasets , join = 'outer' , copy = False , exclude = [dim ])
211
228
212
- concat_over = _calc_concat_over (datasets , dim , data_vars , coords )
229
+ concat_over , equals = _calc_concat_over (datasets , dim , data_vars , coords )
213
230
214
231
def insert_result_variable (k , v ):
215
232
assert isinstance (v , Variable )
@@ -239,11 +256,25 @@ def insert_result_variable(k, v):
239
256
elif (k in result_coord_names ) != (k in ds .coords ):
240
257
raise ValueError ('%r is a coordinate in some datasets but not '
241
258
'others' % k )
242
- elif (k in result_vars and k != dim and
243
- not getattr (v , compat )(result_vars [k ])):
244
- verb = 'equal' if compat == 'equals' else compat
245
- raise ValueError (
246
- 'variable %r not %s across datasets' % (k , verb ))
259
+ elif k in result_vars and k != dim :
260
+ # Don't use Variable.identical as it internally invokes
261
+ # Variable.equals, and we may already know the answer
262
+ if compat == 'identical' and not utils .dict_equiv (
263
+ v .attrs , result_vars [k ].attrs ):
264
+ raise ValueError (
265
+ 'variable %s not identical across datasets' % k )
266
+
267
+ # Proceed with equals()
268
+ try :
269
+ # May be populated when using the "different" method
270
+ is_equal = equals [k ]
271
+ except KeyError :
272
+ result_vars [k ].load ()
273
+ is_equal = v .equals (result_vars [k ])
274
+ if not is_equal :
275
+ raise ValueError (
276
+ 'variable %s not equal across datasets' % k )
277
+
247
278
248
279
# we've already verified everything is consistent; now, calculate
249
280
# shared dimension sizes so we can expand the necessary variables
0 commit comments