Skip to content

Commit 5411f3d

Browse files
committed
Now exclude "anchor" from open_dataset and open_mfdataset calls
According to xarray issues: pydata/xarray#3286 pydata/xarray#1378 The open_mfdataset function has problems in creating a merged dataset from multiple files in which variables have repeated dimension names. The easiest thing to do in this case is to prevent such variables from being read in. We now have added the drop_variables keyword to avoid reading in the "anchor" variable in all calls to open_dataset and open_mfdataset in both benchmark.py and core.py. This variable is only present in GCHP-created netCDF files using MAPL v1.0.0, which is in GCHP 12.5.0 and later. This commit should resolve GCPy issue #26: #26 Signed-off-by: Bob Yantosca <[email protected]>
1 parent b3a8311 commit 5411f3d

File tree

2 files changed

+32
-22
lines changed

2 files changed

+32
-22
lines changed

gcpy/benchmark.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
emission_spc = 'emission_species.json'
3333
emission_inv = 'emission_inventories.json'
3434

35+
# List of variables that should not be read by xarray
36+
drop_varlist = ['anchor']
37+
3538

3639
def compare_single_level(refdata, refstr, devdata, devstr, varlist=None,
3740
ilev=0, itime=0, weightsdir=None, pdfname='',
@@ -2972,15 +2975,15 @@ def make_benchmark_conc_plots(ref, refstr, dev, devstr, dst='./1mo_benchmark',
29722975

29732976
# Ref dataset
29742977
try:
2975-
refds = xr.open_dataset(ref)
2978+
refds = xr.open_dataset(ref, drop_variables=dropvars)
29762979
except FileNotFoundError:
29772980
print('Could not find Ref file: {}'.format(ref))
29782981
raise
29792982
refds = core.add_lumped_species_to_dataset(refds, verbose=verbose)
29802983

29812984
# Dev dataset
29822985
try:
2983-
devds = xr.open_dataset(dev)
2986+
devds = xr.open_dataset(dev, drop_variables=dropvars)
29842987
except FileNotFoundError:
29852988
print('Could not find Dev file: {}!'.format(dev))
29862989
raise
@@ -3213,14 +3216,14 @@ def make_benchmark_emis_plots(ref, refstr, dev, devstr,
32133216

32143217
# Ref dataset
32153218
try:
3216-
refds = xr.open_dataset(ref)
3219+
refds = xr.open_dataset(ref, drop_variables=dropvars)
32173220
except FileNotFoundError:
32183221
print('Could not find Ref file: {}'.format(ref))
32193222
raise
32203223

32213224
# Dev dataset
32223225
try:
3223-
devds = xr.open_dataset(dev)
3226+
devds = xr.open_dataset(dev, drop_variables=dropvars)
32243227
except FileNotFoundError:
32253228
print('Could not find Dev file: {}'.format(dev))
32263229
raise
@@ -3441,23 +3444,23 @@ def make_benchmark_emis_tables(reflist, refstr, devlist, devstr,
34413444

34423445
# Ref
34433446
if len(reflist) == 1:
3444-
refds = xr.open_dataset(reflist[0])
3447+
refds = xr.open_dataset(reflist[0], drop_variables=dropvars)
34453448
assert gcc_area_name in list(refds.keys()),'Ref file {} does not contain area variable {}'.format(reflist[0], gcc_area_name)
34463449

34473450
elif len(reflist) == 2:
3448-
refds = xr.open_dataset(reflist[0])
3449-
metrefds = xr.open_dataset(reflist[1])
3451+
refds = xr.open_dataset(reflist[0], drop_variables=dropvars)
3452+
metrefds = xr.open_dataset(reflist[1], drop_variables=dropvars)
34503453
assert gchp_area_name in list(metrefds.keys()),'Ref met file {} does not contain area variable {}'.format(reflist[1], gchp_area_name)
34513454
refds[gcc_area_name] = metrefds[gchp_area_name]
34523455

34533456
# Dev
34543457
if len(devlist) == 1:
3455-
devds = xr.open_dataset(devlist[0])
3458+
devds = xr.open_dataset(devlist[0], drop_variables=dropvars)
34563459
assert gcc_area_name in list(refds.keys()),'Dev file {} does not contain area variable {}'.format(devlist[0], gcc_area_name)
34573460

34583461
elif len(devlist) == 2:
3459-
devds = xr.open_dataset(devlist[0])
3460-
metdevds = xr.open_dataset(devlist[1])
3462+
devds = xr.open_dataset(devlist[0], drop_variables=dropvars)
3463+
metdevds = xr.open_dataset(devlist[1], drop_variables=dropvars)
34613464
assert gchp_area_name in list(metdevds.keys()),'Dev met file {} does not contain area variable {}'.format(devlist[1], gchp_area_name)
34623465
devds[gcc_area_name] = metdevds[gchp_area_name]
34633466

@@ -3604,14 +3607,14 @@ def make_benchmark_jvalue_plots(ref, refstr, dev, devstr,
36043607

36053608
# Ref dataset
36063609
try:
3607-
refds = xr.open_dataset(ref)
3610+
refds = xr.open_dataset(ref, drop_variables=dropvars)
36083611
except FileNotFoundError:
36093612
print('Could not find Ref file: {}'.format(ref))
36103613
raise
36113614

36123615
# Dev dataset
36133616
try:
3614-
devds = xr.open_dataset(dev)
3617+
devds = xr.open_dataset(dev, drop_variables=dropvars)
36153618
except FileNotFoundError:
36163619
print('Could not find Dev file: {}'.format(dev))
36173620
raise
@@ -3832,14 +3835,14 @@ def make_benchmark_aod_plots(ref, refstr, dev, devstr,
38323835

38333836
# Read the Ref dataset
38343837
try:
3835-
refds = xr.open_dataset(ref)
3838+
refds = xr.open_dataset(ref, drop_variables=dropvars)
38363839
except FileNotFoundError:
38373840
print('Could not find Ref file: {}'.format(ref))
38383841
raise
38393842

38403843
# Read the Dev dataset
38413844
try:
3842-
devds = xr.open_dataset(dev)
3845+
devds = xr.open_dataset(dev, drop_variables=dropvars)
38433846
except FileNotFoundError:
38443847
print('Could not find Dev file: {}'.format(dev))
38453848
raise
@@ -4054,14 +4057,14 @@ def make_benchmark_mass_tables(reflist, refstr, devlist, devstr,
40544057

40554058
# Ref
40564059
try:
4057-
refds = xr.open_mfdataset(reflist)
4060+
refds = xr.open_mfdataset(reflist, drop_variables=dropvars)
40584061
except FileNotFoundError:
40594062
print('Error opening Ref files: {}'.format(reflist))
40604063
raise
40614064

40624065
# Dev dataset
40634066
try:
4064-
devds = xr.open_mfdataset(devlist)
4067+
devds = xr.open_mfdataset(devlist, drop_variables=dropvars)
40654068
except FileNotFoundError:
40664069
print('Error opening Dev files: {}!'.format(devlist))
40674070
raise
@@ -4214,7 +4217,7 @@ def make_benchmark_budget_tables(devlist, devstr, dst='./1mo_benchmark',
42144217

42154218
# Dev
42164219
try:
4217-
devds = xr.open_mfdataset(devlist)
4220+
devds = xr.open_mfdataset(devlist, drop_variables=dropvars)
42184221
except FileNotFoundError:
42194222
print('Could not find one of the Dev files: {}'.format(devlist))
42204223
raise

gcpy/core.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
lumped_spc = 'lumped_species.json'
1818
bpch_to_nc_names = 'bpch_to_nc_names.json'
1919

20+
# List of variables that should not be read by xarray
21+
dropvars = ['anchor']
22+
2023

2124
def open_dataset(filename, **kwargs):
2225
'''
@@ -74,7 +77,7 @@ def open_dataset(filename, **kwargs):
7477
'pass a BPCH or netCDF file with extension '
7578
'"bpch" or "nc"!'.format(file_extension))
7679

77-
return _opener(filename, **kwargs)
80+
return _opener(filename, **kwargs, drop_variables=dropvars)
7881

7982

8083
def open_mfdataset(filenames, concat_dim='time', compat='no_conflicts',
@@ -156,19 +159,23 @@ def open_mfdataset(filenames, concat_dim='time', compat='no_conflicts',
156159
'"bpch" or "nc" or "nc4"'.format(file_extension))
157160

158161
return _opener(filenames, concat_dim=concat_dim, compat=compat,
159-
preprocess=preprocess, lock=lock, **kwargs)
162+
preprocess=preprocess, lock=lock,
163+
drop_variables=dropvars, **kwargs)
160164

161165

162166
def get_gcc_filepath(outputdir, collection, day, time):
163167
if collection == 'Emissions':
164-
filepath = os.path.join(outputdir, 'HEMCO_diagnostics.{}{}.nc'.format(day,time))
168+
filepath = os.path.join(outputdir,
169+
'HEMCO_diagnostics.{}{}.nc'.format(day,time))
165170
else:
166-
filepath = os.path.join(outputdir, 'GEOSChem.{}.{}_{}z.nc4'.format(collection,day,time))
171+
filepath = os.path.join(outputdir,
172+
'GEOSChem.{}.{}_{}z.nc4'.format(collection,day,time))
167173
return filepath
168174

169175

170176
def get_gchp_filepath(outputdir, collection, day, time):
171-
filepath = os.path.join(outputdir, 'GCHP.{}.{}_{}z.nc4'.format(collection,day,time))
177+
filepath = os.path.join(outputdir,
178+
'GCHP.{}.{}_{}z.nc4'.format(collection,day,time))
172179
return filepath
173180

174181

0 commit comments

Comments
 (0)