Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 21 additions & 14 deletions intake_esm/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def save(
catalog_type: str = 'dict',
to_csv_kwargs: dict = None,
json_dump_kwargs: dict = None,
storage_options: typing.Dict[str, typing.Any] = None,
) -> None:
"""
Save the catalog to a file.
Expand All @@ -138,14 +139,18 @@ def save(
name: str
The name of the file to save the catalog to.
directory: str
The directory to save the catalog to. If None, use the current directory
The directory or cloud storage bucket to save the catalog to.
If None, use the current directory
catalog_type: str
The type of catalog to save. Whether to save the catalog table as a dictionary
in the JSON file or as a separate CSV file. Valid options are 'dict' and 'file'.
to_csv_kwargs : dict, optional
Additional keyword arguments passed through to the :py:meth:`~pandas.DataFrame.to_csv` method.
json_dump_kwargs : dict, optional
Additional keyword arguments passed through to the :py:func:`~json.dump` function.
storage_options: dict
fsspec parameters passed to the backend file-system such as Google Cloud Storage,
Amazon Web Service S3.

Notes
-----
Expand All @@ -158,13 +163,12 @@ def save(
raise ValueError(
f'catalog_type must be either "dict" or "file". Received catalog_type={catalog_type}'
)
csv_file_name = pathlib.Path(f'{name}.csv')
json_file_name = pathlib.Path(f'{name}.json')
if directory:
directory = pathlib.Path(directory)
directory.mkdir(parents=True, exist_ok=True)
csv_file_name = directory / csv_file_name
json_file_name = directory / json_file_name
if isinstance(directory, pathlib.Path):
directory = str(directory)
mapper = fsspec.get_mapper(directory or '.', storage_options=storage_options)
fs = mapper.fs
csv_file_name = f'{mapper.fs.protocol}://{mapper.root}/{name}.csv'
json_file_name = f'{mapper.fs.protocol}://{mapper.root}/{name}.json'

data = self.dict().copy()
for key in {'catalog_dict', 'catalog_file'}:
Expand All @@ -179,11 +183,13 @@ def save(
extensions = {'gzip': '.gz', 'bz2': '.bz2', 'zip': '.zip', 'xz': '.xz', None: ''}
csv_file_name = f'{csv_file_name}{extensions[compression]}'
data['catalog_file'] = str(csv_file_name)
self.df.to_csv(csv_file_name, **csv_kwargs)

with fs.open(csv_file_name, 'wb') as csv_outfile:
self.df.to_csv(csv_outfile, **csv_kwargs)
else:
data['catalog_dict'] = self.df.to_dict(orient='records')

with open(json_file_name, 'w') as outfile:
with fs.open(json_file_name, 'w') as outfile:
json_kwargs = {'indent': 2}
json_kwargs.update(json_dump_kwargs or {})
json.dump(data, outfile, **json_kwargs)
Expand Down Expand Up @@ -350,12 +356,13 @@ def search(

"""

if not isinstance(query, QueryModel):
_query = QueryModel(
_query = (
query
if isinstance(query, QueryModel)
else QueryModel(
query=query, require_all_on=require_all_on, columns=self.df.columns.tolist()
)
else:
_query = query
)

results = search(
df=self.df, query=_query.query, columns_with_iterables=self.columns_with_iterables
Expand Down
8 changes: 6 additions & 2 deletions intake_esm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def __getitem__(self, key: str) -> ESMDataSource:
# The canonical unique key is the key of a compatible group of assets
try:
return self._entries[key]
except KeyError:
except KeyError as e:
if key in self.keys():
keys_dict = self.esmcat._construct_group_keys(sep=self.sep)
grouped = self.esmcat.grouped
Expand Down Expand Up @@ -210,7 +210,7 @@ def __getitem__(self, key: str) -> ESMDataSource:
return self._entries[key]
raise KeyError(
f'key={key} not found in catalog. You can access the list of valid keys via the .keys() method.'
)
) from e

def __contains__(self, key) -> bool:
# Python falls back to iterating over the entire catalog
Expand Down Expand Up @@ -381,6 +381,7 @@ def serialize(
catalog_type: str = 'dict',
to_csv_kwargs: typing.Dict[typing.Any, typing.Any] = None,
json_dump_kwargs: typing.Dict[typing.Any, typing.Any] = None,
storage_options: typing.Dict[str, typing.Any] = None,
) -> None:
"""Serialize catalog to corresponding json and csv files.

Expand All @@ -396,6 +397,9 @@ def serialize(
Additional keyword arguments passed through to the :py:meth:`~pandas.DataFrame.to_csv` method.
json_dump_kwargs : dict, optional
Additional keyword arguments passed through to the :py:func:`~json.dump` function.
storage_options: dict
fsspec parameters passed to the backend file-system such as Google Cloud Storage,
Amazon Web Service S3.

Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def test_catalog_serialize(tmp_path, catalog_type, to_csv_kwargs, json_dump_kwar
name = 'CMIP6-MRI-ESM2-0'
cat_subset.serialize(
name=name,
directory=local_store,
directory=str(local_store),
catalog_type=catalog_type,
to_csv_kwargs=to_csv_kwargs,
json_dump_kwargs=json_dump_kwargs,
Expand Down