intake
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.md‎
Lines changed: 1 addition & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug_report.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 8 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/environment.yml‎
Lines changed: 1 addition & 1 deletion b/‎ci/environment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/environment.yml‎
Lines changed: 2 additions & 2 deletions b/‎docs/environment.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/api.md‎
Lines changed: 0 additions & 11 deletions b/‎docs/source/api.md‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎docs/source/changelog.md‎
Lines changed: 0 additions & 3 deletions b/‎docs/source/changelog.md‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 6 additions & 105 deletions b/‎docs/source/conf.py‎
Lines changed: 6 additions & 105 deletions
diff --git a/‎docs/source/explanation/esm-collection-spec.md‎
Lines changed: 123 additions & 0 deletions b/‎docs/source/explanation/esm-collection-spec.md‎
Lines changed: 123 additions & 0 deletions
@@ -46,6 +46,7 @@ Paste the output of `intake_esm.show_versions()` here:
 
 ```python
 import intake_esm
+
 intake_esm.show_versions()
 ```
 
 
@@ -106,9 +106,9 @@ venv.bak/
 # mypy
 .mypy_cache/
 
-
 # Sphinx
 docs/_build
+_build/
 .vscode/
 notes/
 docs/source/collections/*
 
@@ -9,6 +9,14 @@ repos:
       - id: check-yaml
       - id: double-quote-string-fixer
 
+  # - repo: https://github.com/mwouts/jupytext
+  #   rev: v1.13.3
+  #   hooks:
+  #     - id: jupytext
+  #       args: [--pipe, black, --warn-only]
+  #       additional_dependencies:
+  #         - black==21.12b0 # Matches hook
+
   - repo: https://github.com/psf/black
     rev: 21.12b0
     hooks:
@@ -33,11 +41,3 @@ repos:
     rev: v2.5.1
     hooks:
       - id: prettier
-
-  - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.2.2
-    hooks:
-      - id: nbqa-pyupgrade
-        additional_dependencies: [pyupgrade==2.7.3]
-      - id: nbqa-isort
-        additional_dependencies: [isort==v5.9.2]
@@ -43,7 +43,7 @@ providing necessary functionality for searching, discovering, data access/loadin
 
   In [1]: import intake
 
-  In [2]: col_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
+  In [2]: col_url = "https://gist.githubusercontent.com/andersy005/7f416e57acd8319b20fc2b88d129d2b8/raw/987b4b336d1a8a4f9abec95c23eed3bd7c63c80e/pangeo-gcp-subset.json"
 
   In [3]: col = intake.open_esm_datastore(col_url)
 
 
@@ -19,7 +19,7 @@ dependencies:
   - pydantic
   - pytest
   - pytest-cov
-  - pytest-icdiff
+  # - pytest-icdiff
   - pytest-sugar
   - pytest-xdist
   - python=*=*cp*
 
@@ -11,13 +11,13 @@ dependencies:
   - myst-nb
   - pip
   - python-graphviz
-  - python=3.8
+  - python=3.9
   - s3fs
   - sphinx-copybutton
   - watermark
   - zarr
   - pip:
       - sphinxext-opengraph
-      - sphinx-comments
+      - autodoc_pydantic
       - -r ../requirements.txt
       - -e ..
@@ -1,64 +1,39 @@
 # -*- coding: utf-8 -*-
 
-# import inspect
 import datetime
-import os
-import sys
 
 import yaml
 
 import intake_esm
 
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-# sys.path.insert(0, os.path.abspath('.'))
-
-cwd = os.getcwd()
-parent = os.path.dirname(cwd)
-sys.path.insert(0, parent)
-
-
-# -- General configuration -----------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be extensions
-# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
     'sphinx.ext.autosummary',
     'sphinx.ext.doctest',
     'sphinx.ext.intersphinx',
     'sphinx.ext.extlinks',
-    # 'sphinx.ext.linkcode',
     'sphinx.ext.intersphinx',
-    'IPython.sphinxext.ipython_console_highlighting',
-    'IPython.sphinxext.ipython_directive',
     'sphinx.ext.napoleon',
     'myst_nb',
     'sphinxext.opengraph',
     'sphinx_copybutton',
-    'sphinx_comments',
+    'sphinxcontrib.autodoc_pydantic',
 ]
 
 
 # MyST config
 myst_enable_extensions = ['amsmath', 'colon_fence', 'deflist', 'html_image']
-myst_url_schemes = ('http', 'https', 'mailto')
+myst_url_schemes = ['http', 'https', 'mailto']
 
 # sphinx-copybutton configurations
 copybutton_prompt_text = r'>>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: '
 copybutton_prompt_is_regexp = True
 
-comments_config = {
-    'utterances': {'repo': 'intake/intake-esm', 'optional': 'config', 'label': '💬 comment'},
-    'hypothesis': False,
-}
-
+autodoc_pydantic_model_show_json = True
+autodoc_pydantic_model_show_config = False
 
+jupyter_execute_notebooks = 'cache'
 execution_timeout = 600
 
 extlinks = {
@@ -134,11 +109,6 @@
 html_theme_options = {}
 
 
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-# html_favicon = None
-
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
@@ -147,14 +117,7 @@
 # Sometimes the savefig directory doesn't exist and needs to be created
 # https://github.com/ipython/ipython/issues/8733
 # becomes obsolete when we can pin ipython>=5.2; see ci/requirements/doc.yml
-ipython_savefig_dir = os.path.join(
-    os.path.dirname(os.path.abspath(__file__)), '_build', 'html', '_static'
-)
-
-savefig_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'source', '_static')
 
-os.makedirs(ipython_savefig_dir, exist_ok=True)
-os.makedirs(savefig_dir, exist_ok=True)
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
@@ -207,65 +170,10 @@
     'python': ('https://docs.python.org/3/', None),
     'xarray': ('http://xarray.pydata.org/en/stable/', None),
     'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
-    'intake': ('https://intake.readthedocs.io/en/latest/', None),
+    'intake': ('https://intake.readthedocs.io/en/stable/', None),
 }
 
 
-# based on numpy doc/source/conf.py
-
-
-# def linkcode_resolve(domain, info):
-#     """
-#     Determine the URL corresponding to Python object
-#     """
-#     if domain != 'py':
-#         return None
-
-#     modname = info['module']
-#     fullname = info['fullname']
-
-#     submod = sys.modules.get(modname)
-#     if submod is None:
-#         return None
-
-#     obj = submod
-#     for part in fullname.split('.'):
-#         try:
-#             obj = getattr(obj, part)
-#         except AttributeError:
-#             return None
-
-#     try:
-#         fn = inspect.getsourcefile(inspect.unwrap(obj))
-#     except TypeError:
-#         fn = None
-#     if not fn:
-#         return None
-
-#     try:
-#         source, lineno = inspect.getsourcelines(obj)
-#     except OSError:
-#         lineno = None
-
-#     if lineno:
-#         linespec = f'#L{lineno}-L{lineno + len(source) - 1}'
-#     else:
-#         linespec = ''
-
-#     fn = os.path.relpath(fn, start=os.path.dirname(intake_esm.__file__))
-
-#     if '+' in intake_esm.__version__:
-#         return f'https://github.com/intake/intake-esm/blob/master/intake_esm/{fn}{linespec}'
-#     else:
-#         return (
-#             f'https://github.com/intake/intake-esm/blob/'
-#             f'v{intake_esm.__version__}/intake_esm/{fn}{linespec}'
-#         )
-
-
-# https://www.ericholscher.com/blog/2016/jul/25/integrating-jinja-rst-sphinx/
-
-
 def rstjinja(app, docname, source):
     """
     Render our pages as a jinja template for fancy templating goodness.
@@ -278,15 +186,8 @@ def rstjinja(app, docname, source):
     source[0] = rendered
 
 
-def html_page_context(app, pagename, templatename, context, doctree):
-    # Disable edit button for docstring generated pages
-    if 'generated' in pagename:
-        context['theme_use_edit_page_button'] = False
-
-
 def setup(app):
     app.connect('source-read', rstjinja)
-    app.connect('html-page-context', html_page_context)
 
 
 with open('catalogs.yaml') as f:
 
@@ -0,0 +1,123 @@
+# ESM Collection Specification
+
+```{note}
+This documents mirrors the [ESM Collection Specification](https://github.com/NCAR/esm-collection-spec/blob/master/collection-spec/collection-spec.md) and is updated as the specification evolves.
+```
+
+- [ESM Collection Specification](#esm-collection-specification)
+  - [Overview](#overview)
+    - [Collection Specification](#collection-specification)
+    - [Catalog](#catalog)
+    - [Assets (Data Files)](#assets-data-files)
+  - [Catalog fields](#catalog-fields)
+    - [Attribute Object](#attribute-object)
+    - [Assets Object](#assets-object)
+    - [Aggregation Control Object](#aggregation-control-object)
+    - [Aggregation Object](#aggregation-object)
+
+## Overview
+
+This document explains the structure and content of an ESM Collection.
+A collection provides metadata about the catalog, telling us what we expect to find inside and how to open it.
+The collection is described is a single json file, inspired by the STAC spec.
+
+The ESM Collection specification consists of three parts:
+
+### Collection Specification
+
+The _collection_ specification provides metadata about the catalog, telling us what we expect to find inside and how to open it.
+The descriptor is a single json file, inspired by the [STAC spec](https://github.com/radiantearth/stac-spec).
+
+```json
+{
+  "esmcat_version": "0.1.0",
+  "id": "sample",
+  "description": "This is a very basic sample ESM collection.",
+  "catalog_file": "sample_catalog.csv",
+  "attributes": [
+    {
+      "column_name": "activity_id",
+      "vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_activity_id.json"
+    },
+    {
+      "column_name": "source_id",
+      "vocabulary": "https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/master/CMIP6_source_id.json"
+    }
+  ],
+  "assets": {
+    "column_name": "path",
+    "format": "zarr"
+  }
+}
+```
+
+### Catalog
+
+The collection points to a single catalog.
+A catalog is a CSV file.
+The meaning of the columns in the csv file is defined by the parent collection.
+
+```
+activity_id,source_id,path
+CMIP,ACCESS-CM2,gs://pangeo-data/store1.zarr
+CMIP,GISS-E2-1-G,gs://pangeo-data/store1.zarr
+```
+
+### Assets (Data Files)
+
+The data assets can be either netCDF or Zarr.
+They should be either [URIs](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) or full filesystem paths.
+
+## Catalog fields
+
+| Element             | Type                                                      | Description                                                                                                                                                               |
+| ------------------- | --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| esmcat_version      | string                                                    | **REQUIRED.** The ESM Catalog version the collection implements.                                                                                                          |
+| id                  | string                                                    | **REQUIRED.** Identifier for the collection.                                                                                                                              |
+| title               | string                                                    | A short descriptive one-line title for the collection.                                                                                                                    |
+| description         | string                                                    | **REQUIRED.** Detailed multi-line description to fully explain the collection. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. |
+| catalog_file        | string                                                    | **REQUIRED.** Path to a the CSV file with the catalog contents.                                                                                                           |
+| catalog_dict        | array                                                     | If specified, it is mutually exclusive with `catalog_file`. An array of dictionaries that represents the data that would otherwise be in the csv.                         |
+| attributes          | [[Attribute Object](#attribute-object)]                   | **REQUIRED.** A list of attribute columns in the data set.                                                                                                                |
+| assets              | [Assets Object](#assets-object)                           | **REQUIRED.** Description of how the assets (data files) are referenced in the CSV catalog file.                                                                          |
+| aggregation_control | [Aggregation Control Object](#aggregation-control-object) | **OPTIONAL.** Description of how to support aggregation of multiple assets into a single xarray data set.                                                                 |
+
+### Attribute Object
+
+An attribute object describes a column in the catalog CSV file.
+The column names can optionally be associated with a controlled vocabulary, such as the [CMIP6 CVs](https://github.com/WCRP-CMIP/CMIP6_CVs), which explain how to interpret the attribute values.
+
+| Element     | Type   | Description                                                                            |
+| ----------- | ------ | -------------------------------------------------------------------------------------- |
+| column_name | string | **REQUIRED.** The name of the attribute column. Must be in the header of the CSV file. |
+| vocabulary  | string | Link to the controlled vocabulary for the attribute in the format of a URL.            |
+
+### Assets Object
+
+An assets object describes the columns in the CSV file relevant for opening the actual data files.
+
+| Element            | Type   | Description                                                                                                                        |
+| ------------------ | ------ | ---------------------------------------------------------------------------------------------------------------------------------- |
+| column_name        | string | **REQUIRED.** The name of the column containing the path to the asset. Must be in the header of the CSV file.                      |
+| format             | string | The data format. Valid values are `netcdf` and `zarr`. If specified, it means that all data in the catalog is the same type.       |
+| format_column_name | string | The column name which contains the data format, allowing for variable data types in one catalog. Mutually exclusive with `format`. |
+
+### Aggregation Control Object
+
+An aggregation control object defines neccessary information to use when aggregating multiple assets into a single xarray data set.
+
+| Element              | Type                                        | Description                                                                             |
+| -------------------- | ------------------------------------------- | --------------------------------------------------------------------------------------- |
+| variable_column_name | string                                      | **REQUIRED.** Name of the attribute column in csv file that contains the variable name. |
+| groupby_attrs        | array                                       | Column names (attributes) that define data sets that can be aggegrated.                 |
+| aggregations         | [[Aggregation Object](#aggregation-object)] | **OPTIONAL.** List of aggregations to apply to query results                            |
+
+### Aggregation Object
+
+An aggregation object describes types of operations done during the aggregation of multiple assets into a single xarray data set.
+
+| Element        | Type   | Description                                                                                                                                                                                                                                                                                                                                                                                          |
+| -------------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| type           | string | **REQUIRED.** Type of aggregation operation to apply. Valid values include: `join_new`, `join_existing`, `union`                                                                                                                                                                                                                                                                                     |
+| attribute_name | string | Name of attribute (column) across which to aggregate.                                                                                                                                                                                                                                                                                                                                                |
+| options        | object | **OPTIONAL.** Aggregration settings that are passed as keywords arguments to [`xarray.concat()`](https://xarray.pydata.org/en/stable/generated/xarray.concat.html) or [`xarray.merge()`](https://xarray.pydata.org/en/stable/generated/xarray.merge.html#xarray.merge). For `join_existing`, it must contain the name of the existing dimension to use (for e.g.: something like `{'dim': 'time'}`). |