Skip to content

Commit 82ff48d

Browse files
authored
Merge pull request #114 from henriaidasso/rdps
2 parents 8ac35c8 + 07696c4 commit 82ff48d

File tree

19 files changed

+960
-25
lines changed

19 files changed

+960
-25
lines changed

CHANGES.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@
44

55
<!-- insert list items of new changes here -->
66

7+
* Add `RDPS_CRIM` and `HRDPS_CRIM` implementations.
8+
* Add `cf` extension adding CF Parameter metadata to (H)RDPS stac asset and items.
9+
* Add `cf` and `file` helpers.
10+
* Add `providers` and `contacts` extensions metdata to (H)RDPS stac collection.
11+
* Fix deprecated access to `model_fields` in `BaseSTAC` data model class.
12+
* Fix bug service type check in extensions' `get_assets` methods.
13+
* Fix return type of `from_data` in `THREDDSCatalogDataModel`.
14+
* Update RDPS and HRDPS tests.
15+
716
## [0.11.0](https://github.com/crim-ca/stac-populator/tree/0.11.0) (2025-11-17)
817

918
* Add option to automatically update collection extents and summaries based on ingested items.

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@ Provided implementations of `STACpopulatorBase`:
2828

2929
| Implementation | Description |
3030
|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------|
31+
| [RDPS_CRIM][RDPS_CRIM] | Crawls a THREDDS Catalog for RDPS NCML-annotated NetCDF references to publish corresponding STAC Collection and Items. |
32+
| [HRDPS_CRIM][HRDPS_CRIM] | Crawls a THREDDS Catalog for HRDPS NCML-annotated NetCDF references to publish corresponding STAC Collection and Items. |
3133
| [CMIP6_UofT][CMIP6_UofT] | Crawls a THREDDS Catalog for CMIP6 NCML-annotated NetCDF references to publish corresponding STAC Collection and Items. |
3234
| [DirectoryLoader][DirLoader] | Crawls a subdirectory hierarchy of pre-generated STAC Collections and Items to publish to a STAC API endpoint. |
3335
| [CORDEX-CMIP6_Ouranos][CORDEX-CMIP6_Ouranos] | Crawls a THREDDS Catalog for CORDEX-CMIP6 NetCDF references to publish corresponding STAC Collection and Items. |
3436

37+
[RDPS_CRIM]: STACpopulator/implementations/RDPS_CRIM/add_RDPS.py
38+
[HRDPS_CRIM]: STACpopulator/implementations/HRDPS_CRIM/add_HRDPS.py
3539
[CMIP6_UofT]: STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
3640
[DirLoader]: STACpopulator/implementations/DirectoryLoader/crawl_directory.py
3741
[CORDEX-CMIP6_Ouranos]: STACpopulator/implementations/CORDEX-CMIP6_Ouranos/add_CORDEX-CMIP6.py

STACpopulator/extensions/base.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@
6464
class Helper:
6565
"""Class to be subclassed by extension helpers."""
6666

67+
@classmethod
68+
@abstractmethod
69+
def from_data(
70+
cls,
71+
data: dict[str, Any],
72+
**kwargs,
73+
) -> "Helper":
74+
"""Create a Helper instance from raw data."""
75+
pass
76+
6777

6878
class ExtensionHelper(BaseModel, Helper):
6979
"""Base class for dataset properties going into the catalog.
@@ -190,7 +200,8 @@ def create_uid(self) -> str:
190200
@model_validator(mode="after")
191201
def find_helpers(self) -> "BaseSTAC":
192202
"""Populate the list of extensions."""
193-
for key, field in self.model_fields.items():
203+
# Access model fields from class. From obj will be removed in pydantic v3
204+
for key, field in type(self).model_fields.items():
194205
if isinstance(field.annotation, type) and issubclass(field.annotation, Helper):
195206
self._helpers.append(key)
196207
return self
@@ -328,8 +339,8 @@ def get_assets(
328339
return {
329340
key: asset
330341
for key, asset in self.item.get_assets().items()
331-
if (service_type is ServiceType and service_type.value in asset.extra_fields)
332-
or any(ServiceType.from_value(field, default=None) is ServiceType for field in asset.extra_fields)
342+
if (isinstance(service_type, ServiceType) and service_type.value in asset.extra_fields)
343+
or any(ServiceType.from_value(field, default=False) for field in asset.extra_fields)
333344
}
334345

335346
def __repr__(self) -> str:

STACpopulator/extensions/cf.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
"""CF Extension Module."""
2+
3+
from __future__ import annotations
4+
5+
import functools
6+
from typing import (
7+
Any,
8+
Dict,
9+
Generic,
10+
Iterable,
11+
List,
12+
Literal,
13+
Optional,
14+
TypeVar,
15+
Union,
16+
cast,
17+
get_args,
18+
)
19+
20+
import pystac
21+
from pydantic import BaseModel
22+
from pystac.extensions import item_assets
23+
from pystac.extensions.base import ExtensionManagementMixin, PropertiesExtension
24+
25+
from STACpopulator.extensions.base import ExtensionHelper
26+
from STACpopulator.stac_utils import ServiceType
27+
28+
T = TypeVar("T", pystac.Collection, pystac.Item, pystac.Asset)
29+
SchemaName = Literal["cf"]
30+
SCHEMA_URI = "https://stac-extensions.github.io/cf/v0.2.0/schema.json"
31+
PREFIX = f"{get_args(SchemaName)[0]}:"
32+
PARAMETER_PROP = PREFIX + "parameter"
33+
34+
35+
class CFParameter(BaseModel):
36+
"""CFParameter."""
37+
38+
name: str
39+
unit: str
40+
41+
def __repr__(self) -> str:
42+
"""Return string repr."""
43+
return f"<CFParameter name={self.name}, unit={self.unit}>"
44+
45+
46+
class CFHelper(ExtensionHelper):
47+
"""CFHelper."""
48+
49+
_prefix: str = "cf"
50+
variables: Dict[str, Any]
51+
52+
@functools.cached_property
53+
def parameters(self) -> List[CFParameter]:
54+
"""Extracts cf:parameter-like information from item_data."""
55+
parameters = []
56+
57+
for var in self.variables.values():
58+
attrs = var.get("attributes", {})
59+
name = attrs.get("standard_name") # Get the required standard name
60+
if not name:
61+
continue # Skip if no valid name
62+
unit = attrs.get("units", "")
63+
parameters.append(CFParameter(name=name, unit=unit))
64+
65+
return parameters
66+
67+
@classmethod
68+
def from_data(
69+
cls,
70+
data: dict[str, Any],
71+
**kwargs,
72+
) -> "CFHelper":
73+
"""Create a CFHelper instance from raw data."""
74+
return cls(variables=data["data"]["variables"], **kwargs)
75+
76+
def apply(self, item: T, add_if_missing: bool = True) -> T:
77+
"""Apply the Datacube extension to an item."""
78+
ext = CFExtension.ext(item, add_if_missing=add_if_missing)
79+
ext.apply(parameters=self.parameters)
80+
81+
# FIXME: This temporary workaround has been added to comply with the (most certainly buggy) validation schema for CF extension
82+
# It should be remove once the PR is integrated since applying on the item should be enough
83+
asset = item.assets["HTTPServer"]
84+
cf_asset_ext = CFExtension.ext(asset, add_if_missing=True)
85+
cf_asset_ext.apply(parameters=self.parameters)
86+
return item
87+
88+
89+
class CFExtension(
90+
Generic[T],
91+
PropertiesExtension,
92+
ExtensionManagementMixin[Union[pystac.Asset, pystac.Item, pystac.Collection]],
93+
):
94+
"""CF Metadata Extension."""
95+
96+
@property
97+
def name(self) -> SchemaName:
98+
"""Return the schema name."""
99+
return get_args(SchemaName)[0]
100+
101+
@property
102+
def parameter(self) -> List[dict[str, Any]] | None:
103+
"""Get or set the CF parameter(s)."""
104+
return self._get_property(PARAMETER_PROP, int)
105+
106+
@parameter.setter
107+
def parameter(self, v: List[dict[str, Any]] | None) -> None:
108+
self._set_property(PARAMETER_PROP, v)
109+
110+
def apply(
111+
self,
112+
parameters: Union[List[CFParameter], List[dict[str, Any]]],
113+
) -> None:
114+
"""Apply CF Extension properties to the extended :class:`~pystac.Item` or :class:`~pystac.Asset`."""
115+
if not isinstance(parameters[0], dict):
116+
parameters = [p.model_dump() for p in parameters]
117+
self.parameter = parameters
118+
119+
@classmethod
120+
def get_schema_uri(cls) -> str:
121+
"""Return this extension's schema URI."""
122+
return SCHEMA_URI
123+
124+
@classmethod
125+
def ext(cls, obj: T, add_if_missing: bool = False) -> CFExtension[T]:
126+
"""Extend the given STAC Object with properties from the :stac-ext:`CF Extension <cf>`.
127+
128+
This extension can be applied to instances of :class:`~pystac.Item`, :class:`~pystac.Asset`, or :class:`~pystac.Collection`.
129+
130+
Raises
131+
------
132+
pystac.ExtensionTypeError : If an invalid object type is passed.
133+
"""
134+
if isinstance(obj, pystac.Collection):
135+
cls.ensure_has_extension(obj, add_if_missing)
136+
return cast(CFExtension[T], CollectionCFExtension(obj))
137+
elif isinstance(obj, pystac.Item):
138+
cls.ensure_has_extension(obj, add_if_missing)
139+
return cast(CFExtension[T], ItemCFExtension(obj))
140+
elif isinstance(obj, pystac.Asset):
141+
cls.ensure_owner_has_extension(obj, add_if_missing)
142+
return cast(CFExtension[T], AssetCFExtension(obj))
143+
elif isinstance(obj, item_assets.AssetDefinition):
144+
cls.ensure_owner_has_extension(obj, add_if_missing)
145+
return cast(CFExtension[T], ItemAssetsCFExtension(obj))
146+
else:
147+
raise pystac.ExtensionTypeError(cls._ext_error_message(obj))
148+
149+
150+
class ItemCFExtension(CFExtension[pystac.Item]):
151+
"""
152+
A concrete implementation of :class:`CFExtension` on an :class:`~pystac.Item`.
153+
154+
Extends the properties of the Item to include properties defined in the
155+
:stac-ext:`CF Extension <cf>`.
156+
157+
This class should generally not be instantiated directly. Instead, call
158+
:meth:`CFExtension.ext` on an :class:`~pystac.Item` to extend it.
159+
"""
160+
161+
def __init__(self, item: pystac.Item) -> None:
162+
self.item = item
163+
self.properties = item.properties
164+
165+
def get_assets(
166+
self,
167+
service_type: Optional[ServiceType] = None,
168+
) -> dict[str, pystac.Asset]:
169+
"""Get the item's assets where eo:bands are defined.
170+
171+
Args:
172+
service_type: If set, filter the assets such that only those with a
173+
matching :class:`~STACpopulator.stac_utils.ServiceType` are returned.
174+
175+
Returns
176+
-------
177+
Dict[str, Asset]: A dictionary of assets that match ``service_type``
178+
if set or else all of this item's assets were service types are defined.
179+
"""
180+
return {
181+
key: asset
182+
for key, asset in self.item.get_assets().items()
183+
if (isinstance(service_type, ServiceType) and service_type.value in asset.extra_fields)
184+
or any(ServiceType.from_value(field, default=False) for field in asset.extra_fields)
185+
}
186+
187+
def __repr__(self) -> str:
188+
"""Return repr."""
189+
return f"<ItemCFExtension Item id={self.item.id}>"
190+
191+
192+
class ItemAssetsCFExtension(CFExtension[item_assets.AssetDefinition]):
193+
"""Extention for CF item assets."""
194+
195+
properties: dict[str, Any]
196+
asset_defn: item_assets.AssetDefinition
197+
198+
def __init__(self, item_asset: item_assets.AssetDefinition) -> None:
199+
self.asset_defn = item_asset
200+
self.properties = item_asset.properties
201+
202+
203+
class AssetCFExtension(CFExtension[pystac.Asset]):
204+
"""
205+
A concrete implementation of :class:`CFExtension` on an :class:`~pystac.Asset`.
206+
207+
Extends the Asset fields to include properties defined in the
208+
:stac-ext:`CF Extension <cf>`.
209+
210+
This class should generally not be instantiated directly. Instead, call
211+
:meth:`CFExtension.ext` on an :class:`~pystac.Asset` to extend it.
212+
"""
213+
214+
asset_href: str
215+
"""The ``href`` value of the :class:`~pystac.Asset` being extended."""
216+
217+
properties: dict[str, Any]
218+
"""The :class:`~pystac.Asset` fields, including extension properties."""
219+
220+
additional_read_properties: Optional[Iterable[dict[str, Any]]] = None
221+
"""If present, this will be a list containing 1 dictionary representing the
222+
properties of the owning :class:`~pystac.Item`."""
223+
224+
def __init__(self, asset: pystac.Asset) -> None:
225+
self.asset_href = asset.href
226+
self.properties = asset.extra_fields
227+
if asset.owner and isinstance(asset.owner, pystac.Item):
228+
self.additional_read_properties = [asset.owner.properties]
229+
230+
def __repr__(self) -> str:
231+
"""Return repr."""
232+
return f"<AssetCFExtension Asset href={self.asset_href}>"
233+
234+
235+
class CollectionCFExtension(CFExtension[pystac.Collection]):
236+
"""Extension for CF data."""
237+
238+
def __init__(self, collection: pystac.Collection) -> None:
239+
self.collection = collection

STACpopulator/extensions/cmip6.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,8 @@ def get_assets(
308308
return {
309309
key: asset
310310
for key, asset in self.item.get_assets().items()
311-
if (service_type is ServiceType and service_type.value in asset.extra_fields)
312-
or any(ServiceType.from_value(field, default=None) is ServiceType for field in asset.extra_fields)
311+
if (isinstance(service_type, ServiceType) and service_type.value in asset.extra_fields)
312+
or any(ServiceType.from_value(field, default=False) for field in asset.extra_fields)
313313
}
314314

315315
def __repr__(self) -> str:

STACpopulator/extensions/datacube.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,15 @@ def __init__(self, attrs: MutableMapping[str, Any]) -> None:
141141
},
142142
}
143143

144+
@classmethod
145+
def from_data(
146+
cls,
147+
data: dict[str, Any],
148+
**kwargs,
149+
) -> "DataCubeHelper":
150+
"""Create a DataCubeHelper instance from raw data."""
151+
return cls(attrs=data["data"])
152+
144153
@property
145154
@functools.cache
146155
def dimensions(self) -> dict[str, Dimension]:
@@ -213,9 +222,11 @@ def variables(self) -> dict[str, Variable]:
213222
else:
214223
dtype = VariableType.DATA.value
215224

225+
dimensions = meta.get("shape", [])
226+
216227
variables[name] = Variable(
217228
properties=dict(
218-
dimensions=meta["shape"],
229+
dimensions=[] if dimensions == [""] else dimensions,
219230
type=dtype,
220231
description=attrs.get("description", attrs.get("long_name", "")),
221232
unit=attrs.get("units", ""),

0 commit comments

Comments
 (0)