diff --git a/CHANGELOG.md b/CHANGELOG.md index 48faf3592..3745e3ed1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Added Table-extension ([#646](https://github.com/stac-utils/pystac/pull/646)) - Stable support for Python 3.10 ([#656](https://github.com/stac-utils/pystac/pull/656)) - `.python-version` files are now ignored by Git ([#647](https://github.com/stac-utils/pystac/pull/647)) +- Added a flag to allow users to skip transforming hierarchical link HREFs based on root catalog type ([#663](https://github.com/stac-utils/pystac/pull/663)) ### Removed @@ -20,6 +21,7 @@ - Links will get their `title` from their target if no `title` is provided ([#607](https://github.com/stac-utils/pystac/pull/607)) - Relax typing on `LabelClasses` from `List` to `Sequence` ([#627](https://github.com/stac-utils/pystac/pull/627)) - Upgraded datacube-extension to version 2.0.0 ([#645](https://github.com/stac-utils/pystac/pull/645)) +- By default, ItemCollections will not modify Item HREFs based on root catalog type to avoid performance costs of root link reads ([#663](https://github.com/stac-utils/pystac/pull/663)) ### Fixed diff --git a/pystac/catalog.py b/pystac/catalog.py index 80abf825a..3d00f3370 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -473,7 +473,9 @@ def get_item_links(self) -> List[Link]: """ return self.get_links(pystac.RelType.ITEM) - def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]: + def to_dict( + self, include_self_link: bool = True, transform_hrefs: bool = True + ) -> Dict[str, Any]: links = self.links if not include_self_link: links = [x for x in links if x.rel != pystac.RelType.SELF] @@ -483,7 +485,7 @@ def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]: "id": self.id, "stac_version": pystac.get_stac_version(), "description": self.description, - "links": [link.to_dict() for link in links], + "links": [link.to_dict(transform_href=transform_hrefs) for link in links], } if self.stac_extensions is not None: diff --git a/pystac/collection.py b/pystac/collection.py index f6f44b621..29fea8453 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -515,8 +515,12 @@ def add_item( super().add_item(item, title, strategy) item.set_collection(self) - def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]: - d = super().to_dict(include_self_link) + def to_dict( + self, include_self_link: bool = True, transform_hrefs: bool = True + ) -> Dict[str, Any]: + d = super().to_dict( + include_self_link=include_self_link, transform_hrefs=transform_hrefs + ) d["extent"] = self.extent.to_dict() d["license"] = self.license if self.stac_extensions is not None: diff --git a/pystac/item.py b/pystac/item.py index 745aa3f09..14c660e17 100644 --- a/pystac/item.py +++ b/pystac/item.py @@ -285,7 +285,9 @@ def get_collection(self) -> Optional[Collection]: else: return cast(Collection, collection_link.resolve_stac_object().target) - def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]: + def to_dict( + self, include_self_link: bool = True, transform_hrefs: bool = True + ) -> Dict[str, Any]: links = self.links if not include_self_link: links = [x for x in links if x.rel != pystac.RelType.SELF] @@ -303,7 +305,7 @@ def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]: "id": self.id, "properties": self.properties, "geometry": self.geometry, - "links": [link.to_dict() for link in links], + "links": [link.to_dict(transform_href=transform_hrefs) for link in links], "assets": assets, } diff --git a/pystac/item_collection.py b/pystac/item_collection.py index 206fee01a..de38ac34f 100644 --- a/pystac/item_collection.py +++ b/pystac/item_collection.py @@ -88,7 +88,7 @@ def map_item(item_or_dict: ItemLike) -> pystac.Item: if isinstance(item_or_dict, pystac.Item): return item_or_dict.clone() if clone_items else item_or_dict else: - return pystac.Item.from_dict(item_or_dict) + return pystac.Item.from_dict(item_or_dict, preserve_dict=clone_items) self.items = list(map(map_item, items)) self.extra_fields = extra_fields or {} @@ -116,11 +116,23 @@ def __add__(self, other: object) -> "ItemCollection": return ItemCollection(items=combined, clone_items=False) - def to_dict(self) -> Dict[str, Any]: - """Serializes an :class:`ItemCollection` instance to a JSON-like dictionary.""" + def to_dict(self, transform_hrefs: bool = False) -> Dict[str, Any]: + """Serializes an :class:`ItemCollection` instance to a JSON-like dictionary. + + Args: + transform_hrefs: If True, transform the HREF of hierarchical links + of Items based on the type of catalog the Item belongs to (if any). + I.e. if the item belongs to a root catalog that is + RELATIVE_PUBLISHED or SELF_CONTAINED, + hierarchical link HREFs will be transformed to be relative to the + catalog root. This can be slow if the Items have root links that + have not yet been resolved. Defaults to False. + """ return { "type": "FeatureCollection", - "features": [item.to_dict() for item in self.items], + "features": [ + item.to_dict(transform_hrefs=transform_hrefs) for item in self.items + ], **self.extra_fields, } diff --git a/pystac/link.py b/pystac/link.py index 7de266bbf..a6f8e9431 100644 --- a/pystac/link.py +++ b/pystac/link.py @@ -129,9 +129,16 @@ def href(self) -> str: raise ValueError(f"{self} does not have an HREF set.") return result - def get_href(self) -> Optional[str]: + def get_href(self, transform_href: bool = True) -> Optional[str]: """Gets the HREF for this link. + Args: + transform_href: If True, transform the HREF based on the type of + catalog the owner belongs to (if any). I.e. if the link owner + belongs to a root catalog that is RELATIVE_PUBLISHED or SELF_CONTAINED, + the HREF will be transformed to be relative to the catalog root + if this is a hierarchical link relation. + Returns: str: Returns this link's HREF. If there is an owner of the link and the root catalog (if there is one) is of type RELATIVE_PUBLISHED, @@ -144,7 +151,13 @@ def get_href(self) -> Optional[str]: else: href = self._target_href - if href and is_absolute_href(href) and self.owner and self.owner.get_root(): + if ( + transform_href + and href + and is_absolute_href(href) + and self.owner + and self.owner.get_root() + ): root = self.owner.get_root() rel_links = [ *HIERARCHICAL_LINKS, @@ -304,14 +317,22 @@ def is_resolved(self) -> bool: """ return self._target_object is not None - def to_dict(self) -> Dict[str, Any]: + def to_dict(self, transform_href: bool = True) -> Dict[str, Any]: """Generate a dictionary representing the JSON of this serialized Link. Returns: dict: A serialization of the Link that can be written out as JSON. + transform_href: If True, transform the HREF based on the type of + catalog the owner belongs to (if any). I.e. if the link owner + belongs to a root catalog that is RELATIVE_PUBLISHED or SELF_CONTAINED, + the HREF will be transformed to be relative to the catalog root + if this is a hierarchical link relation. """ - d: Dict[str, Any] = {"rel": self.rel, "href": self.get_href()} + d: Dict[str, Any] = { + "rel": self.rel, + "href": self.get_href(transform_href=transform_href), + } if self.media_type is not None: d["type"] = self.media_type diff --git a/pystac/stac_io.py b/pystac/stac_io.py index a0108566a..f5ddbc252 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -3,12 +3,12 @@ import json from typing import ( Any, + Callable, Dict, List, Optional, TYPE_CHECKING, Tuple, - Type, Union, ) @@ -37,7 +37,7 @@ class StacIO(ABC): - _default_io: Optional[Type["StacIO"]] = None + _default_io: Optional[Callable[[], "StacIO"]] = None @abstractmethod def read_text( @@ -251,7 +251,7 @@ def save_json( self.write_text(dest, txt) @classmethod - def set_default(cls, stac_io_class: Type["StacIO"]) -> None: + def set_default(cls, stac_io_class: Callable[[], "StacIO"]) -> None: """Set the default StacIO instance to use.""" cls._default_io = stac_io_class diff --git a/pystac/stac_object.py b/pystac/stac_object.py index b1e4fa7d0..d6871d2d5 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -428,12 +428,20 @@ def _object_links(self) -> List[str]: raise NotImplementedError @abstractmethod - def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]: + def to_dict( + self, include_self_link: bool = True, transform_hrefs: bool = True + ) -> Dict[str, Any]: """Generate a dictionary representing the JSON of this serialized object. Args: include_self_link : If True, the dict will contain a self link to this object. If False, the self link will be omitted. + transform_hrefs: If True, transform the HREF of hierarchical links + based on the type of catalog this object belongs to (if any). + I.e. if this object belongs to a root catalog that is + RELATIVE_PUBLISHED or SELF_CONTAINED, + hierarchical link HREFs will be transformed to be relative to the + catalog root. dict: A serialization of the object that can be written out as JSON. """ diff --git a/tests/test_item_collection.py b/tests/test_item_collection.py index 11603bd8d..3ca79e157 100644 --- a/tests/test_item_collection.py +++ b/tests/test_item_collection.py @@ -7,6 +7,7 @@ import pystac from tests.utils import TestCases +from tests.utils.stac_io_mock import MockDefaultStacIO class TestItemCollection(unittest.TestCase): @@ -112,7 +113,7 @@ def test_from_relative_path(self) -> None: def test_from_list_of_dicts(self) -> None: item_dict = self.stac_io.read_json(self.SIMPLE_ITEM) - item_collection = pystac.ItemCollection(items=[item_dict]) + item_collection = pystac.ItemCollection(items=[item_dict], clone_items=True) self.assertEqual(item_collection[0].id, item_dict.get("id")) @@ -176,3 +177,11 @@ def test_from_dict_sets_root(self) -> None: item_collection = ItemCollection.from_dict(param_dict, root=catalog) for item in item_collection.items: self.assertEqual(item.get_root(), catalog) + + def test_to_dict_does_not_read_root_link_of_items(self) -> None: + with MockDefaultStacIO() as mock_stac_io: + item_collection = pystac.ItemCollection.from_file(self.ITEM_COLLECTION) + + item_collection.to_dict() + + self.assertEqual(mock_stac_io.mock.read_text.call_count, 1) diff --git a/tests/utils/stac_io_mock.py b/tests/utils/stac_io_mock.py index 8feac99b2..2c07d4b94 100644 --- a/tests/utils/stac_io_mock.py +++ b/tests/utils/stac_io_mock.py @@ -1,25 +1,49 @@ -from typing import Any, Union +from typing import Any, Optional, Union from unittest.mock import Mock import pystac +from pystac.stac_io import DefaultStacIO, StacIO class MockStacIO(pystac.StacIO): """Creates a mock that records StacIO calls for testing and allows clients to replace StacIO functionality, all within a context scope. + + Args: + wrapped_stac_io: The StacIO that will be used to perform the calls. + Defaults to an instance of DefaultStacIO. """ - def __init__(self) -> None: + mock: Mock + wrapped_stac_io: StacIO + + def __init__(self, wrapped_stac_io: Optional[StacIO] = None) -> None: self.mock = Mock() + if wrapped_stac_io is None: + self.wrapped_stac_io = DefaultStacIO() + else: + self.wrapped_stac_io = wrapped_stac_io def read_text( self, source: Union[str, pystac.Link], *args: Any, **kwargs: Any ) -> str: self.mock.read_text(source) - return pystac.StacIO.default().read_text(source) + return self.wrapped_stac_io.read_text(source) def write_text( self, dest: Union[str, pystac.Link], txt: str, *args: Any, **kwargs: Any ) -> None: self.mock.write_text(dest, txt) - pystac.StacIO.default().write_text(dest, txt) + self.wrapped_stac_io.write_text(dest, txt) + + +class MockDefaultStacIO(object): + """Context manager for mocking StacIO.""" + + def __enter__(self) -> MockStacIO: + mock = MockStacIO() + pystac.StacIO.set_default(lambda: mock) + return mock + + def __exit__(self, *args: Any) -> None: + pystac.StacIO.set_default(DefaultStacIO)