Skip to content

Allow PySTAC to skip transforming hrefs to relative or absolute based on CatalogType #663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Added Table-extension ([#646](https://github.com/stac-utils/pystac/pull/646))
- Stable support for Python 3.10 ([#656](https://github.com/stac-utils/pystac/pull/656))
- `.python-version` files are now ignored by Git ([#647](https://github.com/stac-utils/pystac/pull/647))
- Added a flag to allow users to skip transforming hierarchical link HREFs based on root catalog type ([#663](https://github.com/stac-utils/pystac/pull/663))

### Removed

Expand All @@ -20,6 +21,7 @@
- Links will get their `title` from their target if no `title` is provided ([#607](https://github.com/stac-utils/pystac/pull/607))
- Relax typing on `LabelClasses` from `List` to `Sequence` ([#627](https://github.com/stac-utils/pystac/pull/627))
- Upgraded datacube-extension to version 2.0.0 ([#645](https://github.com/stac-utils/pystac/pull/645))
- By default, ItemCollections will not modify Item HREFs based on root catalog type to avoid performance costs of root link reads ([#663](https://github.com/stac-utils/pystac/pull/663))

### Fixed

Expand Down
6 changes: 4 additions & 2 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,9 @@ def get_item_links(self) -> List[Link]:
"""
return self.get_links(pystac.RelType.ITEM)

def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]:
def to_dict(
self, include_self_link: bool = True, transform_hrefs: bool = True
) -> Dict[str, Any]:
links = self.links
if not include_self_link:
links = [x for x in links if x.rel != pystac.RelType.SELF]
Expand All @@ -483,7 +485,7 @@ def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]:
"id": self.id,
"stac_version": pystac.get_stac_version(),
"description": self.description,
"links": [link.to_dict() for link in links],
"links": [link.to_dict(transform_href=transform_hrefs) for link in links],
}

if self.stac_extensions is not None:
Expand Down
8 changes: 6 additions & 2 deletions pystac/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,12 @@ def add_item(
super().add_item(item, title, strategy)
item.set_collection(self)

def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]:
d = super().to_dict(include_self_link)
def to_dict(
self, include_self_link: bool = True, transform_hrefs: bool = True
) -> Dict[str, Any]:
d = super().to_dict(
include_self_link=include_self_link, transform_hrefs=transform_hrefs
)
d["extent"] = self.extent.to_dict()
d["license"] = self.license
if self.stac_extensions is not None:
Expand Down
6 changes: 4 additions & 2 deletions pystac/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ def get_collection(self) -> Optional[Collection]:
else:
return cast(Collection, collection_link.resolve_stac_object().target)

def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]:
def to_dict(
self, include_self_link: bool = True, transform_hrefs: bool = True
) -> Dict[str, Any]:
links = self.links
if not include_self_link:
links = [x for x in links if x.rel != pystac.RelType.SELF]
Expand All @@ -303,7 +305,7 @@ def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]:
"id": self.id,
"properties": self.properties,
"geometry": self.geometry,
"links": [link.to_dict() for link in links],
"links": [link.to_dict(transform_href=transform_hrefs) for link in links],
"assets": assets,
}

Expand Down
20 changes: 16 additions & 4 deletions pystac/item_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def map_item(item_or_dict: ItemLike) -> pystac.Item:
if isinstance(item_or_dict, pystac.Item):
return item_or_dict.clone() if clone_items else item_or_dict
else:
return pystac.Item.from_dict(item_or_dict)
return pystac.Item.from_dict(item_or_dict, preserve_dict=clone_items)

self.items = list(map(map_item, items))
self.extra_fields = extra_fields or {}
Expand Down Expand Up @@ -116,11 +116,23 @@ def __add__(self, other: object) -> "ItemCollection":

return ItemCollection(items=combined, clone_items=False)

def to_dict(self) -> Dict[str, Any]:
"""Serializes an :class:`ItemCollection` instance to a JSON-like dictionary."""
def to_dict(self, transform_hrefs: bool = False) -> Dict[str, Any]:
"""Serializes an :class:`ItemCollection` instance to a JSON-like dictionary.

Args:
transform_hrefs: If True, transform the HREF of hierarchical links
of Items based on the type of catalog the Item belongs to (if any).
I.e. if the item belongs to a root catalog that is
RELATIVE_PUBLISHED or SELF_CONTAINED,
hierarchical link HREFs will be transformed to be relative to the
catalog root. This can be slow if the Items have root links that
have not yet been resolved. Defaults to False.
"""
return {
"type": "FeatureCollection",
"features": [item.to_dict() for item in self.items],
"features": [
item.to_dict(transform_hrefs=transform_hrefs) for item in self.items
],
**self.extra_fields,
}

Expand Down
29 changes: 25 additions & 4 deletions pystac/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,16 @@ def href(self) -> str:
raise ValueError(f"{self} does not have an HREF set.")
return result

def get_href(self) -> Optional[str]:
def get_href(self, transform_href: bool = True) -> Optional[str]:
"""Gets the HREF for this link.

Args:
transform_href: If True, transform the HREF based on the type of
catalog the owner belongs to (if any). I.e. if the link owner
belongs to a root catalog that is RELATIVE_PUBLISHED or SELF_CONTAINED,
the HREF will be transformed to be relative to the catalog root
if this is a hierarchical link relation.

Returns:
str: Returns this link's HREF. If there is an owner of the link and
the root catalog (if there is one) is of type RELATIVE_PUBLISHED,
Expand All @@ -144,7 +151,13 @@ def get_href(self) -> Optional[str]:
else:
href = self._target_href

if href and is_absolute_href(href) and self.owner and self.owner.get_root():
if (
transform_href
and href
and is_absolute_href(href)
and self.owner
and self.owner.get_root()
):
root = self.owner.get_root()
rel_links = [
*HIERARCHICAL_LINKS,
Expand Down Expand Up @@ -304,14 +317,22 @@ def is_resolved(self) -> bool:
"""
return self._target_object is not None

def to_dict(self) -> Dict[str, Any]:
def to_dict(self, transform_href: bool = True) -> Dict[str, Any]:
"""Generate a dictionary representing the JSON of this serialized Link.

Returns:
dict: A serialization of the Link that can be written out as JSON.
transform_href: If True, transform the HREF based on the type of
catalog the owner belongs to (if any). I.e. if the link owner
belongs to a root catalog that is RELATIVE_PUBLISHED or SELF_CONTAINED,
the HREF will be transformed to be relative to the catalog root
if this is a hierarchical link relation.
"""

d: Dict[str, Any] = {"rel": self.rel, "href": self.get_href()}
d: Dict[str, Any] = {
"rel": self.rel,
"href": self.get_href(transform_href=transform_href),
}

if self.media_type is not None:
d["type"] = self.media_type
Expand Down
6 changes: 3 additions & 3 deletions pystac/stac_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import json
from typing import (
Any,
Callable,
Dict,
List,
Optional,
TYPE_CHECKING,
Tuple,
Type,
Union,
)

Expand Down Expand Up @@ -37,7 +37,7 @@


class StacIO(ABC):
_default_io: Optional[Type["StacIO"]] = None
_default_io: Optional[Callable[[], "StacIO"]] = None

@abstractmethod
def read_text(
Expand Down Expand Up @@ -251,7 +251,7 @@ def save_json(
self.write_text(dest, txt)

@classmethod
def set_default(cls, stac_io_class: Type["StacIO"]) -> None:
def set_default(cls, stac_io_class: Callable[[], "StacIO"]) -> None:
"""Set the default StacIO instance to use."""
cls._default_io = stac_io_class

Expand Down
10 changes: 9 additions & 1 deletion pystac/stac_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,20 @@ def _object_links(self) -> List[str]:
raise NotImplementedError

@abstractmethod
def to_dict(self, include_self_link: bool = True) -> Dict[str, Any]:
def to_dict(
self, include_self_link: bool = True, transform_hrefs: bool = True
) -> Dict[str, Any]:
"""Generate a dictionary representing the JSON of this serialized object.

Args:
include_self_link : If True, the dict will contain a self link
to this object. If False, the self link will be omitted.
transform_hrefs: If True, transform the HREF of hierarchical links
based on the type of catalog this object belongs to (if any).
I.e. if this object belongs to a root catalog that is
RELATIVE_PUBLISHED or SELF_CONTAINED,
hierarchical link HREFs will be transformed to be relative to the
catalog root.

dict: A serialization of the object that can be written out as JSON.
"""
Expand Down
11 changes: 10 additions & 1 deletion tests/test_item_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pystac

from tests.utils import TestCases
from tests.utils.stac_io_mock import MockDefaultStacIO


class TestItemCollection(unittest.TestCase):
Expand Down Expand Up @@ -112,7 +113,7 @@ def test_from_relative_path(self) -> None:

def test_from_list_of_dicts(self) -> None:
item_dict = self.stac_io.read_json(self.SIMPLE_ITEM)
item_collection = pystac.ItemCollection(items=[item_dict])
item_collection = pystac.ItemCollection(items=[item_dict], clone_items=True)

self.assertEqual(item_collection[0].id, item_dict.get("id"))

Expand Down Expand Up @@ -176,3 +177,11 @@ def test_from_dict_sets_root(self) -> None:
item_collection = ItemCollection.from_dict(param_dict, root=catalog)
for item in item_collection.items:
self.assertEqual(item.get_root(), catalog)

def test_to_dict_does_not_read_root_link_of_items(self) -> None:
with MockDefaultStacIO() as mock_stac_io:
item_collection = pystac.ItemCollection.from_file(self.ITEM_COLLECTION)

item_collection.to_dict()

self.assertEqual(mock_stac_io.mock.read_text.call_count, 1)
32 changes: 28 additions & 4 deletions tests/utils/stac_io_mock.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,49 @@
from typing import Any, Union
from typing import Any, Optional, Union
from unittest.mock import Mock

import pystac
from pystac.stac_io import DefaultStacIO, StacIO


class MockStacIO(pystac.StacIO):
"""Creates a mock that records StacIO calls for testing and allows
clients to replace StacIO functionality, all within a context scope.

Args:
wrapped_stac_io: The StacIO that will be used to perform the calls.
Defaults to an instance of DefaultStacIO.
"""

def __init__(self) -> None:
mock: Mock
wrapped_stac_io: StacIO

def __init__(self, wrapped_stac_io: Optional[StacIO] = None) -> None:
self.mock = Mock()
if wrapped_stac_io is None:
self.wrapped_stac_io = DefaultStacIO()
else:
self.wrapped_stac_io = wrapped_stac_io

def read_text(
self, source: Union[str, pystac.Link], *args: Any, **kwargs: Any
) -> str:
self.mock.read_text(source)
return pystac.StacIO.default().read_text(source)
return self.wrapped_stac_io.read_text(source)

def write_text(
self, dest: Union[str, pystac.Link], txt: str, *args: Any, **kwargs: Any
) -> None:
self.mock.write_text(dest, txt)
pystac.StacIO.default().write_text(dest, txt)
self.wrapped_stac_io.write_text(dest, txt)


class MockDefaultStacIO(object):
"""Context manager for mocking StacIO."""

def __enter__(self) -> MockStacIO:
mock = MockStacIO()
pystac.StacIO.set_default(lambda: mock)
return mock

def __exit__(self, *args: Any) -> None:
pystac.StacIO.set_default(DefaultStacIO)