Skip to content

Commit e4a17c1

Browse files
jsignellgadomski
andauthored
Remove references to RefResolver (#1215)
* Remove references to `RefResolver` * Add dynamic fetching of remote schemas * Try to parse relative refs * Replace non-http ids * Let registry handle relative ref links * Undo error list changes * Remove LocalValidator class and populate schema_cache with local schemas instead * Fix linting * Rewrite cassettes * Rewrite cassettes * Just rewrite casettes that needs rewrite * Remove cast * Importable even if jsonschema not importable * Add back LocalValidator but make it deprecated * Update changelog * Deprecate global variables --------- Co-authored-by: Pete Gadomski <[email protected]>
1 parent de6cfc2 commit e4a17c1

File tree

6 files changed

+247
-63
lines changed

6 files changed

+247
-63
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
## [Unreleased]
44

5+
### Fixed
6+
7+
- Update usage of jsonschema ([#1215](https://github.com/stac-utils/pystac/pull/1215))
8+
9+
### Deprecated
10+
11+
- `pystac.validation.local_validator.LocalValidator` ([#1215](https://github.com/stac-utils/pystac/pull/1215))
12+
13+
514
## [v1.8.3] - 2023-07-12
615

716
### Added

pyproject.toml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ test = [
4949
"doc8~=1.1",
5050
"html5lib~=1.1",
5151
"jinja2<4.0",
52-
"jsonschema>=4.0.1,<4.18",
52+
"jsonschema~=4.18",
5353
"mypy~=1.2",
5454
"orjson~=3.8",
5555
"pre-commit~=3.2",
@@ -64,8 +64,7 @@ test = [
6464
"types-urllib3~=1.26",
6565
]
6666
urllib3 = ["urllib3>=1.26"]
67-
# jsonschema v4.18.2 breaks validation, and it feels safer to set a ceiling rather than just skip this version. The ceiling should be removed when the v4.18 lineage has settled down and feels safer.
68-
validation = ["jsonschema>=4.0.1,<4.18"]
67+
validation = ["jsonschema~=4.18"]
6968

7069
[project.urls]
7170
homepage = "https://github.com/stac-utils/pystac"
@@ -88,9 +87,6 @@ select = ["E", "F", "I"]
8887
[tool.pytest.ini_options]
8988
filterwarnings = [
9089
"error",
91-
# Allows jsonschema's RefResolver deprecation warning through until we're
92-
# updated to support jsonschema v4.18
93-
"default::DeprecationWarning:pystac.validation.*",
9490
]
9591

9692
[build-system]

pystac/validation/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44

55
import pystac
66
from pystac.serialization.identify import STACVersionID, identify_stac_object
7+
from pystac.stac_object import STACObjectType
78
from pystac.utils import make_absolute_href
89
from pystac.validation.schema_uri_map import OldExtensionSchemaUriMap
910

1011
if TYPE_CHECKING:
11-
from pystac.stac_object import STACObject, STACObjectType
12+
from pystac.stac_object import STACObject
1213

1314

1415
# Import after above class definition

pystac/validation/local_validator.py

Lines changed: 76 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import json
22
import sys
3+
import warnings
34
from typing import Any, Dict, List, cast
45

5-
from jsonschema import Draft7Validator, RefResolver, ValidationError
6+
from jsonschema import Draft7Validator, ValidationError
7+
from referencing import Registry, Resource
68

79
from pystac.errors import STACLocalValidationError
810
from pystac.version import STACVersion
@@ -13,27 +15,93 @@
1315
from importlib.resources import files as importlib_resources_files
1416

1517
VERSION = STACVersion.DEFAULT_STAC_VERSION
16-
ITEM_SCHEMA_URI = (
18+
19+
20+
def _read_schema(file_name: str) -> Dict[str, Any]:
21+
with importlib_resources_files("pystac.validation.jsonschemas").joinpath(
22+
file_name
23+
).open("r") as f:
24+
return cast(Dict[str, Any], json.load(f))
25+
26+
27+
def get_local_schema_cache() -> Dict[str, Dict[str, Any]]:
28+
return {
29+
**{
30+
(
31+
f"https://schemas.stacspec.org/v{VERSION}/"
32+
f"{name}-spec/json-schema/{name}.json"
33+
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
34+
for name in ("item", "catalog", "collection")
35+
},
36+
**{
37+
f"https://geojson.org/schema/{name}.json": _read_schema(
38+
f"geojson/{name}.json"
39+
)
40+
for name in ("Feature", "Geometry")
41+
},
42+
**{
43+
(
44+
f"https://schemas.stacspec.org/v{VERSION}/"
45+
f"item-spec/json-schema/{name}.json"
46+
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
47+
for name in (
48+
"basics",
49+
"datetime",
50+
"instrument",
51+
"licensing",
52+
"provider",
53+
)
54+
},
55+
}
56+
57+
58+
############################### DEPRECATED #################################
59+
60+
_deprecated_ITEM_SCHEMA_URI = (
1761
f"https://schemas.stacspec.org/v{VERSION}/item-spec/json-schema/item.json"
1862
)
19-
COLLECTION_SCHEMA_URI = (
63+
_deprecated_COLLECTION_SCHEMA_URI = (
2064
f"https://schemas.stacspec.org/v{VERSION}/"
2165
"collection-spec/json-schema/collection.json"
2266
)
23-
CATALOG_SCHEMA_URI = (
67+
_deprecated_CATALOG_SCHEMA_URI = (
2468
f"https://schemas.stacspec.org/v{VERSION}/catalog-spec/json-schema/catalog.json"
2569
)
2670

71+
deprecated_names = ["ITEM_SCHEMA_URI", "COLLECTION_SCHEMA_URI", "CATALOG_SCHEMA_URI"]
72+
73+
74+
def __getattr__(name: str) -> Any:
75+
if name in deprecated_names:
76+
warnings.warn(f"{name} is deprecated and will be removed in v2.", FutureWarning)
77+
return globals()[f"_deprecated_{name}"]
78+
raise AttributeError(f"module {__name__} has no attribute {name}")
79+
2780

2881
class LocalValidator:
82+
def __init__(self) -> None:
83+
"""DEPRECATED"""
84+
warnings.warn(
85+
"``LocalValidator`` is deprecated and will be removed in v2.",
86+
DeprecationWarning,
87+
)
88+
self.schema_cache = get_local_schema_cache()
89+
90+
def registry(self) -> Any:
91+
return Registry().with_resources(
92+
[
93+
(k, Resource.from_contents(v)) for k, v in self.schema_cache.items()
94+
] # type: ignore
95+
)
96+
2997
def _validate_from_local(
3098
self, schema_uri: str, stac_dict: Dict[str, Any]
3199
) -> List[ValidationError]:
32-
if schema_uri == ITEM_SCHEMA_URI:
100+
if schema_uri == _deprecated_ITEM_SCHEMA_URI:
33101
validator = self.item_validator(VERSION)
34-
elif schema_uri == COLLECTION_SCHEMA_URI:
102+
elif schema_uri == _deprecated_COLLECTION_SCHEMA_URI:
35103
validator = self.collection_validator(VERSION)
36-
elif schema_uri == CATALOG_SCHEMA_URI:
104+
elif schema_uri == _deprecated_CATALOG_SCHEMA_URI:
37105
validator = self.catalog_validator(VERSION)
38106
else:
39107
raise STACLocalValidationError(
@@ -43,22 +111,7 @@ def _validate_from_local(
43111

44112
def _validator(self, stac_type: str, version: str) -> Draft7Validator:
45113
schema = _read_schema(f"stac-spec/v{version}/{stac_type}.json")
46-
resolver = RefResolver.from_schema(schema)
47-
resolver.store[
48-
f"https://schemas.stacspec.org/v{version}/collection-spec/json-schema/collection.json"
49-
] = _read_schema(f"stac-spec/v{version}/collection.json")
50-
resolver.store[
51-
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/item.json"
52-
] = _read_schema(f"stac-spec/v{version}/item.json")
53-
for name in ("Feature", "Geometry"):
54-
resolver.store[f"https://geojson.org/schema/{name}.json"] = _read_schema(
55-
f"geojson/{name}.json"
56-
)
57-
for name in ("basics", "datetime", "instrument", "licensing", "provider"):
58-
resolver.store[
59-
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/{name}.json"
60-
] = _read_schema(f"stac-spec/v{version}/{name}.json")
61-
return Draft7Validator(schema, resolver=resolver)
114+
return Draft7Validator(schema, registry=self.registry)
62115

63116
def catalog_validator(self, version: str = VERSION) -> Draft7Validator:
64117
return self._validator("catalog", version)
@@ -68,10 +121,3 @@ def collection_validator(self, version: str = VERSION) -> Draft7Validator:
68121

69122
def item_validator(self, version: str = VERSION) -> Draft7Validator:
70123
return self._validator("item", version)
71-
72-
73-
def _read_schema(file_name: str) -> Dict[str, Any]:
74-
with importlib_resources_files("pystac.validation.jsonschemas").joinpath(
75-
file_name
76-
).open("r") as f:
77-
return cast(Dict[str, Any], json.load(f))

pystac/validation/stac_validator.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
11
import json
22
import logging
3+
import warnings
34
from abc import ABC, abstractmethod
45
from typing import Any, Dict, List, Optional, Tuple
56

67
import pystac
78
import pystac.utils
8-
from pystac.errors import STACLocalValidationError, STACValidationError
9+
from pystac.errors import STACValidationError
910
from pystac.stac_object import STACObjectType
1011
from pystac.validation.schema_uri_map import DefaultSchemaUriMap, SchemaUriMap
1112

1213
try:
1314
import jsonschema
1415
import jsonschema.exceptions
1516
import jsonschema.validators
17+
from referencing import Registry, Resource
1618

17-
from pystac.validation.local_validator import LocalValidator
19+
from pystac.validation.local_validator import get_local_schema_cache
1820

1921
HAS_JSONSCHEMA = True
2022
except ImportError:
@@ -149,20 +151,35 @@ def __init__(self, schema_uri_map: Optional[SchemaUriMap] = None) -> None:
149151
else:
150152
self.schema_uri_map = DefaultSchemaUriMap()
151153

152-
self.schema_cache = {}
154+
self.schema_cache = get_local_schema_cache()
153155

154-
def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
156+
def _get_schema(self, schema_uri: str) -> Dict[str, Any]:
155157
if schema_uri not in self.schema_cache:
156158
s = json.loads(pystac.StacIO.default().read_text(schema_uri))
157159
self.schema_cache[schema_uri] = s
158-
159-
schema = self.schema_cache[schema_uri]
160-
161-
resolver = jsonschema.validators.RefResolver(
162-
base_uri=schema_uri, referrer=schema, store=self.schema_cache
160+
id_field = "$id" if "$id" in s else "id"
161+
if not s[id_field].startswith("http"):
162+
s[id_field] = schema_uri
163+
return self.schema_cache[schema_uri]
164+
165+
@property
166+
def registry(self) -> Any:
167+
def retrieve(schema_uri: str) -> Resource[Dict[str, Any]]:
168+
return Resource.from_contents(self._get_schema(schema_uri))
169+
170+
return Registry(retrieve=retrieve).with_resources( # type: ignore
171+
[
172+
(k, Resource.from_contents(v)) for k, v in self.schema_cache.items()
173+
] # type: ignore
163174
)
164175

165-
return schema, resolver
176+
def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
177+
"""DEPRECATED"""
178+
warnings.warn(
179+
"get_schema_from_uri is deprecated and will be removed in v2.",
180+
DeprecationWarning,
181+
)
182+
return self._get_schema(schema_uri), self.registry
166183

167184
def _validate_from_uri(
168185
self,
@@ -172,17 +189,13 @@ def _validate_from_uri(
172189
href: Optional[str] = None,
173190
) -> None:
174191
try:
175-
resolver = None
176-
try:
177-
errors = LocalValidator()._validate_from_local(schema_uri, stac_dict)
178-
except STACLocalValidationError:
179-
schema, resolver = self.get_schema_from_uri(schema_uri)
180-
# This block is cribbed (w/ change in error handling) from
181-
# jsonschema.validate
182-
cls = jsonschema.validators.validator_for(schema)
183-
cls.check_schema(schema)
184-
validator = cls(schema, resolver=resolver)
185-
errors = list(validator.iter_errors(stac_dict))
192+
schema = self._get_schema(schema_uri)
193+
# This block is cribbed (w/ change in error handling) from
194+
# jsonschema.validate
195+
cls = jsonschema.validators.validator_for(schema)
196+
cls.check_schema(schema)
197+
validator = cls(schema, registry=self.registry)
198+
errors = list(validator.iter_errors(stac_dict))
186199
except Exception as e:
187200
logger.error(f"Exception while validating {stac_object_type} href: {href}")
188201
logger.exception(e)
@@ -199,11 +212,6 @@ def _validate_from_uri(
199212
best = jsonschema.exceptions.best_match(errors)
200213
raise STACValidationError(msg, source=errors) from best
201214

202-
if resolver is not None:
203-
for uri in resolver.store:
204-
if uri not in self.schema_cache:
205-
self.schema_cache[uri] = resolver.store[uri]
206-
207215
def validate_core(
208216
self,
209217
stac_dict: Dict[str, Any],

0 commit comments

Comments
 (0)