diff --git a/CHANGELOG.md b/CHANGELOG.md index f07bad38e..515b1ff68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ ## [Unreleased] +### Fixed + +- Update usage of jsonschema ([#1215](https://github.com/stac-utils/pystac/pull/1215)) + +### Deprecated + +- `pystac.validation.local_validator.LocalValidator` ([#1215](https://github.com/stac-utils/pystac/pull/1215)) + + ## [v1.8.3] - 2023-07-12 ### Added diff --git a/pyproject.toml b/pyproject.toml index b0edf700d..ce6dc76fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ test = [ "doc8~=1.1", "html5lib~=1.1", "jinja2<4.0", - "jsonschema>=4.0.1,<4.18", + "jsonschema~=4.18", "mypy~=1.2", "orjson~=3.8", "pre-commit~=3.2", @@ -64,8 +64,7 @@ test = [ "types-urllib3~=1.26", ] urllib3 = ["urllib3>=1.26"] -# jsonschema v4.18.2 breaks validation, and it feels safer to set a ceiling rather than just skip this version. The ceiling should be removed when the v4.18 lineage has settled down and feels safer. -validation = ["jsonschema>=4.0.1,<4.18"] +validation = ["jsonschema~=4.18"] [project.urls] homepage = "https://github.com/stac-utils/pystac" @@ -88,9 +87,6 @@ select = ["E", "F", "I"] [tool.pytest.ini_options] filterwarnings = [ "error", - # Allows jsonschema's RefResolver deprecation warning through until we're - # updated to support jsonschema v4.18 - "default::DeprecationWarning:pystac.validation.*", ] [build-system] diff --git a/pystac/validation/__init__.py b/pystac/validation/__init__.py index 5eb4d4475..1d5ebc37f 100644 --- a/pystac/validation/__init__.py +++ b/pystac/validation/__init__.py @@ -4,11 +4,12 @@ import pystac from pystac.serialization.identify import STACVersionID, identify_stac_object +from pystac.stac_object import STACObjectType from pystac.utils import make_absolute_href from pystac.validation.schema_uri_map import OldExtensionSchemaUriMap if TYPE_CHECKING: - from pystac.stac_object import STACObject, STACObjectType + from pystac.stac_object import STACObject # Import after above class definition diff --git a/pystac/validation/local_validator.py b/pystac/validation/local_validator.py index fc6212fe7..1ac464c90 100644 --- a/pystac/validation/local_validator.py +++ b/pystac/validation/local_validator.py @@ -1,8 +1,10 @@ import json import sys +import warnings from typing import Any, Dict, List, cast -from jsonschema import Draft7Validator, RefResolver, ValidationError +from jsonschema import Draft7Validator, ValidationError +from referencing import Registry, Resource from pystac.errors import STACLocalValidationError from pystac.version import STACVersion @@ -13,27 +15,93 @@ from importlib.resources import files as importlib_resources_files VERSION = STACVersion.DEFAULT_STAC_VERSION -ITEM_SCHEMA_URI = ( + + +def _read_schema(file_name: str) -> Dict[str, Any]: + with importlib_resources_files("pystac.validation.jsonschemas").joinpath( + file_name + ).open("r") as f: + return cast(Dict[str, Any], json.load(f)) + + +def get_local_schema_cache() -> Dict[str, Dict[str, Any]]: + return { + **{ + ( + f"https://schemas.stacspec.org/v{VERSION}/" + f"{name}-spec/json-schema/{name}.json" + ): _read_schema(f"stac-spec/v{VERSION}/{name}.json") + for name in ("item", "catalog", "collection") + }, + **{ + f"https://geojson.org/schema/{name}.json": _read_schema( + f"geojson/{name}.json" + ) + for name in ("Feature", "Geometry") + }, + **{ + ( + f"https://schemas.stacspec.org/v{VERSION}/" + f"item-spec/json-schema/{name}.json" + ): _read_schema(f"stac-spec/v{VERSION}/{name}.json") + for name in ( + "basics", + "datetime", + "instrument", + "licensing", + "provider", + ) + }, + } + + +############################### DEPRECATED ################################# + +_deprecated_ITEM_SCHEMA_URI = ( f"https://schemas.stacspec.org/v{VERSION}/item-spec/json-schema/item.json" ) -COLLECTION_SCHEMA_URI = ( +_deprecated_COLLECTION_SCHEMA_URI = ( f"https://schemas.stacspec.org/v{VERSION}/" "collection-spec/json-schema/collection.json" ) -CATALOG_SCHEMA_URI = ( +_deprecated_CATALOG_SCHEMA_URI = ( f"https://schemas.stacspec.org/v{VERSION}/catalog-spec/json-schema/catalog.json" ) +deprecated_names = ["ITEM_SCHEMA_URI", "COLLECTION_SCHEMA_URI", "CATALOG_SCHEMA_URI"] + + +def __getattr__(name: str) -> Any: + if name in deprecated_names: + warnings.warn(f"{name} is deprecated and will be removed in v2.", FutureWarning) + return globals()[f"_deprecated_{name}"] + raise AttributeError(f"module {__name__} has no attribute {name}") + class LocalValidator: + def __init__(self) -> None: + """DEPRECATED""" + warnings.warn( + "``LocalValidator`` is deprecated and will be removed in v2.", + DeprecationWarning, + ) + self.schema_cache = get_local_schema_cache() + + def registry(self) -> Any: + return Registry().with_resources( + [ + (k, Resource.from_contents(v)) for k, v in self.schema_cache.items() + ] # type: ignore + ) + def _validate_from_local( self, schema_uri: str, stac_dict: Dict[str, Any] ) -> List[ValidationError]: - if schema_uri == ITEM_SCHEMA_URI: + if schema_uri == _deprecated_ITEM_SCHEMA_URI: validator = self.item_validator(VERSION) - elif schema_uri == COLLECTION_SCHEMA_URI: + elif schema_uri == _deprecated_COLLECTION_SCHEMA_URI: validator = self.collection_validator(VERSION) - elif schema_uri == CATALOG_SCHEMA_URI: + elif schema_uri == _deprecated_CATALOG_SCHEMA_URI: validator = self.catalog_validator(VERSION) else: raise STACLocalValidationError( @@ -43,22 +111,7 @@ def _validate_from_local( def _validator(self, stac_type: str, version: str) -> Draft7Validator: schema = _read_schema(f"stac-spec/v{version}/{stac_type}.json") - resolver = RefResolver.from_schema(schema) - resolver.store[ - f"https://schemas.stacspec.org/v{version}/collection-spec/json-schema/collection.json" - ] = _read_schema(f"stac-spec/v{version}/collection.json") - resolver.store[ - f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/item.json" - ] = _read_schema(f"stac-spec/v{version}/item.json") - for name in ("Feature", "Geometry"): - resolver.store[f"https://geojson.org/schema/{name}.json"] = _read_schema( - f"geojson/{name}.json" - ) - for name in ("basics", "datetime", "instrument", "licensing", "provider"): - resolver.store[ - f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/{name}.json" - ] = _read_schema(f"stac-spec/v{version}/{name}.json") - return Draft7Validator(schema, resolver=resolver) + return Draft7Validator(schema, registry=self.registry) def catalog_validator(self, version: str = VERSION) -> Draft7Validator: return self._validator("catalog", version) @@ -68,10 +121,3 @@ def collection_validator(self, version: str = VERSION) -> Draft7Validator: def item_validator(self, version: str = VERSION) -> Draft7Validator: return self._validator("item", version) - - -def _read_schema(file_name: str) -> Dict[str, Any]: - with importlib_resources_files("pystac.validation.jsonschemas").joinpath( - file_name - ).open("r") as f: - return cast(Dict[str, Any], json.load(f)) diff --git a/pystac/validation/stac_validator.py b/pystac/validation/stac_validator.py index 1fbb9924e..35d6d826f 100644 --- a/pystac/validation/stac_validator.py +++ b/pystac/validation/stac_validator.py @@ -1,11 +1,12 @@ import json import logging +import warnings from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional, Tuple import pystac import pystac.utils -from pystac.errors import STACLocalValidationError, STACValidationError +from pystac.errors import STACValidationError from pystac.stac_object import STACObjectType from pystac.validation.schema_uri_map import DefaultSchemaUriMap, SchemaUriMap @@ -13,8 +14,9 @@ import jsonschema import jsonschema.exceptions import jsonschema.validators + from referencing import Registry, Resource - from pystac.validation.local_validator import LocalValidator + from pystac.validation.local_validator import get_local_schema_cache HAS_JSONSCHEMA = True except ImportError: @@ -149,20 +151,35 @@ def __init__(self, schema_uri_map: Optional[SchemaUriMap] = None) -> None: else: self.schema_uri_map = DefaultSchemaUriMap() - self.schema_cache = {} + self.schema_cache = get_local_schema_cache() - def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]: + def _get_schema(self, schema_uri: str) -> Dict[str, Any]: if schema_uri not in self.schema_cache: s = json.loads(pystac.StacIO.default().read_text(schema_uri)) self.schema_cache[schema_uri] = s - - schema = self.schema_cache[schema_uri] - - resolver = jsonschema.validators.RefResolver( - base_uri=schema_uri, referrer=schema, store=self.schema_cache + id_field = "$id" if "$id" in s else "id" + if not s[id_field].startswith("http"): + s[id_field] = schema_uri + return self.schema_cache[schema_uri] + + @property + def registry(self) -> Any: + def retrieve(schema_uri: str) -> Resource[Dict[str, Any]]: + return Resource.from_contents(self._get_schema(schema_uri)) + + return Registry(retrieve=retrieve).with_resources( # type: ignore + [ + (k, Resource.from_contents(v)) for k, v in self.schema_cache.items() + ] # type: ignore ) - return schema, resolver + def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]: + """DEPRECATED""" + warnings.warn( + "get_schema_from_uri is deprecated and will be removed in v2.", + DeprecationWarning, + ) + return self._get_schema(schema_uri), self.registry def _validate_from_uri( self, @@ -172,17 +189,13 @@ def _validate_from_uri( href: Optional[str] = None, ) -> None: try: - resolver = None - try: - errors = LocalValidator()._validate_from_local(schema_uri, stac_dict) - except STACLocalValidationError: - schema, resolver = self.get_schema_from_uri(schema_uri) - # This block is cribbed (w/ change in error handling) from - # jsonschema.validate - cls = jsonschema.validators.validator_for(schema) - cls.check_schema(schema) - validator = cls(schema, resolver=resolver) - errors = list(validator.iter_errors(stac_dict)) + schema = self._get_schema(schema_uri) + # This block is cribbed (w/ change in error handling) from + # jsonschema.validate + cls = jsonschema.validators.validator_for(schema) + cls.check_schema(schema) + validator = cls(schema, registry=self.registry) + errors = list(validator.iter_errors(stac_dict)) except Exception as e: logger.error(f"Exception while validating {stac_object_type} href: {href}") logger.exception(e) @@ -199,11 +212,6 @@ def _validate_from_uri( best = jsonschema.exceptions.best_match(errors) raise STACValidationError(msg, source=errors) from best - if resolver is not None: - for uri in resolver.store: - if uri not in self.schema_cache: - self.schema_cache[uri] = resolver.store[uri] - def validate_core( self, stac_dict: Dict[str, Any], diff --git a/tests/validation/cassettes/test_validate/TestValidate.test_validate_examples[example115].yaml b/tests/validation/cassettes/test_validate/TestValidate.test_validate_examples[example115].yaml index 7e72cfdf3..810df31d6 100644 --- a/tests/validation/cassettes/test_validate/TestValidate.test_validate_examples[example115].yaml +++ b/tests/validation/cassettes/test_validate/TestValidate.test_validate_examples[example115].yaml @@ -640,4 +640,128 @@ interactions: status: code: 200 message: OK +- request: + body: null + headers: + Connection: + - close + Host: + - stac-extensions.github.io + User-Agent: + - Python-urllib/3.9 + method: GET + uri: https://stac-extensions.github.io/projection/v1.0.0/schema.json + response: + body: + string: "{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"$id\": + \"https://stac-extensions.github.io/projection/v1.0.0/schema.json\",\n \"title\": + \"Projection Extension\",\n \"description\": \"STAC Projection Extension + for STAC Items.\",\n \"oneOf\": [\n {\n \"$comment\": \"This is the + schema for STAC Items.\",\n \"allOf\": [\n {\n \"type\": + \"object\",\n \"required\": [\n \"type\",\n \"properties\",\n + \ \"assets\"\n ],\n \"properties\": {\n \"type\": + {\n \"const\": \"Feature\"\n },\n \"properties\": + {\n \"allOf\": [\n {\n \"$comment\": + \"Require fields here for item properties.\",\n \"required\": + [\n \"proj:epsg\"\n ]\n },\n + \ {\n \"$ref\": \"#/definitions/fields\"\n + \ }\n ]\n },\n \"assets\": + {\n \"type\": \"object\",\n \"additionalProperties\": + {\n \"$ref\": \"#/definitions/fields\"\n }\n }\n + \ }\n },\n {\n \"$ref\": \"#/definitions/stac_extensions\"\n + \ }\n ]\n },\n {\n \"$comment\": \"This is the schema + for STAC Collections.\",\n \"allOf\": [\n {\n \"type\": + \"object\",\n \"required\": [\n \"type\"\n ],\n + \ \"properties\": {\n \"type\": {\n \"const\": + \"Collection\"\n },\n \"assets\": {\n \"type\": + \"object\",\n \"additionalProperties\": {\n \"$ref\": + \"#/definitions/fields\"\n }\n },\n \"item_assets\": + {\n \"type\": \"object\",\n \"additionalProperties\": + {\n \"$ref\": \"#/definitions/fields\"\n }\n }\n + \ }\n },\n {\n \"$ref\": \"#/definitions/stac_extensions\"\n + \ }\n ]\n }\n ],\n \"definitions\": {\n \"stac_extensions\": + {\n \"type\": \"object\",\n \"required\": [\n \"stac_extensions\"\n + \ ],\n \"properties\": {\n \"stac_extensions\": {\n \"type\": + \"array\",\n \"contains\": {\n \"const\": \"https://stac-extensions.github.io/projection/v1.0.0/schema.json\"\n + \ }\n }\n }\n },\n \"fields\": {\n \"$comment\": + \"Add your new fields here. Don't require them here, do that above in the + item schema.\",\n \"type\": \"object\",\n \"properties\": {\n \"proj:epsg\":{\n + \ \"title\":\"EPSG code\",\n \"type\":[\n \"integer\",\n + \ \"null\"\n ]\n },\n \"proj:wkt2\":{\n \"title\":\"Coordinate + Reference System in WKT2 format\",\n \"type\":[\n \"string\",\n + \ \"null\"\n ]\n },\n \"proj:projjson\": + {\n \"title\":\"Coordinate Reference System in PROJJSON format\",\n + \ \"oneOf\": [\n {\n \"$ref\": \"https://proj.org/schemas/v0.2/projjson.schema.json\"\n + \ },\n {\n \"type\": \"null\"\n }\n + \ ]\n },\n \"proj:geometry\":{\n \"$ref\": + \"https://geojson.org/schema/Geometry.json\"\n },\n \"proj:bbox\":{\n + \ \"title\":\"Extent\",\n \"type\":\"array\",\n \"oneOf\": + [\n {\n \"minItems\":4,\n \"maxItems\":4\n + \ },\n {\n \"minItems\":6,\n \"maxItems\":6\n + \ }\n ],\n \"items\":{\n \"type\":\"number\"\n + \ }\n },\n \"proj:centroid\":{\n \"title\":\"Centroid\",\n + \ \"type\":\"object\",\n \"required\": [\n \"lat\",\n + \ \"lon\"\n ],\n \"properties\": {\n \"lat\": + {\n \"type\": \"number\",\n \"minimum\": -90,\n + \ \"maximum\": 90\n },\n \"lon\": {\n \"type\": + \"number\",\n \"minimum\": -180,\n \"maximum\": + 180\n }\n }\n },\n \"proj:shape\":{\n \"title\":\"Shape\",\n + \ \"type\":\"array\",\n \"minItems\":2,\n \"maxItems\":2,\n + \ \"items\":{\n \"type\":\"integer\"\n }\n },\n + \ \"proj:transform\":{\n \"title\":\"Transform\",\n \"type\":\"array\",\n + \ \"oneOf\": [\n {\n \"minItems\":6,\n \"maxItems\":6\n + \ },\n {\n \"minItems\":9,\n \"maxItems\":9\n + \ }\n ],\n \"items\":{\n \"type\":\"number\"\n + \ }\n }\n },\n \"patternProperties\": {\n \"^(?!proj:)\": + {}\n },\n \"additionalProperties\": false\n }\n }\n}" + headers: + Accept-Ranges: + - bytes + Access-Control-Allow-Origin: + - '*' + Age: + - '0' + Cache-Control: + - max-age=600 + Connection: + - close + Content-Length: + - '4646' + Content-Type: + - application/json; charset=utf-8 + Date: + - Wed, 13 Sep 2023 18:50:52 GMT + ETag: + - '"63e6651b-1226"' + Last-Modified: + - Fri, 10 Feb 2023 15:39:07 GMT + Server: + - GitHub.com + Strict-Transport-Security: + - max-age=31556952 + Vary: + - Accept-Encoding + Via: + - 1.1 varnish + X-Cache: + - HIT + X-Cache-Hits: + - '1' + X-Fastly-Request-ID: + - a0eb45653d9d666c06c2a07a8c6ed0eecd22bbe4 + X-GitHub-Request-Id: + - 3F32:105C:90E8A3:BBFF05:6501E745 + X-Served-By: + - cache-bos4626-BOS + X-Timer: + - S1694631052.253405,VS0,VE33 + expires: + - Wed, 13 Sep 2023 16:55:59 GMT + permissions-policy: + - interest-cohort=() + x-proxy-cache: + - MISS + status: + code: 200 + message: OK version: 1