Skip to content

Commit cfe0f40

Browse files
committed
Add 'remove additional props' processor
1 parent d74977e commit cfe0f40

File tree

12 files changed

+299
-104
lines changed

12 files changed

+299
-104
lines changed

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
`jschon-sort` sorts a JSON or YAML document according to its JSON Schema:
1+
Provides JSON tools:
2+
3+
- `jschon-sort` sorts a JSON or YAML document according to its JSON Schema:
24
object properties are ordered to match the order in which JSON Schema properties (that match them) are declared.
35

6+
- `jschon-remove-additional-props` removes properties not defined in the JSON Schema.
7+
48
The "jschon" name relates to it being based on the [jschon](https://github.com/marksparkza/jschon) library
59
for JSON Schema handling.
610

@@ -28,13 +32,14 @@ jschon-sort --schema ../schema.json file.yaml
2832

2933
```python
3034
import jschon
31-
import jschon_sort
35+
import jschon_tools
3236

3337
jschon.create_catalog('2020-12')
3438
...
35-
sorted_doc_data = jschon_sort.sort_doc_by_schema(
39+
sorted_doc_data = jschon_tools.process_json_doc(
3640
schema_data=schema_data,
3741
doc_data=doc_data,
42+
sort=True,
3843
)
3944
```
4045

jschon_sort/__init__.py

Lines changed: 0 additions & 5 deletions
This file was deleted.

jschon_sort/cli.py

Lines changed: 0 additions & 62 deletions
This file was deleted.

jschon_tools/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from ._main import process_json_doc
2+
3+
__all__ = [
4+
'process_json_doc',
5+
]
Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import copy
21
import math
32
from typing import cast
43
from typing import Dict
@@ -11,6 +10,9 @@
1110
from jschon.json import JSONCompatible
1211

1312

13+
_END_SORT_KEY = (math.inf,)
14+
15+
1416
def _get_sort_keys_for_json_nodes(root_node: jschon.JSON) -> Mapping[jschon.JSONPointer, Tuple[int, ...]]:
1517
"""
1618
Gets a mapping from JSON nodes (as JSON pointers) to sort keys (as tuples of integers) that match their position
@@ -39,20 +41,7 @@ def _recurse(node: jschon.JSON, node_sort_key: Tuple[int, ...]) -> None:
3941
return mapping
4042

4143

42-
def sort_doc_by_schema(*, doc_data: JSONCompatible, schema_data: Mapping[str, JSONCompatible]) -> JSONCompatible:
43-
try:
44-
root_schema = jschon.JSONSchema(schema_data)
45-
except jschon.CatalogError:
46-
# jschon only supports newer jsonschema drafts
47-
schema_data = dict(schema_data)
48-
schema_data['$schema'] = "https://json-schema.org/draft/2020-12/schema"
49-
root_schema = jschon.JSONSchema(schema_data)
50-
51-
doc_json = jschon.JSON(doc_data)
52-
res = root_schema.evaluate(doc_json)
53-
if not res.valid:
54-
raise ValueError('Document failed schema validation')
55-
44+
def _get_sort_keys_for_json_doc(*, root_scope: jschon.jsonschema.Scope) -> Mapping[jschon.JSONPointer, Tuple[int, ...]]:
5645
schema_sort_keys_cache: Dict[jschon.URI, Mapping[jschon.JSONPointer, Tuple[int, ...]]] = {}
5746

5847
def _get_sort_keys_for_schema(schema: jschon.JSONSchema) -> Mapping[jschon.JSONPointer, Tuple[int, ...]]:
@@ -73,11 +62,37 @@ def _traverse_scope(scope: jschon.jsonschema.Scope) -> None:
7362
for child in scope.iter_children():
7463
_traverse_scope(child)
7564

76-
_traverse_scope(res)
65+
_traverse_scope(root_scope)
7766

78-
end_sort_key = (math.inf,)
67+
return doc_sort_keys
68+
69+
70+
def _get_root_scope(doc_json: jschon.JSON, schema_data: Mapping[str, JSONCompatible]) -> jschon.jsonschema.Scope:
71+
try:
72+
root_schema = jschon.JSONSchema(schema_data)
73+
except jschon.CatalogError:
74+
# jschon only supports newer jsonschema drafts
75+
schema_data = dict(schema_data)
76+
schema_data['$schema'] = "https://json-schema.org/draft/2020-12/schema"
77+
root_schema = jschon.JSONSchema(schema_data)
78+
res = root_schema.evaluate(doc_json)
79+
if not res.valid:
80+
raise ValueError('Document failed schema validation')
81+
return res
82+
83+
84+
def process_json_doc(
85+
*,
86+
doc_data: JSONCompatible,
87+
schema_data: Mapping[str, JSONCompatible],
88+
sort: bool = False,
89+
remove_additional_props: bool = False,
90+
) -> JSONCompatible:
91+
doc_json = jschon.JSON(doc_data)
92+
root_scope = _get_root_scope(doc_json, schema_data=schema_data)
93+
doc_sort_keys = _get_sort_keys_for_json_doc(root_scope=root_scope)
7994

80-
def _sort_json_node(node: JSONCompatible, json_node: jschon.JSON) -> JSONCompatible:
95+
def _traverse_node(node: JSONCompatible, json_node: jschon.JSON) -> JSONCompatible:
8196
"""
8297
@param node: the node being traversed (the data)
8398
@param json_node: the node being traversed (jschon's representation)
@@ -93,14 +108,18 @@ def _sort_json_node(node: JSONCompatible, json_node: jschon.JSON) -> JSONCompati
93108
v: JSONCompatible
94109
v_json: jschon.JSON
95110
for (k, v), v_json in zip(node.items(), object_data.values()):
96-
properties.append((k, _sort_json_node(v, v_json)))
111+
v = _traverse_node(v, v_json)
97112
# Keys which don't map to the schema (e.g. undefined properties when additionalProperties is missing,
98113
# defaulting to true) are assumed to come last (end_sort_key).
99114
# As a tie breaker for multiple such undefined properties, we use the key's name.
100115
# TODO: update jschon to add additional properties to res.children when appropriate
101-
key_sort_keys[k] = doc_sort_keys.get(v_json.path, end_sort_key), k
116+
sk = doc_sort_keys.get(v_json.path, _END_SORT_KEY)
117+
if sk is not _END_SORT_KEY or not remove_additional_props:
118+
key_sort_keys[k] = sk, k
119+
properties.append((k, v))
102120

103-
properties.sort(key=lambda pair: key_sort_keys[pair[0]])
121+
if sort:
122+
properties.sort(key=lambda pair: key_sort_keys[pair[0]])
104123

105124
# to maintain YAML round-trip data, copy node and re-populate
106125
node_copy = node.copy()
@@ -111,10 +130,10 @@ def _sort_json_node(node: JSONCompatible, json_node: jschon.JSON) -> JSONCompati
111130

112131
elif isinstance(node, list):
113132
list_data = cast(Sequence[jschon.JSON], json_node.data)
114-
return [_sort_json_node(node[idx], v_json) for idx, v_json in enumerate(list_data)]
133+
return [_traverse_node(node[idx], v_json) for idx, v_json in enumerate(list_data)]
115134

116135
return node
117136

118137
# we recurse down both the "JSON" and the actual document, and mutate only the actual document
119138
# which is the primitive type that we can serialize back to JSON/YAML easily
120-
return _sort_json_node(doc_data, doc_json)
139+
return _traverse_node(doc_data, doc_json)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class YamlIndent(NamedTuple):
1010
offset: int
1111

1212

13-
def create_yaml(*, indent: YamlIndent) -> ruyaml.main.YAML:
13+
def create_yaml_processor(*, indent: YamlIndent) -> ruyaml.main.YAML:
1414
def _null_representer(self: ruyaml.representer.BaseRepresenter, data: None) -> Any:
1515
return self.represent_scalar('tag:yaml.org,2002:null', 'null')
1616

jschon_tools/cli.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import argparse
2+
import json
3+
from typing import Mapping
4+
from typing import Tuple
5+
6+
import jschon
7+
8+
from ._main import process_json_doc
9+
from ._yaml import create_yaml_processor
10+
from ._yaml import YamlIndent
11+
12+
13+
def _make_parser(*, prog: str, description: str) -> argparse.ArgumentParser:
14+
parser = argparse.ArgumentParser(
15+
prog=prog,
16+
description=description,
17+
)
18+
parser.add_argument('path', help='path to the JSON / YAML document')
19+
parser.add_argument(
20+
'--schema', required=True, metavar='/path/to/schema.json', help='path to the JSON Schema document'
21+
)
22+
parser.add_argument(
23+
'--dry-run',
24+
'-n',
25+
help='if set, result is not persisted back to the original file',
26+
action='store_true',
27+
)
28+
parser.add_argument('--indent', type=int, default=4, help='indent size')
29+
parser.add_argument(
30+
'--yaml-indent',
31+
type=lambda s: YamlIndent(*map(int, s.split(','))),
32+
metavar='MAPPING,SEQUENCE,OFFSET',
33+
default=YamlIndent(2, 4, 2),
34+
help='YAML indent size',
35+
)
36+
return parser
37+
38+
39+
def _is_yaml_path(path: str) -> bool:
40+
return path.endswith('.yaml') or path.endswith('.yml')
41+
42+
43+
def _load_doc_and_schema(
44+
args: argparse.Namespace,
45+
) -> Tuple[jschon.json.JSONCompatible, Mapping[str, jschon.json.JSONCompatible]]:
46+
with open(args.path) as f:
47+
if _is_yaml_path(args.path):
48+
yaml = create_yaml_processor(indent=args.yaml_indent)
49+
doc_data = yaml.load(f)
50+
else:
51+
doc_data = json.load(f)
52+
53+
with open(args.schema) as f:
54+
schema_data = json.load(f)
55+
56+
return doc_data, schema_data
57+
58+
59+
def _maybe_persist(doc_data: jschon.json.JSONCompatible, args: argparse.Namespace) -> None:
60+
if args.dry_run:
61+
return
62+
63+
if _is_yaml_path(args.path):
64+
with open(args.path, 'w') as f:
65+
yaml = create_yaml_processor(indent=args.yaml_indent)
66+
yaml.dump(doc_data, f)
67+
else:
68+
with open(args.path, 'w') as f:
69+
json.dump(doc_data, f, indent=args.indent)
70+
71+
72+
def sort_main() -> None:
73+
jschon.create_catalog('2020-12')
74+
75+
parser = _make_parser(
76+
prog='jschon-sort',
77+
description="Sorts a JSON or YAML document to match a JSON Schema's order of properties",
78+
)
79+
args = parser.parse_args()
80+
81+
doc_data, schema_data = _load_doc_and_schema(args)
82+
doc_data = process_json_doc(doc_data=doc_data, schema_data=schema_data, sort=True)
83+
_maybe_persist(doc_data, args)
84+
85+
86+
def remove_additional_props_main() -> None:
87+
jschon.create_catalog('2020-12')
88+
89+
parser = _make_parser(
90+
prog='jschon-remove-additional-props',
91+
description="Processes a JSON or YAML document to remove additional properties not defined in the schema",
92+
)
93+
args = parser.parse_args()
94+
95+
doc_data, schema_data = _load_doc_and_schema(args)
96+
doc_data = process_json_doc(doc_data=doc_data, schema_data=schema_data, remove_additional_props=True)
97+
_maybe_persist(doc_data, args)

setup.cfg

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = jschon-sort
3-
version = 0.0.5
3+
version = 0.1.0
44
description = Sorts a JSON or YAML document to match a JSON Schema's order of properties
55
long_description = file: README.md
66
long_description_content_type = text/markdown
@@ -26,10 +26,11 @@ exclude = tests*
2626
2727
[options.entry_points]
2828
console_scripts =
29-
jschon-sort = jschon_sort.cli:main
29+
jschon-sort = jschon_tools.cli:sort_main
30+
jschon-remove-additional-props = jschon_tools.cli:remove_additional_props_main
3031
3132
[tool:pytest]
32-
addopts = --cov
33+
addopts = --cov .
3334
3435
[coverage:run]
3536
branch = true

0 commit comments

Comments
 (0)