diff --git a/bench.py b/bench.py new file mode 100644 index 000000000..e7318ed75 --- /dev/null +++ b/bench.py @@ -0,0 +1,74 @@ +#!/usr/env/bin python +""" +Benchmark the performance of jsonschema. + +Example benchmark: + + wget http://swagger.io/v2/schema.json + wget http://petstore.swagger.io/v2/swagger.json + python bench.py -r 5 schema.json swagger.json + +""" +from __future__ import print_function +import argparse +import cProfile +import json +import time + +import jsonschema + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('schema', help="path to a schema used to benchmark") + parser.add_argument('document', help="document to validate with schema") + parser.add_argument('-r', '--repeat', type=int, help="number of iterations") + parser.add_argument('--profile', + help="Enable profiling, write profile to this filepath") + return parser.parse_args() + + +def run(filename, schema, document): + resolver = jsonschema.RefResolver( + 'file://{0}'.format(filename), + schema, + store={schema['id']: schema}) + jsonschema.validate(document, schema, resolver=resolver) + + +def format_time(time_): + return "%.3fms" % (time_ * 1000) + + +def run_timeit(schema_filename, document_filename, repeat, profile): + with open(schema_filename) as schema_file: + schema = json.load(schema_file) + + with open(document_filename) as fh: + document = json.load(fh) + + if profile: + profiler = cProfile.Profile() + profiler.enable() + + times = [] + for _ in range(repeat): + start_time = time.time() + run(schema_filename, schema, document) + times.append(time.time() - start_time) + + if profile: + profiler.disable() + profiler.dump_stats(profile) + + print(", ".join(map(format_time, sorted(times)))) + print("Mean: {0}".format(format_time(sum(times) / repeat))) + + +def main(): + args = parse_args() + run_timeit(args.schema, args.document, args.repeat, args.profile) + + +if __name__ == "__main__": + main() diff --git a/jsonschema/compat.py b/jsonschema/compat.py index 6ca49ab6b..0afd9eaaa 100644 --- a/jsonschema/compat.py +++ b/jsonschema/compat.py @@ -1,6 +1,9 @@ from __future__ import unicode_literals -import sys + +from collections import namedtuple import operator +import sys + try: from collections import MutableMapping, Sequence # noqa @@ -40,6 +43,9 @@ def urlsplit(url): return SplitResult(scheme, netloc, path, query, fragment) +DefragResult = namedtuple('DefragResult', 'url fragment') + + def urldefrag(url): if "#" in url: s, n, p, q, frag = urlsplit(url) @@ -47,7 +53,7 @@ def urldefrag(url): else: defrag = url frag = '' - return defrag, frag + return DefragResult(defrag, frag) # flake8: noqa diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 2b14372ab..d966ab821 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -815,7 +815,7 @@ def test_it_retrieves_unstored_refs_via_urlopen(self): def test_it_can_construct_a_base_uri_from_a_schema(self): schema = {"id" : "foo"} resolver = RefResolver.from_schema(schema) - self.assertEqual(resolver.base_uri, "foo") + self.assertEqual(resolver.resolution_scope.url, "foo") with resolver.resolving("") as resolved: self.assertEqual(resolved, schema) with resolver.resolving("#") as resolved: @@ -828,7 +828,7 @@ def test_it_can_construct_a_base_uri_from_a_schema(self): def test_it_can_construct_a_base_uri_from_a_schema_without_id(self): schema = {} resolver = RefResolver.from_schema(schema) - self.assertEqual(resolver.base_uri, "") + self.assertEqual(resolver.resolution_scope.url, "") with resolver.resolving("") as resolved: self.assertEqual(resolved, schema) with resolver.resolving("#") as resolved: diff --git a/jsonschema/validators.py b/jsonschema/validators.py index c347bf145..54e07622e 100644 --- a/jsonschema/validators.py +++ b/jsonschema/validators.py @@ -11,7 +11,7 @@ from jsonschema import _utils, _validators from jsonschema.compat import ( - Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen, + Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen, DefragResult, str_types, int_types, iteritems, ) from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa @@ -79,7 +79,10 @@ def iter_errors(self, instance, _schema=None): if _schema is None: _schema = self.schema - with self.resolver.in_scope(_schema.get(u"id", u"")): + scope = _schema.get(u"id") + if scope: + self.resolver.push_scope(urldefrag(scope)) + try: ref = _schema.get(u"$ref") if ref is not None: validators = [(u"$ref", ref)] @@ -103,6 +106,9 @@ def iter_errors(self, instance, _schema=None): if k != u"$ref": error.schema_path.appendleft(k) yield error + finally: + if scope: + self.resolver.scopes_stack.pop() def descend(self, instance, schema, path=None, schema_path=None): for error in self.iter_errors(instance, schema): @@ -233,19 +239,19 @@ class RefResolver(object): def __init__( self, base_uri, referrer, store=(), cache_remote=True, handlers=(), ): - self.base_uri = base_uri - self.resolution_scope = base_uri + base_uri = urldefrag(base_uri) # This attribute is not used, it is for backwards compatibility self.referrer = referrer self.cache_remote = cache_remote self.handlers = dict(handlers) + self.scopes_stack = [base_uri] self.store = _utils.URIDict( (id, validator.META_SCHEMA) for id, validator in iteritems(meta_schemas) ) self.store.update(store) - self.store[base_uri] = referrer + self.store[base_uri.url] = referrer @classmethod def from_schema(cls, schema, *args, **kwargs): @@ -259,14 +265,23 @@ def from_schema(cls, schema, *args, **kwargs): return cls(schema.get(u"id", u""), schema, *args, **kwargs) + def push_scope(self, scope): + old_scope = self.resolution_scope + url = (urljoin(old_scope.url, scope.url, allow_fragments=False) + if scope.url else old_scope.url) + self.scopes_stack.append(scope._replace(url=url)) + + @property + def resolution_scope(self): + return self.scopes_stack[-1] + @contextlib.contextmanager def in_scope(self, scope): - old_scope = self.resolution_scope - self.resolution_scope = urljoin(old_scope, scope) + self.push_scope(scope) try: yield finally: - self.resolution_scope = old_scope + self.scopes_stack.pop() @contextlib.contextmanager def resolving(self, ref): @@ -277,26 +292,29 @@ def resolving(self, ref): :argument str ref: reference to resolve """ + ref = urldefrag(ref) - full_uri = urljoin(self.resolution_scope, ref) - uri, fragment = urldefrag(full_uri) - if not uri: - uri = self.base_uri - - if uri in self.store: - document = self.store[uri] + if ref.url: + url = urljoin( + self.resolution_scope.url, + ref.url, + allow_fragments=False) else: + url = self.resolution_scope.url + + try: + document = self.store[url] + except KeyError: try: - document = self.resolve_remote(uri) + document = self.resolve_remote(url) except Exception as exc: raise RefResolutionError(exc) - old_base_uri, self.base_uri = self.base_uri, uri + self.push_scope(DefragResult(url, ref.fragment)) try: - with self.in_scope(uri): - yield self.resolve_fragment(document, fragment) + yield self.resolve_fragment(document, ref.fragment) finally: - self.base_uri = old_base_uri + self.scopes_stack.pop() def resolve_fragment(self, document, fragment): """