Skip to content

Performance - reduce urljoin/urldefrag overhead #202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/env/bin python
"""
Benchmark the performance of jsonschema.

Example benchmark:

wget http://swagger.io/v2/schema.json
wget http://petstore.swagger.io/v2/swagger.json
python bench.py -r 5 schema.json swagger.json

"""
from __future__ import print_function
import argparse
import cProfile
import json
import time

import jsonschema


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('schema', help="path to a schema used to benchmark")
parser.add_argument('document', help="document to validate with schema")
parser.add_argument('-r', '--repeat', type=int, help="number of iterations")
parser.add_argument('--profile',
help="Enable profiling, write profile to this filepath")
return parser.parse_args()


def run(filename, schema, document):
resolver = jsonschema.RefResolver(
'file://{0}'.format(filename),
schema,
store={schema['id']: schema})
jsonschema.validate(document, schema, resolver=resolver)


def format_time(time_):
return "%.3fms" % (time_ * 1000)


def run_timeit(schema_filename, document_filename, repeat, profile):
with open(schema_filename) as schema_file:
schema = json.load(schema_file)

with open(document_filename) as fh:
document = json.load(fh)

if profile:
profiler = cProfile.Profile()
profiler.enable()

times = []
for _ in range(repeat):
start_time = time.time()
run(schema_filename, schema, document)
times.append(time.time() - start_time)

if profile:
profiler.disable()
profiler.dump_stats(profile)

print(", ".join(map(format_time, sorted(times))))
print("Mean: {0}".format(format_time(sum(times) / repeat)))


def main():
args = parse_args()
run_timeit(args.schema, args.document, args.repeat, args.profile)


if __name__ == "__main__":
main()
10 changes: 8 additions & 2 deletions jsonschema/compat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import unicode_literals
import sys

from collections import namedtuple
import operator
import sys


try:
from collections import MutableMapping, Sequence # noqa
Expand Down Expand Up @@ -40,14 +43,17 @@ def urlsplit(url):
return SplitResult(scheme, netloc, path, query, fragment)


DefragResult = namedtuple('DefragResult', 'url fragment')


def urldefrag(url):
if "#" in url:
s, n, p, q, frag = urlsplit(url)
defrag = urlunsplit((s, n, p, q, ''))
else:
defrag = url
frag = ''
return defrag, frag
return DefragResult(defrag, frag)


# flake8: noqa
4 changes: 2 additions & 2 deletions jsonschema/tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,7 @@ def test_it_retrieves_unstored_refs_via_urlopen(self):
def test_it_can_construct_a_base_uri_from_a_schema(self):
schema = {"id" : "foo"}
resolver = RefResolver.from_schema(schema)
self.assertEqual(resolver.base_uri, "foo")
self.assertEqual(resolver.resolution_scope.url, "foo")
with resolver.resolving("") as resolved:
self.assertEqual(resolved, schema)
with resolver.resolving("#") as resolved:
Expand All @@ -828,7 +828,7 @@ def test_it_can_construct_a_base_uri_from_a_schema(self):
def test_it_can_construct_a_base_uri_from_a_schema_without_id(self):
schema = {}
resolver = RefResolver.from_schema(schema)
self.assertEqual(resolver.base_uri, "")
self.assertEqual(resolver.resolution_scope.url, "")
with resolver.resolving("") as resolved:
self.assertEqual(resolved, schema)
with resolver.resolving("#") as resolved:
Expand Down
58 changes: 38 additions & 20 deletions jsonschema/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from jsonschema import _utils, _validators
from jsonschema.compat import (
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen,
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen, DefragResult,
str_types, int_types, iteritems,
)
from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa
Expand Down Expand Up @@ -79,7 +79,10 @@ def iter_errors(self, instance, _schema=None):
if _schema is None:
_schema = self.schema

with self.resolver.in_scope(_schema.get(u"id", u"")):
scope = _schema.get(u"id")
if scope:
self.resolver.push_scope(urldefrag(scope))
try:
ref = _schema.get(u"$ref")
if ref is not None:
validators = [(u"$ref", ref)]
Expand All @@ -103,6 +106,9 @@ def iter_errors(self, instance, _schema=None):
if k != u"$ref":
error.schema_path.appendleft(k)
yield error
finally:
if scope:
self.resolver.scopes_stack.pop()

def descend(self, instance, schema, path=None, schema_path=None):
for error in self.iter_errors(instance, schema):
Expand Down Expand Up @@ -233,19 +239,19 @@ class RefResolver(object):
def __init__(
self, base_uri, referrer, store=(), cache_remote=True, handlers=(),
):
self.base_uri = base_uri
self.resolution_scope = base_uri
base_uri = urldefrag(base_uri)
# This attribute is not used, it is for backwards compatibility
self.referrer = referrer
self.cache_remote = cache_remote
self.handlers = dict(handlers)

self.scopes_stack = [base_uri]
self.store = _utils.URIDict(
(id, validator.META_SCHEMA)
for id, validator in iteritems(meta_schemas)
)
self.store.update(store)
self.store[base_uri] = referrer
self.store[base_uri.url] = referrer

@classmethod
def from_schema(cls, schema, *args, **kwargs):
Expand All @@ -259,14 +265,23 @@ def from_schema(cls, schema, *args, **kwargs):

return cls(schema.get(u"id", u""), schema, *args, **kwargs)

def push_scope(self, scope):
old_scope = self.resolution_scope
url = (urljoin(old_scope.url, scope.url, allow_fragments=False)
if scope.url else old_scope.url)
self.scopes_stack.append(scope._replace(url=url))

@property
def resolution_scope(self):
return self.scopes_stack[-1]

@contextlib.contextmanager
def in_scope(self, scope):
old_scope = self.resolution_scope
self.resolution_scope = urljoin(old_scope, scope)
self.push_scope(scope)
try:
yield
finally:
self.resolution_scope = old_scope
self.scopes_stack.pop()

@contextlib.contextmanager
def resolving(self, ref):
Expand All @@ -277,26 +292,29 @@ def resolving(self, ref):
:argument str ref: reference to resolve

"""
ref = urldefrag(ref)

full_uri = urljoin(self.resolution_scope, ref)
uri, fragment = urldefrag(full_uri)
if not uri:
uri = self.base_uri

if uri in self.store:
document = self.store[uri]
if ref.url:
url = urljoin(
self.resolution_scope.url,
ref.url,
allow_fragments=False)
else:
url = self.resolution_scope.url

try:
document = self.store[url]
except KeyError:
try:
document = self.resolve_remote(uri)
document = self.resolve_remote(url)
except Exception as exc:
raise RefResolutionError(exc)

old_base_uri, self.base_uri = self.base_uri, uri
self.push_scope(DefragResult(url, ref.fragment))
try:
with self.in_scope(uri):
yield self.resolve_fragment(document, fragment)
yield self.resolve_fragment(document, ref.fragment)
finally:
self.base_uri = old_base_uri
self.scopes_stack.pop()

def resolve_fragment(self, document, fragment):
"""
Expand Down