diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index d5278dd2a..b946ef266 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -158,3 +158,24 @@ jobs: - name: Install dev dependencies run: pip install -r requirements-dev.txt + + check-benchmarks: + # This checks to make sure any API changes haven't broken any of the + # benchmarks. It doesn't do any actual benchmarking, since (IMO) that's not + # appropriate for CI on Github actions. + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.8" + cache: "pip" + cache-dependency-path: requirements-bench.txt + - name: Install pystac + run: pip install . + - name: Install benchmark dependencies + run: pip install -r requirements-bench.txt + - name: Set asv machine + run: asv machine --yes + - name: Check benchmarks + run: asv dev -a repeat=1 -a rounds=1 --strict diff --git a/.gitignore b/.gitignore index e349774d5..fb913e75d 100644 --- a/.gitignore +++ b/.gitignore @@ -157,3 +157,6 @@ dmypy.json # Cython debug symbols cython_debug/ + +# asv environments +.asv \ No newline at end of file diff --git a/asv.conf.json b/asv.conf.json new file mode 100644 index 000000000..56b462ee5 --- /dev/null +++ b/asv.conf.json @@ -0,0 +1,24 @@ +{ + "version": 1, + "project": "pystac", + "project_url": "https://pystac.readthedocs.io/", + "repo": ".", + "branches": [ + "main" + ], + "dvcs": "git", + "environment_type": "virtualenv", + "show_commit_url": "http://github.com/stac-utils/pystac/commit/", + "matrix": { + "req": { + "orjson": [ + null, + "" + ] + } + }, + "benchmark_dir": "benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html" +} \ No newline at end of file diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/_base.py b/benchmarks/_base.py new file mode 100644 index 000000000..02ca1fa9c --- /dev/null +++ b/benchmarks/_base.py @@ -0,0 +1,6 @@ +class Bench: + # Repeat between 10-50 times up to a max time of 5s + repeat = (10, 50, 2.0) + + # Bump number of rounds to 4 + rounds = 4 diff --git a/benchmarks/_util.py b/benchmarks/_util.py new file mode 100644 index 000000000..74ee83ac7 --- /dev/null +++ b/benchmarks/_util.py @@ -0,0 +1,16 @@ +import os +from typing import Union, TYPE_CHECKING + +if TYPE_CHECKING: + PathLike = os.PathLike[str] +else: + PathLike = os.PathLike + + +def get_data_path(rel_path: Union[str, PathLike]) -> str: + """Gets the absolute path to a file based on a path relative to the + tests/data-files directory in this repo.""" + rel_path = os.fspath(rel_path) + return os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "tests", "data-files", rel_path) + ) diff --git a/benchmarks/catalog.py b/benchmarks/catalog.py new file mode 100644 index 000000000..9e0b2de10 --- /dev/null +++ b/benchmarks/catalog.py @@ -0,0 +1,118 @@ +import datetime +import json +import os +import shutil +import tempfile +from pathlib import Path +from tempfile import TemporaryDirectory +from pystac import ( + Catalog, + StacIO, + Collection, + Extent, + TemporalExtent, + SpatialExtent, + Item, +) + +from ._base import Bench +from ._util import get_data_path + + +class CatalogBench(Bench): + def setup(self) -> None: + self.temp_dir = tempfile.mkdtemp() + + self.stac_io = StacIO.default() + + self.catalog_path = get_data_path("examples/1.0.0/catalog.json") + with open(self.catalog_path) as src: + self.catalog_dict = json.load(src) + self.catalog = Catalog.from_file(self.catalog_path) + + def teardown(self) -> None: + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def time_catalog_from_file(self) -> None: + """Deserialize an Item from file""" + _ = Catalog.from_file(self.catalog_path) + + def time_catalog_from_dict(self) -> None: + """Deserialize an Item from dictionary.""" + _ = Catalog.from_dict(self.catalog_dict) + + def time_catalog_to_dict(self) -> None: + """Serialize an Item to a dictionary.""" + self.catalog.to_dict(include_self_link=True) + + def time_catalog_save(self) -> None: + """Serialize an Item to a JSON file.""" + self.catalog.save_object( + include_self_link=True, + dest_href=os.path.join(self.temp_dir, "time_catalog_save.json"), + stac_io=self.stac_io, + ) + + +class WalkCatalogBench(Bench): + def setup_cache(self) -> Catalog: + return make_large_catalog() + + def time_walk(self, catalog: Catalog) -> None: + for ( + _, + _, + _, + ) in catalog.walk(): + pass + + def peakmem_walk(self, catalog: Catalog) -> None: + for ( + _, + _, + _, + ) in catalog.walk(): + pass + + +class ReadCatalogBench(Bench): + def setup(self) -> None: + catalog = make_large_catalog() + self.temporary_directory = TemporaryDirectory() + self.path = str(Path(self.temporary_directory.name) / "catalog.json") + catalog.normalize_and_save(self.temporary_directory.name) + + def teardown(self) -> None: + shutil.rmtree(self.temporary_directory.name) + + def time_read_and_walk(self) -> None: + catalog = Catalog.from_file(self.path) + for _, _, _ in catalog.walk(): + pass + + +class WriteCatalogBench(Bench): + def setup(self) -> None: + self.catalog = make_large_catalog() + self.temporary_directory = TemporaryDirectory() + + def teardown(self) -> None: + shutil.rmtree(self.temporary_directory.name) + + def time_normalize_and_save(self) -> None: + self.catalog.normalize_and_save(self.temporary_directory.name) + + +def make_large_catalog() -> Catalog: + catalog = Catalog("an-id", "a description") + extent = Extent( + SpatialExtent([[-180.0, -90.0, 180.0, 90.0]]), + TemporalExtent([[datetime.datetime(2023, 1, 1), None]]), + ) + for i in range(0, 10): + collection = Collection(f"collection-{i}", f"Collection {i}", extent) + for j in range(0, 100): + item = Item(f"item-{i}-{j}", None, None, datetime.datetime.now(), {}) + collection.add_item(item) + catalog.add_child(collection) + return catalog diff --git a/benchmarks/collection.py b/benchmarks/collection.py new file mode 100644 index 000000000..df1def81e --- /dev/null +++ b/benchmarks/collection.py @@ -0,0 +1,43 @@ +import json +import os +import shutil +import tempfile +from pystac import StacIO, Collection + +from ._base import Bench +from ._util import get_data_path + + +class CollectionBench(Bench): + def setup(self) -> None: + self.temp_dir = tempfile.mkdtemp() + + self.stac_io = StacIO.default() + + self.collection_path = get_data_path("examples/1.0.0/collection.json") + with open(self.collection_path) as src: + self.collection_dict = json.load(src) + self.collection = Collection.from_file(self.collection_path) + + def teardown(self) -> None: + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def time_collection_from_file(self) -> None: + """Deserialize an Item from file""" + _ = Collection.from_file(self.collection_path) + + def time_collection_from_dict(self) -> None: + """Deserialize an Item from dictionary.""" + _ = Collection.from_dict(self.collection_dict) + + def time_collection_to_dict(self) -> None: + """Serialize an Item to a dictionary.""" + self.collection.to_dict(include_self_link=True) + + def time_collection_save(self) -> None: + """Serialize an Item to a JSON file.""" + self.collection.save_object( + include_self_link=True, + dest_href=os.path.join(self.temp_dir, "time_collection_save.json"), + stac_io=self.stac_io, + ) diff --git a/benchmarks/extensions/__init__.py b/benchmarks/extensions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/extensions/projection.py b/benchmarks/extensions/projection.py new file mode 100644 index 000000000..0b57eda86 --- /dev/null +++ b/benchmarks/extensions/projection.py @@ -0,0 +1,14 @@ +import datetime + +from pystac import Item +from pystac.extensions.projection import ProjectionExtension + +from .._base import Bench + + +class ProjectionBench(Bench): + def setup(self) -> None: + self.item = Item("an-id", None, None, datetime.datetime.now(), {}) + + def time_add_projection_extension(self) -> None: + _ = ProjectionExtension.ext(self.item, add_if_missing=True) diff --git a/benchmarks/import_pystac.py b/benchmarks/import_pystac.py new file mode 100644 index 000000000..90f5858c7 --- /dev/null +++ b/benchmarks/import_pystac.py @@ -0,0 +1,7 @@ +class ImportPySTACBench: + repeat = 10 + + def timeraw_import_pystac(self) -> str: + return """ + import pystac + """ diff --git a/benchmarks/item.py b/benchmarks/item.py new file mode 100644 index 000000000..ba4393d0a --- /dev/null +++ b/benchmarks/item.py @@ -0,0 +1,43 @@ +import json +import os +import shutil +import tempfile +from pystac import StacIO, Item + +from ._base import Bench +from ._util import get_data_path + + +class ItemBench(Bench): + def setup(self) -> None: + self.temp_dir = tempfile.mkdtemp() + + self.stac_io = StacIO.default() + + self.item_path = get_data_path("item/sample-item-asset-properties.json") + with open(self.item_path) as src: + self.item_dict = json.load(src) + self.item = Item.from_file(self.item_path) + + def teardown(self) -> None: + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def time_item_from_file(self) -> None: + """Deserialize an Item from file""" + _ = Item.from_file(self.item_path) + + def time_item_from_dict(self) -> None: + """Deserialize an Item from dictionary.""" + _ = Item.from_dict(self.item_dict) + + def time_item_to_dict(self) -> None: + """Serialize an Item to a dictionary.""" + self.item.to_dict(include_self_link=True) + + def time_item_save(self) -> None: + """Serialize an Item to a JSON file.""" + self.item.save_object( + include_self_link=True, + dest_href=os.path.join(self.temp_dir, "time_item_save.json"), + stac_io=self.stac_io, + ) diff --git a/docs/contributing.rst b/docs/contributing.rst index 4cb15ba03..e1192aec3 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -73,6 +73,45 @@ flag to Git commit commands, as in ``git commit --no-verify``. .. [#] In rare cases changes to one file might invalidate an unchanged file, such as when modifying the return type of a function used in another file. +Benchmarks +^^^^^^^^^^ + +PySTAC uses `asv `_ for benchmarking. Benchmarks are +defined in the ``./benchmarks`` directory. Due to the inherent uncertainty in +the environment of Github workflow runners, benchmarks are not executed in CI. +If your changes may affect performance, use the provided script to run the +benchmark suite locally. This script will compare your current ``HEAD`` with +the **main** branch and report any improvements or regressions. + +.. code-block:: bash + + scripts/bench + +The benchmark suite takes a while to run, and will report any significant +changes to standard output. For example, here's a benchmark comparison between +v1.0.0 and v1.6.1 (from `@gadomski's `_ computer):: + + before after ratio + [eee06027] [579c071b] + + - 533±20μs 416±10μs 0.78 collection.CollectionBench.time_collection_from_file [gadomski/virtualenv-py3.10-orjson] + - 329±8μs 235±10μs 0.72 collection.CollectionBench.time_collection_from_dict [gadomski/virtualenv-py3.10-orjson] + - 332±10μs 231±4μs 0.70 collection.CollectionBench.time_collection_from_dict [gadomski/virtualenv-py3.10] + - 174±4μs 106±2μs 0.61 item.ItemBench.time_item_from_dict [gadomski/virtualenv-py3.10] + - 174±4μs 106±2μs 0.61 item.ItemBench.time_item_from_dict [gadomski/virtualenv-py3.10-orjson] + before after ratio + [eee06027] [579c071b] + + + 87.1±3μs 124±5μs 1.42 catalog.CatalogBench.time_catalog_from_dict [gadomski/virtualenv-py3.10] + + 87.1±4μs 122±5μs 1.40 catalog.CatalogBench.time_catalog_from_dict [gadomski/virtualenv-py3.10-orjson] + +When developing new benchmarks, you can run a shortened version of the benchmark suite: + +.. code-block:: bash + + asv dev + + CHANGELOG ^^^^^^^^^ diff --git a/requirements-bench.txt b/requirements-bench.txt new file mode 100644 index 000000000..8209c51fb --- /dev/null +++ b/requirements-bench.txt @@ -0,0 +1,2 @@ +asv==0.5.1 +virtualenv==20.13.1 \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index aa1b2b78c..e2156d7c7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,5 @@ -r ./requirements-docs.txt -r ./requirements-test.txt +-r ./requirements-bench.txt jupyter==1.0.0 \ No newline at end of file diff --git a/scripts/bench b/scripts/bench new file mode 100755 index 000000000..b86a44bc2 --- /dev/null +++ b/scripts/bench @@ -0,0 +1,11 @@ +#!/bin/bash + +set -e + +if [[ -z $ASV_FACTOR ]]; then + ASV_FACTOR=1.25; +fi + +asv continuous --split -e --interleave-rounds \ + --factor ${ASV_FACTOR} \ + main HEAD;