stac-utils · gadomski · Jan 11, 2023 · Feb 8, 2022 · Feb 9, 2022 · Feb 10, 2022
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -0,0 +1,79 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    types: [opened, reopened, synchronize, labeled]
+  workflow_dispatch:
+
+jobs:
+  benchmark:
+    if: ${{ contains( github.event.pull_request.labels.*.name, 'run-benchmarks') && github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }}
+    name: Linux
+    runs-on: ubuntu-20.04
+
+    strategy:
+      # Allow other matrix jobs to complete if 1 fails
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.8"
+          - "3.9"
+          - "3.10"
+
+    env:
+      ASV_DIR: "./benchmarks"
+      ASV_FACTOR: 1.25
+    steps:
+      # We need the full repo to avoid this issue
+      # https://github.com/actions/checkout/issues/23
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Cache dependencies
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          # Cache based on OS, Python version, and dependency hash
+          key: pip-benchmarks-python${{ matrix.python-version }}-${{ hashFiles('requirements-bench.txt') }}
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r requirements-bench.txt
+          pip install -e .
+
+      - name: Run benchmarks
+        id: benchmark
+        run: |
+          set -x
+          # ID this runner
+          asv machine --yes
+          echo "Baseline:  ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
+          echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})"
+          # Run benchmarks for current commit against base
+          ASV_OPTIONS="--split -e --interleave-rounds --factor $ASV_FACTOR"
+          asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
+              | tee benchmarks.log
+          # Report and export results for subsequent steps
+          if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then
+              exit 1
+          fi
+        working-directory: ${{ env.ASV_DIR }}
+
+      - name: Add instructions to artifact
+        if: always()
+        run: |
+          cp benchmarks.log .asv/results/
+        working-directory: ${{ env.ASV_DIR }}
+
+      - uses: actions/upload-artifact@v2
+        if: always()
+        with:
+          name: asv-benchmark-results-${{ runner.os }}-${{ matrix.python-version }}
+          path: ${{ env.ASV_DIR }}/.asv/results
diff --git a/.gitignore b/.gitignore
@@ -157,3 +157,6 @@ dmypy.json
 
 # Cython debug symbols
 cython_debug/
+
+# asv environments
+.asv
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -0,0 +1,163 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "pystac",
+
+    // The project's homepage
+    "project_url": "https://pystac.readthedocs.io/",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "..",
+
+    // Customizable commands for building, installing, and
+    // uninstalling the project. See asv.conf.json documentation.
+    //
+    // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+    // "build_command": [
+    //     "python setup.py build",
+    //     "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    // ],
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["main"], // for git
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "http://github.com/stac-utils/pystac/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["3.9"],
+
+    // The matrix of dependencies to test.  Each key of the "req"
+    // requirements dictionary is the name of a package (in PyPI) and
+    // the values are version numbers.  An empty list or empty string
+    // indicates to just test against the default (latest)
+    // version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed
+    // via pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // The ``@env`` and ``@env_nobuild`` keys contain the matrix of
+    // environment variables to pass to build and benchmark commands.
+    // An environment will be created for every combination of the
+    // cartesian product of the "@env" variables in this matrix.
+    // Variables in "@env_nobuild" will be passed to every environment
+    // during the benchmark phase, but will not trigger creation of
+    // new environments.  A value of ``null`` means that the variable
+    // will not be set for the current combination.
+    //
+    "matrix": {
+        "req": {
+            "orjson": [null, ""]
+        }
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    // - req
+    //     Required packages
+    // - env
+    //     Environment variables
+    // - env_nobuild
+    //     Non-build environment variables
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
+    //     {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": ".asv/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": ".asv/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    // "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    ".*": 0.25     // Change default threshold to 20%
+    // }
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
diff --git a/benchmarks/benchmarks/_base.py b/benchmarks/benchmarks/_base.py
@@ -0,0 +1,6 @@
+class Bench:
+    # Repeat between 10-50 times up to a max time of 5s
+    repeat = (10, 50, 2.0)
+
+    # Bump number of rounds to 4
+    rounds = 4
diff --git a/benchmarks/benchmarks/_util.py b/benchmarks/benchmarks/_util.py
@@ -0,0 +1,18 @@
+import os
+from typing import Union, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    PathLike = os.PathLike[str]
+else:
+    PathLike = os.PathLike
+
+
+def get_data_path(rel_path: Union[str, PathLike]) -> str:
+    """Gets the absolute path to a file based on a path relative to the
+    tests/data-files directory in this repo."""
+    rel_path = os.fspath(rel_path)
+    return os.path.abspath(
+        os.path.join(
+            os.path.dirname(__file__), "..", "..", "tests", "data-files", rel_path
+        )
+    )
diff --git a/benchmarks/benchmarks/catalog.py b/benchmarks/benchmarks/catalog.py
@@ -0,0 +1,43 @@
+import json
+import os
+import shutil
+import tempfile
+import pystac
+
+from ._base import Bench
+from ._util import get_data_path
+
+
+class CatalogBench(Bench):
+    def setup(self) -> None:
+        self.temp_dir = tempfile.mkdtemp()
+
+        self.stac_io = pystac.StacIO.default()
+
+        self.catalog_path = get_data_path("examples/1.0.0/catalog.json")
+        with open(self.catalog_path) as src:
+            self.catalog_dict = json.load(src)
+        self.catalog = pystac.Catalog.from_file(self.catalog_path)
+
+    def teardown(self) -> None:
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def time_catalog_from_file(self) -> None:
+        """Deserialize an Item from file"""
+        _ = pystac.Catalog.from_file(self.catalog_path)
+
+    def time_catalog_from_dict(self) -> None:
+        """Deserialize an Item from dictionary."""
+        _ = pystac.Catalog.from_dict(self.catalog_dict)
+
+    def time_catalog_to_dict(self) -> None:
+        """Serialize an Item to a dictionary."""
+        self.catalog.to_dict(include_self_link=True)
+
+    def time_catalog_save(self) -> None:
+        """Serialize an Item to a JSON file."""
+        self.catalog.save_object(
+            include_self_link=True,
+            dest_href=os.path.join(self.temp_dir, "time_catalog_save.json"),
+            stac_io=self.stac_io,
+        )
diff --git a/benchmarks/benchmarks/collection.py b/benchmarks/benchmarks/collection.py
@@ -0,0 +1,43 @@
+import json
+import os
+import shutil
+import tempfile
+import pystac
+
+from ._base import Bench
+from ._util import get_data_path
+
+
+class CollectionBench(Bench):
+    def setup(self) -> None:
+        self.temp_dir = tempfile.mkdtemp()
+
+        self.stac_io = pystac.StacIO.default()
+
+        self.collection_path = get_data_path("examples/1.0.0/collection.json")
+        with open(self.collection_path) as src:
+            self.collection_dict = json.load(src)
+        self.collection = pystac.Collection.from_file(self.collection_path)
+
+    def teardown(self) -> None:
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def time_collection_from_file(self) -> None:
+        """Deserialize an Item from file"""
+        _ = pystac.Collection.from_file(self.collection_path)
+
+    def time_collection_from_dict(self) -> None:
+        """Deserialize an Item from dictionary."""
+        _ = pystac.Collection.from_dict(self.collection_dict)
+
+    def time_collection_to_dict(self) -> None:
+        """Serialize an Item to a dictionary."""
+        self.collection.to_dict(include_self_link=True)
+
+    def time_collection_save(self) -> None:
+        """Serialize an Item to a JSON file."""
+        self.collection.save_object(
+            include_self_link=True,
+            dest_href=os.path.join(self.temp_dir, "time_collection_save.json"),
+            stac_io=self.stac_io,
+        )
diff --git a/benchmarks/benchmarks/import_pystac.py b/benchmarks/benchmarks/import_pystac.py
@@ -0,0 +1,14 @@
+from typing import Any
+
+
+class ImportPySTACBench:
+    repeat = 10
+
+    def setup(self, *args: Any, **kwargs: Any) -> None:
+        def import_pystac() -> None:
+            import pystac  # noqa: F401
+
+        self._import_pystac = import_pystac
+
+    def time_import_pystac(self) -> None:
+        self._import_pystac()