Skip to content

Add performance benchmarks #748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ca0a6fa
Configure asv and add import benchmark
duckontheweb Feb 8, 2022
67633b2
Add item (de)serialization benchmarks & use 10 reps
duckontheweb Feb 9, 2022
ed65815
Add catalog, collection benchmarks and tweak settings
duckontheweb Feb 10, 2022
3f5bb3d
Add convenience script for running locally
duckontheweb Feb 10, 2022
78d97f5
Use default Python
duckontheweb Feb 10, 2022
db7e92d
Add benchmark workflow to CI
duckontheweb Feb 10, 2022
c187569
Match label condition to label name in repo
duckontheweb Feb 10, 2022
d6440b0
Fix lint errors
duckontheweb Feb 10, 2022
01798fc
Add virtualenv to benchmark deps
duckontheweb Feb 10, 2022
8648b23
Fix artifact name, increase failure threshold
duckontheweb Feb 10, 2022
c0267bb
Merge branch 'main' into add/729-performance-benchmarks
gadomski Jan 9, 2023
6a00081
rm: benchmarks workflow
gadomski Jan 9, 2023
6a35b1e
refactor: use classes directly
gadomski Jan 9, 2023
0f574d8
refactor: move benchmarks up a level
gadomski Jan 9, 2023
1afea50
feat: add projection benchmarks
gadomski Jan 9, 2023
65155a4
feat: add large catalog benchmarks
gadomski Jan 9, 2023
3767735
fix: benchmark config
gadomski Jan 9, 2023
06ec7b8
feat: add benchmark docs
gadomski Jan 9, 2023
1ed23a8
ci: add benchmark check
gadomski Jan 9, 2023
12a08b5
ci: set the asv machine
gadomski Jan 9, 2023
34c4369
ci: install pystac for benchmarks
gadomski Jan 9, 2023
5b39551
Merge branch 'main' into add/729-performance-benchmarks
gadomski Jan 9, 2023
a170df5
docs: add more text about running benchmarks
gadomski Jan 11, 2023
3b09da4
bench: use timeraw for import
gadomski Jan 11, 2023
e023b4f
Merge branch 'main' into add/729-performance-benchmarks
gadomski Jan 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: Benchmarks

on:
pull_request:
types: [opened, reopened, synchronize, labeled]
workflow_dispatch:

jobs:
benchmark:
if: ${{ contains( github.event.pull_request.labels.*.name, 'run-benchmarks') && github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }}
name: Linux
runs-on: ubuntu-20.04

strategy:
# Allow other matrix jobs to complete if 1 fails
fail-fast: false
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"

env:
ASV_DIR: "./benchmarks"
ASV_FACTOR: 1.25
steps:
# We need the full repo to avoid this issue
# https://github.com/actions/checkout/issues/23
- uses: actions/checkout@v2
with:
fetch-depth: 0

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Cache dependencies
uses: actions/cache@v2
with:
path: ~/.cache/pip
# Cache based on OS, Python version, and dependency hash
key: pip-benchmarks-python${{ matrix.python-version }}-${{ hashFiles('requirements-bench.txt') }}

- name: Install dependencies
run: |
pip install --upgrade pip
pip install -r requirements-bench.txt
pip install -e .

- name: Run benchmarks
id: benchmark
run: |
set -x
# ID this runner
asv machine --yes
echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})"
# Run benchmarks for current commit against base
ASV_OPTIONS="--split -e --interleave-rounds --factor $ASV_FACTOR"
asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
| tee benchmarks.log
# Report and export results for subsequent steps
if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then
exit 1
fi
working-directory: ${{ env.ASV_DIR }}

- name: Add instructions to artifact
if: always()
run: |
cp benchmarks.log .asv/results/
working-directory: ${{ env.ASV_DIR }}

- uses: actions/upload-artifact@v2
if: always()
with:
name: asv-benchmark-results-${{ runner.os }}-${{ matrix.python-version }}
path: ${{ env.ASV_DIR }}/.asv/results
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,6 @@ dmypy.json

# Cython debug symbols
cython_debug/

# asv environments
.asv
163 changes: 163 additions & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,

// The name of the project being benchmarked
"project": "pystac",

// The project's homepage
"project_url": "https://pystac.readthedocs.io/",

// The URL or local path of the source code repository for the
// project being benchmarked
"repo": "..",

// Customizable commands for building, installing, and
// uninstalling the project. See asv.conf.json documentation.
//
// "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
// "build_command": [
// "python setup.py build",
// "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
// ],

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
"branches": ["main"], // for git
// "branches": ["default"], // for mercurial

// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
"dvcs": "git",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "virtualenv",

// the base URL to show a commit for the project.
"show_commit_url": "http://github.com/stac-utils/pystac/commit/",

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["3.9"],

// The matrix of dependencies to test. Each key of the "req"
// requirements dictionary is the name of a package (in PyPI) and
// the values are version numbers. An empty list or empty string
// indicates to just test against the default (latest)
// version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed
// via pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
// The ``@env`` and ``@env_nobuild`` keys contain the matrix of
// environment variables to pass to build and benchmark commands.
// An environment will be created for every combination of the
// cartesian product of the "@env" variables in this matrix.
// Variables in "@env_nobuild" will be passed to every environment
// during the benchmark phase, but will not trigger creation of
// new environments. A value of ``null`` means that the variable
// will not be set for the current combination.
//
"matrix": {
"req": {
"orjson": [null, ""]
}
},

// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
// - req
// Required packages
// - env
// Environment variables
// - env_nobuild
// Non-build environment variables
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
// {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}},
// ],

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
"benchmark_dir": "benchmarks",

// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
"env_dir": ".asv/env",

// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
"results_dir": ".asv/results",

// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
"html_dir": ".asv/html",

// The number of characters to retain in the commit hashes.
// "hash_length": 8,

// `asv` will cache results of the recent builds in each
// environment, making them faster to install next time. This is
// the number of builds to keep, per environment.
// "build_cache_size": 2,

// The commits after which the regression search in `asv publish`
// should start looking for regressions. Dictionary whose keys are
// regexps matching to benchmark names, and values corresponding to
// the commit (exclusive) after which to start looking for
// regressions. The default is to start from the first commit
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// },

// The thresholds for relative change in results, after which `asv
// publish` starts reporting regressions. Dictionary of the same
// form as in ``regressions_first_commits``, with values
// indicating the thresholds. If multiple entries match, the
// maximum is taken. If no entry matches, the default is 5%.
//
// "regressions_thresholds": {
// ".*": 0.25 // Change default threshold to 20%
// }
}
Empty file.
6 changes: 6 additions & 0 deletions benchmarks/benchmarks/_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class Bench:
# Repeat between 10-50 times up to a max time of 5s
repeat = (10, 50, 2.0)

# Bump number of rounds to 4
rounds = 4
18 changes: 18 additions & 0 deletions benchmarks/benchmarks/_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from typing import Union, TYPE_CHECKING

if TYPE_CHECKING:
PathLike = os.PathLike[str]
else:
PathLike = os.PathLike


def get_data_path(rel_path: Union[str, PathLike]) -> str:
"""Gets the absolute path to a file based on a path relative to the
tests/data-files directory in this repo."""
rel_path = os.fspath(rel_path)
return os.path.abspath(
os.path.join(
os.path.dirname(__file__), "..", "..", "tests", "data-files", rel_path
)
)
43 changes: 43 additions & 0 deletions benchmarks/benchmarks/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import json
import os
import shutil
import tempfile
import pystac

from ._base import Bench
from ._util import get_data_path


class CatalogBench(Bench):
def setup(self) -> None:
self.temp_dir = tempfile.mkdtemp()

self.stac_io = pystac.StacIO.default()

self.catalog_path = get_data_path("examples/1.0.0/catalog.json")
with open(self.catalog_path) as src:
self.catalog_dict = json.load(src)
self.catalog = pystac.Catalog.from_file(self.catalog_path)

def teardown(self) -> None:
shutil.rmtree(self.temp_dir, ignore_errors=True)

def time_catalog_from_file(self) -> None:
"""Deserialize an Item from file"""
_ = pystac.Catalog.from_file(self.catalog_path)

def time_catalog_from_dict(self) -> None:
"""Deserialize an Item from dictionary."""
_ = pystac.Catalog.from_dict(self.catalog_dict)

def time_catalog_to_dict(self) -> None:
"""Serialize an Item to a dictionary."""
self.catalog.to_dict(include_self_link=True)

def time_catalog_save(self) -> None:
"""Serialize an Item to a JSON file."""
self.catalog.save_object(
include_self_link=True,
dest_href=os.path.join(self.temp_dir, "time_catalog_save.json"),
stac_io=self.stac_io,
)
43 changes: 43 additions & 0 deletions benchmarks/benchmarks/collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import json
import os
import shutil
import tempfile
import pystac

from ._base import Bench
from ._util import get_data_path


class CollectionBench(Bench):
def setup(self) -> None:
self.temp_dir = tempfile.mkdtemp()

self.stac_io = pystac.StacIO.default()

self.collection_path = get_data_path("examples/1.0.0/collection.json")
with open(self.collection_path) as src:
self.collection_dict = json.load(src)
self.collection = pystac.Collection.from_file(self.collection_path)

def teardown(self) -> None:
shutil.rmtree(self.temp_dir, ignore_errors=True)

def time_collection_from_file(self) -> None:
"""Deserialize an Item from file"""
_ = pystac.Collection.from_file(self.collection_path)

def time_collection_from_dict(self) -> None:
"""Deserialize an Item from dictionary."""
_ = pystac.Collection.from_dict(self.collection_dict)

def time_collection_to_dict(self) -> None:
"""Serialize an Item to a dictionary."""
self.collection.to_dict(include_self_link=True)

def time_collection_save(self) -> None:
"""Serialize an Item to a JSON file."""
self.collection.save_object(
include_self_link=True,
dest_href=os.path.join(self.temp_dir, "time_collection_save.json"),
stac_io=self.stac_io,
)
14 changes: 14 additions & 0 deletions benchmarks/benchmarks/import_pystac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from typing import Any


class ImportPySTACBench:
repeat = 10

def setup(self, *args: Any, **kwargs: Any) -> None:
def import_pystac() -> None:
import pystac # noqa: F401

self._import_pystac = import_pystac

def time_import_pystac(self) -> None:
self._import_pystac()
Loading