Skip to content

Improve benchmarks #168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
//
// "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
// "build_command": [
// "python setup.py build",
// "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
// ],
"build_command": [
"python -m pip install build",
"python -m build --wheel -o {build_cache_dir} {build_dir}"
//"PIP_NO_BUILD_ISOLATION=false python -m pip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
],

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
Expand Down
24 changes: 24 additions & 0 deletions asv_bench/benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import numpy as np


def parameterized(names, params):
"""
Copied from xarray benchmarks:
Expand All @@ -10,3 +13,24 @@ def decorator(func):
return func

return decorator


def randn(shape, frac_nan=None, chunks=None, seed=0):
"""
Copied from xarray benchmarks:
https://github.com/pydata/xarray/blob/main/asv_bench/benchmarks/__init__.py#L32-L46
"""
rng = np.random.RandomState(seed)
if chunks is None:
x = rng.standard_normal(shape)
else:
import dask.array as da

rng = da.random.RandomState(seed)
x = rng.standard_normal(shape, chunks=chunks)

if frac_nan is not None:
inds = rng.choice(range(x.size), int(x.size * frac_nan))
x.flat[inds] = np.nan

return x
39 changes: 39 additions & 0 deletions asv_bench/benchmarks/accessors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np
import pandas as pd
import xarray as xr

import xbatcher # noqa: F401

from . import parameterized, randn

nx = 250
ny = 50
nt = 10

randn_xyt = randn((nx, ny, nt), frac_nan=0.1)


class Accessor:
def setup(self, *args, **kwargs):
self.ds = xr.Dataset(
{
"var1": (("x", "y", "t"), randn_xyt),
},
coords={
"x": np.arange(nx),
"y": np.linspace(0, 1, ny),
"t": pd.date_range("1970-01-01", periods=nt, freq="D"),
},
)

@parameterized(
["input_dims"],
([{"x": 10}, {"x": 10, "y": 5}, {"x": 10, "y": 5, "t": 2}],),
)
def time_input_dims(self, input_dims):
"""
Benchmark simple batch generation case using xarray accessor
Equivalent to subset of ``time_batch_input()``.
"""
bg = self.ds.batch.generator(input_dims=input_dims)
bg[0]
172 changes: 172 additions & 0 deletions asv_bench/benchmarks/batches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import numpy as np
import pandas as pd
import xarray as xr

from xbatcher import BatchGenerator

from . import randn

nx = 250
ny = 50
nt = 10

randn_xyt = randn((nx, ny, nt), frac_nan=0.1)


class Base:
def setup(self):
self.ds = xr.Dataset(
{
"var1": (("x", "y", "t"), randn_xyt),
},
coords={
"x": np.arange(nx),
"y": np.linspace(0, 1, ny),
"t": pd.date_range("1970-01-01", periods=nt, freq="D"),
},
)


class NoPreload(Base):
"""
Get a batch from the generator without computing dask arrays.
"""

def setup(self):
super().setup()
ds_dask = self.ds.chunk({"t": 2})
self.bg = BatchGenerator(ds_dask, input_dims={"t": 2}, preload_batch=False)

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))


class OneInputDim(Base):
"""
Get a batch from the generator with one input_dim specified.
"""

def setup(self):
super().setup()
self.bg = BatchGenerator(self.ds, input_dims={"x": 10})

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))


class AllInputDim(Base):
"""
Get a batch from the generator with all dimensions specified in input_dims.
"""

def setup(self):
super().setup()
self.bg = BatchGenerator(self.ds, input_dims={"x": 10, "y": 10, "t": 5})

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))


class InputDimInputOverlap(Base):
"""
Get a batch from the generator using input_dims and input_overlap.
"""

def setup(self):
super().setup()
self.bg = BatchGenerator(
self.ds, input_dims={"x": 10, "y": 10}, input_overlap={"x": 5, "y": 5}
)

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))


class InputDimConcat(Base):
"""
Get a batch from the generator with input_dims and concat_input_dims
"""

def setup(self):
super().setup()
self.bg = BatchGenerator(
self.ds, input_dims={"x": 10, "y": 10}, concat_input_dims=True
)

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))


class InputDimBatchDim(Base):
"""
Get a batch from the generator with input_dims and batch_dims
"""

def setup(self):
super().setup()
self.bg = BatchGenerator(
self.ds, input_dims={"x": 10, "y": 10}, batch_dims={"t": 2}
)

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))


class InputDimBatchDimConcat(Base):
"""
Get a batch from the generator with input_dims, batch_dims and concat_input_dim
"""

def setup(self):
super().setup()
self.bg = BatchGenerator(
self.ds,
input_dims={"x": 5, "y": 5},
batch_dims={"x": 10, "y": 10},
concat_input_dims=True,
)

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))


class InputDimInputOverlapConcat(Base):
"""
Get a batch from the generator with input_dims, input_overlap and concat_input_dim
"""

def setup(self):
super().setup()
self.bg = BatchGenerator(
self.ds,
input_dims={"x": 10, "y": 10},
input_overlap={"x": 5, "y": 5},
concat_input_dims=True,
)

def time_next_batch(self):
"""
Get a batch
"""
next(iter(self.bg))
Loading