Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions python/perspective/bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import subprocess
import venv
import tornado
from datetime import datetime
from timeit import timeit
sys.path.insert(1, os.path.join(os.path.dirname(__file__), '..'))
from perspective import Table, PerspectiveManager, PerspectiveTornadoHandler # noqa: E402
Expand Down Expand Up @@ -207,8 +208,9 @@ def host_results(self):
def write_results(self):
if self._table is None:
return
logging.info("Writing results to `benchmark.arrow`")
arrow_path = os.path.join(os.path.dirname(__file__), "benchmark.arrow")
name = "benchmark_{}_.arrow".format(datetime.now().isoformat())
logging.info("Writing results to `{}`".format(name))
arrow_path = os.path.join(os.path.dirname(__file__), name)
with open(arrow_path, "wb") as file:
arrow = self._table.view().to_arrow()
file.write(arrow)
Expand Down
145 changes: 143 additions & 2 deletions python/perspective/bench/perspective_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
import os
import sys
import subprocess
import random
import pandas as pd
from functools import partial
from bench import Benchmark, Suite, Runner, VirtualEnvHandler
from bench import Benchmark, Suite, Runner
sys.path.insert(1, os.path.join(os.path.dirname(__file__), '..'))
from perspective import Table # noqa: E402
from perspective.tests.common import superstore # noqa: E402
Expand All @@ -33,6 +35,9 @@ def make_meta(group, name):
"name": name
}

def empty_callback():
pass


class PerspectiveBenchmark(Suite):

Expand All @@ -47,6 +52,13 @@ class PerspectiveBenchmark(Suite):
def __init__(self):
"""Create a benchmark suite for `perspective-python`."""
tbl = Table(SUPERSTORE)
self._schema = tbl.schema()
self._df_schema = tbl.schema()
# mutate schema to have some integer columns, so as to force numpy
# float-to-int demotion
self._df_schema["Sales"] = int
self._df_schema["Profit"] = int
self._df_schema["Quantity"] = int
self._view = tbl.view()
self.dict = self._view.to_dict()
self.records = self._view.to_records()
Expand All @@ -55,6 +67,10 @@ def __init__(self):
self.arrow = self._view.to_arrow()
self._table = tbl

def _get_update_data(self, n=30):
"""Retrieve n rows from self.records to be used as update data."""
return random.sample(self.records, n)

def register_benchmarks(self):
"""Register all the benchmark methods - each method creates a number of
lambdas, and then calls `setattr` on the Suite itself so that the
Expand All @@ -65,6 +81,14 @@ def register_benchmarks(self):
self.benchmark_view_one()
self.benchmark_view_two()
self.benchmark_view_two_column_only()
self.benchmark_view_zero_updates()
self.benchmark_view_one_updates()
self.benchmark_view_two_updates()
self.benchmark_view_two_column_only_updates()
self.benchmark_view_zero_df_updates()
self.benchmark_view_one_df_updates()
self.benchmark_view_two_df_updates()
self.benchmark_view_two_column_only_df_updates()
self.benchmark_to_format_zero()
self.benchmark_to_format_one()
self.benchmark_to_format_two()
Expand Down Expand Up @@ -92,6 +116,38 @@ def benchmark_view_zero(self):
func = Benchmark(lambda: self._table.view(), meta=make_meta("view", "zero"))
setattr(self, "view_zero", func)

def benchmark_view_zero_updates(self):
"""Benchmark how long it takes for each update to resolve fully, using
the on update callback that forces resolution of updates across
10 views."""
table = Table(self._schema)
views = [table.view() for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = self._get_update_data(1000)
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "zero"))
setattr(self, "update_zero", func)

def benchmark_view_zero_df_updates(self):
"""Benchmark how long it takes for each update to resolve fully, using
the on update callback that forces resolution of updates across
10 views. This version updates using dataframes, and is designed to
compare the overhead of dataframe loading vs. regular data structure
loading."""
table = Table(self._df_schema)
views = [table.view() for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = pd.DataFrame(self._get_update_data(1000))
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "zero_df"))
setattr(self, "update_zero_df", func)

def benchmark_view_one(self):
"""Benchmark view creation with different pivots."""
for pivot in PerspectiveBenchmark.ROW_PIVOT_OPTIONS:
Expand All @@ -102,6 +158,34 @@ def benchmark_view_one(self):
func = Benchmark(lambda: view_constructor(), meta=test_meta)
setattr(self, "view_{0}".format(test_meta["name"]), func)

def benchmark_view_one_updates(self):
"""Benchmark how long it takes for each update to resolve fully, using
the on update callback that forces resolution of updates across
25 views."""
table = Table(self._schema)
views = [table.view(row_pivots=["State", "City"]) for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = self._get_update_data(1000)
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "one"))
setattr(self, "update_one", func)

def benchmark_view_one_df_updates(self):
"""Benchmark dataframe updates for one-sided views."""
table = Table(self._df_schema)
views = [table.view(row_pivots=["State", "City"]) for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = pd.DataFrame(self._get_update_data(1000))
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "one_df"))
setattr(self, "update_one_df", func)

def benchmark_view_two(self):
"""Benchmark view creation with row and column pivots."""
for i in range(len(PerspectiveBenchmark.ROW_PIVOT_OPTIONS)):
Expand All @@ -116,6 +200,35 @@ def benchmark_view_two(self):
func = Benchmark(lambda: view_constructor(), meta=test_meta)
setattr(self, "view_{0}".format(test_meta["name"]), func)

def benchmark_view_two_updates(self):
"""Benchmark how long it takes for each update to resolve fully, using
the on update callback that forces resolution of updates across
25 views."""
table = Table(self._schema)
views = [table.view(row_pivots=["State", "City"], column_pivots=["Category", "Sub-Category"]) for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = self._get_update_data(1000)
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "two"))
setattr(self, "update_two", func)


def benchmark_view_two_df_updates(self):
"""Benchmark dataframe updates for two-sided views."""
table = Table(self._df_schema)
views = [table.view(row_pivots=["State", "City"], column_pivots=["Category", "Sub-Category"]) for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = pd.DataFrame(self._get_update_data(1000))
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "two_df"))
setattr(self, "update_two_df", func)

def benchmark_view_two_column_only(self):
"""Benchmark column-only view creation."""
for pivot in PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS:
Expand All @@ -127,6 +240,34 @@ def benchmark_view_two_column_only(self):
func = Benchmark(lambda: view_constructor(), meta=test_meta)
setattr(self, "view_{0}".format(test_meta["name"]), func)

def benchmark_view_two_column_only_updates(self):
"""Benchmark how long it takes for each update to resolve fully, using
the on update callback that forces resolution of updates across
25 views."""
table = Table(self._schema)
views = [table.view(column_pivots=["Category", "Sub-Category"]) for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = self._get_update_data(1000)
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only"))
setattr(self, "update_two_column_only", func)

def benchmark_view_two_column_only_df_updates(self):
"""Benchmark dataframe updates for two-sided column only views."""
table = Table(self._df_schema)
views = [table.view(column_pivots=["Category", "Sub-Category"]) for i in range(25)]
for v in views:
v.on_update(empty_callback)
update_data = pd.DataFrame(self._get_update_data(1000))
def resolve_update():
table.update(update_data)
table.size()
func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only_df"))
setattr(self, "update_two_column_only_df", func)

def benchmark_to_format_zero(self):
"""Benchmark each `to_format` method."""
for name in ("numpy", "dict", "records", "df", "arrow"):
Expand Down Expand Up @@ -179,7 +320,7 @@ def benchmark_to_format_two_column_only(self):


if __name__ == "__main__":
VERSION = sys.argv[1]
VERSION = "master"

# Initialize a suite and runner, then call `.run()`
suite = PerspectiveBenchmark()
Expand Down
3 changes: 1 addition & 2 deletions python/perspective/bench/run_perspective_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@
for version in VERSIONS[1:]:
print("Installing perspective-python=={}".format(version))
subprocess.check_output(
"yes | python3 -m pip uninstall perspective-python".format(version),
shell=True)
"yes | python3 -m pip uninstall perspective-python", shell=True)
subprocess.check_output(
"yes | python3 -m pip install perspective-python=={}".format(version),
shell=True)
Expand Down