Skip to content

Performance regression relative to 0.7.4 release #2470

Closed
@amyskov

Description

@amyskov

System information

  • OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Ubuntu 18.04
  • Modin version (modin.__version__): 41d3111
  • Python version: 3.8.6
  • Code we can use to reproduce:
reproducer
import subprocess
from timeit import default_timer as timer
import json
import os

data_file = "/dataset/path/yellow_tripdata_2015-01.csv"
iterations_number = 3
report_file = "regression_report.txt"
report_metric_file = "regression_metric_report.txt"
report_modin_good = "modin_good_results.txt"
max_g2b_deviation = 0.8 # good/bad

def get_git_revision_hash(encoding="ascii"):
    return subprocess.check_output(['git', 'rev-parse', 'HEAD'], encoding=encoding).strip()

def measure(func, *args, **kw):
    t0 = timer()
    res = func(*args, **kw)
    t = timer() - t0
    return res, t

def q_read_csv(pd):
    res = pd.read_csv(data_file, parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"], quoting=3)
    repr(res)
    return res

def q_reductions(df):
    res = df.count()
    repr(res)
    return res

def q_map_operations(df):
    res = df.isnull()
    repr(res)
    return res

def q_appy(df):
    res = df["trip_distance"].apply(round)
    repr(res)
    return res

def q_add_column(df, col):
    df["rounded_trip_distance"] = col
    repr(df)
    return df

def q_groupby_agg(df):
    res = df.groupby(by="rounded_trip_distance").count()
    repr(res)
    return res

def bench(pd):
    results = {}
    df, results["t_read_csv"] = measure(q_read_csv, pd)
    _, results["t_reductions"] =  measure(q_reductions, df)
    _, results["t_map_operations"] = measure(q_map_operations, df)
    col_apply, results["t_apply"] = measure(q_appy, df)
    _, results["t_add_column"] = measure(q_add_column, df, col_apply)
    _, results["t_groupby_agg"] = measure(q_groupby_agg, df)

    return results

def bench_iterations(iterations=1):
    results = {}
    final_results = {}
    
    for i in range(1, iterations+1):
        import modin.pandas as pd
        pd.DEFAULT_NPARTITIONS = 4
        results[f"iteration_{i}"] = bench(pd)

    for t in results["iteration_1"].keys():
        values = []
        for it in results.keys():
            values.append(results[it][t])

        final_results[t] = min(values)

    return final_results

def make_report(result, report_file=report_file):
    with open(report_file, mode = 'a') as file:
        file.write(json.dumps(result) + "\n")

def metric(good_results: dict, bad_results: dict):
    coefficients = {}
    for t in good_results.keys():
        assert t in bad_results.keys()
        coefficients[t] = good_results[t] / bad_results[t]

    count = 0
    _sum = 0
    for coeff_key in coefficients:
        count += 1
        _sum += coefficients[coeff_key]

    mean = _sum / count
    good = mean > max_g2b_deviation
    coefficients["version"] = get_git_revision_hash()
    coefficients["mean"] = mean

    return coefficients, good

def eval_modin_bench(good_results):
    reults = bench_iterations(iterations=iterations_number)
    metrics, good = metric(good_results, reults)

    reults["version"] = get_git_revision_hash()

    make_report(reults, report_file=report_file)
    make_report(metrics, report_file=report_metric_file)
    print("reults: \n", reults)
    print("metrics: \n", metrics)

    return good

def remove_old_reports():
    for f in [report_file, report_metric_file]:
        if os.path.exists(f):
            os.remove(f)

def get_good_results(fname):
    with open(fname) as json_file:
        data = json.load(json_file)
    
    return data
    

if __name__ == "__main__":
    # uncomment to measure reference (good commit)
    # remove_old_reports()

    # reults_modin_good = bench_iterations(iterations=iterations_number)
    # print(reults_modin_good)
    # make_report(reults_modin_good, report_file=report_modin_good)

    good_results = get_good_results(report_modin_good)
    good = eval_modin_bench(good_results)
    print("good", good)

It was found that performance of some operations from example notebook degraded relative to 0.7.4 release. Measurement results are next (given relation of queries execution times t_0.7.4/t_master, numbers lower 1 corresponds to degradation from 0.7.4 release to current master)

read_csv 1.23 (reading from local csv)
reductions 0.98
map operations 0.62
apply operation 0.95
add column operation 0.23
groupby.agg operation 0.34

mean value for all queries 0.72

Source code / logs

Metadata

Metadata

Assignees

Labels

bug 🦗Something isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions