Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions cpp/perspective/src/cpp/sparse_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,19 +1127,13 @@ t_stree::update_agg_table(t_uindex nidx, t_agg_update_info& info,
new_value.set(reduce_from_gstate<
std::function<t_tscalar(std::vector<t_tscalar>&)>>(gstate,
expression_master_table, spec.get_dependencies()[0].name(),
pkeys, [](std::vector<t_tscalar>& values) {
pkeys, [&](std::vector<t_tscalar>& values) {
if (values.size() == 0) {
return t_tscalar();
} else if (values.size() == 1) {
return values[0];
} else {
std::vector<t_tscalar>::iterator middle
= values.begin() + (values.size() / 2);

std::nth_element(
values.begin(), middle, values.end());

return *middle;
return get_aggregate_median(values);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a suggestion - if we're going to factor out this logic into a function called get_aggregate_median(values), maybe we should also move the 0 and 1 cases into this definition to 1) make it complete and 2) move the entire closure body to a single method call.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

}
}));

Expand Down Expand Up @@ -2020,6 +2014,28 @@ t_stree::get_aggregate(t_index idx, t_index aggnum) const {
return extract_aggregate(m_aggspecs[aggnum], c, agg_ridx, agg_pridx);
}

t_tscalar
t_stree::get_aggregate_median(std::vector<t_tscalar>& values) const {
int size = values.size();
bool is_even_size = size % 2 == 0;

if (is_even_size && values[0].is_numeric()){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See review summary - is_numeric() case here should be exclusively float column types via is_floating_point().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

t_tscalar median_average;
std::vector<t_tscalar>::iterator first_middle = values.begin() + ((size - 1) / 2);
std::vector<t_tscalar>::iterator second_middle = values.begin() + (size / 2);

nth_element(values.begin(), first_middle, values.end());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nth_element() does not need to be called twice here, the column is guaranteed to be even and min 2, so this is equivalent to *(second_middle - 1).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

nth_element(values.begin(), second_middle, values.end());

median_average.set((*first_middle + *second_middle) / static_cast<t_tscalar>(2));
return median_average;
}else{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Formatting - please run clang-format.

std::vector<t_tscalar>::iterator middle = values.begin() + (size / 2);
std::nth_element(values.begin(), middle, values.end());
return *middle;
}
}

void
t_stree::get_child_indices(t_index idx, std::vector<t_index>& out_data) const {
t_index num_children = get_num_children(idx);
Expand Down
2 changes: 2 additions & 0 deletions cpp/perspective/src/include/perspective/sparse_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ class PERSPECTIVE_EXPORT t_stree {

t_tscalar get_aggregate(t_index idx, t_index aggnum) const;

t_tscalar get_aggregate_median(std::vector<t_tscalar>& values) const;

void get_child_indices(t_index idx, std::vector<t_index>& out_data) const;

void set_alerts_enabled(bool enabled_state);
Expand Down
35 changes: 27 additions & 8 deletions packages/perspective/test/js/expressions/functionality.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2491,11 +2491,21 @@ const perspective = require("@finos/perspective");
});

test("Should be able to aggregate a numeric expression column.", async function () {
const table = await perspective.table({

const data = {
x: [1, 2, 3, 4],
y: [100, 200, 300, 400],
z: [1.5, 2.5, 3.5, 4.5],
});
}
const schema = {
x:'float',
y:'float',
z:'float'
}

const table = await perspective.table(schema);
table.update(data)

const view = await table.view({
group_by: ['"x" + "z"'],
aggregates: {
Expand All @@ -2507,8 +2517,8 @@ const perspective = require("@finos/perspective");
const result = await view.to_columns();
expect(result).toEqual({
__ROW_PATH__: [[], [2.5], [4.5], [6.5], [8.5]],
'"x" + "z"': [6.5, 2.5, 4.5, 6.5, 8.5],
x: [3, 1, 2, 3, 4],
'"x" + "z"': [5.5, 2.5, 4.5, 6.5, 8.5],
x: [2.5, 1, 2, 3, 4],
y: [1000, 100, 200, 300, 400],
z: [12, 1.5, 2.5, 3.5, 4.5],
});
Expand Down Expand Up @@ -2540,11 +2550,20 @@ const perspective = require("@finos/perspective");
});

test("Should be able to aggregate a numeric expression column that aliases a real column.", async function () {
const table = await perspective.table({
const data = {
x: [1, 2, 3, 4],
y: [100, 200, 300, 400],
z: [1.5, 2.5, 3.5, 4.5],
});
}
const schema = {
x:'float',
y:'float',
z:'float'
}

const table = await perspective.table(schema);
table.update(data)

const view = await table.view({
group_by: ['"x"'],
aggregates: {
Expand All @@ -2556,8 +2575,8 @@ const perspective = require("@finos/perspective");
const result = await view.to_columns();
expect(result).toEqual({
__ROW_PATH__: [[], [1], [2], [3], [4]],
'"x"': [3, 1, 2, 3, 4],
x: [3, 1, 2, 3, 4],
'"x"': [2.5, 1, 2, 3, 4],
x: [2.5, 1, 2, 3, 4],
y: [1000, 100, 200, 300, 400],
z: [12, 1.5, 2.5, 3.5, 4.5],
});
Expand Down
46 changes: 45 additions & 1 deletion python/perspective/perspective/tests/core/test_aggregates.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pytest import raises
from perspective import PerspectiveError, PerspectiveViewer,\
PerspectiveWidget, Aggregate
PerspectiveWidget, Aggregate, Table


class TestAggregates:
Expand Down Expand Up @@ -86,3 +86,47 @@ def test_aggregates_viewer_set_all(self):
for agg in Aggregate:
viewer.aggregates = {"a": agg}
assert viewer.aggregates == {"a": agg.value}

def get_median(self, input_data):
table = Table(data=input_data)
view = table.view(
columns=['Price'],
aggregates={'Price':'median'},
group_by=['Item'])

return view.to_json()[0]['Price']

def test_aggregate_median(self):
numeric_data = [
{'Item':'Book','Price':2.0},
{'Item':'Book','Price':3.0},
{'Item':'Book','Price':5.0},
{'Item':'Book','Price':4.0},
{'Item':'Book','Price':8.0},
{'Item':'Book','Price':9.0},
{'Item':'Book','Price':6.0},
]

non_numeric_data = [
{'Item':'Book','Price':'2'},
{'Item':'Book','Price':'3'},
{'Item':'Book','Price':'5'},
{'Item':'Book','Price':'4'},
{'Item':'Book','Price':'8'},
{'Item':'Book','Price':'9'},
{'Item':'Book','Price':'6'},
]

# Testing with numeric data
assert self.get_median(numeric_data) == 5.0 #List = [2.0,3.0,5.0,4.0,8.0,9.0,6.0], median = 5.0
assert self.get_median(numeric_data[:2]) == 2.5 #List = [2.0,3.0], median = 2.5
assert self.get_median(numeric_data[5:]) == 7.5 #List = [9.0,6.0], median = 7.5
assert self.get_median(numeric_data[1:]) == 5.5 #List = [3.0,5.0,4.0,8.0,9.0,6.0], median = 5.5
assert self.get_median(numeric_data[::2]) == 5.5 #List = [2.0,5.0,8.0,6.0], median = 5.5

# Testing with non-numeric data
assert self.get_median(non_numeric_data) == '5' #List = ['2','3','5','4','8','9','6'], median = '5'
assert self.get_median(non_numeric_data[:2]) == '3' #List = ['2','3'], median = '5'
assert self.get_median(non_numeric_data[5:]) == '9' #List = ['9','6'], median = '9'
assert self.get_median(non_numeric_data[1:]) == '6' #List = ['3','5','4','8','9','6'], median = '6'
assert self.get_median(non_numeric_data[::2]) == '6' #List = ['2','5','8','6'], median = '6'