-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Fix median miscalculation for even-sized item list #2224
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1127,19 +1127,13 @@ t_stree::update_agg_table(t_uindex nidx, t_agg_update_info& info, | |
| new_value.set(reduce_from_gstate< | ||
| std::function<t_tscalar(std::vector<t_tscalar>&)>>(gstate, | ||
| expression_master_table, spec.get_dependencies()[0].name(), | ||
| pkeys, [](std::vector<t_tscalar>& values) { | ||
| pkeys, [&](std::vector<t_tscalar>& values) { | ||
| if (values.size() == 0) { | ||
| return t_tscalar(); | ||
| } else if (values.size() == 1) { | ||
| return values[0]; | ||
| } else { | ||
| std::vector<t_tscalar>::iterator middle | ||
| = values.begin() + (values.size() / 2); | ||
|
|
||
| std::nth_element( | ||
| values.begin(), middle, values.end()); | ||
|
|
||
| return *middle; | ||
| return get_aggregate_median(values); | ||
| } | ||
| })); | ||
|
|
||
|
|
@@ -2020,6 +2014,28 @@ t_stree::get_aggregate(t_index idx, t_index aggnum) const { | |
| return extract_aggregate(m_aggspecs[aggnum], c, agg_ridx, agg_pridx); | ||
| } | ||
|
|
||
| t_tscalar | ||
| t_stree::get_aggregate_median(std::vector<t_tscalar>& values) const { | ||
| int size = values.size(); | ||
| bool is_even_size = size % 2 == 0; | ||
|
|
||
| if (is_even_size && values[0].is_numeric()){ | ||
|
||
| t_tscalar median_average; | ||
| std::vector<t_tscalar>::iterator first_middle = values.begin() + ((size - 1) / 2); | ||
| std::vector<t_tscalar>::iterator second_middle = values.begin() + (size / 2); | ||
|
|
||
| nth_element(values.begin(), first_middle, values.end()); | ||
|
||
| nth_element(values.begin(), second_middle, values.end()); | ||
|
|
||
| median_average.set((*first_middle + *second_middle) / static_cast<t_tscalar>(2)); | ||
| return median_average; | ||
| }else{ | ||
|
||
| std::vector<t_tscalar>::iterator middle = values.begin() + (size / 2); | ||
| std::nth_element(values.begin(), middle, values.end()); | ||
| return *middle; | ||
| } | ||
| } | ||
|
|
||
| void | ||
| t_stree::get_child_indices(t_index idx, std::vector<t_index>& out_data) const { | ||
| t_index num_children = get_num_children(idx); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a suggestion - if we're going to factor out this logic into a function called
get_aggregate_median(values), maybe we should also move the0and1cases into this definition to 1) make it complete and 2) move the entire closure body to a single method call.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!