Skip to content

Commit 91b5563

Browse files
authored
Merge pull request #182 from jpmorganchase/merge-cpp
Merge from fork
2 parents bd88a12 + 599c101 commit 91b5563

23 files changed

+290
-97
lines changed

packages/perspective/src/cpp/context_grouped_pkey.cpp

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -947,15 +947,11 @@ t_ctx_grouped_pkey::notify(const t_table& flattened)
947947
// aggregates should be presized to be same size
948948
// as agg_indices
949949
void
950-
t_ctx_grouped_pkey::get_aggregates(t_uindex nidx,
950+
t_ctx_grouped_pkey::get_aggregates_for_sorting(t_uindex nidx,
951951
const t_idxvec& agg_indices,
952952
t_tscalvec& aggregates,
953953
t_ctx2 * ) const
954954
{
955-
956-
const t_str& grouping_label_col =
957-
m_config.get_grouping_label_column();
958-
959955
for (t_uindex idx = 0, loop_end = agg_indices.size();
960956
idx < loop_end;
961957
++idx)
@@ -964,16 +960,7 @@ t_ctx_grouped_pkey::get_aggregates(t_uindex nidx,
964960

965961
if (which_agg < 0)
966962
{
967-
if (m_has_label)
968-
{
969-
t_tscalvec pkeys;
970-
auto iters = m_tree->get_pkeys_for_leaf(nidx);
971-
aggregates[idx].set(m_state->get_value(
972-
iters.first->m_pkey, grouping_label_col));
973-
} else
974-
{
975-
aggregates[idx].set(m_tree->get_value(nidx));
976-
}
963+
aggregates[idx].set(m_tree->get_sortby_value(nidx));
977964
}
978965
else
979966
{

packages/perspective/src/cpp/context_two.cpp

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ t_ctx2::get_ctraversal_indices() const
243243
}
244244

245245
t_tscalvec
246-
t_ctx2::get_data(t_tvidx start_row,
246+
t_ctx2::get_data_old_path(t_tvidx start_row,
247247
t_tvidx end_row,
248248
t_tvidx start_col,
249249
t_tvidx end_col) const
@@ -343,6 +343,111 @@ t_ctx2::get_data(t_tvidx start_row,
343343
return retval;
344344
}
345345

346+
t_tscalvec
347+
t_ctx2::get_data(t_tvidx start_row,
348+
t_tvidx end_row,
349+
t_tvidx start_col,
350+
t_tvidx end_col) const
351+
{
352+
static bool const enable_getdata_fix = true;
353+
354+
if( !enable_getdata_fix )
355+
return get_data_old_path( start_row, end_row, start_col, end_col );
356+
357+
auto ext = sanitize_get_data_extents(
358+
*this, start_row, end_row, start_col, end_col);
359+
360+
t_uidxpvec cells;
361+
for (t_index ridx = ext.m_srow; ridx < ext.m_erow; ++ridx)
362+
{
363+
for (t_index cidx = ext.m_scol; cidx < ext.m_ecol; ++cidx)
364+
{
365+
cells.push_back(t_idxpair(ridx, cidx));
366+
}
367+
}
368+
369+
auto cells_info = resolve_cells(cells);
370+
371+
t_index nrows = ext.m_erow - ext.m_srow;
372+
t_index stride = ext.m_ecol - ext.m_scol;
373+
t_tscalvec retval(nrows * stride);
374+
375+
t_tscalar empty = mknone();
376+
377+
typedef std::pair<t_uindex, t_uindex> t_aggpair;
378+
std::map<t_aggpair, const t_column*> aggmap;
379+
380+
for (t_uindex treeidx = 0, tree_loop_end = m_trees.size();
381+
treeidx < tree_loop_end;
382+
++treeidx)
383+
{
384+
auto aggtable = m_trees[treeidx]->get_aggtable();
385+
t_schema aggschema = aggtable->get_schema();
386+
387+
for (t_uindex aggidx = 0,
388+
agg_loop_end = m_config.get_num_aggregates();
389+
aggidx < agg_loop_end;
390+
++aggidx)
391+
{
392+
const t_str& aggname = aggschema.m_columns[aggidx];
393+
394+
aggmap[t_aggpair(treeidx, aggidx)] =
395+
aggtable->get_const_column(aggname).get();
396+
}
397+
}
398+
399+
const t_aggspecvec& aggspecs = m_config.get_aggregates();
400+
401+
for (t_index ridx = ext.m_srow; ridx < ext.m_erow; ++ridx)
402+
{
403+
if( ext.m_scol == 0 )
404+
{
405+
retval[(ridx - ext.m_srow) * stride].set(
406+
rtree()->get_value(m_rtraversal->get_tree_index(ridx)));
407+
}
408+
409+
for (t_index cidx = std::max( ext.m_scol, t_tvidx(1) ); cidx < ext.m_ecol; ++cidx)
410+
{
411+
t_index insert_idx = (ridx - ext.m_srow) * stride + (cidx - ext.m_scol);
412+
const t_cellinfo& cinfo = cells_info[insert_idx];
413+
414+
if (cinfo.m_idx < 0)
415+
{
416+
retval[insert_idx].set(empty);
417+
}
418+
else
419+
{
420+
auto aggcol = aggmap[t_aggpair(cinfo.m_treenum,
421+
cinfo.m_agg_index)];
422+
423+
t_ptidx p_idx =
424+
m_trees[cinfo.m_treenum]->get_parent_idx(
425+
cinfo.m_idx);
426+
427+
t_uindex agg_ridx =
428+
m_trees[cinfo.m_treenum]->get_aggidx(cinfo.m_idx);
429+
430+
t_uindex agg_pridx =
431+
p_idx == INVALID_INDEX
432+
? INVALID_INDEX
433+
: m_trees[cinfo.m_treenum]->get_aggidx(p_idx);
434+
435+
auto value =
436+
extract_aggregate(aggspecs[cinfo.m_agg_index],
437+
aggcol,
438+
agg_ridx,
439+
agg_pridx);
440+
441+
if (!value.is_valid())
442+
value.set(empty);
443+
444+
retval[insert_idx].set(value);
445+
}
446+
}
447+
}
448+
449+
return retval;
450+
}
346451
void
347452
t_ctx2::sort_by(const t_sortsvec& sortby)
348453
{

packages/perspective/src/cpp/gnode.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ t_gnode::_process()
495495

496496
t_colcptrvec fcolumns(flattened->num_columns());
497497
t_uindex ncols = sschema.get_num_columns();
498-
498+
499499
t_colcptrvec scolumns(ncols);
500500
t_colptrvec dcolumns(ncols);
501501
t_colptrvec pcolumns(ncols);
@@ -1261,7 +1261,7 @@ t_gnode::notify_contexts(const t_table& flattened)
12611261
t_sctxhvec ctxhvec(num_ctx);
12621262

12631263
t_index ctxh_count = 0;
1264-
for (t_sctxhmap::iterator iter = m_contexts.begin();
1264+
for (t_sctxhmap::const_iterator iter = m_contexts.begin();
12651265
iter != m_contexts.end();
12661266
++iter)
12671267
{
@@ -1272,7 +1272,7 @@ t_gnode::notify_contexts(const t_table& flattened)
12721272
auto notify_context_helper = [this, &ctxhvec, &flattened](
12731273
t_index ctxidx) {
12741274
const t_ctx_handle& ctxh = ctxhvec[ctxidx];
1275-
switch (ctxh.m_ctx_type)
1275+
switch (ctxh.get_type())
12761276
{
12771277
case TWO_SIDED_CONTEXT:
12781278
{

packages/perspective/src/cpp/gnode_state.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,10 @@ t_gstate::update_history(const t_table* tbl)
155155
++idx)
156156
{
157157
const t_str& cname = fschema.m_columns[idx];
158-
col_translation[count] = idx;
159-
fcolumns[idx] = tbl->get_const_column(cname).get();
160-
++count;
161-
}
158+
col_translation[count] = idx;
159+
fcolumns[idx] = tbl->get_const_column(cname).get();
160+
++count;
161+
}
162162

163163
t_colptrvec scolumns(ncols);
164164

@@ -762,10 +762,29 @@ t_gstate::_get_pkeyed_table(const t_schema& schema,
762762
if (get_pkey_dtype() == DTYPE_STR)
763763
{
764764
static const t_tscalar empty = get_interned_tscalar("");
765+
static bool const enable_pkeyed_table_vocab_reserve = true;
765766

766767
t_uindex offset = has_pkey(empty) ? 0 : 1;
767768

768-
pkey_col->set_vocabulary(order);
769+
size_t total_string_size = 0;
770+
771+
if( enable_pkeyed_table_vocab_reserve )
772+
{
773+
total_string_size += offset;
774+
for (t_uindex idx = 0, loop_end = order.size();
775+
idx < loop_end;
776+
++idx)
777+
{
778+
total_string_size += strlen(order[idx].first.get_char_ptr()) + 1;
779+
}
780+
}
781+
782+
// if the m_mapping is empty, get_pkey_dtype() may lie about our pkeys being strings
783+
// don't try to reserve in this case
784+
if( !order.size() )
785+
total_string_size = 0;
786+
787+
pkey_col->set_vocabulary(order, total_string_size);
769788
auto base = pkey_col->get_nth<t_uindex>(0);
770789

771790
for (t_uindex idx = 0, loop_end = order.size();

packages/perspective/src/cpp/sparse_tree.cpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,18 +1011,26 @@ t_stree::update_aggs_from_static(const t_dtree_ctx& ctx,
10111011
cols_topo_sorted.clear();
10121012
cols_topo_sorted.reserve(col_cnt);
10131013

1014+
static bool const enable_aggregate_reordering = true;
1015+
static bool const enable_fix_double_calculation = true;
1016+
10141017
std::unordered_set< t_column* > dst_visited;
1015-
auto push_column = [&](size_t idx)
1018+
auto push_column = [&]( size_t idx )
10161019
{
1017-
t_column* dst = agg_update_info.m_dst[idx];
1018-
if (dst_visited.find(dst) != dst_visited.end())
1020+
if ( enable_fix_double_calculation )
10191021
{
1020-
return;
1022+
t_column* dst = agg_update_info.m_dst[ idx ];
1023+
if ( dst_visited.find( dst ) != dst_visited.end() )
1024+
{
1025+
return;
1026+
}
1027+
dst_visited.insert( dst );
10211028
}
1022-
dst_visited.insert(dst);
1023-
cols_topo_sorted.push_back(idx);
1029+
cols_topo_sorted.push_back( idx );
10241030
};
10251031

1032+
if ( enable_aggregate_reordering )
1033+
{
10261034
// Move scaled agg columns to the end
10271035
// This does not handle case where scaled aggregate depends on other scaled aggregate
10281036
// ( not sure if that is possible )
@@ -1040,6 +1048,15 @@ t_stree::update_aggs_from_static(const t_dtree_ctx& ctx,
10401048
push_column(i);
10411049
}
10421050
}
1051+
}
1052+
else
1053+
{
1054+
// If backed out, use same column order as before ( not topo sorted )
1055+
for ( size_t i = 0; i < col_cnt; ++i )
1056+
{
1057+
push_column( i );
1058+
}
1059+
}
10431060

10441061
for (const auto& r : m_tree_unification_records)
10451062
{
@@ -1213,6 +1230,7 @@ t_stree::update_agg_table(t_uindex nidx,
12131230
t_index nstrands,
12141231
const t_gstate& gstate)
12151232
{
1233+
static bool const enable_sticky_nan_fix = true;
12161234
for (t_uindex idx : info.m_dst_topo_sorted)
12171235
{
12181236
const t_column* src = info.m_src[idx];
@@ -1231,16 +1249,14 @@ t_stree::update_agg_table(t_uindex nidx,
12311249
t_tscalar dst_scalar = dst->get_scalar(dst_ridx);
12321250
old_value.set(dst_scalar);
12331251
new_value.set(dst_scalar.add(src_scalar));
1234-
1235-
if(old_value.is_nan())
1252+
if( enable_sticky_nan_fix && old_value.is_nan() ) // is_nan returns false for non-float types
12361253
{
12371254
// if we previously had a NaN, add can't make it finite again; recalculate entire sum in case it is now finite
12381255
auto pkeys = get_pkeys(nidx);
12391256
t_f64vec values;
12401257
gstate.read_column(spec.get_dependencies()[0].name(), pkeys, values);
12411258
new_value.set(std::accumulate(values.begin(), values.end(), t_float64(0)));
12421259
}
1243-
12441260
dst->set_scalar(dst_ridx, new_value);
12451261
}
12461262
break;
@@ -2337,7 +2353,7 @@ t_stree::get_aggcols(const t_idxvec& agg_indices) const
23372353
// aggregates should be presized to be same size
23382354
// as agg_indices
23392355
void
2340-
t_stree::get_aggregates(t_uindex nidx,
2356+
t_stree::get_aggregates_for_sorting(t_uindex nidx,
23412357
const t_idxvec& agg_indices,
23422358
t_tscalvec& aggregates,
23432359
t_ctx2 * ctx2) const
@@ -2350,7 +2366,7 @@ t_stree::get_aggregates(t_uindex nidx,
23502366
auto which_agg = agg_indices[idx];
23512367
if(which_agg < 0)
23522368
{
2353-
aggregates[idx] = get_value(nidx);
2369+
aggregates[idx] = get_sortby_value(nidx);
23542370
}
23552371
else if( ctx2 || ( size_t(which_agg) >= m_aggcols.size() ) )
23562372
{

packages/perspective/src/cpp/table.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ t_mask
567567
t_table::filter_cpp(t_filter_op combiner,
568568
const t_ftermvec& fterms_) const
569569
{
570+
static bool const enable_interned_filtering = true;
570571

571572
auto self = const_cast<t_table*>(this);
572573
auto fterms = fterms_;
@@ -581,10 +582,9 @@ t_table::filter_cpp(t_filter_op combiner,
581582
indices[idx] = m_schema.get_colidx(fterms[idx].m_colname);
582583
columns[idx] = get_const_column(fterms[idx].m_colname).get();
583584
fterms[idx].coerce_numeric(columns[idx]->get_dtype());
584-
auto op = fterms[idx].m_op;
585-
t_tscalar& thr = fterms[idx].m_threshold;
586-
if (fterms[idx].m_use_interned)
585+
if (fterms[idx].m_use_interned && enable_interned_filtering)
587586
{
587+
t_tscalar& thr = fterms[idx].m_threshold;
588588
auto col = self->get_column(fterms[idx].m_colname);
589589
auto interned = col->get_interned(thr.get_char_ptr());
590590
thr.set(interned);
@@ -611,7 +611,7 @@ t_table::filter_cpp(t_filter_op combiner,
611611
const auto& ft = fterms[cidx];
612612
t_bool tval;
613613

614-
if (ft.m_use_interned)
614+
if (ft.m_use_interned && enable_interned_filtering)
615615
{
616616
cell_val.set(
617617
*(columns[cidx]->get_nth<t_stridx>(ridx)));
@@ -623,7 +623,7 @@ t_table::filter_cpp(t_filter_op combiner,
623623
tval = ft(cell_val);
624624
}
625625

626-
if (!tval)
626+
if (!cell_val.is_valid() || !tval)
627627
{
628628
pass = false;
629629
break;
@@ -875,7 +875,7 @@ t_table::fill_expr_helper(const t_svec& icol_names,
875875
struct cmp_str
876876
{
877877
bool
878-
operator()(const char* a, const char* b)
878+
operator()(const char* a, const char* b) const
879879
{
880880
return std::strcmp(a, b) < 0;
881881
}

packages/perspective/src/cpp/traversal.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ t_traversal::expand_node(const t_sortsvec& sortby, t_tvidx exp_idx, t_ctx2 * ctx
164164
iter != tchildren.end();
165165
++iter)
166166
{
167-
m_tree->get_aggregates(
167+
m_tree->get_aggregates_for_sorting(
168168
iter->m_idx, sortby_agg_indices, aggregates, ctx2);
169169
(*sortelems)[count] = t_mselem(aggregates, child_idx);
170170
++count;

packages/perspective/src/cpp/tree_context_common.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,10 @@ notify_sparse_tree_common(t_table_sptr strands,
137137

138138
if (!leaf_paths.empty() && traversal.get() && traversal->size() == 1)
139139
{
140-
traversal->populate_root_children(tree);
140+
if ( traversal->get_node( 0 ).m_expanded )
141+
{
142+
traversal->populate_root_children( tree );
143+
}
141144
}
142145
else
143146
{

0 commit comments

Comments
 (0)