Skip to content

Commit dbecf43

Browse files
committed
Add index(), col(), and vlookup() to ExprTk
1 parent afa7438 commit dbecf43

File tree

17 files changed

+407
-95
lines changed

17 files changed

+407
-95
lines changed

cpp/perspective/src/cpp/computed_expression.cpp

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ t_computed_expression::t_computed_expression(
109109

110110
void
111111
t_computed_expression::compute(std::shared_ptr<t_data_table> source_table,
112+
const t_gstate::t_mapping& pkey_map,
112113
std::shared_ptr<t_data_table> destination_table, t_expression_vocab& vocab,
113114
t_regex_mapping& regex_mapping) const {
114115
// TODO: share symtables across pre/re/compute
@@ -117,9 +118,12 @@ t_computed_expression::compute(std::shared_ptr<t_data_table> source_table,
117118
// pi, infinity, etc.
118119
sym_table.add_constants();
119120

121+
t_uindex row_idx = 0;
122+
120123
// Create a function store, with is_type_validator set to false as we
121124
// are calculating values, not type-checking.
122-
t_computed_function_store function_store(vocab, regex_mapping, false);
125+
t_computed_function_store function_store(
126+
vocab, regex_mapping, false, source_table, pkey_map, row_idx);
123127
function_store.register_computed_functions(sym_table);
124128

125129
exprtk::expression<t_tscalar> expr_definition;
@@ -166,6 +170,7 @@ t_computed_expression::compute(std::shared_ptr<t_data_table> source_table,
166170
const std::string& column_id = m_column_ids[cidx].first;
167171
values[cidx].second.set(columns[column_id]->get_scalar(ridx));
168172
}
173+
row_idx = ridx;
169174

170175
t_tscalar value = expr_definition.value();
171176

@@ -228,14 +233,18 @@ t_computed_expression_parser::precompute(const std::string& expression_alias,
228233
const std::string& expression_string,
229234
const std::string& parsed_expression_string,
230235
const std::vector<std::pair<std::string, std::string>>& column_ids,
231-
std::shared_ptr<t_schema> schema, t_expression_vocab& vocab,
232-
t_regex_mapping& regex_mapping) {
236+
std::shared_ptr<t_data_table> source_table,
237+
const t_gstate::t_mapping& pkey_map, std::shared_ptr<t_schema> schema,
238+
t_expression_vocab& vocab, t_regex_mapping& regex_mapping) {
233239
exprtk::symbol_table<t_tscalar> sym_table;
234240
sym_table.add_constants();
235241

242+
t_uindex row_idx = 0;
243+
236244
// Create a function store, with is_type_validator set to true as we are
237245
// just getting the output types.
238-
t_computed_function_store function_store(vocab, regex_mapping, true);
246+
t_computed_function_store function_store(
247+
vocab, regex_mapping, true, source_table, pkey_map, row_idx);
239248
function_store.register_computed_functions(sym_table);
240249

241250
std::vector<t_tscalar> values;
@@ -291,16 +300,21 @@ t_computed_expression_parser::get_dtype(const std::string& expression_alias,
291300
const std::string& expression_string,
292301
const std::string& parsed_expression_string,
293302
const std::vector<std::pair<std::string, std::string>>& column_ids,
294-
const t_schema& schema, t_expression_error& error,
295-
t_expression_vocab& vocab, t_regex_mapping& regex_mapping) {
303+
std::shared_ptr<t_data_table> source_table,
304+
const t_gstate::t_mapping& pkey_map, const t_schema& schema,
305+
t_expression_error& error, t_expression_vocab& vocab,
306+
t_regex_mapping& regex_mapping) {
296307
exprtk::symbol_table<t_tscalar> sym_table;
297308
sym_table.add_constants();
298309

299310
std::vector<t_tscalar> values;
300311

312+
t_uindex row_idx = 0;
313+
301314
// Create a function store, with is_type_validator set to true as we are
302315
// just validating the output types.
303-
t_computed_function_store function_store(vocab, regex_mapping, true);
316+
t_computed_function_store function_store(
317+
vocab, regex_mapping, true, source_table, pkey_map, row_idx);
304318
function_store.register_computed_functions(sym_table);
305319

306320
auto num_input_columns = column_ids.size();
@@ -440,7 +454,9 @@ t_validated_expression_map::get_expression_errors() const {
440454
}
441455

442456
t_computed_function_store::t_computed_function_store(t_expression_vocab& vocab,
443-
t_regex_mapping& regex_mapping, bool is_type_validator)
457+
t_regex_mapping& regex_mapping, bool is_type_validator,
458+
std::shared_ptr<t_data_table> source_table,
459+
const t_gstate::t_mapping& pkey_map, t_uindex& row_idx)
444460
: m_day_of_week_fn(computed_function::day_of_week(vocab, is_type_validator))
445461
, m_month_of_year_fn(
446462
computed_function::month_of_year(vocab, is_type_validator))
@@ -459,7 +475,13 @@ t_computed_function_store::t_computed_function_store(t_expression_vocab& vocab,
459475
, m_replace_fn(
460476
computed_function::replace(vocab, regex_mapping, is_type_validator))
461477
, m_replace_all_fn(computed_function::replace_all(
462-
vocab, regex_mapping, is_type_validator)) {}
478+
vocab, regex_mapping, is_type_validator))
479+
, m_add_one_fn(computed_function::add_one())
480+
, m_index_fn(computed_function::index(pkey_map, source_table, row_idx))
481+
, m_col_fn(computed_function::col(
482+
vocab, is_type_validator, source_table, row_idx))
483+
, m_vlookup_fn(computed_function::vlookup(
484+
vocab, is_type_validator, source_table, row_idx)) {}
463485

464486
void
465487
t_computed_function_store::register_computed_functions(
@@ -494,6 +516,7 @@ t_computed_function_store::register_computed_functions(
494516
sym_table.add_function("month_of_year", m_month_of_year_fn);
495517
sym_table.add_function("today", computed_function::today);
496518
sym_table.add_function("now", computed_function::now);
519+
sym_table.add_function("add_one", m_add_one_fn);
497520

498521
// String functions
499522
sym_table.add_function("intern", m_intern_fn);
@@ -522,6 +545,9 @@ t_computed_function_store::register_computed_functions(
522545
sym_table.add_function("substring", m_substring_fn);
523546
sym_table.add_function("replace", m_replace_fn);
524547
sym_table.add_function("replace_all", m_replace_all_fn);
548+
sym_table.add_function("index", m_index_fn);
549+
sym_table.add_function("col", m_col_fn);
550+
sym_table.add_function("vlookup", m_vlookup_fn);
525551

526552
// And scalar constants
527553
sym_table.add_constant("True", t_computed_expression_parser::TRUE_SCALAR);

cpp/perspective/src/cpp/computed_function.cpp

Lines changed: 179 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
1212

1313
#include <perspective/computed_function.h>
14+
#include <perspective/gnode_state.h>
15+
#include <perspective/column.h>
1416
#include <math.h>
1517

1618
namespace perspective {
@@ -46,12 +48,6 @@ namespace computed_function {
4648
std::string temp_str
4749
= std::string(temp_string.begin(), temp_string.end());
4850

49-
if (m_is_type_validator) {
50-
// Return the sentinel value which indicates a valid output from
51-
// type checking, as the output value is not STATUS_CLEAR
52-
return m_sentinel;
53-
}
54-
5551
// Intern the string into the vocabulary.
5652
rval.set(m_expression_vocab.intern(temp_str));
5753
return rval;
@@ -2186,6 +2182,183 @@ namespace computed_function {
21862182
return rval;
21872183
}
21882184

2185+
add_one::add_one()
2186+
: exprtk::igeneric_function<t_tscalar>("T") {}
2187+
add_one::~add_one() {}
2188+
2189+
t_tscalar
2190+
add_one::operator()(t_parameter_list params) {
2191+
t_tscalar rval;
2192+
rval.clear();
2193+
2194+
t_generic_type& gt = params[0];
2195+
t_scalar_view temp(gt);
2196+
t_tscalar temp_scalar;
2197+
2198+
temp_scalar.set(temp());
2199+
t_dtype dtype = temp_scalar.get_dtype();
2200+
2201+
switch (dtype) {
2202+
case DTYPE_INT8:
2203+
case DTYPE_INT16:
2204+
case DTYPE_INT32:
2205+
rval.set(temp_scalar.to_int32() + 1);
2206+
break;
2207+
case DTYPE_INT64:
2208+
rval.set(temp_scalar.to_int64() + 1);
2209+
break;
2210+
case DTYPE_UINT8:
2211+
case DTYPE_UINT16:
2212+
case DTYPE_UINT32:
2213+
case DTYPE_UINT64:
2214+
rval.set(temp_scalar.to_uint64() + 1);
2215+
break;
2216+
case DTYPE_FLOAT32:
2217+
case DTYPE_FLOAT64:
2218+
rval.set(temp_scalar.to_double() + 1);
2219+
break;
2220+
case DTYPE_NONE:
2221+
rval.set(0.0);
2222+
default:
2223+
rval.m_status = STATUS_CLEAR;
2224+
return rval;
2225+
}
2226+
2227+
if (!temp_scalar.is_valid()) {
2228+
return rval;
2229+
}
2230+
2231+
return rval;
2232+
}
2233+
2234+
index::index(const t_gstate::t_mapping& pkey_map,
2235+
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx)
2236+
: exprtk::igeneric_function<t_tscalar>("Z")
2237+
, m_pkey_map(pkey_map)
2238+
, m_source_table(source_table)
2239+
, m_row_idx(row_idx) {}
2240+
2241+
index::~index() {}
2242+
2243+
t_tscalar
2244+
index::operator()(t_parameter_list parameters) {
2245+
t_tscalar rval;
2246+
rval.clear();
2247+
2248+
auto col = m_source_table->get_const_column("psp_pkey");
2249+
auto res = col->get_scalar(m_row_idx);
2250+
rval.set(res);
2251+
2252+
return rval;
2253+
}
2254+
2255+
col::col(t_expression_vocab& expression_vocab, bool is_type_validator,
2256+
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx)
2257+
: exprtk::igeneric_function<t_tscalar>("T")
2258+
, m_expression_vocab(expression_vocab)
2259+
, m_is_type_validator(is_type_validator)
2260+
, m_source_table(source_table)
2261+
, m_row_idx(row_idx) {}
2262+
col::~col() {}
2263+
2264+
t_tscalar
2265+
col::operator()(t_parameter_list parameters) {
2266+
t_tscalar rval;
2267+
rval.clear();
2268+
2269+
t_generic_type& gt = parameters[0];
2270+
t_scalar_view temp(gt);
2271+
t_tscalar temp_scalar;
2272+
2273+
temp_scalar.set(temp());
2274+
t_dtype dtype = temp_scalar.get_dtype();
2275+
2276+
if (dtype != DTYPE_STR) {
2277+
rval.m_status = STATUS_CLEAR;
2278+
return rval;
2279+
}
2280+
2281+
if (!temp_scalar.is_valid()) {
2282+
return rval;
2283+
}
2284+
2285+
std::string temp_str = temp_scalar.to_string();
2286+
2287+
// rval.set(m_expression_vocab.intern(temp_str));
2288+
// return rval;
2289+
2290+
if (!m_source_table->get_schema().has_column(temp_str)) {
2291+
rval.m_status = STATUS_CLEAR;
2292+
return rval;
2293+
}
2294+
auto col = m_source_table->get_const_column(temp_str);
2295+
auto res = col->get_scalar(m_row_idx);
2296+
rval.set(res);
2297+
rval.m_type = col->get_dtype();
2298+
2299+
return rval;
2300+
}
2301+
2302+
vlookup::vlookup(t_expression_vocab& expression_vocab,
2303+
bool is_type_validator, std::shared_ptr<t_data_table> source_table,
2304+
t_uindex& row_idx)
2305+
: exprtk::igeneric_function<t_tscalar>("TT")
2306+
, m_expression_vocab(expression_vocab)
2307+
, m_is_type_validator(is_type_validator)
2308+
, m_source_table(source_table)
2309+
, m_row_idx(row_idx) {}
2310+
vlookup::~vlookup() {}
2311+
2312+
t_tscalar
2313+
vlookup::operator()(t_parameter_list parameters) {
2314+
t_tscalar rval;
2315+
rval.clear();
2316+
2317+
t_generic_type& column_gt = parameters[0];
2318+
t_scalar_view column_gt_view(column_gt);
2319+
t_tscalar column_name;
2320+
2321+
column_name.set(column_gt_view());
2322+
t_dtype column_name_dtype = column_name.get_dtype();
2323+
2324+
t_generic_type& index_gt = parameters[1];
2325+
t_scalar_view index_gt_view(index_gt);
2326+
t_tscalar index;
2327+
2328+
index.set(index_gt_view());
2329+
2330+
if (column_name_dtype != DTYPE_STR || !index.is_numeric()) {
2331+
rval.m_status = STATUS_CLEAR;
2332+
return rval;
2333+
}
2334+
2335+
if (!column_name.is_valid() || !index.is_valid()) {
2336+
return rval;
2337+
}
2338+
2339+
std::string col_name_str = column_name.to_string();
2340+
if (!m_source_table->get_schema().has_column(col_name_str)) {
2341+
rval.m_status = STATUS_CLEAR;
2342+
return rval;
2343+
}
2344+
auto col = m_source_table->get_const_column(col_name_str);
2345+
2346+
if (m_is_type_validator) {
2347+
rval.m_status = STATUS_VALID;
2348+
rval.m_type = col->get_dtype();
2349+
return rval;
2350+
}
2351+
2352+
auto idx = index.to_uint64();
2353+
if (idx < col->size()) {
2354+
auto res = col->get_scalar(idx);
2355+
rval.set(res);
2356+
}
2357+
rval.m_type = col->get_dtype();
2358+
2359+
return rval;
2360+
}
2361+
21892362
// Set up random number generator
21902363
std::default_random_engine random::RANDOM_ENGINE
21912364
= std::default_random_engine();

cpp/perspective/src/cpp/context_grouped_pkey.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
1212

1313

14+
#include <perspective/context_grouped_pkey.h>
1415
#include <perspective/first.h>
1516
#include <perspective/get_data_extents.h>
16-
#include <perspective/context_grouped_pkey.h>
1717
#include <perspective/extract_aggregate.h>
1818
#include <perspective/filter.h>
1919
#include <perspective/sparse_tree.h>
@@ -684,7 +684,8 @@ t_ctx_grouped_pkey::get_column_dtype(t_uindex idx) const {
684684

685685
void
686686
t_ctx_grouped_pkey::compute_expressions(std::shared_ptr<t_data_table> master,
687-
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
687+
const t_gstate::t_mapping& pkey_map, t_expression_vocab& expression_vocab,
688+
t_regex_mapping& regex_mapping) {
688689
// Clear the transitional expression tables on the context so they are
689690
// ready for the next update.
690691
m_expression_tables->clear_transitional_tables();
@@ -700,13 +701,14 @@ t_ctx_grouped_pkey::compute_expressions(std::shared_ptr<t_data_table> master,
700701
const auto& expressions = m_config.get_expressions();
701702
for (const auto& expr : expressions) {
702703
// Compute the expressions on the master table.
703-
expr->compute(
704-
master, master_expression_table, expression_vocab, regex_mapping);
704+
expr->compute(master, pkey_map, master_expression_table,
705+
expression_vocab, regex_mapping);
705706
}
706707
}
707708

708709
void
709710
t_ctx_grouped_pkey::compute_expressions(std::shared_ptr<t_data_table> master,
711+
const t_gstate::t_mapping& pkey_map,
710712
std::shared_ptr<t_data_table> flattened,
711713
std::shared_ptr<t_data_table> delta, std::shared_ptr<t_data_table> prev,
712714
std::shared_ptr<t_data_table> current,
@@ -729,25 +731,25 @@ t_ctx_grouped_pkey::compute_expressions(std::shared_ptr<t_data_table> master,
729731
const auto& expressions = m_config.get_expressions();
730732
for (const auto& expr : expressions) {
731733
// master: compute based on latest state of the gnode state table
732-
expr->compute(master, m_expression_tables->m_master, expression_vocab,
733-
regex_mapping);
734+
expr->compute(master, pkey_map, m_expression_tables->m_master,
735+
expression_vocab, regex_mapping);
734736

735737
// flattened: compute based on the latest update dataset
736-
expr->compute(flattened, m_expression_tables->m_flattened,
738+
expr->compute(flattened, pkey_map, m_expression_tables->m_flattened,
737739
expression_vocab, regex_mapping);
738740

739741
// delta: for each numerical column, the numerical delta between the
740742
// previous value and the current value in the row.
741-
expr->compute(delta, m_expression_tables->m_delta, expression_vocab,
742-
regex_mapping);
743+
expr->compute(delta, pkey_map, m_expression_tables->m_delta,
744+
expression_vocab, regex_mapping);
743745

744746
// prev: the values of the updated rows before this update was applied
745-
expr->compute(
746-
prev, m_expression_tables->m_prev, expression_vocab, regex_mapping);
747+
expr->compute(prev, pkey_map, m_expression_tables->m_prev,
748+
expression_vocab, regex_mapping);
747749

748750
// current: the current values of the updated rows
749-
expr->compute(current, m_expression_tables->m_current, expression_vocab,
750-
regex_mapping);
751+
expr->compute(current, pkey_map, m_expression_tables->m_current,
752+
expression_vocab, regex_mapping);
751753
}
752754

753755
// Calculate the transitions now that the intermediate tables are computed

0 commit comments

Comments
 (0)