Skip to content

Commit a7c10cd

Browse files
committed
feat(cubesql): Support trivial casts in member pushdown
1 parent 634acc7 commit a7c10cd

File tree

4 files changed

+184
-4
lines changed

4 files changed

+184
-4
lines changed

rust/cubesql/cubesql/src/compile/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -13429,7 +13429,7 @@ ORDER BY "source"."str0" ASC
1342913429
V1LoadResultAnnotation::new(json!([]), json!([]), json!([]), json!([]))
1343013430
}
1343113431

13432-
fn simple_load_response(data: Vec<serde_json::Value>) -> V1LoadResponse {
13432+
pub(crate) fn simple_load_response(data: Vec<serde_json::Value>) -> V1LoadResponse {
1343313433
V1LoadResponse::new(vec![V1LoadResult::new(empty_annotation(), data)])
1343413434
}
1343513435

rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs

+99-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use crate::{
3131
},
3232
},
3333
config::ConfigObj,
34+
sql::ColumnType,
3435
transport::{MetaContext, V1CubeMetaDimensionExt, V1CubeMetaExt, V1CubeMetaMeasureExt},
3536
var, var_iter, var_list_iter, CubeError,
3637
};
@@ -527,6 +528,23 @@ impl MemberRules {
527528
relation,
528529
),
529530
),
531+
// Cast without alias will not generate stable name in schema, so there's no rule like that for now
532+
// TODO implement it anyway, to be able to remove Projection on top of CubeScan completely
533+
transforming_rewrite(
534+
&format!("member-pushdown-replacer-column-{}-cast-alias", name),
535+
member_pushdown_replacer(
536+
alias_expr(cast_expr(column_expr("?column"), "?cast_type"), "?alias"),
537+
member_fn("?old_alias"),
538+
"?member_pushdown_replacer_alias_to_cube",
539+
),
540+
member_fn("?output_column"),
541+
self.transform_alias(
542+
"?member_pushdown_replacer_alias_to_cube",
543+
"?alias",
544+
"?output_column",
545+
relation,
546+
),
547+
),
530548
]
531549
}
532550

@@ -541,7 +559,10 @@ impl MemberRules {
541559
};
542560

543561
let find_matching_old_member =
544-
|name: &str, column_expr: String, column_to_search: ColumnToSearch| {
562+
|name: &str,
563+
column_expr: String,
564+
column_to_search: ColumnToSearch,
565+
cast_type_var: Option<&'static str>| {
545566
transforming_rewrite(
546567
&format!(
547568
"member-pushdown-replacer-column-find-matching-old-member-{}",
@@ -560,6 +581,7 @@ impl MemberRules {
560581
self.transform_find_matching_old_member(
561582
"?member_pushdown_replacer_alias_to_cube",
562583
column_to_search,
584+
cast_type_var,
563585
"?old_members",
564586
"?terminal_member",
565587
"?filtered_member_pushdown_replacer_alias_to_cube",
@@ -610,16 +632,31 @@ impl MemberRules {
610632
"column",
611633
column_expr("?column"),
612634
ColumnToSearch::Var("?column"),
635+
None,
636+
));
637+
rules.push(find_matching_old_member(
638+
"column-cast",
639+
cast_expr(column_expr("?column"), "?cast_type"),
640+
ColumnToSearch::Var("?column"),
641+
Some("?cast_type"),
613642
));
614643
rules.push(find_matching_old_member(
615644
"alias",
616645
alias_expr(column_expr("?column"), "?alias"),
617646
ColumnToSearch::Var("?column"),
647+
None,
648+
));
649+
rules.push(find_matching_old_member(
650+
"alias-cast",
651+
alias_expr(cast_expr(column_expr("?column"), "?cast_type"), "?alias"),
652+
ColumnToSearch::Var("?column"),
653+
Some("?cast_type"),
618654
));
619655
rules.push(find_matching_old_member(
620656
"agg-fun",
621657
agg_fun_expr("?fun_name", vec![column_expr("?column")], "?distinct"),
622658
ColumnToSearch::Var("?column"),
659+
None,
623660
));
624661
rules.push(find_matching_old_member(
625662
"agg-fun-alias",
@@ -628,11 +665,13 @@ impl MemberRules {
628665
"?alias",
629666
),
630667
ColumnToSearch::Var("?column"),
668+
None,
631669
));
632670
rules.push(find_matching_old_member(
633671
"udaf-fun",
634672
udaf_expr(MEASURE_UDAF_NAME, vec![column_expr("?column")]),
635673
ColumnToSearch::Var("?column"),
674+
None,
636675
));
637676
rules.push(find_matching_old_member(
638677
"agg-fun-default-count",
@@ -642,6 +681,7 @@ impl MemberRules {
642681
"AggregateFunctionExprDistinct:false",
643682
),
644683
ColumnToSearch::DefaultCount,
684+
None,
645685
));
646686
rules.push(find_matching_old_member(
647687
"agg-fun-default-count-alias",
@@ -654,6 +694,7 @@ impl MemberRules {
654694
"?alias",
655695
),
656696
ColumnToSearch::DefaultCount,
697+
None,
657698
));
658699
rules.push(find_matching_old_member(
659700
"agg-fun-with-cast",
@@ -664,6 +705,7 @@ impl MemberRules {
664705
"?distinct",
665706
),
666707
ColumnToSearch::Var("?column"),
708+
None,
667709
));
668710
rules.push(find_matching_old_member(
669711
"date-trunc",
@@ -672,6 +714,7 @@ impl MemberRules {
672714
vec![literal_expr("?granularity"), column_expr("?column")],
673715
),
674716
ColumnToSearch::Var("?column"),
717+
None,
675718
));
676719
rules.push(find_matching_old_member(
677720
"date-trunc-with-alias",
@@ -684,6 +727,7 @@ impl MemberRules {
684727
"?original_alias",
685728
),
686729
ColumnToSearch::Var("?column"),
730+
None,
687731
));
688732
Self::measure_rewrites(
689733
&mut rules,
@@ -1955,10 +1999,47 @@ impl MemberRules {
19551999
)
19562000
}
19572001

2002+
fn can_remove_cast(
2003+
meta: &MetaContext,
2004+
member: &Member,
2005+
cast_types: Option<&Vec<DataType>>,
2006+
) -> bool {
2007+
let cube = member.cube();
2008+
match cast_types {
2009+
// No cast, nothing to check
2010+
None => true,
2011+
// Need to check that cast is trivial
2012+
Some(cast_types) => {
2013+
// For now, allow trivial casts only for cube members, not literals
2014+
let Some(cube) = &cube else {
2015+
return false;
2016+
};
2017+
let Some(name) = member.name() else {
2018+
return false;
2019+
};
2020+
let Some(cube) = meta.find_cube_with_name(cube) else {
2021+
return false;
2022+
};
2023+
// For now, allow trivial casts only for dimensions
2024+
let Some(dimension) = cube.lookup_dimension_by_member_name(name) else {
2025+
return false;
2026+
};
2027+
2028+
cast_types
2029+
.iter()
2030+
.any(|dt| match (dimension.get_sql_type(), dt) {
2031+
(ColumnType::String, DataType::Utf8) => true,
2032+
_ => false,
2033+
})
2034+
}
2035+
}
2036+
}
2037+
19582038
fn transform_find_matching_old_member(
19592039
&self,
19602040
member_pushdown_replacer_alias_to_cube_var: &'static str,
19612041
column_to_search: ColumnToSearch,
2042+
cast_type_var: Option<&'static str>,
19622043
old_members_var: &'static str,
19632044
terminal_member: &'static str,
19642045
filtered_member_pushdown_replacer_alias_to_cube_var: &'static str,
@@ -1969,18 +2050,27 @@ impl MemberRules {
19692050
ColumnToSearch::Var(column_var) => Some(var!(column_var)),
19702051
ColumnToSearch::DefaultCount => None,
19712052
};
2053+
let cast_type_var = cast_type_var.map(|cast_type_var| var!(cast_type_var));
19722054
let old_members_var = var!(old_members_var);
19732055
let terminal_member = var!(terminal_member);
19742056
let filtered_member_pushdown_replacer_alias_to_cube_var =
19752057
var!(filtered_member_pushdown_replacer_alias_to_cube_var);
19762058
let flat_list = self.config_obj.push_down_pull_up_split();
2059+
let meta = self.meta_context.clone();
19772060
move |egraph, subst| {
19782061
let alias_to_cubes: Vec<_> = var_iter!(
19792062
egraph[subst[member_pushdown_replacer_alias_to_cube_var]],
19802063
MemberPushdownReplacerAliasToCube
19812064
)
19822065
.cloned()
19832066
.collect();
2067+
2068+
let cast_types = cast_type_var.map(|cast_type_var| {
2069+
var_iter!(egraph[subst[cast_type_var]], CastExprDataType)
2070+
.cloned()
2071+
.collect::<Vec<_>>()
2072+
});
2073+
19842074
for alias_to_cube in alias_to_cubes {
19852075
let column_iter = match column_var {
19862076
Some(column_var) => var_iter!(egraph[subst[column_var]], ColumnExprColumn)
@@ -1996,7 +2086,9 @@ impl MemberRules {
19962086
.data
19972087
.find_member_by_alias(&alias_name)
19982088
{
1999-
let cube_to_filter = if let Some(cube) = member.1.cube() {
2089+
let member = &member.1;
2090+
2091+
let cube_to_filter = if let Some(cube) = member.cube() {
20002092
Some(cube)
20012093
} else {
20022094
alias_to_cube
@@ -2014,8 +2106,12 @@ impl MemberRules {
20142106
alias_to_cube.clone()
20152107
};
20162108

2109+
if !Self::can_remove_cast(&meta, member, cast_types.as_ref()) {
2110+
continue;
2111+
}
2112+
20172113
// TODO remove unwrap
2018-
let old_member = member.1.clone().add_to_egraph(egraph, flat_list).unwrap();
2114+
let old_member = member.clone().add_to_egraph(egraph, flat_list).unwrap();
20192115
subst.insert(terminal_member, old_member);
20202116

20212117
let filtered_member_pushdown_replacer_alias_to_cube =
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
source: cubesql/src/compile/test/test_cube_join.rs
3+
expression: context.execute_query(query).await.unwrap()
4+
---
5+
+-------+--------------+
6+
| notes | content_cast |
7+
+-------+--------------+
8+
| foo | bar |
9+
| baz | quux |
10+
+-------+--------------+

rust/cubesql/cubesql/src/compile/test/test_cube_join.rs

+74
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use cubeclient::models::{
22
V1LoadRequestQuery, V1LoadRequestQueryFilterItem, V1LoadRequestQueryTimeDimension,
33
};
4+
use datafusion::physical_plan::displayable;
45
use pretty_assertions::assert_eq;
56
use serde_json::json;
67

8+
use crate::compile::test::TestContext;
79
use crate::compile::{
810
rewrite::rewriter::Rewriter,
911
test::{
@@ -566,3 +568,75 @@ async fn test_join_cubes_with_aggr_error() {
566568
. Please check logs for additional information.".to_string()
567569
)
568570
}
571+
572+
/// CAST(dimension AS TEXT) should be pushed into CubeScan as regular dimension,
573+
/// so join could see both CubeScans
574+
#[tokio::test]
575+
async fn test_join_with_trivial_cast() {
576+
init_testing_logger();
577+
578+
let context = TestContext::new(DatabaseProtocol::PostgreSQL).await;
579+
580+
// language=PostgreSQL
581+
let query = r#"
582+
SELECT
583+
KibanaSampleDataEcommerce.notes,
584+
t0.content_cast
585+
FROM
586+
KibanaSampleDataEcommerce
587+
INNER JOIN (
588+
SELECT
589+
__cubeJoinField,
590+
CAST(content AS TEXT) AS content_cast
591+
FROM
592+
Logs
593+
) t0 ON (
594+
KibanaSampleDataEcommerce.__cubeJoinField = t0.__cubeJoinField
595+
)
596+
;
597+
"#;
598+
599+
let expected_cube_scan = V1LoadRequestQuery {
600+
measures: Some(vec![]),
601+
segments: Some(vec![]),
602+
dimensions: Some(vec![
603+
"KibanaSampleDataEcommerce.notes".to_string(),
604+
"Logs.content".to_string(),
605+
]),
606+
order: Some(vec![]),
607+
ungrouped: Some(true),
608+
..Default::default()
609+
};
610+
611+
context
612+
.add_cube_load_mock(
613+
expected_cube_scan.clone(),
614+
crate::compile::tests::simple_load_response(vec![
615+
json!({
616+
"KibanaSampleDataEcommerce.notes": "foo",
617+
"Logs.content": "bar",
618+
}),
619+
json!({
620+
"Logs.content": "quux",
621+
"KibanaSampleDataEcommerce.notes": "baz",
622+
}),
623+
]),
624+
)
625+
.await;
626+
627+
let query_plan = context.convert_sql_to_cube_query(&query).await.unwrap();
628+
629+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
630+
println!(
631+
"Physical plan: {}",
632+
displayable(physical_plan.as_ref()).indent()
633+
);
634+
635+
assert_eq!(
636+
query_plan.as_logical_plan().find_cube_scan().request,
637+
expected_cube_scan
638+
);
639+
640+
// Expect that query is executable, and properly assigns alias for cast
641+
insta::assert_snapshot!(context.execute_query(query).await.unwrap());
642+
}

0 commit comments

Comments
 (0)