-
Notifications
You must be signed in to change notification settings - Fork 13
Upgrade DataFusion 48.0.0 #61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -455,6 +455,10 @@ impl Predicate { | |
| self.eq_classes[idx].columns.insert(c2.clone()); | ||
| } | ||
| (Some(&i), Some(&j)) => { | ||
| if i == j { | ||
| // The two columns are already in the same equivalence class. | ||
| return Ok(()); | ||
| } | ||
| // We need to merge two existing column eq classes. | ||
|
|
||
| // Delete the eq class with a larger index, | ||
|
|
@@ -593,15 +597,15 @@ impl Predicate { | |
| /// Add a binary expression to our collection of filters. | ||
| fn insert_binary_expr(&mut self, left: &Expr, op: Operator, right: &Expr) -> Result<()> { | ||
| match (left, op, right) { | ||
| (Expr::Column(c), op, Expr::Literal(v)) => { | ||
| (Expr::Column(c), op, Expr::Literal(v, _)) => { | ||
| if let Err(e) = self.add_range(c, &op, v) { | ||
| // Add a range can fail in some cases, so just fallthrough | ||
| log::debug!("failed to add range filter: {e}"); | ||
| } else { | ||
| return Ok(()); | ||
| } | ||
| } | ||
| (Expr::Literal(_), op, Expr::Column(_)) => { | ||
| (Expr::Literal(_, _), op, Expr::Column(_)) => { | ||
| if let Some(swapped) = op.swap() { | ||
| return self.insert_binary_expr(right, swapped, left); | ||
| } | ||
|
|
@@ -714,22 +718,22 @@ impl Predicate { | |
| extra_range_filters.push(Expr::BinaryExpr(BinaryExpr { | ||
| left: Box::new(Expr::Column(other_column.clone())), | ||
| op: Operator::Eq, | ||
| right: Box::new(Expr::Literal(range.lower().clone())), | ||
| right: Box::new(Expr::Literal(range.lower().clone(), None)), | ||
| })) | ||
| } else { | ||
| if !range.lower().is_null() { | ||
| extra_range_filters.push(Expr::BinaryExpr(BinaryExpr { | ||
| left: Box::new(Expr::Column(other_column.clone())), | ||
| op: Operator::GtEq, | ||
| right: Box::new(Expr::Literal(range.lower().clone())), | ||
| right: Box::new(Expr::Literal(range.lower().clone(), None)), | ||
| })) | ||
| } | ||
|
|
||
| if !range.upper().is_null() { | ||
| extra_range_filters.push(Expr::BinaryExpr(BinaryExpr { | ||
| left: Box::new(Expr::Column(other_column.clone())), | ||
| op: Operator::LtEq, | ||
| right: Box::new(Expr::Literal(range.upper().clone())), | ||
| right: Box::new(Expr::Literal(range.upper().clone(), None)), | ||
| })) | ||
| } | ||
| } | ||
|
|
@@ -984,7 +988,8 @@ mod test { | |
| let ctx = SessionContext::new_with_config( | ||
| SessionConfig::new() | ||
| .set_bool("datafusion.execution.parquet.pushdown_filters", true) | ||
| .set_bool("datafusion.explain.logical_plan_only", true), | ||
| .set_bool("datafusion.explain.logical_plan_only", true) | ||
| .set_bool("datafusion.sql_parser.map_varchar_to_utf8view", false), | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DF48 made the config true by default, so it'll break the -> Currently, the t1 is from here: So I'm wondering if
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opened an issue in upstream: apache/datafusion#16277
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's unfortunate, it may be possible to generalize the equivalence class logic to handle certain types of casts (particularly invertible ones). It would probably be somewhat complex to implement though.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we can add this to our backlist. |
||
| ); | ||
|
|
||
| let t1_path = tempdir()?; | ||
|
|
@@ -996,11 +1001,11 @@ mod test { | |
| ctx.sql(&format!( | ||
| " | ||
| CREATE EXTERNAL TABLE t1 ( | ||
| column1 VARCHAR, | ||
| column2 BIGINT, | ||
| column1 VARCHAR, | ||
| column2 BIGINT, | ||
| column3 CHAR | ||
| ) | ||
| STORED AS PARQUET | ||
| STORED AS PARQUET | ||
| LOCATION '{}'", | ||
| t1_path.path().to_string_lossy() | ||
| )) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DF48 will generate the following plan:
That is,
t1.column3 = t1.column1will be processed twice, and the second time they have the same index -> 0. If we don't add the fast return, it'll make column2's index 0, which will break the test (I guess we didn't consider the i==j case before)