Skip to content

Commit e4166b3

Browse files
authored
Encapsulate metadata for literals on to a FieldMetadata structure (#16317)
* Encapsulate FieldMetadata * Add examples * refactor * impl `Default` for `FieldMetadata`
1 parent 3a312a9 commit e4166b3

File tree

8 files changed

+189
-54
lines changed

8 files changed

+189
-54
lines changed

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use datafusion_common::{
4040
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, not_impl_err,
4141
plan_err, DFSchema, DataFusionError, Result, ScalarValue,
4242
};
43+
use datafusion_expr::expr::FieldMetadata;
4344
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
4445
use datafusion_expr::{
4546
lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody,
@@ -1535,7 +1536,7 @@ async fn test_metadata_based_udf_with_literal() -> Result<()> {
15351536
let df = ctx.sql("select 0;").await?.select(vec![
15361537
lit(5u64).alias_with_metadata("lit_with_doubling", Some(input_metadata.clone())),
15371538
lit(5u64).alias("lit_no_doubling"),
1538-
lit_with_metadata(5u64, Some(input_metadata))
1539+
lit_with_metadata(5u64, Some(FieldMetadata::from(input_metadata)))
15391540
.alias("lit_with_double_no_alias_metadata"),
15401541
])?;
15411542

datafusion/expr/src/expr.rs

Lines changed: 165 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::logical_plan::Subquery;
3030
use crate::Volatility;
3131
use crate::{udaf, ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
3232

33-
use arrow::datatypes::{DataType, FieldRef};
33+
use arrow::datatypes::{DataType, Field, FieldRef};
3434
use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
3535
use datafusion_common::tree_node::{
3636
Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
@@ -284,8 +284,8 @@ pub enum Expr {
284284
Column(Column),
285285
/// A named reference to a variable in a registry.
286286
ScalarVariable(DataType, Vec<String>),
287-
/// A constant value along with associated metadata
288-
Literal(ScalarValue, Option<BTreeMap<String, String>>),
287+
/// A constant value along with associated [`FieldMetadata`].
288+
Literal(ScalarValue, Option<FieldMetadata>),
289289
/// A binary expression such as "age > 21"
290290
BinaryExpr(BinaryExpr),
291291
/// LIKE expression
@@ -413,6 +413,168 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr {
413413
}
414414
}
415415

416+
/// Literal metadata
417+
///
418+
/// Stores metadata associated with a literal expressions
419+
/// and is designed to be fast to `clone`.
420+
///
421+
/// This structure is used to store metadata associated with a literal expression, and it
422+
/// corresponds to the `metadata` field on [`Field`].
423+
///
424+
/// # Example: Create [`FieldMetadata`] from a [`Field`]
425+
/// ```
426+
/// # use std::collections::HashMap;
427+
/// # use datafusion_expr::expr::FieldMetadata;
428+
/// # use arrow::datatypes::{Field, DataType};
429+
/// # let field = Field::new("c1", DataType::Int32, true)
430+
/// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
431+
/// // Create a new `FieldMetadata` instance from a `Field`
432+
/// let metadata = FieldMetadata::new_from_field(&field);
433+
/// // There is also a `From` impl:
434+
/// let metadata = FieldMetadata::from(&field);
435+
/// ```
436+
///
437+
/// # Example: Update a [`Field`] with [`FieldMetadata`]
438+
/// ```
439+
/// # use datafusion_expr::expr::FieldMetadata;
440+
/// # use arrow::datatypes::{Field, DataType};
441+
/// # let field = Field::new("c1", DataType::Int32, true);
442+
/// # let metadata = FieldMetadata::new_from_field(&field);
443+
/// // Add any metadata from `FieldMetadata` to `Field`
444+
/// let updated_field = metadata.add_to_field(field);
445+
/// ```
446+
///
447+
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
448+
pub struct FieldMetadata {
449+
/// The inner metadata of a literal expression, which is a map of string
450+
/// keys to string values.
451+
///
452+
/// Note this is not a `HashMap because `HashMap` does not provide
453+
/// implementations for traits like `Debug` and `Hash`.
454+
inner: Arc<BTreeMap<String, String>>,
455+
}
456+
457+
impl Default for FieldMetadata {
458+
fn default() -> Self {
459+
Self::new_empty()
460+
}
461+
}
462+
463+
impl FieldMetadata {
464+
/// Create a new empty metadata instance.
465+
pub fn new_empty() -> Self {
466+
Self {
467+
inner: Arc::new(BTreeMap::new()),
468+
}
469+
}
470+
471+
/// Merges two optional `FieldMetadata` instances, overwriting any existing
472+
/// keys in `m` with keys from `n` if present
473+
pub fn merge_options(
474+
m: Option<&FieldMetadata>,
475+
n: Option<&FieldMetadata>,
476+
) -> Option<FieldMetadata> {
477+
match (m, n) {
478+
(Some(m), Some(n)) => {
479+
let mut merged = m.clone();
480+
merged.extend(n.clone());
481+
Some(merged)
482+
}
483+
(Some(m), None) => Some(m.clone()),
484+
(None, Some(n)) => Some(n.clone()),
485+
(None, None) => None,
486+
}
487+
}
488+
489+
/// Create a new metadata instance from a `Field`'s metadata.
490+
pub fn new_from_field(field: &Field) -> Self {
491+
let inner = field
492+
.metadata()
493+
.iter()
494+
.map(|(k, v)| (k.to_string(), v.to_string()))
495+
.collect();
496+
Self {
497+
inner: Arc::new(inner),
498+
}
499+
}
500+
501+
/// Create a new metadata instance from a map of string keys to string values.
502+
pub fn new(inner: BTreeMap<String, String>) -> Self {
503+
Self {
504+
inner: Arc::new(inner),
505+
}
506+
}
507+
508+
/// Get the inner metadata as a reference to a `BTreeMap`.
509+
pub fn inner(&self) -> &BTreeMap<String, String> {
510+
&self.inner
511+
}
512+
513+
/// Return the inner metadata
514+
pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
515+
self.inner
516+
}
517+
518+
/// Adds metadata from `other` into `self`, overwriting any existing keys.
519+
pub fn extend(&mut self, other: Self) {
520+
let other = Arc::unwrap_or_clone(other.into_inner());
521+
Arc::make_mut(&mut self.inner).extend(other);
522+
}
523+
524+
/// Returns true if the metadata is empty.
525+
pub fn is_empty(&self) -> bool {
526+
self.inner.is_empty()
527+
}
528+
529+
/// Returns the number of key-value pairs in the metadata.
530+
pub fn len(&self) -> usize {
531+
self.inner.len()
532+
}
533+
534+
/// Updates the metadata on the Field with this metadata, if it is not empty.
535+
pub fn add_to_field(&self, field: Field) -> Field {
536+
if self.inner.is_empty() {
537+
return field;
538+
}
539+
540+
field.with_metadata(
541+
self.inner
542+
.iter()
543+
.map(|(k, v)| (k.clone(), v.clone()))
544+
.collect(),
545+
)
546+
}
547+
}
548+
549+
impl From<&Field> for FieldMetadata {
550+
fn from(field: &Field) -> Self {
551+
Self::new_from_field(field)
552+
}
553+
}
554+
555+
impl From<BTreeMap<String, String>> for FieldMetadata {
556+
fn from(inner: BTreeMap<String, String>) -> Self {
557+
Self::new(inner)
558+
}
559+
}
560+
561+
impl From<std::collections::HashMap<String, String>> for FieldMetadata {
562+
fn from(map: std::collections::HashMap<String, String>) -> Self {
563+
Self::new(map.into_iter().collect())
564+
}
565+
}
566+
567+
/// From reference
568+
impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
569+
fn from(map: &std::collections::HashMap<String, String>) -> Self {
570+
let inner = map
571+
.iter()
572+
.map(|(k, v)| (k.to_string(), v.to_string()))
573+
.collect();
574+
Self::new(inner)
575+
}
576+
}
577+
416578
/// UNNEST expression.
417579
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
418580
pub struct Unnest {

datafusion/expr/src/expr_rewriter/mod.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -390,11 +390,7 @@ mod test {
390390
} else {
391391
utf8_val
392392
};
393-
Ok(Transformed::yes(lit_with_metadata(
394-
utf8_val,
395-
metadata
396-
.map(|m| m.into_iter().collect::<HashMap<String, String>>()),
397-
)))
393+
Ok(Transformed::yes(lit_with_metadata(utf8_val, metadata)))
398394
}
399395
// otherwise, return None
400396
_ => Ok(Transformed::no(expr)),

datafusion/expr/src/expr_schema.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -423,12 +423,7 @@ impl ExprSchemable for Expr {
423423
Expr::Literal(l, metadata) => {
424424
let mut field = Field::new(&schema_name, l.data_type(), l.is_null());
425425
if let Some(metadata) = metadata {
426-
field = field.with_metadata(
427-
metadata
428-
.iter()
429-
.map(|(k, v)| (k.clone(), v.clone()))
430-
.collect(),
431-
);
426+
field = metadata.add_to_field(field);
432427
}
433428
Ok(Arc::new(field))
434429
}

datafusion/expr/src/literal.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,34 +17,29 @@
1717

1818
//! Literal module contains foundational types that are used to represent literals in DataFusion.
1919
20+
use crate::expr::FieldMetadata;
2021
use crate::Expr;
2122
use datafusion_common::ScalarValue;
22-
use std::collections::HashMap;
2323

2424
/// Create a literal expression
2525
pub fn lit<T: Literal>(n: T) -> Expr {
2626
n.lit()
2727
}
2828

29-
pub fn lit_with_metadata<T: Literal>(
30-
n: T,
31-
metadata: impl Into<Option<HashMap<String, String>>>,
32-
) -> Expr {
33-
let metadata = metadata.into();
29+
pub fn lit_with_metadata<T: Literal>(n: T, metadata: Option<FieldMetadata>) -> Expr {
3430
let Some(metadata) = metadata else {
3531
return n.lit();
3632
};
3733

3834
let Expr::Literal(sv, prior_metadata) = n.lit() else {
3935
unreachable!();
4036
};
41-
4237
let new_metadata = match prior_metadata {
4338
Some(mut prior) => {
4439
prior.extend(metadata);
4540
prior
4641
}
47-
None => metadata.into_iter().collect(),
42+
None => metadata,
4843
};
4944

5045
Expr::Literal(sv, Some(new_metadata))

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! Expression simplification API
1919
2020
use std::borrow::Cow;
21-
use std::collections::{BTreeMap, HashSet};
21+
use std::collections::HashSet;
2222
use std::ops::Not;
2323

2424
use arrow::{
@@ -58,6 +58,7 @@ use crate::{
5858
analyzer::type_coercion::TypeCoercionRewriter,
5959
simplify_expressions::unwrap_cast::try_cast_literal_to_type,
6060
};
61+
use datafusion_expr::expr::FieldMetadata;
6162
use indexmap::IndexSet;
6263
use regex::Regex;
6364

@@ -523,9 +524,9 @@ struct ConstEvaluator<'a> {
523524
#[allow(clippy::large_enum_variant)]
524525
enum ConstSimplifyResult {
525526
// Expr was simplified and contains the new expression
526-
Simplified(ScalarValue, Option<BTreeMap<String, String>>),
527+
Simplified(ScalarValue, Option<FieldMetadata>),
527528
// Expr was not simplified and original value is returned
528-
NotSimplified(ScalarValue, Option<BTreeMap<String, String>>),
529+
NotSimplified(ScalarValue, Option<FieldMetadata>),
529530
// Evaluation encountered an error, contains the original expression
530531
SimplifyRuntimeError(DataFusionError, Expr),
531532
}
@@ -682,9 +683,7 @@ impl<'a> ConstEvaluator<'a> {
682683
let m = f.metadata();
683684
match m.is_empty() {
684685
true => None,
685-
false => {
686-
Some(m.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
687-
}
686+
false => Some(FieldMetadata::from(m)),
688687
}
689688
});
690689
let col_val = match phys_expr.evaluate(&self.input_batch) {

datafusion/physical-expr/src/expressions/literal.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
//! Literal expressions for physical operations
1919
2020
use std::any::Any;
21-
use std::collections::HashMap;
2221
use std::hash::Hash;
2322
use std::sync::Arc;
2423

@@ -30,6 +29,7 @@ use arrow::{
3029
record_batch::RecordBatch,
3130
};
3231
use datafusion_common::{Result, ScalarValue};
32+
use datafusion_expr::expr::FieldMetadata;
3333
use datafusion_expr::Expr;
3434
use datafusion_expr_common::columnar_value::ColumnarValue;
3535
use datafusion_expr_common::interval_arithmetic::Interval;
@@ -64,14 +64,13 @@ impl Literal {
6464
/// Create a literal value expression
6565
pub fn new_with_metadata(
6666
value: ScalarValue,
67-
metadata: impl Into<Option<HashMap<String, String>>>,
67+
metadata: Option<FieldMetadata>,
6868
) -> Self {
69-
let metadata = metadata.into();
7069
let mut field =
7170
Field::new(format!("{value}"), value.data_type(), value.is_null());
7271

7372
if let Some(metadata) = metadata {
74-
field = field.with_metadata(metadata);
73+
field = metadata.add_to_field(field);
7574
}
7675

7776
Self {

0 commit comments

Comments
 (0)