Skip to content

Commit d565b48

Browse files
committed
address cr.
1 parent ce1cdc8 commit d565b48

File tree

1 file changed

+8
-5
lines changed
  • datafusion/physical-plan/src/aggregates/group_values/single_group_by

1 file changed

+8
-5
lines changed

datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,13 @@ hash_float!(f16, f32, f64);
8181
pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
8282
/// The data type of the output array
8383
data_type: DataType,
84-
/// Stores the group index based on the hash of its value
85-
///
86-
/// We don't store the hashes as hashing fixed width primitives
87-
/// is fast enough for this not to benefit performance
84+
/// Stores the `group_index` and `hash` based on the hash of its value
85+
///
86+
/// We also store `hash` is for reducing cost of rehashing. Such cost
87+
/// is obvious in high cardinality group by situation.
88+
/// More details can see:
89+
/// <https://github.com/apache/datafusion/issues/15961>
90+
///
8891
map: HashTable<(usize, u64)>,
8992
/// The group index of the null value if any
9093
null_group: Option<usize>,
@@ -148,7 +151,7 @@ where
148151
}
149152

150153
fn size(&self) -> usize {
151-
self.map.capacity() * size_of::<usize>() + self.values.allocated_size()
154+
self.map.capacity() * size_of::<(usize, u64)>() + self.values.allocated_size()
152155
}
153156

154157
fn is_empty(&self) -> bool {

0 commit comments

Comments
 (0)