@@ -98,6 +98,8 @@ pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
98
98
values : Vec < T :: Native > ,
99
99
/// The random state used to generate hashes
100
100
random_state : RandomState ,
101
+
102
+ append_row_indices : Vec < u32 > ,
101
103
}
102
104
103
105
impl < T : ArrowPrimitiveType > GroupValuesPrimitive < T > {
@@ -109,6 +111,7 @@ impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> {
109
111
values : Vec :: with_capacity ( 128 ) ,
110
112
null_group : None ,
111
113
random_state : Default :: default ( ) ,
114
+ append_row_indices : Vec :: new ( ) ,
112
115
}
113
116
}
114
117
}
@@ -119,13 +122,20 @@ where
119
122
{
120
123
fn intern ( & mut self , cols : & [ ArrayRef ] , groups : & mut Vec < usize > ) -> Result < ( ) > {
121
124
assert_eq ! ( cols. len( ) , 1 ) ;
125
+ let col = cols[ 0 ] . as_primitive :: < T > ( ) ;
126
+
122
127
groups. clear ( ) ;
128
+ self . append_row_indices . clear ( ) ;
123
129
124
- for v in cols[ 0 ] . as_primitive :: < T > ( ) {
130
+ let mut num_new_groups = 0 ;
131
+ let mut num_total_groups = self . values . len ( ) ;
132
+ for ( row_index, v) in col. iter ( ) . enumerate ( ) {
125
133
let group_id = match v {
126
134
None => * self . null_group . get_or_insert_with ( || {
127
- let group_id = self . values . len ( ) ;
128
- self . values . push ( Default :: default ( ) ) ;
135
+ let group_id = num_total_groups;
136
+ self . append_row_indices . push ( row_index as u32 ) ;
137
+ num_total_groups += 1 ;
138
+ num_new_groups += 1 ;
129
139
group_id
130
140
} ) ,
131
141
Some ( key) => {
@@ -140,16 +150,29 @@ where
140
150
match insert {
141
151
hashbrown:: hash_table:: Entry :: Occupied ( o) => o. get ( ) . 0 ,
142
152
hashbrown:: hash_table:: Entry :: Vacant ( v) => {
143
- let g = self . values . len ( ) ;
153
+ let g = num_total_groups ;
144
154
v. insert ( ( g, key) ) ;
145
- self . values . push ( key) ;
155
+ self . append_row_indices . push ( row_index as u32 ) ;
156
+ num_new_groups += 1 ;
157
+ num_total_groups += 1 ;
146
158
g
147
159
}
148
160
}
149
161
}
150
162
} ;
151
163
groups. push ( group_id)
152
164
}
165
+
166
+ // If all are new groups, we just extend it
167
+ if num_new_groups == col. len ( ) {
168
+ self . values . extend_from_slice ( col. values ( ) ) ;
169
+ } else {
170
+ let col_values = col. values ( ) ;
171
+ for & row_index in self . append_row_indices . iter ( ) {
172
+ self . values . push ( col_values[ row_index as usize ] ) ;
173
+ }
174
+ }
175
+
153
176
Ok ( ( ) )
154
177
}
155
178
0 commit comments