@@ -98,6 +98,8 @@ pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
98
98
values : Vec < T :: Native > ,
99
99
/// The random state used to generate hashes
100
100
random_state : RandomState ,
101
+
102
+ append_row_indices : Vec < u32 > ,
101
103
}
102
104
103
105
impl < T : ArrowPrimitiveType > GroupValuesPrimitive < T > {
@@ -109,6 +111,7 @@ impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> {
109
111
values : Vec :: with_capacity ( 128 ) ,
110
112
null_group : None ,
111
113
random_state : Default :: default ( ) ,
114
+ append_row_indices : Vec :: new ( ) ,
112
115
}
113
116
}
114
117
}
@@ -119,13 +122,18 @@ where
119
122
{
120
123
fn intern ( & mut self , cols : & [ ArrayRef ] , groups : & mut Vec < usize > ) -> Result < ( ) > {
121
124
assert_eq ! ( cols. len( ) , 1 ) ;
125
+ let col = cols[ 0 ] . as_primitive :: < T > ( ) ;
126
+
122
127
groups. clear ( ) ;
128
+ self . append_row_indices . clear ( ) ;
123
129
124
- for v in cols[ 0 ] . as_primitive :: < T > ( ) {
130
+ let mut num_total_groups = self . values . len ( ) ;
131
+ for ( row_index, v) in col. iter ( ) . enumerate ( ) {
125
132
let group_id = match v {
126
133
None => * self . null_group . get_or_insert_with ( || {
127
- let group_id = self . values . len ( ) ;
128
- self . values . push ( Default :: default ( ) ) ;
134
+ let group_id = num_total_groups;
135
+ self . append_row_indices . push ( row_index as u32 ) ;
136
+ num_total_groups += 1 ;
129
137
group_id
130
138
} ) ,
131
139
Some ( key) => {
@@ -140,16 +148,28 @@ where
140
148
match insert {
141
149
hashbrown:: hash_table:: Entry :: Occupied ( o) => o. get ( ) . 0 ,
142
150
hashbrown:: hash_table:: Entry :: Vacant ( v) => {
143
- let g = self . values . len ( ) ;
151
+ let g = num_total_groups ;
144
152
v. insert ( ( g, key) ) ;
145
- self . values . push ( key) ;
153
+ self . append_row_indices . push ( row_index as u32 ) ;
154
+ num_total_groups += 1 ;
146
155
g
147
156
}
148
157
}
149
158
}
150
159
} ;
151
160
groups. push ( group_id)
152
161
}
162
+
163
+ // If all are new groups, we just extend it
164
+ if self . append_row_indices . len ( ) == col. len ( ) {
165
+ self . values . extend_from_slice ( col. values ( ) ) ;
166
+ } else {
167
+ let col_values = col. values ( ) ;
168
+ for & row_index in self . append_row_indices . iter ( ) {
169
+ self . values . push ( col_values[ row_index as usize ] ) ;
170
+ }
171
+ }
172
+
153
173
Ok ( ( ) )
154
174
}
155
175
0 commit comments