4242
4343// https://github.com/twitter/algebird/blob/5fdb079447271a5fe0f1fba068e5f86591ccde36/algebird-core/src/main/scala/com/twitter/algebird/HyperLogLog.scala
4444// https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.rdd.RDD countApproxDistinct
45+ // is_x86_feature_detected ?
4546
46- // use bytecount ;
47+ use packed_simd :: { self , Cast , FromBits , IntoBits } ;
4748use std:: {
48- cmp:: { self , Ordering } , fmt, hash:: { Hash , Hasher } , marker:: PhantomData , ops
49+ cmp:: { self , Ordering } , convert :: identity , fmt, hash:: { Hash , Hasher } , marker:: PhantomData , ops:: { self , Range }
4950} ;
5051use traits:: { Intersect , IntersectPlusUnionIsPlus , New , UnionAssign } ;
5152use twox_hash:: XxHash ;
52- // use faster::{self, IntoSIMDRefIterator,SIMDIterator};
53- use packed_simd:: { self , FromBits } ;
54- use std:: { convert:: identity, ops:: Range } ;
5553
5654mod consts;
5755use self :: consts:: * ;
@@ -80,11 +78,10 @@ where
8078 let p = ( f64:: log2 ( 1.04 / error_rate) * 2.0 ) . ceil ( ) as u8 ;
8179 assert ! ( 0 < p && p < 64 ) ;
8280 let alpha = Self :: get_alpha ( p) ;
83- // let max_rho = 64;//64 - self.p + 1;
8481 Self {
8582 alpha,
8683 zero : 1 << p,
87- sum : ( 1 << p) as f64 , //(1u128 << max_rho) * (1 << p),
84+ sum : ( 1 << p) as f64 ,
8885 p,
8986 m : vec ! [ 0 ; 1 << p] . into_boxed_slice ( ) ,
9087 marker : PhantomData ,
@@ -117,10 +114,6 @@ where
117114 let new = cmp:: max ( old, rho) ;
118115 self . zero -= if old == 0 { 1 } else { 0 } ;
119116
120- // let max_rho = 64;//64 - self.p + 1;
121- // self.sum -= 1u128 << (max_rho-old);
122- // self.sum += 1u128 << (max_rho-new);
123-
124117 // see pow_bithack()
125118 self . sum -= f64:: from_bits ( u64:: max_value ( ) . wrapping_sub ( old as u64 ) << 54 >> 2 )
126119 - f64:: from_bits ( u64:: max_value ( ) . wrapping_sub ( new as u64 ) << 54 >> 2 ) ;
@@ -131,7 +124,6 @@ where
131124 /// Retrieve an estimate of the carginality of the stream.
132125 pub fn len ( & self ) -> f64 {
133126 let v = self . zero ;
134- // assert_eq!(bytecount::count(&self.m, 0), self.zero);
135127 if v > 0 {
136128 let h = self . m . len ( ) as f64 * ( self . m . len ( ) as f64 / v as f64 ) . ln ( ) ;
137129 if h <= Self :: get_threshold ( self . p - 4 ) {
@@ -141,10 +133,9 @@ where
141133 self . ep ( )
142134 }
143135
144- /// Returns true if the cardinality estimate is 0
136+ /// Returns true if empty.
145137 pub fn is_empty ( & self ) -> bool {
146- // self.len() == 0.0
147- self . m . iter ( ) . all ( |& x| x == 0 )
138+ self . zero == self . m . len ( )
148139 }
149140
150141 /// Merge another HyperLogLog data structure into `self`.
@@ -154,19 +145,7 @@ where
154145 assert_eq ! ( src. alpha, self . alpha) ;
155146 assert_eq ! ( src. p, self . p) ;
156147 assert_eq ! ( src. m. len( ) , self . m. len( ) ) ;
157- // let mut zero = self.zero;
158- // src.m
159- // .iter()
160- // .zip(self.m.iter_mut())
161- // .for_each(|(src_mir, mir)| {
162- // if *mir == 0 && *src_mir != 0 {
163- // zero -= 1;
164- // }
165- // *mir = cmp::max(*mir, *src_mir);
166- // });
167- // self.zero = zero;
168- use packed_simd:: * ;
169- assert_eq ! ( self . m. len( ) % u8s:: lanes( ) , 0 ) ;
148+ assert_eq ! ( self . m. len( ) % u8s:: lanes( ) , 0 ) ; // TODO: high error rate can trigger this
170149 assert_eq ! ( u8s:: lanes( ) , f32s:: lanes( ) * 4 ) ;
171150 assert_eq ! ( f32s:: lanes( ) , u32s:: lanes( ) ) ;
172151 assert_eq ! ( u8sq:: lanes( ) , u32s:: lanes( ) ) ;
@@ -192,22 +171,9 @@ where
192171 }
193172 }
194173 self . zero = zero. wrapping_sum ( ) as usize ;
195- // assert_eq!(self.zero, self.m.iter().filter(|&&x| x == 0).count());// bytecount::count(&self.m, 0);
196- // self.zero = self.m.iter().filter(|&&x| x == 0).count(); //bytecount::count(&self.m, 0);
197174 self . sum = sum. sum ( ) as f64 ;
198- // let max_rho = 64;//64 - self.p + 1;
199- // let sum2 = self
200- // .m
201- // .iter()
202- // .map(|&x| {
203- // 1u128 << (max_rho-x)
204- // }).sum::<u128>() as f64 / (1u128 << max_rho) as f64;
205- // if self.sum != sum2 {
206- // println!("{} {}", self.sum, sum2);
207- // }
208175 // https://github.com/AdamNiederer/faster/issues/37
209176 // (src.m.simd_iter(faster::u8s(0)),self.m.simd_iter_mut(faster::u8s(0))).zip()
210- // 00011111 00011111 00011111 00011111 00011111 00011111 00011111 00011111
211177 }
212178
213179 /// Intersect another HyperLogLog data structure into `self`.
@@ -217,23 +183,10 @@ where
217183 assert_eq ! ( src. alpha, self . alpha) ;
218184 assert_eq ! ( src. p, self . p) ;
219185 assert_eq ! ( src. m. len( ) , self . m. len( ) ) ;
220- // let mut zero = self.zero;
221- // src.m
222- // .iter()
223- // .zip(self.m.iter_mut())
224- // .for_each(|(src_mir, mir)| {
225- // if *mir != 0 && *src_mir == 0 {
226- // zero += 1;
227- // }
228- // *mir = cmp::min(*mir, *src_mir);
229- // });
230- // self.zero = zero;
231- use packed_simd:: * ;
232186 assert_eq ! ( self . m. len( ) % u8s:: lanes( ) , 0 ) ;
233187 assert_eq ! ( u8s:: lanes( ) , f32s:: lanes( ) * 4 ) ;
234188 assert_eq ! ( f32s:: lanes( ) , u32s:: lanes( ) ) ;
235189 assert_eq ! ( u8sq:: lanes( ) , u32s:: lanes( ) ) ;
236- // is_x86_feature_detected
237190 let mut zero = u8s_sad_out:: splat ( 0 ) ;
238191 let mut sum = f32s:: splat ( 0.0 ) ;
239192 for i in ( 0 ..self . m . len ( ) ) . step_by ( u8s:: lanes ( ) ) {
@@ -253,28 +206,16 @@ where
253206 let x: f32s = ( ( u32s:: splat ( u32:: max_value ( ) ) - x) << 25 >> 2 ) . into_bits ( ) ;
254207 sum += x;
255208 }
256- // f32::from_bits(u32::max_value().wrapping_sub(x as u32) << 25 >> 2)
257- // f64::from_bits(u64::max_value().wrapping_sub(x as u64) << 54 >> 2)
258209 }
259210 }
260211 self . zero = zero. wrapping_sum ( ) as usize ;
261- // assert_eq!(self.zero, self.m.iter().filter(|&&x| x == 0).count());// bytecount::count(&self.m, 0);
262212 self . sum = sum. sum ( ) as f64 ;
263- // let max_rho = 64;//64 - self.p + 1;
264- // let sum2 = self
265- // .m
266- // .iter()
267- // .map(|&x| {
268- // 1u128 << (max_rho-x)
269- // }).sum::<u128>() as f64 / (1u128 << max_rho) as f64;
270- // if self.sum != sum2 {
271- // println!("{} {}", self.sum, sum2);
272- // }
273213 }
274214
275215 /// Clears the `HyperLogLog` data structure, as if it was new.
276216 pub fn clear ( & mut self ) {
277217 self . zero = self . m . len ( ) ;
218+ self . sum = self . m . len ( ) as f64 ;
278219 self . m . iter_mut ( ) . for_each ( |x| {
279220 * x = 0 ;
280221 } ) ;
@@ -308,29 +249,12 @@ where
308249 }
309250
310251 fn get_nearest_neighbors ( e : f64 , estimate_vector : & [ f64 ] ) -> Range < usize > {
311- // TODO binary search
312- // let mut r: Vec<(f64, usize)> = estimate_vector
313- // .into_iter()
314- // .enumerate()
315- // .map(|(i, estimate)| {
316- // let dr = e - estimate;
317- // (dr * dr, i)
318- // }).collect();
319- // r.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
320- // r.truncate(6);
321- // r.sort_by_key(|a| a.1);
322-
323252 let index = estimate_vector
324253 . binary_search_by ( |a| a. partial_cmp ( & e) . unwrap_or ( Ordering :: Equal ) )
325254 . unwrap_or_else ( identity) ;
255+
326256 let mut min = if index > 6 { index - 6 } else { 0 } ;
327257 let mut max = cmp:: min ( index + 6 , estimate_vector. len ( ) ) ;
328- // let mut b: Vec<(f64, usize)> = estimate_vector[min..max].into_iter().enumerate().map(|(i, estimate)| {
329- // let dr = e - estimate;
330- // (dr * dr, min+i)
331- // }).collect();
332- // b.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
333- // b.truncate(6);
334258
335259 while max - min != 6 {
336260 let ( min_val, max_val) = unsafe {
@@ -347,44 +271,10 @@ where
347271 }
348272 }
349273
350- // assert_eq!(r[0].1, min);
351- // assert_eq!(r[5].1, max - 1);
352-
353274 min..max
354- // let mut c: Vec<(f64, usize)> = estimate_vector[min..max].into_iter().enumerate().map(|(i, estimate)| {
355- // let dr = e - estimate;
356- // (dr * dr, min+i)
357- // }).collect();
358- // c.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
359-
360- // assert_eq!(b, c);
361- // b.into_iter().map(|(_, b)| b)
362275 }
363276
364277 fn ep ( & self ) -> f64 {
365- // let sum = self
366- // .m
367- // .iter()
368- // .map(|&x| {
369- // // see pow_bithack()
370- // f64::from_bits(u64::max_value().wrapping_sub(x as u64) << 54 >> 2)
371- // }).sum::<f64>();
372- // let max_rho = 64;//64 - self.p + 1;
373- // let sum2 = self
374- // .m
375- // .iter()
376- // .map(|&x| {
377- // 1u128 << (max_rho-x)
378- // }).sum::<u128>();
379- // assert_eq!(sum2, self.sum);
380- // let sum = self.sum as f64 / (1u128 << max_rho) as f64;
381- // if (sum - self.sum).abs() > ::std::f64::EPSILON {
382- // println!("{} {}", sum, self.sum);
383- // }
384- // https://github.com/AdamNiederer/faster/issues/59
385- // let sum = m.simd_iter(faster::u8s(0)).simd_map(|v| {
386- // (u64::max_value() - v) << 54 >> 2
387- // }).simd_reduce(faster::f64s(0.0), |acc, v| acc + v).sum();
388278 let e = self . alpha * ( self . m . len ( ) * self . m . len ( ) ) as f64 / self . sum ;
389279 if e <= ( 5 * self . m . len ( ) ) as f64 {
390280 e - Self :: estimate_bias ( e, self . p )
@@ -471,6 +361,7 @@ mod simd_types {
471361}
472362#[ cfg( target_feature = "avx2" ) ]
473363mod simd_types {
364+ #![ allow( non_camel_case_types) ]
474365 use super :: * ;
475366 pub type u8s = packed_simd:: u8x32 ;
476367 pub type u8s_sad_out = packed_simd:: u64x4 ;
@@ -480,6 +371,7 @@ mod simd_types {
480371}
481372#[ cfg( all( not( target_feature = "avx2" ) , target_feature = "sse2" ) ) ]
482373mod simd_types {
374+ #![ allow( non_camel_case_types) ]
483375 use super :: * ;
484376 pub type u8s = packed_simd:: u8x16 ;
485377 pub type u8s_sad_out = packed_simd:: u64x2 ;
@@ -489,6 +381,7 @@ mod simd_types {
489381}
490382#[ cfg( all( not( target_feature = "avx2" ) , not( target_feature = "sse2" ) ) ) ]
491383mod simd_types {
384+ #![ allow( non_camel_case_types) ]
492385 use super :: * ;
493386 pub type u8s = packed_simd:: u8x8 ;
494387 pub type u8s_sad_out = u64 ;
@@ -509,31 +402,37 @@ struct Sad<X>(PhantomData<fn(X)>);
509402// packed_simd::Simd(transmute(_mm512_sad_epu8(transmute(a.0), transmute(b.0))))
510403// }
511404// }
405+ mod x86 {
406+ #[ cfg( target_arch = "x86" ) ]
407+ pub use std:: arch:: x86:: * ;
408+ #[ cfg( target_arch = "x86_64" ) ]
409+ pub use std:: arch:: x86_64:: * ;
410+ }
512411#[ cfg( target_feature = "avx2" ) ]
513412impl Sad < packed_simd:: u8x32 > {
514413 #[ inline]
515414 #[ target_feature( enable = "avx2" ) ]
516415 unsafe fn sad ( a : packed_simd:: u8x32 , b : packed_simd:: u8x32 ) -> packed_simd:: u64x4 {
517- use std:: { arch :: x86_64 :: _mm256_sad_epu8 , mem:: transmute} ;
518- packed_simd:: Simd ( transmute ( _mm256_sad_epu8 ( transmute ( a. 0 ) , transmute ( b. 0 ) ) ) )
416+ use std:: mem:: transmute;
417+ packed_simd:: Simd ( transmute ( x86 :: _mm256_sad_epu8 ( transmute ( a. 0 ) , transmute ( b. 0 ) ) ) )
519418 }
520419}
521420#[ cfg( target_feature = "sse2" ) ]
522421impl Sad < packed_simd:: u8x16 > {
523422 #[ inline]
524423 #[ target_feature( enable = "sse2" ) ]
525424 unsafe fn sad ( a : packed_simd:: u8x16 , b : packed_simd:: u8x16 ) -> packed_simd:: u64x2 {
526- use std:: { arch :: x86_64 :: _mm_sad_epu8 , mem:: transmute} ;
527- packed_simd:: Simd ( transmute ( _mm_sad_epu8 ( transmute ( a. 0 ) , transmute ( b. 0 ) ) ) )
425+ use std:: mem:: transmute;
426+ packed_simd:: Simd ( transmute ( x86 :: _mm_sad_epu8 ( transmute ( a. 0 ) , transmute ( b. 0 ) ) ) )
528427 }
529428}
530429#[ cfg( target_feature = "sse,mmx" ) ]
531430impl Sad < packed_simd:: u8x8 > {
532431 #[ inline]
533432 #[ target_feature( enable = "sse,mmx" ) ]
534433 unsafe fn sad ( a : packed_simd:: u8x8 , b : packed_simd:: u8x8 ) -> u64 {
535- use std:: { arch :: x86_64 :: _mm_sad_pu8 , mem:: transmute} ;
536- transmute ( _mm_sad_pu8 ( transmute ( a. 0 ) , transmute ( b. 0 ) ) )
434+ use std:: mem:: transmute;
435+ transmute ( x86 :: _mm_sad_pu8 ( transmute ( a. 0 ) , transmute ( b. 0 ) ) )
537436 }
538437}
539438#[ cfg( not( target_feature = "sse,mmx" ) ) ]
@@ -557,7 +456,9 @@ mod test {
557456 for x in 0_u8 ..65 {
558457 let a = 2.0_f64 . powi ( -( x as i32 ) ) ;
559458 let b = f64:: from_bits ( u64:: max_value ( ) . wrapping_sub ( x as u64 ) << 54 >> 2 ) ;
459+ let c = f32:: from_bits ( u32:: max_value ( ) . wrapping_sub ( x as u32 ) << 25 >> 2 ) ;
560460 assert_eq ! ( a, b) ;
461+ assert_eq ! ( a, c as f64 ) ;
561462 }
562463 }
563464
0 commit comments