@@ -10,13 +10,14 @@ use polars::frame::DataFrame;
10
10
use polars:: prelude:: { col, lit, IntoLazy , Series } ;
11
11
use rayon:: iter:: { IntoParallelIterator , IntoParallelRefIterator , ParallelIterator } ;
12
12
use std:: collections:: HashMap ;
13
+ use std:: hash:: BuildHasherDefault ;
13
14
use std:: sync:: Arc ;
14
15
15
16
impl CatEncs {
16
17
pub fn new_empty ( ) -> CatEncs {
17
18
CatEncs {
18
19
map : Default :: default ( ) ,
19
- rev_map : Default :: default ( ) ,
20
+ rev_map : HashMap :: with_capacity_and_hasher ( 2 , BuildHasherDefault :: default ( ) ) ,
20
21
}
21
22
}
22
23
@@ -532,42 +533,61 @@ pub fn encode_triples(
532
533
let out: Vec < _ > = dfs
533
534
. into_par_iter ( )
534
535
. map ( |mut df| {
535
- let subject_out =
536
- if let Some ( subject_prefix_map) = prefix_maps. get ( SUBJECT_COL_NAME ) {
537
- let u = df
538
- . drop_in_place ( SUBJECT_PREFIX_COL_NAME )
539
- . unwrap ( )
540
- . as_materialized_series_maintain_scalar ( )
541
- . u32 ( )
542
- . unwrap ( )
543
- . first ( )
544
- . unwrap ( ) ;
545
- Some ( subject_prefix_map. get ( & u) . unwrap ( ) . clone ( ) )
546
- } else {
547
- None
548
- } ;
549
- let object_out = if let Some ( object_prefix_map) = prefix_maps. get ( OBJECT_COL_NAME ) {
550
- let u = df
551
- . drop_in_place ( OBJECT_PREFIX_COL_NAME )
552
- . unwrap ( )
553
- . as_materialized_series_maintain_scalar ( )
554
- . u32 ( )
555
- . unwrap ( )
556
- . first ( )
557
- . unwrap ( ) ;
558
- Some ( object_prefix_map. get ( & u) . unwrap ( ) . clone ( ) )
559
- } else {
536
+ if df. height ( ) == 0 {
560
537
None
561
- } ;
562
- ( df, subject_out, object_out)
538
+ } else {
539
+ let subject_out =
540
+ if let Some ( subject_prefix_map) = prefix_maps. get ( SUBJECT_COL_NAME ) {
541
+ let ser = df
542
+ . drop_in_place ( SUBJECT_PREFIX_COL_NAME )
543
+ . unwrap ( )
544
+ . as_materialized_series_maintain_scalar ( ) ;
545
+ let u32ch = ser. u32 ( ) . unwrap ( ) ;
546
+ let i = u32ch. first_non_null ( ) ;
547
+ if let Some ( i) = i {
548
+ let u = u32ch. get ( i) ;
549
+ if let Some ( u) = u {
550
+ Some ( subject_prefix_map. get ( & u) . unwrap ( ) . clone ( ) )
551
+ } else {
552
+ None
553
+ }
554
+ } else {
555
+ None
556
+ }
557
+ } else {
558
+ None
559
+ } ;
560
+ let object_out =
561
+ if let Some ( object_prefix_map) = prefix_maps. get ( OBJECT_COL_NAME ) {
562
+ let ser = df
563
+ . drop_in_place ( OBJECT_PREFIX_COL_NAME )
564
+ . unwrap ( )
565
+ . as_materialized_series_maintain_scalar ( ) ;
566
+ let u32ch = ser. u32 ( ) . unwrap ( ) ;
567
+ let i = u32ch. first_non_null ( ) ;
568
+ if let Some ( i) = i {
569
+ let u = u32ch. get ( i) ;
570
+ if let Some ( u) = u {
571
+ Some ( object_prefix_map. get ( & u) . unwrap ( ) . clone ( ) )
572
+ } else {
573
+ None
574
+ }
575
+ } else {
576
+ None
577
+ }
578
+ } else {
579
+ None
580
+ } ;
581
+ Some ( ( df, subject_out, object_out) )
582
+ }
563
583
} )
564
584
. collect ( ) ;
565
585
out
566
586
} else {
567
- vec ! [ ( mappings, None , None ) ]
587
+ vec ! [ Some ( ( mappings, None , None ) ) ]
568
588
} ;
569
589
let mut new_out = vec ! [ ] ;
570
- for ( df, subject, object) in out {
590
+ for ( df, subject, object) in out. into_iter ( ) . filter ( |x| x . is_some ( ) ) . map ( |x| x . unwrap ( ) ) {
571
591
let subject = if matches ! ( subject_cat_state, BaseCatState :: CategoricalNative ( ..) ) {
572
592
if let Some ( subject) = subject {
573
593
Some ( CatType :: Prefix ( NamedNode :: new_unchecked ( subject) ) )
0 commit comments