@@ -70,7 +70,7 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
70
70
});
71
71
72
72
// figure out which kernels to run
73
- auto const kernel_mask = GetAggregatedDecodeKernelMask (subpass.pages , _stream);
73
+ auto const kernel_mask = get_aggregated_decode_kernel_mask (subpass.pages , _stream);
74
74
75
75
// Check to see if there are any string columns present. If so, then we need to get size info
76
76
// for each string page. This size info will be used to pre-allocate memory for the column,
@@ -445,6 +445,9 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
445
445
page_nesting.device_to_host_async (_stream);
446
446
page_nesting_decode.device_to_host_async (_stream);
447
447
448
+ // Invalidate output buffer nullmasks at row indices spanned by pruned pages
449
+ update_output_nullmasks_for_pruned_pages (host_page_mask);
450
+
448
451
// Copy over initial string offsets from device
449
452
auto h_initial_str_offsets = cudf::detail::make_host_vector_async (initial_str_offsets, _stream);
450
453
@@ -862,6 +865,79 @@ bool reader_impl::has_next()
862
865
return has_more_work () or is_first_output_chunk ();
863
866
}
864
867
868
+ void reader_impl::update_output_nullmasks_for_pruned_pages (cudf::host_span<bool const > page_mask)
869
+ {
870
+ auto const & subpass = _pass_itm_data->subpass ;
871
+ auto const & pages = subpass->pages ;
872
+ auto const & chunks = _pass_itm_data->chunks ;
873
+ auto const num_columns = _input_columns.size ();
874
+
875
+ CUDF_EXPECTS (pages.size () == page_mask.size (), " Page mask size mismatch" );
876
+
877
+ // Return early if page mask is empty or all pages are required
878
+ if (page_mask.empty () or std::all_of (page_mask.begin (), page_mask.end (), std::identity{})) {
879
+ return ;
880
+ }
881
+
882
+ auto page_and_mask_begin =
883
+ thrust::make_zip_iterator (thrust::make_tuple (pages.host_begin (), page_mask.begin ()));
884
+
885
+ auto null_masks = std::vector<bitmask_type*>{};
886
+ auto begin_bits = std::vector<cudf::size_type>{};
887
+ auto end_bits = std::vector<cudf::size_type>{};
888
+
889
+ std::for_each (
890
+ page_and_mask_begin, page_and_mask_begin + pages.size (), [&](auto const & page_and_mask_pair) {
891
+ // Return early if the page is valid
892
+ if (thrust::get<1 >(page_and_mask_pair)) { return ; }
893
+
894
+ auto const & page = thrust::get<0 >(page_and_mask_pair);
895
+ auto const chunk_idx = page.chunk_idx ;
896
+ auto const start_row = chunks[chunk_idx].start_row + page.chunk_row ;
897
+ auto const end_row = start_row + page.num_rows ;
898
+ auto & input_col = _input_columns[chunk_idx % num_columns];
899
+ auto max_depth = input_col.nesting_depth ();
900
+ auto * cols = &_output_buffers;
901
+
902
+ for (size_t l_idx = 0 ; l_idx < max_depth; l_idx++) {
903
+ auto & out_buf = (*cols)[input_col.nesting [l_idx]];
904
+ cols = &out_buf.children ;
905
+ // Continue if the current column is a list column
906
+ if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) { continue ; }
907
+ // Add the nullmask and bit bounds to corresponding lists
908
+ null_masks.emplace_back (out_buf.null_mask ());
909
+ begin_bits.emplace_back (start_row);
910
+ end_bits.emplace_back (end_row);
911
+
912
+ // Increment the null count by the number of rows in this page
913
+ out_buf.null_count () += page.num_rows ;
914
+ }
915
+ });
916
+
917
+ // Min number of nullmasks to use bulk update optimally
918
+ constexpr auto min_nullmasks_for_bulk_update = 32 ;
919
+
920
+ // Bulk update the nullmasks if optimal
921
+ if (null_masks.size () >= min_nullmasks_for_bulk_update) {
922
+ auto valids = cudf::detail::make_host_vector<bool >(null_masks.size (), _stream);
923
+ std::fill (valids.begin (), valids.end (), false );
924
+ cudf::set_null_masks_safe (null_masks, begin_bits, end_bits, valids, _stream);
925
+ }
926
+ // Otherwise, update the nullmasks in a loop
927
+ else {
928
+ auto nullmask_iter = thrust::make_zip_iterator (
929
+ thrust::make_tuple (null_masks.begin (), begin_bits.begin (), end_bits.begin ()));
930
+ std::for_each (
931
+ nullmask_iter, nullmask_iter + null_masks.size (), [&](auto const & nullmask_tuple) {
932
+ cudf::set_null_mask (thrust::get<0 >(nullmask_tuple),
933
+ thrust::get<1 >(nullmask_tuple),
934
+ thrust::get<2 >(nullmask_tuple),
935
+ false ,
936
+ _stream);
937
+ });
938
+ }
939
+ }
940
+
865
941
namespace {
866
942
parquet_column_schema walk_schema (aggregate_reader_metadata const * mt, int idx)
867
943
{
0 commit comments