Skip to content

Update String Transform Examples #19407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: branch-25.08
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions ci/run_cudf_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ compute-sanitizer --tool memcheck custom_optimized names.csv
compute-sanitizer --tool memcheck custom_prealloc names.csv
compute-sanitizer --tool memcheck custom_with_malloc names.csv

compute-sanitizer --tool memcheck branching_public info.csv output.csv
compute-sanitizer --tool memcheck branching info.csv output.csv
compute-sanitizer --tool memcheck int_output info.csv output.csv
compute-sanitizer --tool memcheck output_public info.csv output.csv
compute-sanitizer --tool memcheck output info.csv output.csv
compute-sanitizer --tool memcheck preallocated_public info.csv output.csv
compute-sanitizer --tool memcheck preallocated info.csv output.csv
compute-sanitizer --tool memcheck compute_checksum_jit info.csv output.csv
compute-sanitizer --tool memcheck extract_email_jit info.csv output.csv
compute-sanitizer --tool memcheck extract_email_precompiled info.csv output.csv
compute-sanitizer --tool memcheck format_phone_jit info.csv output.csv
compute-sanitizer --tool memcheck format_phone_precompiled info.csv output.csv
compute-sanitizer --tool memcheck localize_phone_jit info.csv output.csv
compute-sanitizer --tool memcheck localize_phone_precompiled info.csv output.csv

compute-sanitizer --tool memcheck parquet_io example.parquet
compute-sanitizer --tool memcheck parquet_io example.parquet output.parquet DELTA_BINARY_PACKED ZSTD TRUE
Expand Down
88 changes: 53 additions & 35 deletions cpp/examples/string_transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,48 +20,66 @@ rapids_cmake_build_type("Release")

list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)

add_executable(branching branching.cpp)
target_compile_features(branching PRIVATE cxx_std_20)
target_compile_options(branching PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(branching PRIVATE cudf::cudf nvtx3::nvtx3-cpp)
install(TARGETS branching DESTINATION bin/examples/libcudf)
add_executable(compute_checksum_jit compute_checksum_jit.cpp)
target_compile_features(compute_checksum_jit PRIVATE cxx_std_20)
target_compile_options(
compute_checksum_jit PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)
target_link_libraries(
compute_checksum_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
)
install(TARGETS compute_checksum_jit DESTINATION bin/examples/libcudf)

add_executable(branching_public branching_public.cpp)
target_compile_features(branching_public PRIVATE cxx_std_20)
target_compile_options(branching_public PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(branching_public PRIVATE cudf::cudf nvtx3::nvtx3-cpp)
install(TARGETS branching_public DESTINATION bin/examples/libcudf)
add_executable(extract_email_jit extract_email_jit.cpp)
target_compile_features(extract_email_jit PRIVATE cxx_std_20)
target_compile_options(extract_email_jit PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(
extract_email_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
)
install(TARGETS extract_email_jit DESTINATION bin/examples/libcudf)

add_executable(int_output int_output.cpp)
target_compile_features(int_output PRIVATE cxx_std_20)
target_compile_options(int_output PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(int_output PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
install(TARGETS int_output DESTINATION bin/examples/libcudf)
add_executable(extract_email_precompiled extract_email_precompiled.cpp)
target_compile_features(extract_email_precompiled PRIVATE cxx_std_20)
target_compile_options(
extract_email_precompiled PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)
target_link_libraries(
extract_email_precompiled PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
)
install(TARGETS extract_email_precompiled DESTINATION bin/examples/libcudf)

add_executable(output output.cpp)
target_compile_features(output PRIVATE cxx_std_20)
target_compile_options(output PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(output PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
install(TARGETS output DESTINATION bin/examples/libcudf)
add_executable(format_phone_jit format_phone_jit.cpp)
target_compile_features(format_phone_jit PRIVATE cxx_std_20)
target_compile_options(format_phone_jit PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(format_phone_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
install(TARGETS format_phone_jit DESTINATION bin/examples/libcudf)

add_executable(output_public output_public.cpp)
target_compile_features(output_public PRIVATE cxx_std_20)
target_compile_options(output_public PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(output_public PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
install(TARGETS output_public DESTINATION bin/examples/libcudf)
add_executable(format_phone_precompiled format_phone_precompiled.cpp)
target_compile_features(format_phone_precompiled PRIVATE cxx_std_20)
target_compile_options(
format_phone_precompiled PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)
target_link_libraries(
format_phone_precompiled PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
)
install(TARGETS format_phone_precompiled DESTINATION bin/examples/libcudf)

add_executable(preallocated preallocated.cpp)
target_compile_features(preallocated PRIVATE cxx_std_20)
target_compile_options(preallocated PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(preallocated PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
install(TARGETS preallocated DESTINATION bin/examples/libcudf)
add_executable(localize_phone_jit localize_phone_jit.cpp)
target_compile_features(localize_phone_jit PRIVATE cxx_std_20)
target_compile_options(localize_phone_jit PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
target_link_libraries(
localize_phone_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
)
install(TARGETS localize_phone_jit DESTINATION bin/examples/libcudf)

add_executable(preallocated_public preallocated_public.cpp)
target_compile_features(preallocated_public PRIVATE cxx_std_20)
target_compile_options(preallocated_public PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
add_executable(localize_phone_precompiled localize_phone_precompiled.cpp)
target_compile_features(localize_phone_precompiled PRIVATE cxx_std_20)
target_compile_options(
localize_phone_precompiled PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)
target_link_libraries(
preallocated_public PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
localize_phone_precompiled PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
)
install(TARGETS preallocated_public DESTINATION bin/examples/libcudf)
install(TARGETS localize_phone_precompiled DESTINATION bin/examples/libcudf)

install(FILES ${CMAKE_CURRENT_LIST_DIR}/info.csv DESTINATION bin/examples/libcudf)
14 changes: 7 additions & 7 deletions cpp/examples/string_transforms/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ strings columns.
The example source code loads a csv file and produces a transformed column from the table using the values from the tables.

The following examples are included:
1. `branching` - Using a transform to branch on input columns and returning string values
2. `branching_public` - Performs same transformation on the table as `branching` but uses alternative public APIs
3. `int_output` - Using a transform to perform a fused checksum on two columns
4. `output` - Using a transform to get a substring from a kernel
5. `output_public` - Performs same transformation on the table as `output` but uses alternative public APIs
6. `preallocated` - Using a transform kernel to output a string to a pre-allocated buffer
7. `preallocated_public` - Performs same transformation on the table as `preallocated` but uses alternative public APIs
1. `localize_phone_jit` - Using a transform to branch on input columns and returning string values
2. `localize_phone_precompiled` - Performs same transformation on the table as `branching` but uses precompiled public APIs
3. `compute_checksum_jit` - Using a transform to perform a fused checksum on two columns
4. `extract_email_jit` - Using a transform to get a substring from a kernel
5. `extract_email_precompiled` - Performs same transformation on the table as `output` but uses precompiled public APIs
6. `format_phone_jit` - Using a transform kernel to output a string to a pre-allocated buffer
7. `format_phone_precompiled` - Performs same transformation on the table as `preallocated` but uses precompiled public APIs

## Compile and execute

Expand Down
20 changes: 18 additions & 2 deletions cpp/examples/string_transforms/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/io/csv.hpp>
#include <cudf/io/datasource.hpp>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -85,7 +86,7 @@ void write_csv(cudf::table_view const& tbl_view,
{
auto sink_info = cudf::io::sink_info(file_path);
auto builder = cudf::io::csv_writer_options::builder(sink_info, tbl_view);
auto options = builder.include_header(true).names(names).build();
auto options = builder.include_header(true).names(names).rows_per_chunk(10'000'000).build();
cudf::io::write_csv(options);
}

Expand Down Expand Up @@ -139,13 +140,27 @@ int main(int argc, char const** argv)

auto table_view = input->view();

std::chrono::duration<double> elapsed_cold{};
{
// warmup pass
stream.synchronize();
auto start_cold = std::chrono::steady_clock::now();
nvtxRangePush("transform cold");
auto [result_cold, input_indices_cold] = transform(table_view);
stream.synchronize();
nvtxRangePop();
elapsed_cold = std::chrono::steady_clock::now() - start_cold;
}

stream.synchronize();

auto start = std::chrono::steady_clock::now();
auto start = std::chrono::steady_clock::now();
nvtxRangePush("transform warm");
auto [result, input_indices] = transform(table_view);

// ensure transform operation completes and the wall-time is only for the transform computation
stream.synchronize();
nvtxRangePop();

std::chrono::duration<double> elapsed = std::chrono::steady_clock::now() - start;

Expand All @@ -172,6 +187,7 @@ int main(int argc, char const** argv)
[&](auto index) { return input->get_column(index).alloc_size(); });

std::cout << "Memory Resource: " << memory_resource_name << "\n"
<< "Warmup Time: " << elapsed_cold.count() << " seconds\n"
<< "Wall Time: " << elapsed.count() << " seconds\n"
<< "Input Table: " << table_view.num_rows() << " rows x " << table_view.num_columns()
<< " columns, " << input->alloc_size() << " bytes\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ __device__ void e164_format(void* scratch,
constexpr cudf::size_type maximum_size = 20;
auto const num_rows = table.num_rows();

rmm::device_uvector<char> scratch(maximum_size * num_rows, stream, mr);
rmm::device_uvector<char> scratch(maximum_size * static_cast<std::size_t>(num_rows), stream, mr);

// a column with size 1 is considered a scalar
auto size = cudf::make_column_from_scalar(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,9 @@ __device__ void format_phone(void* scratch,
constexpr cudf::size_type MAX_ENTRY_LENGTH = 24; // Enough space for "(123) 123-4567" or "n/a"

auto const num_rows = table.num_rows();
rmm::device_uvector<char> scratch(
MAX_ENTRY_LENGTH * num_rows, stream, mr); // allocate scratch space for the outputs
rmm::device_uvector<char> scratch(MAX_ENTRY_LENGTH * static_cast<std::size_t>(num_rows),
stream,
mr); // allocate scratch space for the outputs

auto size = cudf::make_column_from_scalar(
cudf::numeric_scalar<int32_t>(MAX_ENTRY_LENGTH, true, stream, mr), 1, stream, mr);
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <cudf/detail/copy.hpp>
#include <cudf/detail/fill.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/data_sink.hpp>
#include <cudf/io/detail/csv.hpp>
Expand Down Expand Up @@ -411,6 +412,8 @@ void write_csv(data_sink* out_sink,
// write header: column names separated by delimiter:
// (even for tables with no rows)
//
cudf::scoped_range range("io::csv::write_csv");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mhaseeb123 Is this where you would put the scoped range?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems ok to me as this is the main CSV write function that gets called from cudf::io::write_csv and I don't see any CUDF_FUNC_RANGE() in any of the functions called from in here so this one should capture the complete write_csv time. In future, we might want to put scoped ranges in maybe (write_chunked_begin and write_chunked for better granularity). Wdyt @vuule ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typically, scoped range is used to make a range within a code block. That means, we can have multiple scoped ranges in a single function. On the other hand, CUDF_FUNC_RANGE is used to generate a (scoped)range for the entire function, at the top function level.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that this scoped range is at the top function level so it should be equivalent to CUDF_FUNC_RANGE.


write_chunked_begin(
out_sink, table, user_column_names, options, stream, cudf::get_current_device_resource_ref());

Expand Down
Loading