Skip to content

Add Parquet arrow_reader benchmarks for {u}int{8,16} columns #7484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
May 8, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions parquet/benches/arrow_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ fn build_test_schema() -> SchemaDescPtr {
OPTIONAL FIXED_LEN_BYTE_ARRAY (8) optional_flba8_leaf;
REQUIRED FIXED_LEN_BYTE_ARRAY (16) mandatory_flba16_leaf;
OPTIONAL FIXED_LEN_BYTE_ARRAY (16) optional_flba16_leaf;
REQUIRED INT32 mandatory_uint8_leaf (INTEGER(8, false));
OPTIONAL INT32 optional_uint8_leaf (INTEGER(8, false));
REQUIRED INT32 mandatory_uint16_leaf (INTEGER(16, false));
OPTIONAL INT32 optional_uint16_leaf (INTEGER(16, false));
REQUIRED INT32 mandatory_uint32_leaf (INTEGER(32, false));
OPTIONAL INT32 optional_uint32_leaf (INTEGER(32, false));
REQUIRED INT32 mandatory_int8_leaf (INTEGER(8, true));
OPTIONAL INT32 optional_int8_leaf (INTEGER(8, true));
REQUIRED INT32 mandatory_int16_leaf (INTEGER(16, true));
OPTIONAL INT32 optional_int16_leaf (INTEGER(16, true));
REQUIRED INT64 mandatory_uint64_leaf (INTEGER(64, false));
OPTIONAL INT64 optional_uint64_leaf (INTEGER(64, false));
}
";
parse_message_type(message_type)
Expand Down Expand Up @@ -1280,6 +1292,18 @@ fn add_benches(c: &mut Criterion) {
let string_list_desc = schema.column(14);
let mandatory_binary_column_desc = schema.column(15);
let optional_binary_column_desc = schema.column(16);
let mandatory_uint8_column_desc = schema.column(27);
let optional_uint8_column_desc = schema.column(28);
let mandatory_uint16_column_desc = schema.column(29);
let optional_uint16_column_desc = schema.column(30);
let mandatory_uint32_column_desc = schema.column(31);
let optional_uint32_column_desc = schema.column(32);
let mandatory_int8_column_desc = schema.column(33);
let optional_int8_column_desc = schema.column(34);
let mandatory_int16_column_desc = schema.column(35);
let optional_int16_column_desc = schema.column(36);
let mandatory_uint64_column_desc = schema.column(37);
let optional_uint64_column_desc = schema.column(38);

// primitive / int32 benchmarks
// =============================
Expand All @@ -1294,6 +1318,61 @@ fn add_benches(c: &mut Criterion) {
);
group.finish();

// primitive int32 / logical uint8 benchmarks
let mut group = c.benchmark_group("arrow_array_reader/UInt8Array");
bench_primitive::<Int32Type>(
&mut group,
&mandatory_uint8_column_desc,
&optional_uint8_column_desc,
0,
256,
);
group.finish();

// primitive int32 / logical int8 benchmarks
let mut group = c.benchmark_group("arrow_array_reader/Int8Array");
bench_primitive::<Int32Type>(
&mut group,
&mandatory_int8_column_desc,
&optional_int8_column_desc,
0,
128,
);
group.finish();

// primitive int32 / logical uint16 benchmarks
let mut group = c.benchmark_group("arrow_array_reader/UInt16Array");
bench_primitive::<Int32Type>(
&mut group,
&mandatory_uint16_column_desc,
&optional_uint16_column_desc,
0,
65536,
);
group.finish();

// primitive int32 / logical int16 benchmarks
let mut group = c.benchmark_group("arrow_array_reader/Int16Array");
bench_primitive::<Int32Type>(
&mut group,
&mandatory_int16_column_desc,
&optional_int16_column_desc,
0,
32768,
);
group.finish();

// primitive int32 / logical uint32 benchmarks
let mut group = c.benchmark_group("arrow_array_reader/UInt32Array");
bench_primitive::<Int32Type>(
&mut group,
&mandatory_uint32_column_desc,
&optional_uint32_column_desc,
0,
1000,
);
group.finish();

// primitive / int64 benchmarks
// =============================

Expand All @@ -1307,6 +1386,17 @@ fn add_benches(c: &mut Criterion) {
);
group.finish();

// primitive int64 / logical uint64 benchmarks
let mut group = c.benchmark_group("arrow_array_reader/UInt64Array");
bench_primitive::<Int64Type>(
Copy link
Contributor Author

@alamb alamb May 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was confused at first -- I thought this is meant to be

Suggested change
bench_primitive::<Int64Type>(
bench_primitive::<UInt64Type>(

However, the type is the parquet type not the arrow / logical type

&mut group,
&mandatory_uint64_column_desc,
&optional_uint64_column_desc,
0,
1000,
);
group.finish();

// string benchmarks
//==============================

Expand Down
Loading