-
Notifications
You must be signed in to change notification settings - Fork 749
iceberg: serialize all data_file fields in manifests #29680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
34c08bd
cd716a4
101e5be
d455088
d14bd40
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,20 +40,20 @@ struct data_file { | |
|
|
||
| size_t record_count; | ||
| size_t file_size_bytes; | ||
|
Comment on lines
41
to
42
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should these also be int64 as well? I think they will get assigned to one in the snapshot
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Began changing them but then didn't want to get too distracted from the main task. To be done.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cool sg |
||
| chunked_hash_map<nested_field::id_t, size_t> column_sizes; | ||
| chunked_hash_map<nested_field::id_t, size_t> value_counts; | ||
| chunked_hash_map<nested_field::id_t, size_t> null_value_counts; | ||
| chunked_hash_map<nested_field::id_t, size_t> nan_value_counts; | ||
| std::optional<chunked_hash_map<nested_field::id_t, int64_t>> column_sizes; | ||
| std::optional<chunked_hash_map<nested_field::id_t, int64_t>> value_counts; | ||
| std::optional<chunked_hash_map<nested_field::id_t, int64_t>> | ||
| null_value_counts; | ||
| std::optional<chunked_hash_map<nested_field::id_t, int64_t>> | ||
| nan_value_counts; | ||
| std::optional<chunked_hash_map<nested_field::id_t, iobuf>> lower_bounds; | ||
| std::optional<chunked_hash_map<nested_field::id_t, iobuf>> upper_bounds; | ||
| std::optional<iobuf> key_metadata; | ||
| std::optional<chunked_vector<int64_t>> split_offsets; | ||
| std::optional<chunked_vector<nested_field::id_t>> equality_ids; | ||
| std::optional<int32_t> sort_order_id; | ||
| std::optional<uri> referenced_data_file; | ||
|
|
||
| // TODO: The following fields are not supported, and are serialized as | ||
| // empty options. | ||
| // - distinct_counts | ||
|
oleiman marked this conversation as resolved.
|
||
| // - lower_bounds | ||
| // - upper_bounds | ||
| // - key_metadata | ||
| // - split_offsets | ||
| // - equality_ids | ||
| // - sort_order_ids | ||
| friend bool operator==(const data_file&, const data_file&) = default; | ||
| data_file copy() const; | ||
| }; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.