Skip to content

Commit 8f720b7

Browse files
Sreesh MaheshwarFokko
authored andcommitted
Partition statistics metadata reading
1 parent 62a95cf commit 8f720b7

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

pyiceberg/table/metadata.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
SortOrder,
3737
assign_fresh_sort_order_ids,
3838
)
39-
from pyiceberg.table.statistics import StatisticsFile
39+
from pyiceberg.table.statistics import PartitionStatisticsFile, StatisticsFile
4040
from pyiceberg.typedef import (
4141
EMPTY_DICT,
4242
IcebergBaseModel,
@@ -222,6 +222,14 @@ class TableMetadataCommonFields(IcebergBaseModel):
222222
table correctly. A table can contain many statistics files
223223
associated with different table snapshots."""
224224

225+
partition_statistics: List[PartitionStatisticsFile] = Field(alias="partition-statistics", default_factory=list)
226+
"""A optional list of partition statistics files.
227+
Partition statistics are not required for reading or planning
228+
and readers may ignore them. Each table snapshot may be associated
229+
with at most one partition statistics file. A writer can optionally
230+
write the partition statistics file during each write operation,
231+
or it can also be computed on demand."""
232+
225233
# validators
226234
@field_validator("properties", mode="before")
227235
def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]:

pyiceberg/table/statistics.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,24 @@ class BlobMetadata(IcebergBaseModel):
2929
properties: Optional[Dict[str, str]] = None
3030

3131

32-
class StatisticsFile(IcebergBaseModel):
32+
class StatisticsCommonFields(IcebergBaseModel):
33+
"""Common fields between table and partition statistics structs found on metadata."""
34+
3335
snapshot_id: int = Field(alias="snapshot-id")
3436
statistics_path: str = Field(alias="statistics-path")
3537
file_size_in_bytes: int = Field(alias="file-size-in-bytes")
38+
39+
40+
class StatisticsFile(StatisticsCommonFields, IcebergBaseModel):
3641
file_footer_size_in_bytes: int = Field(alias="file-footer-size-in-bytes")
3742
key_metadata: Optional[str] = Field(alias="key-metadata", default=None)
3843
blob_metadata: List[BlobMetadata] = Field(alias="blob-metadata")
3944

4045

46+
class PartitionStatisticsFile(IcebergBaseModel):
47+
pass
48+
49+
4150
def filter_statistics_by_snapshot_id(
4251
statistics: List[StatisticsFile],
4352
reject_snapshot_id: int,

0 commit comments

Comments
 (0)