|
20 | 20 | import pandas |
21 | 21 |
|
22 | 22 | from bigframes import dtypes |
| 23 | +from bigframes.core import bigframe_node, nodes |
23 | 24 |
|
24 | 25 |
|
25 | 26 | def get_table_stats(table: bigquery.Table) -> pandas.Series: |
@@ -86,13 +87,26 @@ def get_query_stats_with_dtypes( |
86 | 87 | query_job: bigquery.QueryJob, |
87 | 88 | column_dtypes: Dict[str, dtypes.Dtype], |
88 | 89 | index_dtypes: Sequence[dtypes.Dtype], |
| 90 | + expr_root: bigframe_node.BigFrameNode | None = None, |
89 | 91 | ) -> pandas.Series: |
| 92 | + """ |
| 93 | + Returns important stats from the query job as a Pandas Series. The dtypes information is added too. |
| 94 | +
|
| 95 | + Args: |
| 96 | + expr_root (Optional): |
| 97 | + The root of the expression tree that may contain local data, whose size is added to the |
| 98 | + total bytes count if available. |
| 99 | +
|
| 100 | + """ |
90 | 101 | index = ["columnCount", "columnDtypes", "indexLevel", "indexDtypes"] |
91 | 102 | values = [len(column_dtypes), column_dtypes, len(index_dtypes), index_dtypes] |
92 | 103 |
|
93 | 104 | s = pandas.Series(values, index=index) |
94 | 105 |
|
95 | | - return pandas.concat([s, get_query_stats(query_job)]) |
| 106 | + result = pandas.concat([s, get_query_stats(query_job)]) |
| 107 | + if expr_root is not None: |
| 108 | + result["totalBytesProcessed"] += get_local_bytes(expr_root) |
| 109 | + return result |
96 | 110 |
|
97 | 111 |
|
98 | 112 | def get_query_stats( |
@@ -145,4 +159,24 @@ def get_query_stats( |
145 | 159 | else None |
146 | 160 | ) |
147 | 161 |
|
148 | | - return pandas.Series(values, index=index) |
| 162 | + result = pandas.Series(values, index=index) |
| 163 | + if result["totalBytesProcessed"] is None: |
| 164 | + result["totalBytesProcessed"] = 0 |
| 165 | + else: |
| 166 | + result["totalBytesProcessed"] = int(result["totalBytesProcessed"]) |
| 167 | + |
| 168 | + return result |
| 169 | + |
| 170 | + |
| 171 | +def get_local_bytes(root: bigframe_node.BigFrameNode) -> int: |
| 172 | + def get_total_bytes( |
| 173 | + root: bigframe_node.BigFrameNode, child_results: tuple[int, ...] |
| 174 | + ) -> int: |
| 175 | + child_bytes = sum(child_results) |
| 176 | + |
| 177 | + if isinstance(root, nodes.ReadLocalNode): |
| 178 | + return child_bytes + root.local_data_source.data.get_total_buffer_size() |
| 179 | + |
| 180 | + return child_bytes |
| 181 | + |
| 182 | + return root.reduce_up(get_total_bytes) |
0 commit comments