Skip to content

Commit 4c4c9b1

Browse files
authored
fix: Resolve AttributeError in TableWidget and improve initialization (#1937)
* remove expensive len() call * add testcase * fix a typo * change how row_count is updated * testcase stil fails, need to merged in 1888 * update the method of using PandasBatches.total_rows * change tests in read_gbq_colab * polish comment * fix a test * change code and update more testcase * remove unneeded except * add assert for total_rows * get actual row_counts * avoid two query calls * remove double query when display widget * get row count directly * restore notebook * restore notebook change * remove duplicated code * minor updates * still have zero total rows issue * now large dataset can get the correct row counts * benchmark change * revert a benchmark * revert executor change * raising a NotImplementedError when the row count is none * change return type * Revert accidental change of dataframe.ipynb * remove unnecessary execution in benchmark * remove row_count check * remove extra execute_result * remove unnecessary tests * Fix: Address review comments on PandasBatches and docstring - Reinstated 'Iterator[pd.DataFrame]' inheritance for 'PandasBatches' in 'bigframes/core/blocks.py'. - Removed internal type hint 'bigframes.core.blocks.PandasBatches:' from 'to_pandas_batches' docstring in 'bigframes/dataframe.py' to avoid exposing internal types in public documentation. * Revert: Revert import change in read_gbq_colab benchmark This reverts the import path for the benchmark utils to 'benchmark.utils' to address concerns about google3 imports. * Revert: Revert unnecessary changes in read_gbq_colab benchmarks * Remove notebooks/Untitled-2.ipynb * Remove notebooks/multimodal/audio_transcribe_partial_ordering.ipynb * remove unnecessary change * revert typo * add todo * change docstring * revert changes to tests/benchmark/read_gbq_colab * merge change * update how we handle invalid row count * eliminate duplated flags
1 parent dc46b3c commit 4c4c9b1

File tree

4 files changed

+210
-49
lines changed

4 files changed

+210
-49
lines changed

bigframes/display/anywidget.py

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@
2323
import pandas as pd
2424

2525
import bigframes
26+
from bigframes.core import blocks
2627
import bigframes.dataframe
2728
import bigframes.display.html
2829

29-
# anywidget and traitlets are optional dependencies. We don't want the import of this
30-
# module to fail if they aren't installed, though. Instead, we try to limit the surface that
31-
# these packages could affect. This makes unit testing easier and ensures we don't
32-
# accidentally make these required packages.
30+
# anywidget and traitlets are optional dependencies. We don't want the import of
31+
# this module to fail if they aren't installed, though. Instead, we try to
32+
# limit the surface that these packages could affect. This makes unit testing
33+
# easier and ensures we don't accidentally make these required packages.
3334
try:
3435
import anywidget
3536
import traitlets
@@ -46,9 +47,21 @@
4647

4748

4849
class TableWidget(WIDGET_BASE):
50+
"""An interactive, paginated table widget for BigFrames DataFrames.
51+
52+
This widget provides a user-friendly way to display and navigate through
53+
large BigQuery DataFrames within a Jupyter environment.
4954
"""
50-
An interactive, paginated table widget for BigFrames DataFrames.
51-
"""
55+
56+
page = traitlets.Int(0).tag(sync=True)
57+
page_size = traitlets.Int(0).tag(sync=True)
58+
row_count = traitlets.Int(0).tag(sync=True)
59+
table_html = traitlets.Unicode().tag(sync=True)
60+
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
61+
_batches: Optional[blocks.PandasBatches] = None
62+
_error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
63+
sync=True
64+
)
5265

5366
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
5467
"""Initialize the TableWidget.
@@ -61,10 +74,11 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
6174
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
6275
)
6376

64-
super().__init__()
6577
self._dataframe = dataframe
6678

67-
# Initialize attributes that might be needed by observers FIRST
79+
super().__init__()
80+
81+
# Initialize attributes that might be needed by observers first
6882
self._table_id = str(uuid.uuid4())
6983
self._all_data_loaded = False
7084
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
@@ -73,9 +87,6 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
7387
# respect display options for initial page size
7488
initial_page_size = bigframes.options.display.max_rows
7589

76-
# Initialize data fetching attributes.
77-
self._batches = dataframe._to_pandas_batches(page_size=initial_page_size)
78-
7990
# set traitlets properties that trigger observers
8091
self.page_size = initial_page_size
8192

@@ -84,12 +95,21 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
8495
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
8596
# before we get here so that the count might already be cached.
8697
# TODO(b/452747934): Allow row_count to be None and check to see if
87-
# there are multiple pages and show "page 1 of many" in this case.
88-
self.row_count = self._batches.total_rows or 0
98+
# there are multiple pages and show "page 1 of many" in this case
99+
self._reset_batches_for_new_page_size()
100+
if self._batches is None or self._batches.total_rows is None:
101+
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
102+
self.row_count = 0
103+
else:
104+
self.row_count = self._batches.total_rows
89105

90106
# get the initial page
91107
self._set_table_html()
92108

109+
# Signals to the frontend that the initial data load is complete.
110+
# Also used as a guard to prevent observers from firing during initialization.
111+
self._initial_load_complete = True
112+
93113
@functools.cached_property
94114
def _esm(self):
95115
"""Load JavaScript code from external file."""
@@ -100,11 +120,6 @@ def _css(self):
100120
"""Load CSS code from external file."""
101121
return resources.read_text(bigframes.display, "table_widget.css")
102122

103-
page = traitlets.Int(0).tag(sync=True)
104-
page_size = traitlets.Int(25).tag(sync=True)
105-
row_count = traitlets.Int(0).tag(sync=True)
106-
table_html = traitlets.Unicode().tag(sync=True)
107-
108123
@traitlets.validate("page")
109124
def _validate_page(self, proposal: Dict[str, Any]) -> int:
110125
"""Validate and clamp the page number to a valid range.
@@ -171,7 +186,10 @@ def _get_next_batch(self) -> bool:
171186
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
172187
"""Lazily initializes and returns the batch iterator."""
173188
if self._batch_iter is None:
174-
self._batch_iter = iter(self._batches)
189+
if self._batches is None:
190+
self._batch_iter = iter([])
191+
else:
192+
self._batch_iter = iter(self._batches)
175193
return self._batch_iter
176194

177195
@property
@@ -181,15 +199,22 @@ def _cached_data(self) -> pd.DataFrame:
181199
return pd.DataFrame(columns=self._dataframe.columns)
182200
return pd.concat(self._cached_batches, ignore_index=True)
183201

184-
def _reset_batches_for_new_page_size(self):
202+
def _reset_batches_for_new_page_size(self) -> None:
185203
"""Reset the batch iterator when page size changes."""
186204
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
205+
187206
self._cached_batches = []
188207
self._batch_iter = None
189208
self._all_data_loaded = False
190209

191-
def _set_table_html(self):
210+
def _set_table_html(self) -> None:
192211
"""Sets the current html data based on the current page and page size."""
212+
if self._error_message:
213+
self.table_html = (
214+
f"<div class='bigframes-error-message'>{self._error_message}</div>"
215+
)
216+
return
217+
193218
start = self.page * self.page_size
194219
end = start + self.page_size
195220

@@ -211,13 +236,17 @@ def _set_table_html(self):
211236
)
212237

213238
@traitlets.observe("page")
214-
def _page_changed(self, _change: Dict[str, Any]):
239+
def _page_changed(self, _change: Dict[str, Any]) -> None:
215240
"""Handler for when the page number is changed from the frontend."""
241+
if not self._initial_load_complete:
242+
return
216243
self._set_table_html()
217244

218245
@traitlets.observe("page_size")
219-
def _page_size_changed(self, _change: Dict[str, Any]):
246+
def _page_size_changed(self, _change: Dict[str, Any]) -> None:
220247
"""Handler for when the page size is changed from the frontend."""
248+
if not self._initial_load_complete:
249+
return
221250
# Reset the page to 0 when page size changes to avoid invalid page states
222251
self.page = 0
223252

bigframes/display/table_widget.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@ function render({ model, el }) {
137137
}
138138
});
139139
model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange);
140+
model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates);
141+
model.on(`change:_initial_load_complete`, (val) => {
142+
if (val) {
143+
updateButtonStates();
144+
}
145+
});
140146

141147
// Assemble the DOM
142148
paginationContainer.appendChild(prevPage);

notebooks/dataframes/anywidget_mode.ipynb

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,24 @@
127127
"id": "ce250157",
128128
"metadata": {},
129129
"outputs": [
130+
{
131+
"data": {
132+
"text/html": [
133+
"✅ Completed. "
134+
],
135+
"text/plain": [
136+
"<IPython.core.display.HTML object>"
137+
]
138+
},
139+
"metadata": {},
140+
"output_type": "display_data"
141+
},
130142
{
131143
"data": {
132144
"application/vnd.jupyter.widget-view+json": {
133-
"model_id": "9e3e413eb0774a62818c58d217af8488",
145+
"model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
134146
"version_major": 2,
135-
"version_minor": 1
147+
"version_minor": 0
136148
},
137149
"text/plain": [
138150
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
@@ -171,6 +183,18 @@
171183
"id": "6920d49b",
172184
"metadata": {},
173185
"outputs": [
186+
{
187+
"data": {
188+
"text/html": [
189+
"✅ Completed. "
190+
],
191+
"text/plain": [
192+
"<IPython.core.display.HTML object>"
193+
]
194+
},
195+
"metadata": {},
196+
"output_type": "display_data"
197+
},
174198
{
175199
"name": "stdout",
176200
"output_type": "stream",
@@ -181,17 +205,16 @@
181205
{
182206
"data": {
183207
"application/vnd.jupyter.widget-view+json": {
184-
"model_id": "df5e93f0d03f45cda67aa6da7f9ef1ae",
208+
"model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
185209
"version_major": 2,
186-
"version_minor": 1
210+
"version_minor": 0
187211
},
188212
"text/plain": [
189213
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
190214
]
191215
},
192-
"execution_count": 7,
193216
"metadata": {},
194-
"output_type": "execute_result"
217+
"output_type": "display_data"
195218
}
196219
],
197220
"source": [
@@ -257,6 +280,20 @@
257280
"id": "a9d5d13a",
258281
"metadata": {},
259282
"outputs": [
283+
{
284+
"data": {
285+
"text/html": [
286+
"✅ Completed. \n",
287+
" Query processed 171.4 MB in a moment of slot time.\n",
288+
" "
289+
],
290+
"text/plain": [
291+
"<IPython.core.display.HTML object>"
292+
]
293+
},
294+
"metadata": {},
295+
"output_type": "display_data"
296+
},
260297
{
261298
"name": "stdout",
262299
"output_type": "stream",
@@ -267,17 +304,16 @@
267304
{
268305
"data": {
269306
"application/vnd.jupyter.widget-view+json": {
270-
"model_id": "a4ec5248708442fabc59c446c78a1304",
307+
"model_id": "651b5aac958c408183775152c2573a03",
271308
"version_major": 2,
272-
"version_minor": 1
309+
"version_minor": 0
273310
},
274311
"text/plain": [
275312
"TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
276313
]
277314
},
278-
"execution_count": 9,
279315
"metadata": {},
280-
"output_type": "execute_result"
316+
"output_type": "display_data"
281317
}
282318
],
283319
"source": [
@@ -287,19 +323,11 @@
287323
"print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
288324
"small_widget"
289325
]
290-
},
291-
{
292-
"cell_type": "code",
293-
"execution_count": null,
294-
"id": "c4e5836b-c872-4a9c-b9ec-14f6f338176d",
295-
"metadata": {},
296-
"outputs": [],
297-
"source": []
298326
}
299327
],
300328
"metadata": {
301329
"kernelspec": {
302-
"display_name": "venv",
330+
"display_name": "3.10.18",
303331
"language": "python",
304332
"name": "python3"
305333
},
@@ -313,7 +341,7 @@
313341
"name": "python",
314342
"nbconvert_exporter": "python",
315343
"pygments_lexer": "ipython3",
316-
"version": "3.10.16"
344+
"version": "3.10.18"
317345
}
318346
},
319347
"nbformat": 4,

0 commit comments

Comments
 (0)