fix: Resolve AttributeError in TableWidget and improve initialization (#1937)

shuoweil · web-flow · commit 4c4c9b14657b · 2025-10-27T14:30:54.000-05:00
* remove expensive len() call

* add testcase

* fix a typo

* change how row_count is updated

* testcase stil fails, need to merged in 1888

* update the method of using PandasBatches.total_rows

* change tests in read_gbq_colab

* polish comment

* fix a test

* change code and update more testcase

* remove unneeded except

* add assert for total_rows

* get actual row_counts

* avoid two query calls

* remove double query when display widget

* get row count directly

* restore notebook

* restore notebook change

* remove duplicated code

* minor updates

* still have zero total rows issue

* now large dataset can get the correct row counts

* benchmark change

* revert a benchmark

* revert executor change

* raising a NotImplementedError when the row count is none

* change return type

* Revert accidental change of dataframe.ipynb

* remove unnecessary execution in benchmark

* remove row_count check

* remove extra execute_result

* remove unnecessary tests

* Fix: Address review comments on PandasBatches and docstring

- Reinstated 'Iterator[pd.DataFrame]' inheritance for 'PandasBatches' in 'bigframes/core/blocks.py'.
- Removed internal type hint 'bigframes.core.blocks.PandasBatches:' from 'to_pandas_batches' docstring in 'bigframes/dataframe.py' to avoid exposing internal types in public documentation.

* Revert: Revert import change in read_gbq_colab benchmark

This reverts the import path for the benchmark utils to 'benchmark.utils' to address concerns about google3 imports.

* Revert: Revert unnecessary changes in read_gbq_colab benchmarks

* Remove notebooks/Untitled-2.ipynb

* Remove notebooks/multimodal/audio_transcribe_partial_ordering.ipynb

* remove unnecessary change

* revert typo

* add todo

* change docstring

* revert changes to tests/benchmark/read_gbq_colab

* merge change

* update how we handle invalid row count

* eliminate duplated flags
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
@@ -23,13 +23,14 @@
 import pandas as pd
 
 import bigframes
+from bigframes.core import blocks
 import bigframes.dataframe
 import bigframes.display.html
 
-# anywidget and traitlets are optional dependencies. We don't want the import of this
-# module to fail if they aren't installed, though. Instead, we try to limit the surface that
-# these packages could affect. This makes unit testing easier and ensures we don't
-# accidentally make these required packages.
+# anywidget and traitlets are optional dependencies. We don't want the import of
+# this module to fail if they aren't installed, though. Instead, we try to
+# limit the surface that these packages could affect. This makes unit testing
+# easier and ensures we don't accidentally make these required packages.
 try:
     import anywidget
     import traitlets
@@ -46,9 +47,21 @@
 
 
 class TableWidget(WIDGET_BASE):
+    """An interactive, paginated table widget for BigFrames DataFrames.
+
+    This widget provides a user-friendly way to display and navigate through
+    large BigQuery DataFrames within a Jupyter environment.
     """
-    An interactive, paginated table widget for BigFrames DataFrames.
-    """
+
+    page = traitlets.Int(0).tag(sync=True)
+    page_size = traitlets.Int(0).tag(sync=True)
+    row_count = traitlets.Int(0).tag(sync=True)
+    table_html = traitlets.Unicode().tag(sync=True)
+    _initial_load_complete = traitlets.Bool(False).tag(sync=True)
+    _batches: Optional[blocks.PandasBatches] = None
+    _error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
+        sync=True
+    )
 
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         """Initialize the TableWidget.
@@ -61,10 +74,11 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
                 "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
             )
 
-        super().__init__()
         self._dataframe = dataframe
 
-        # Initialize attributes that might be needed by observers FIRST
+        super().__init__()
+
+        # Initialize attributes that might be needed by observers first
         self._table_id = str(uuid.uuid4())
         self._all_data_loaded = False
         self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
@@ -73,9 +87,6 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         # respect display options for initial page size
         initial_page_size = bigframes.options.display.max_rows
 
-        # Initialize data fetching attributes.
-        self._batches = dataframe._to_pandas_batches(page_size=initial_page_size)
-
         # set traitlets properties that trigger observers
         self.page_size = initial_page_size
 
@@ -84,12 +95,21 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
         # before we get here so that the count might already be cached.
         # TODO(b/452747934): Allow row_count to be None and check to see if
-        # there are multiple pages and show "page 1 of many" in this case.
-        self.row_count = self._batches.total_rows or 0
+        # there are multiple pages and show "page 1 of many" in this case
+        self._reset_batches_for_new_page_size()
+        if self._batches is None or self._batches.total_rows is None:
+            self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
+            self.row_count = 0
+        else:
+            self.row_count = self._batches.total_rows
 
         # get the initial page
         self._set_table_html()
 
+        # Signals to the frontend that the initial data load is complete.
+        # Also used as a guard to prevent observers from firing during initialization.
+        self._initial_load_complete = True
+
     @functools.cached_property
     def _esm(self):
         """Load JavaScript code from external file."""
@@ -100,11 +120,6 @@ def _css(self):
         """Load CSS code from external file."""
         return resources.read_text(bigframes.display, "table_widget.css")
 
-    page = traitlets.Int(0).tag(sync=True)
-    page_size = traitlets.Int(25).tag(sync=True)
-    row_count = traitlets.Int(0).tag(sync=True)
-    table_html = traitlets.Unicode().tag(sync=True)
-
     @traitlets.validate("page")
     def _validate_page(self, proposal: Dict[str, Any]) -> int:
         """Validate and clamp the page number to a valid range.
@@ -171,7 +186,10 @@ def _get_next_batch(self) -> bool:
     def _batch_iterator(self) -> Iterator[pd.DataFrame]:
         """Lazily initializes and returns the batch iterator."""
         if self._batch_iter is None:
-            self._batch_iter = iter(self._batches)
+            if self._batches is None:
+                self._batch_iter = iter([])
+            else:
+                self._batch_iter = iter(self._batches)
         return self._batch_iter
 
     @property
@@ -181,15 +199,22 @@ def _cached_data(self) -> pd.DataFrame:
             return pd.DataFrame(columns=self._dataframe.columns)
         return pd.concat(self._cached_batches, ignore_index=True)
 
-    def _reset_batches_for_new_page_size(self):
+    def _reset_batches_for_new_page_size(self) -> None:
         """Reset the batch iterator when page size changes."""
         self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
+
         self._cached_batches = []
         self._batch_iter = None
         self._all_data_loaded = False
 
-    def _set_table_html(self):
+    def _set_table_html(self) -> None:
         """Sets the current html data based on the current page and page size."""
+        if self._error_message:
+            self.table_html = (
+                f"<div class='bigframes-error-message'>{self._error_message}</div>"
+            )
+            return
+
         start = self.page * self.page_size
         end = start + self.page_size
 
@@ -211,13 +236,17 @@ def _set_table_html(self):
         )
 
     @traitlets.observe("page")
-    def _page_changed(self, _change: Dict[str, Any]):
+    def _page_changed(self, _change: Dict[str, Any]) -> None:
         """Handler for when the page number is changed from the frontend."""
+        if not self._initial_load_complete:
+            return
         self._set_table_html()
 
     @traitlets.observe("page_size")
-    def _page_size_changed(self, _change: Dict[str, Any]):
+    def _page_size_changed(self, _change: Dict[str, Any]) -> None:
         """Handler for when the page size is changed from the frontend."""
+        if not self._initial_load_complete:
+            return
         # Reset the page to 0 when page size changes to avoid invalid page states
         self.page = 0
 
diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js
@@ -137,6 +137,12 @@ function render({ model, el }) {
 		}
 	});
 	model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange);
+	model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates);
+	model.on(`change:_initial_load_complete`, (val) => {
+		if (val) {
+			updateButtonStates();
+		}
+	});
 
 	// Assemble the DOM
 	paginationContainer.appendChild(prevPage);
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -127,12 +127,24 @@
    "id": "ce250157",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9e3e413eb0774a62818c58d217af8488",
+       "model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
@@ -171,6 +183,18 @@
    "id": "6920d49b",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -181,17 +205,16 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "df5e93f0d03f45cda67aa6da7f9ef1ae",
+       "model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
-     "execution_count": 7,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -257,6 +280,20 @@
    "id": "a9d5d13a",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 171.4 MB in a moment of slot time.\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -267,17 +304,16 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a4ec5248708442fabc59c446c78a1304",
+       "model_id": "651b5aac958c408183775152c2573a03",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
       ]
      },
-     "execution_count": 9,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -287,19 +323,11 @@
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c4e5836b-c872-4a9c-b9ec-14f6f338176d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "3.10.18",
    "language": "python",
    "name": "python3"
   },
@@ -313,7 +341,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.16"
+   "version": "3.10.18"
   }
  },
  "nbformat": 4,
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py