From ae06c02ec8237b618fd3aecc50d267119c8956a7 Mon Sep 17 00:00:00 2001
From: Tim Swena <swast@google.com>
Date: Mon, 31 Mar 2025 12:00:31 -0500
Subject: [PATCH 1/3] fix: `to_pandas_batches()` respects `page_size` and
 `max_results` again

---
 bigframes/session/_io/bigquery/__init__.py |  2 ++
 tests/system/load/test_large_tables.py     |  8 ++++---
 tests/system/small/test_dataframe_io.py    | 25 ++++++++++++++++++++++
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py
index d9f1c0f295..4fdd836777 100644
--- a/bigframes/session/_io/bigquery/__init__.py
+++ b/bigframes/session/_io/bigquery/__init__.py
@@ -245,6 +245,8 @@ def start_query_with_client(
                 location=location,
                 project=project,
                 api_timeout=timeout,
+                page_size=page_size,
+                max_results=max_results,
             )
             if metrics is not None:
                 metrics.count_job_stats(query=sql)
diff --git a/tests/system/load/test_large_tables.py b/tests/system/load/test_large_tables.py
index 472be3d2ad..ee49c2703e 100644
--- a/tests/system/load/test_large_tables.py
+++ b/tests/system/load/test_large_tables.py
@@ -75,17 +75,19 @@ def test_index_repr_large_table():
 
 
 def test_to_pandas_batches_large_table():
-    df = bpd.read_gbq("load_testing.scalars_1tb")
+    df = bpd.read_gbq("load_testing.scalars_100gb")
     _, expected_column_count = df.shape
 
     # download only a few batches, since 1tb would be too much
-    iterable = df.to_pandas_batches(page_size=500, max_results=1500)
+    iterable = df.to_pandas_batches(
+        page_size=500, max_results=1500, allow_large_results=True
+    )
     # use page size since client library doesn't support
     # streaming only part of the dataframe via bqstorage
     for pdf in iterable:
         batch_row_count, batch_column_count = pdf.shape
         assert batch_column_count == expected_column_count
-        assert batch_row_count > 0
+        assert 0 < batch_row_count <= 500
 
 
 @pytest.mark.skip(reason="See if it caused kokoro build aborted.")
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index cd21f5094c..fbaf4fcb49 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -35,6 +35,7 @@
 
 import bigframes
 import bigframes.dataframe
+import bigframes.enums
 import bigframes.features
 import bigframes.pandas as bpd
 
@@ -288,6 +289,30 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
         pd.testing.assert_series_equal(actual, expected)
 
 
+@pytest.mark.parametrize("allow_large_results", (True, False))
+def test_to_pandas_batches_w_page_size_and_max_results(session, allow_large_results):
+    """Verify to_pandas_batches() APIs returns the expected page size.
+
+    Regression test for b/407521010.
+    """
+    bf_df = session.read_gbq(
+        "bigquery-public-data.usa_names.usa_1910_2013",
+        index_col=bigframes.enums.DefaultIndexKind.NULL,
+    )
+    expected_column_count = len(bf_df.columns)
+
+    batch_count = 0
+    for pd_df in bf_df.to_pandas_batches(
+        page_size=42, allow_large_results=allow_large_results, max_results=42 * 3
+    ):
+        batch_row_count, batch_column_count = pd_df.shape
+        batch_count += 1
+        assert batch_column_count == expected_column_count
+        assert batch_row_count == 42
+
+    assert batch_count == 3
+
+
 @pytest.mark.parametrize(
     ("index",),
     [(True,), (False,)],

From 735da6771ae0aa318d2d21a5eea1f5d02d5457fd Mon Sep 17 00:00:00 2001
From: Tim Swena <swast@google.com>
Date: Mon, 31 Mar 2025 13:43:07 -0500
Subject: [PATCH 2/3] fix lint

---
 .pre-commit-config.yaml | 4 ++--
 noxfile.py              | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8ca120bd07..863a345da1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,11 +31,11 @@ repos:
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0
+    rev: 7.1.2
     hooks:
     - id: flake8
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.0
+    rev: v1.15.0
     hooks:
     -   id: mypy
         additional_dependencies: [types-requests, types-tabulate, pandas-stubs<=2.2.3.241126]
diff --git a/noxfile.py b/noxfile.py
index bb4ba91a3a..5c7a2eacc9 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -29,7 +29,9 @@
 import nox.sessions
 
 BLACK_VERSION = "black==22.3.0"
+FLAKE8_VERSION = "flake8==7.1.2"
 ISORT_VERSION = "isort==5.12.0"
+MYPY_VERSION = "mypy==1.15.0"
 
 # TODO: switch to 3.13 once remote functions / cloud run adds a runtime for it (internal issue 333742751)
 LATEST_FULLY_SUPPORTED_PYTHON = "3.12"
@@ -135,7 +137,7 @@ def lint(session):
     Returns a failure if the linters find linting errors or sufficiently
     serious code quality issues.
     """
-    session.install("flake8", BLACK_VERSION, ISORT_VERSION)
+    session.install(FLAKE8_VERSION, BLACK_VERSION, ISORT_VERSION)
     session.run(
         "isort",
         "--check",
@@ -264,7 +266,7 @@ def mypy(session):
     deps = (
         set(
             [
-                "mypy",
+                MYPY_VERSION,
                 # TODO: update to latest pandas-stubs once we resolve bigframes issues.
                 "pandas-stubs<=2.2.3.241126",
                 "types-protobuf",

From 1a8435ae713ee230c2bc06aa7dc53090ef8417c2 Mon Sep 17 00:00:00 2001
From: Tim Swena <swast@google.com>
Date: Mon, 31 Mar 2025 13:52:47 -0500
Subject: [PATCH 3/3] help with session close flakiness

---
 tests/system/small/test_bq_sessions.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/system/small/test_bq_sessions.py b/tests/system/small/test_bq_sessions.py
index e470728061..7aad19bd8f 100644
--- a/tests/system/small/test_bq_sessions.py
+++ b/tests/system/small/test_bq_sessions.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from concurrent.futures import ThreadPoolExecutor
+import time
 
 import google
 import google.api_core.exceptions
@@ -58,7 +59,11 @@ def test_bq_session_create_temp_table_clustered(bigquery_client: bigquery.Client
 
     session_resource_manager.close()
     with pytest.raises(google.api_core.exceptions.NotFound):
-        bigquery_client.get_table(session_table_ref)
+        # It may take time for the underlying tables to get cleaned up after
+        # closing the session, so wait at least 1 minute to check.
+        for _ in range(6):
+            bigquery_client.get_table(session_table_ref)
+            time.sleep(10)
 
 
 def test_bq_session_create_multi_temp_tables(bigquery_client: bigquery.Client):