googleapis
diff --git a/‎bigframes/core/blocks.py
Lines changed: 5 additions & 1 deletion b/‎bigframes/core/blocks.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎bigframes/series.py
Lines changed: 76 additions & 1 deletion b/‎bigframes/series.py
Lines changed: 76 additions & 1 deletion
diff --git a/‎bigframes/session/__init__.py
Lines changed: 162 additions & 91 deletions b/‎bigframes/session/__init__.py
Lines changed: 162 additions & 91 deletions
@@ -590,6 +590,7 @@ def to_pandas_batches(
         page_size: Optional[int] = None,
         max_results: Optional[int] = None,
         allow_large_results: Optional[bool] = None,
+        squeeze: Optional[bool] = False,
     ):
         """Download results one message at a time.
 
@@ -605,7 +606,10 @@ def to_pandas_batches(
         for record_batch in execute_result.arrow_batches():
             df = io_pandas.arrow_to_pandas(record_batch, self.expr.schema)
             self._copy_index_to_pandas(df)
-            yield df
+            if squeeze:
+                yield df.squeeze(axis=1)
+            else:
+                yield df
 
     def _copy_index_to_pandas(self, df: pd.DataFrame):
         """Set the index on pandas DataFrame to match this block.
 
@@ -23,7 +23,18 @@
 import numbers
 import textwrap
 import typing
-from typing import Any, cast, List, Literal, Mapping, Optional, Sequence, Tuple, Union
+from typing import (
+    Any,
+    cast,
+    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.series as vendored_pandas_series
@@ -479,6 +490,70 @@ def to_pandas(
         series.name = self._name
         return series
 
+    def to_pandas_batches(
+        self,
+        page_size: Optional[int] = None,
+        max_results: Optional[int] = None,
+        *,
+        allow_large_results: Optional[bool] = None,
+    ) -> Iterable[pandas.Series]:
+        """Stream Series results to an iterable of pandas Series.
+
+        page_size and max_results determine the size and number of batches,
+        see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series([4, 3, 2, 2, 3])
+
+        Iterate through the results in batches, limiting the total rows yielded
+        across all batches via `max_results`:
+
+            >>> for s_batch in s.to_pandas_batches(max_results=3):
+            ...     print(s_batch)
+            0    4
+            1    3
+            2    2
+            dtype: Int64
+
+        Alternatively, control the approximate size of each batch using `page_size`
+        and fetch batches manually using `next()`:
+
+            >>> it = s.to_pandas_batches(page_size=2)
+            >>> next(it)
+            0    4
+            1    3
+            dtype: Int64
+            >>> next(it)
+            2    2
+            3    2
+            dtype: Int64
+
+        Args:
+            page_size (int, default None):
+                The maximum number of rows of each batch. Non-positive values are ignored.
+            max_results (int, default None):
+                The maximum total number of rows of all batches.
+            allow_large_results (bool, default None):
+                If not None, overrides the global setting to allow or disallow large query results
+                over the default size limit of 10 GB.
+
+        Returns:
+            Iterable[pandas.Series]:
+                An iterable of smaller Series which combine to
+                form the original Series. Results stream from bigquery,
+                see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable
+        """
+        df = self._block.to_pandas_batches(
+            page_size=page_size,
+            max_results=max_results,
+            allow_large_results=allow_large_results,
+            squeeze=True,
+        )
+        return df
+
     def _compute_dry_run(self) -> bigquery.QueryJob:
         _, query_job = self._block._compute_dry_run((self._value_column,))
         return query_job
 
@@ -910,112 +910,183 @@ def read_csv(
             engine=engine,
             write_engine=write_engine,
         )
-        if engine is not None and engine == "bigquery":
-            if any(param is not None for param in (dtype, names)):
-                not_supported = ("dtype", "names")
-                raise NotImplementedError(
-                    f"BigQuery engine does not support these arguments: {not_supported}. "
-                    f"{constants.FEEDBACK_LINK}"
-                )
 
-            # TODO(b/338089659): Looks like we can relax this 1 column
-            # restriction if we check the contents of an iterable are strings
-            # not integers.
-            if (
-                # Empty tuples, None, and False are allowed and falsey.
-                index_col
-                and not isinstance(index_col, bigframes.enums.DefaultIndexKind)
-                and not isinstance(index_col, str)
-            ):
-                raise NotImplementedError(
-                    "BigQuery engine only supports a single column name for `index_col`, "
-                    f"got: {repr(index_col)}. {constants.FEEDBACK_LINK}"
-                )
+        if engine != "bigquery":
+            # Using pandas.read_csv by default and warning about potential issues with
+            # large files.
+            return self._read_csv_w_pandas_engines(
+                filepath_or_buffer,
+                sep=sep,
+                header=header,
+                names=names,
+                index_col=index_col,
+                usecols=usecols,  # type: ignore
+                dtype=dtype,
+                engine=engine,
+                encoding=encoding,
+                write_engine=write_engine,
+                **kwargs,
+            )
+        else:
+            return self._read_csv_w_bigquery_engine(
+                filepath_or_buffer,
+                sep=sep,
+                header=header,
+                names=names,
+                index_col=index_col,
+                usecols=usecols,  # type: ignore
+                dtype=dtype,
+                encoding=encoding,
+            )
 
-            # None and False cannot be passed to read_gbq.
-            # TODO(b/338400133): When index_col is None, we should be using the
-            # first column of the CSV as the index to be compatible with the
-            # pandas engine. According to the pandas docs, only "False"
-            # indicates a default sequential index.
-            if not index_col:
-                index_col = ()
+    def _read_csv_w_pandas_engines(
+        self,
+        filepath_or_buffer,
+        *,
+        sep,
+        header,
+        names,
+        index_col,
+        usecols,
+        dtype,
+        engine,
+        encoding,
+        write_engine,
+        **kwargs,
+    ) -> dataframe.DataFrame:
+        """Reads a CSV file using pandas engines into a BigQuery DataFrames.
 
-            index_col = typing.cast(
-                Union[
-                    Sequence[str],  # Falsey values
-                    bigframes.enums.DefaultIndexKind,
-                    str,
-                ],
-                index_col,
+        This method serves as the implementation backend for read_csv when the
+        specified engine is one supported directly by pandas ('c', 'python',
+        'pyarrow').
+        """
+        if isinstance(index_col, bigframes.enums.DefaultIndexKind):
+            raise NotImplementedError(
+                f"With index_col={repr(index_col)}, only engine='bigquery' is supported. "
+                f"{constants.FEEDBACK_LINK}"
             )
+        if any(arg in kwargs for arg in ("chunksize", "iterator")):
+            raise NotImplementedError(
+                "'chunksize' and 'iterator' arguments are not supported. "
+                f"{constants.FEEDBACK_LINK}"
+            )
+        if isinstance(filepath_or_buffer, str):
+            self._check_file_size(filepath_or_buffer)
 
-            # usecols should only be an iterable of strings (column names) for use as columns in read_gbq.
-            columns: Tuple[Any, ...] = tuple()
-            if usecols is not None:
-                if isinstance(usecols, Iterable) and all(
-                    isinstance(col, str) for col in usecols
-                ):
-                    columns = tuple(col for col in usecols)
-                else:
-                    raise NotImplementedError(
-                        "BigQuery engine only supports an iterable of strings for `usecols`. "
-                        f"{constants.FEEDBACK_LINK}"
-                    )
+        pandas_df = pandas.read_csv(
+            filepath_or_buffer,
+            sep=sep,
+            header=header,
+            names=names,
+            index_col=index_col,
+            usecols=usecols,  # type: ignore
+            dtype=dtype,
+            engine=engine,
+            encoding=encoding,
+            **kwargs,
+        )
+        return self._read_pandas(pandas_df, api_name="read_csv", write_engine=write_engine)  # type: ignore
 
-            if encoding is not None and encoding not in _VALID_ENCODINGS:
-                raise NotImplementedError(
-                    f"BigQuery engine only supports the following encodings: {_VALID_ENCODINGS}. "
-                    f"{constants.FEEDBACK_LINK}"
-                )
+    def _read_csv_w_bigquery_engine(
+        self,
+        filepath_or_buffer,
+        *,
+        sep,
+        header,
+        names,
+        index_col,
+        usecols,
+        dtype,
+        encoding,
+    ) -> dataframe.DataFrame:
+        """Reads a CSV file using the BigQuery engine into a BigQuery DataFrames.
 
-            job_config = bigquery.LoadJobConfig()
-            job_config.source_format = bigquery.SourceFormat.CSV
-            job_config.autodetect = True
-            job_config.field_delimiter = sep
-            job_config.encoding = encoding
-            job_config.labels = {"bigframes-api": "read_csv"}
+        This method serves as the implementation backend for read_csv when the
+        'bigquery' engine is specified or inferred. It leverages BigQuery's
+        native CSV loading capabilities, making it suitable for large datasets
+        that may not fit into local memory.
+        """
 
-            # We want to match pandas behavior. If header is 0, no rows should be skipped, so we
-            # do not need to set `skip_leading_rows`. If header is None, then there is no header.
-            # Setting skip_leading_rows to 0 does that. If header=N and N>0, we want to skip N rows.
-            if header is None:
-                job_config.skip_leading_rows = 0
-            elif header > 0:
-                job_config.skip_leading_rows = header
+        if any(param is not None for param in (dtype, names)):
+            not_supported = ("dtype", "names")
+            raise NotImplementedError(
+                f"BigQuery engine does not support these arguments: {not_supported}. "
+                f"{constants.FEEDBACK_LINK}"
+            )
 
-            return self._loader.read_bigquery_load_job(
-                filepath_or_buffer,
-                job_config=job_config,
-                index_col=index_col,
-                columns=columns,
+        # TODO(b/338089659): Looks like we can relax this 1 column
+        # restriction if we check the contents of an iterable are strings
+        # not integers.
+        if (
+            # Empty tuples, None, and False are allowed and falsey.
+            index_col
+            and not isinstance(index_col, bigframes.enums.DefaultIndexKind)
+            and not isinstance(index_col, str)
+        ):
+            raise NotImplementedError(
+                "BigQuery engine only supports a single column name for `index_col`, "
+                f"got: {repr(index_col)}. {constants.FEEDBACK_LINK}"
             )
-        else:
-            if isinstance(index_col, bigframes.enums.DefaultIndexKind):
-                raise NotImplementedError(
-                    f"With index_col={repr(index_col)}, only engine='bigquery' is supported. "
-                    f"{constants.FEEDBACK_LINK}"
-                )
-            if any(arg in kwargs for arg in ("chunksize", "iterator")):
+
+        # None and False cannot be passed to read_gbq.
+        # TODO(b/338400133): When index_col is None, we should be using the
+        # first column of the CSV as the index to be compatible with the
+        # pandas engine. According to the pandas docs, only "False"
+        # indicates a default sequential index.
+        if not index_col:
+            index_col = ()
+
+        index_col = typing.cast(
+            Union[
+                Sequence[str],  # Falsey values
+                bigframes.enums.DefaultIndexKind,
+                str,
+            ],
+            index_col,
+        )
+
+        # usecols should only be an iterable of strings (column names) for use as columns in read_gbq.
+        columns: Tuple[Any, ...] = tuple()
+        if usecols is not None:
+            if isinstance(usecols, Iterable) and all(
+                isinstance(col, str) for col in usecols
+            ):
+                columns = tuple(col for col in usecols)
+            else:
                 raise NotImplementedError(
-                    "'chunksize' and 'iterator' arguments are not supported. "
+                    "BigQuery engine only supports an iterable of strings for `usecols`. "
                     f"{constants.FEEDBACK_LINK}"
                 )
 
-            if isinstance(filepath_or_buffer, str):
-                self._check_file_size(filepath_or_buffer)
-            pandas_df = pandas.read_csv(
-                filepath_or_buffer,
-                sep=sep,
-                header=header,
-                names=names,
-                index_col=index_col,
-                usecols=usecols,  # type: ignore
-                dtype=dtype,
-                engine=engine,
-                encoding=encoding,
-                **kwargs,
+        if encoding is not None and encoding not in _VALID_ENCODINGS:
+            raise NotImplementedError(
+                f"BigQuery engine only supports the following encodings: {_VALID_ENCODINGS}. "
+                f"{constants.FEEDBACK_LINK}"
             )
-            return self._read_pandas(pandas_df, api_name="read_csv", write_engine=write_engine)  # type: ignore
+
+        job_config = bigquery.LoadJobConfig()
+        job_config.source_format = bigquery.SourceFormat.CSV
+        job_config.autodetect = True
+        job_config.field_delimiter = sep
+        job_config.encoding = encoding
+        job_config.labels = {"bigframes-api": "read_csv"}
+
+        # b/409070192: When header > 0, pandas and BigFrames returns different column naming.
+
+        # We want to match pandas behavior. If header is 0, no rows should be skipped, so we
+        # do not need to set `skip_leading_rows`. If header is None, then there is no header.
+        # Setting skip_leading_rows to 0 does that. If header=N and N>0, we want to skip N rows.
+        if header is None:
+            job_config.skip_leading_rows = 0
+        elif header > 0:
+            job_config.skip_leading_rows = header + 1
+
+        return self._loader.read_bigquery_load_job(
+            filepath_or_buffer,
+            job_config=job_config,
+            index_col=index_col,
+            columns=columns,
+        )
 
     def read_pickle(
         self,