Skip to content

docs: deprecate bpd.options.bigquery.allow_large_results in favor of bpd.options.compute.allow_large_results #1597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 12, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions bigframes/_config/bigquery_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@
from typing import Literal, Optional
import warnings

import google.api_core.exceptions
import google.auth.credentials

import bigframes.constants
import bigframes.enums
import bigframes.exceptions as bfe

Expand Down Expand Up @@ -239,22 +237,43 @@ def skip_bq_connection_check(self, value: bool):
@property
def allow_large_results(self) -> bool:
"""
Sets the flag to allow or disallow query results larger than 10 GB.
DEPRECATED: Checks the legacy global setting for allowing large results.
Use ``bpd.options.compute.allow_large_results`` instead.

The default setting for this flag is True, which allows queries to return results
exceeding 10 GB by creating an explicit destination table. If set to False, it
restricts the result size to 10 GB, and BigQuery will raise an error if this limit
is exceeded.
Warning: Accessing ``bpd.options.bigquery.allow_large_results`` is deprecated
and this property will be removed in a future version. The configuration for
handling large results has moved.

Returns:
bool: True if large results are allowed with an explicit destination table,
False if results are limited to 10 GB and errors are raised when exceeded.
bool: The value of the deprecated setting.
"""
warnings.warn(
"`bpd.options.bigquery.allow_large_results` is deprecated and will be removed soon. "
"Please use `bpd.options.compute.allow_large_results` instead.",
DeprecationWarning,
stacklevel=2,
)
return self._allow_large_results

@allow_large_results.setter
def allow_large_results(self, value: bool):
self._allow_large_results = value
"""
DEPRECATED: Setting ``allow_large_results`` via ``bpd.options.bigquery``
is deprecated and has no effect. Use
``bpd.options.compute.allow_large_results`` instead.

Warning: Setting this option here is deprecated, ignored, and this setter
will be removed in a future version. The configuration for handling large
results has moved.
"""
warnings.warn(
"Setting `bpd.options.bigquery.allow_large_results` is deprecated, ignored, "
"and will be removed soon. "
"Please use `bpd.options.compute.allow_large_results = <value>` instead.",
DeprecationWarning,
stacklevel=2,
)
pass

@property
def use_regional_endpoints(self) -> bool:
Expand Down
10 changes: 9 additions & 1 deletion bigframes/_config/compute_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ class ComputeOptions:
ai_ops_threshold_autofail (bool):
Guards against unexpected processing of large amount of rows by semantic operators.
When set to True, the operation automatically fails without asking for user inputs.

allow_large_results (bool):
Specifies whether query results can exceed 10 GB. Defaults to False. Setting this
to False (the default) restricts results to 10 GB for potentially faster execution;
BigQuery will raise an error if this limit is exceeded. Setting to True removes
this result size limit.
"""

maximum_bytes_billed: Optional[int] = None
Expand All @@ -97,7 +103,9 @@ class ComputeOptions:
semantic_ops_threshold_autofail = False

ai_ops_confirmation_threshold: Optional[int] = 0
ai_ops_threshold_autofail = False
ai_ops_threshold_autofail: bool = False

allow_large_results: bool = False

def assign_extra_query_labels(self, **kwargs: Any) -> None:
"""
Expand Down
6 changes: 3 additions & 3 deletions bigframes/session/bq_caching_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def execute(
max_results: Optional[int] = None,
) -> executor.ExecuteResult:
if use_explicit_destination is None:
use_explicit_destination = bigframes.options.bigquery.allow_large_results
use_explicit_destination = bigframes.options.compute.allow_large_results
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to use the bigquery option if the compute option is not set.

Suggested change
use_explicit_destination = bigframes.options.compute.allow_large_results
allow_large_results = bigframes.options.compute.allow_large_results
if allow_large_results is None:
allow_large_results = bigframes.options.bigquery.allow_large_results
use_explicit_destination = allow_large_results

Since this is going to be used in many places, please make a helper function somewhere in bigframes._config for this.

Copy link
Collaborator Author

@Genesis929 Genesis929 Apr 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added one in Options class, but I'm not sure if that's the best place for it.


if bigframes.options.compute.enable_multi_query_execution:
self._simplify_with_caching(array_value)
Expand Down Expand Up @@ -231,7 +231,7 @@ def peek(
msg = bfe.format_message("Peeking this value cannot be done efficiently.")
warnings.warn(msg)
if use_explicit_destination is None:
use_explicit_destination = bigframes.options.bigquery.allow_large_results
use_explicit_destination = bigframes.options.compute.allow_large_results

destination_table = (
self.storage_manager.create_temp_table(
Expand Down Expand Up @@ -555,7 +555,7 @@ def iterator_supplier():
"The query result size has exceeded 10 GB. In BigFrames 2.0 and "
"later, you might need to manually set `allow_large_results=True` in "
"the IO method or adjust the BigFrames option: "
"`bigframes.options.bigquery.allow_large_results=True`."
"`bigframes.options.compute.allow_large_results=True`."
)
warnings.warn(msg, FutureWarning)
# Runs strict validations to ensure internal type predictions and ibis are completely in sync
Expand Down
12 changes: 4 additions & 8 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def resourcemanager_client(

@pytest.fixture(scope="session")
def session() -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(location="US", allow_large_results=False)
context = bigframes.BigQueryOptions(location="US")
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup time
Expand All @@ -158,19 +158,15 @@ def session_load() -> Generator[bigframes.Session, None, None]:

@pytest.fixture(scope="session", params=["strict", "partial"])
def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(
location="US", ordering_mode=request.param, allow_large_results=False
)
context = bigframes.BigQueryOptions(location="US", ordering_mode=request.param)
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup type


@pytest.fixture(scope="session")
def unordered_session() -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(
location="US", ordering_mode="partial", allow_large_results=False
)
context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup type
Expand Down Expand Up @@ -1419,7 +1415,7 @@ def floats_product_bf(session, floats_product_pd):

@pytest.fixture(scope="session", autouse=True)
def use_fast_query_path():
with bpd.option_context("bigquery.allow_large_results", False):
with bpd.option_context("compute.allow_large_results", False):
yield


Expand Down
2 changes: 1 addition & 1 deletion tests/system/large/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import bigframes

WIKIPEDIA_TABLE = "bigquery-public-data.samples.wikipedia"
LARGE_TABLE_OPTION = "bigquery.allow_large_results"
LARGE_TABLE_OPTION = "compute.allow_large_results"


def test_to_pandas_batches_raise_when_large_result_not_allowed(session):
Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5015,7 +5015,7 @@ def test_df_bool_interpretation_error(scalars_df_index):

def test_query_job_setters(scalars_df_default_index: dataframe.DataFrame):
# if allow_large_results=False, might not create query job
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):
job_ids = set()
repr(scalars_df_default_index)
assert scalars_df_default_index.query_job is not None
Expand Down
4 changes: 2 additions & 2 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_to_pandas_array_struct_correct_result(session):
def test_to_pandas_override_global_option(scalars_df_index):
# Direct call to_pandas uses global default setting (allow_large_results=True),
# table has 'bqdf' prefix.
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

scalars_df_index.to_pandas()
table_id = scalars_df_index._query_job.destination.table_id
Expand Down Expand Up @@ -324,7 +324,7 @@ def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):

def test_to_arrow_override_global_option(scalars_df_index):
# Direct call to_arrow uses global default setting (allow_large_results=True),
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

scalars_df_index.to_arrow()
table_id = scalars_df_index._query_job.destination.table_id
Expand Down
4 changes: 2 additions & 2 deletions tests/system/small/test_index_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


def test_to_pandas_override_global_option(scalars_df_index):
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

bf_index = scalars_df_index.index

Expand All @@ -39,7 +39,7 @@ def test_to_pandas_dry_run(scalars_df_index):


def test_to_numpy_override_global_option(scalars_df_index):
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

bf_index = scalars_df_index.index

Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_progress_bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_progress_bar_scalar_allow_large_results(
capsys.readouterr() # clear output

with bf.option_context(
"display.progress_bar", "terminal", "bigquery.allow_large_results", "True"
"display.progress_bar", "terminal", "compute.allow_large_results", "True"
):
penguins_df_default_index["body_mass_g"].head(10).mean()

Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3925,7 +3925,7 @@ def test_series_bool_interpretation_error(scalars_df_index):

def test_query_job_setters(scalars_dfs):
# if allow_large_results=False, might not create query job
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):
job_ids = set()
df, _ = scalars_dfs
series = df["int64_col"]
Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_series_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


def test_to_pandas_override_global_option(scalars_df_index):
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

bf_series = scalars_df_index["int64_col"]

Expand Down
1 change: 0 additions & 1 deletion tests/unit/_config/test_bigquery_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,5 +188,4 @@ def test_client_endpoints_override_set_shows_warning():
def test_default_options():
options = bigquery_options.BigQueryOptions()

assert options.allow_large_results is False
assert options.ordering_mode == "strict"
21 changes: 21 additions & 0 deletions tests/unit/_config/test_compute_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import bigframes._config.compute_options as compute_options


def test_default_options():
options = compute_options.ComputeOptions()

assert options.allow_large_results is False