Skip to content

Commit 4607f86

Browse files
authored
fix: remove noisy AmbiguousWindowWarning from partial ordering mode (#2129)
Also fixes type errors identified when updating notebooks.
1 parent c3c292c commit 4607f86

File tree

15 files changed

+2659
-1478
lines changed

15 files changed

+2659
-1478
lines changed

bigframes/core/array_value.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,6 @@ def reversed(self) -> ArrayValue:
224224
def slice(
225225
self, start: Optional[int], stop: Optional[int], step: Optional[int]
226226
) -> ArrayValue:
227-
if self.node.order_ambiguous and not (self.session._strictly_ordered):
228-
msg = bfe.format_message(
229-
"Window ordering may be ambiguous, this can cause unstable results."
230-
)
231-
warnings.warn(msg, bfe.AmbiguousWindowWarning)
232227
return ArrayValue(
233228
nodes.SliceNode(
234229
self.node,
@@ -243,17 +238,6 @@ def promote_offsets(self) -> Tuple[ArrayValue, str]:
243238
Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
244239
"""
245240
col_id = self._gen_namespaced_uid()
246-
if self.node.order_ambiguous and not (self.session._strictly_ordered):
247-
if not self.session._allows_ambiguity:
248-
raise ValueError(
249-
"Generating offsets not supported in partial ordering mode"
250-
)
251-
else:
252-
msg = bfe.format_message(
253-
"Window ordering may be ambiguous, this can cause unstable results."
254-
)
255-
warnings.warn(msg, category=bfe.AmbiguousWindowWarning)
256-
257241
return (
258242
ArrayValue(
259243
nodes.PromoteOffsetsNode(child=self.node, col_id=ids.ColumnId(col_id))
@@ -434,18 +418,6 @@ def project_window_expr(
434418
never_skip_nulls=False,
435419
skip_reproject_unsafe: bool = False,
436420
):
437-
# TODO: Support non-deterministic windowing
438-
if window.is_row_bounded or not expression.op.order_independent:
439-
if self.node.order_ambiguous and not self.session._strictly_ordered:
440-
if not self.session._allows_ambiguity:
441-
raise ValueError(
442-
"Generating offsets not supported in partial ordering mode"
443-
)
444-
else:
445-
msg = bfe.format_message(
446-
"Window ordering may be ambiguous, this can cause unstable results."
447-
)
448-
warnings.warn(msg, category=bfe.AmbiguousWindowWarning)
449421
output_name = self._gen_namespaced_uid()
450422
return (
451423
ArrayValue(

bigframes/dataframe.py

Lines changed: 59 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from typing import (
2929
Any,
3030
Callable,
31+
cast,
3132
Dict,
3233
Hashable,
3334
Iterable,
@@ -94,8 +95,12 @@
9495

9596
import bigframes.session
9697

97-
SingleItemValue = Union[bigframes.series.Series, int, float, str, Callable]
98-
MultiItemValue = Union["DataFrame", Sequence[int | float | str | Callable]]
98+
SingleItemValue = Union[
99+
bigframes.series.Series, int, float, str, pandas.Timedelta, Callable
100+
]
101+
MultiItemValue = Union[
102+
"DataFrame", Sequence[int | float | str | pandas.Timedelta | Callable]
103+
]
99104

100105
LevelType = typing.Hashable
101106
LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]
@@ -581,11 +586,51 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame:
581586
def _set_internal_query_job(self, query_job: Optional[bigquery.QueryJob]):
582587
self._query_job = query_job
583588

589+
@overload
590+
def __getitem__(
591+
self,
592+
key: bigframes.series.Series,
593+
) -> DataFrame:
594+
...
595+
596+
@overload
597+
def __getitem__(
598+
self,
599+
key: slice,
600+
) -> DataFrame:
601+
...
602+
603+
@overload
604+
def __getitem__(
605+
self,
606+
key: List[str],
607+
) -> DataFrame:
608+
...
609+
610+
@overload
611+
def __getitem__(
612+
self,
613+
key: List[blocks.Label],
614+
) -> DataFrame:
615+
...
616+
617+
@overload
618+
def __getitem__(self, key: pandas.Index) -> DataFrame:
619+
...
620+
621+
@overload
622+
def __getitem__(
623+
self,
624+
key: blocks.Label,
625+
) -> bigframes.series.Series:
626+
...
627+
584628
def __getitem__(
585629
self,
586630
key: Union[
587631
blocks.Label,
588-
Sequence[blocks.Label],
632+
List[str],
633+
List[blocks.Label],
589634
# Index of column labels can be treated the same as a sequence of column labels.
590635
pandas.Index,
591636
bigframes.series.Series,
@@ -601,33 +646,27 @@ def __getitem__(
601646
if isinstance(key, slice):
602647
return self.iloc[key]
603648

604-
if isinstance(key, typing.Hashable):
649+
# TODO(tswast): Fix this pylance warning: Class overlaps "Hashable"
650+
# unsafely and could produce a match at runtime
651+
if isinstance(key, blocks.Label):
605652
return self._getitem_label(key)
606-
# Select a subset of columns or re-order columns.
607-
# In Ibis after you apply a projection, any column objects from the
608-
# table before the projection can't be combined with column objects
609-
# from the table after the projection. This is because the table after
610-
# a projection is considered a totally separate table expression.
611-
#
612-
# This is unexpected behavior for a pandas user, who expects their old
613-
# Series objects to still work with the new / mutated DataFrame. We
614-
# avoid applying a projection in Ibis until it's absolutely necessary
615-
# to provide pandas-like semantics.
616-
# TODO(swast): Do we need to apply implicit join when doing a
617-
# projection?
618653

619-
# Select a number of columns as DF.
620-
key = key if utils.is_list_like(key) else [key] # type:ignore
654+
if utils.is_list_like(key):
655+
return self._getitem_columns(key)
656+
else:
657+
# TODO(tswast): What case is this supposed to be handling?
658+
return self._getitem_columns([cast(Hashable, key)])
621659

660+
__getitem__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__getitem__)
661+
662+
def _getitem_columns(self, key: Sequence[blocks.Label]) -> DataFrame:
622663
selected_ids: Tuple[str, ...] = ()
623664
for label in key:
624665
col_ids = self._block.label_to_col_id[label]
625666
selected_ids = (*selected_ids, *col_ids)
626667

627668
return DataFrame(self._block.select_columns(selected_ids))
628669

629-
__getitem__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__getitem__)
630-
631670
def _getitem_label(self, key: blocks.Label):
632671
col_ids = self._block.cols_matching_label(key)
633672
if len(col_ids) == 0:

bigframes/exceptions.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,11 @@ class TimeTravelCacheWarning(Warning):
8484

8585

8686
class AmbiguousWindowWarning(Warning):
87-
"""A query may produce nondeterministic results as the window may be ambiguously ordered."""
87+
"""A query may produce nondeterministic results as the window may be ambiguously ordered.
88+
89+
Deprecated. Kept for backwards compatibility for code that filters warnings
90+
from this category.
91+
"""
8892

8993

9094
class UnknownDataTypeWarning(Warning):

bigframes/series.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,7 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
10391039
block_ops.nsmallest(self._block, n, [self._value_column], keep=keep)
10401040
)
10411041

1042-
def isin(self, values) -> "Series" | None:
1042+
def isin(self, values) -> "Series":
10431043
if isinstance(values, Series):
10441044
return Series(self._block.isin(values._block))
10451045
if isinstance(values, indexes.Index):
@@ -1086,20 +1086,20 @@ def __xor__(self, other: bool | int | Series) -> Series:
10861086

10871087
__rxor__ = __xor__
10881088

1089-
def __add__(self, other: float | int | Series) -> Series:
1089+
def __add__(self, other: float | int | pandas.Timedelta | Series) -> Series:
10901090
return self.add(other)
10911091

10921092
__add__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__add__)
10931093

1094-
def __radd__(self, other: float | int | Series) -> Series:
1094+
def __radd__(self, other: float | int | pandas.Timedelta | Series) -> Series:
10951095
return self.radd(other)
10961096

10971097
__radd__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__radd__)
10981098

1099-
def add(self, other: float | int | Series) -> Series:
1099+
def add(self, other: float | int | pandas.Timedelta | Series) -> Series:
11001100
return self._apply_binary_op(other, ops.add_op)
11011101

1102-
def radd(self, other: float | int | Series) -> Series:
1102+
def radd(self, other: float | int | pandas.Timedelta | Series) -> Series:
11031103
return self._apply_binary_op(other, ops.add_op, reverse=True)
11041104

11051105
def __sub__(self, other: float | int | Series) -> Series:
@@ -1140,20 +1140,20 @@ def rmul(self, other: float | int | Series) -> Series:
11401140
multiply = mul
11411141
multiply.__doc__ = inspect.getdoc(vendored_pandas_series.Series.mul)
11421142

1143-
def __truediv__(self, other: float | int | Series) -> Series:
1143+
def __truediv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
11441144
return self.truediv(other)
11451145

11461146
__truediv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__truediv__)
11471147

1148-
def __rtruediv__(self, other: float | int | Series) -> Series:
1148+
def __rtruediv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
11491149
return self.rtruediv(other)
11501150

11511151
__rtruediv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__rtruediv__)
11521152

1153-
def truediv(self, other: float | int | Series) -> Series:
1153+
def truediv(self, other: float | int | pandas.Timedelta | Series) -> Series:
11541154
return self._apply_binary_op(other, ops.div_op)
11551155

1156-
def rtruediv(self, other: float | int | Series) -> Series:
1156+
def rtruediv(self, other: float | int | pandas.Timedelta | Series) -> Series:
11571157
return self._apply_binary_op(other, ops.div_op, reverse=True)
11581158

11591159
truediv.__doc__ = inspect.getdoc(vendored_pandas_series.Series.truediv)
@@ -1162,20 +1162,20 @@ def rtruediv(self, other: float | int | Series) -> Series:
11621162
rdiv = rtruediv
11631163
rdiv.__doc__ = inspect.getdoc(vendored_pandas_series.Series.rtruediv)
11641164

1165-
def __floordiv__(self, other: float | int | Series) -> Series:
1165+
def __floordiv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
11661166
return self.floordiv(other)
11671167

11681168
__floordiv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__floordiv__)
11691169

1170-
def __rfloordiv__(self, other: float | int | Series) -> Series:
1170+
def __rfloordiv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
11711171
return self.rfloordiv(other)
11721172

11731173
__rfloordiv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__rfloordiv__)
11741174

1175-
def floordiv(self, other: float | int | Series) -> Series:
1175+
def floordiv(self, other: float | int | pandas.Timedelta | Series) -> Series:
11761176
return self._apply_binary_op(other, ops.floordiv_op)
11771177

1178-
def rfloordiv(self, other: float | int | Series) -> Series:
1178+
def rfloordiv(self, other: float | int | pandas.Timedelta | Series) -> Series:
11791179
return self._apply_binary_op(other, ops.floordiv_op, reverse=True)
11801180

11811181
def __pow__(self, other: float | int | Series) -> Series:

0 commit comments

Comments
 (0)