Skip to content

Commit 2e86a47

Browse files
committed
address comments
1 parent a242824 commit 2e86a47

File tree

5 files changed

+103
-65
lines changed

5 files changed

+103
-65
lines changed

bigframes/core/compile/compiled.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from google.cloud import bigquery
3030
import pyarrow as pa
3131

32+
from bigframes.core import utils
3233
import bigframes.core.compile.aggregate_compiler as agg_compiler
3334
import bigframes.core.compile.googlesql
3435
import bigframes.core.compile.ibis_types
@@ -597,6 +598,17 @@ def _convert_range_ordering_to_table_value(
597598
value_lookup: typing.Mapping[str, ibis_types.Value],
598599
ordering_column: OrderingExpression,
599600
) -> ibis_types.Value:
601+
"""Converts the ordering for range windows to Ibis references.
602+
603+
Note that this method is different from `_convert_row_ordering_to_table_values` in
604+
that it does not arrange null values. There are two reasons:
605+
1. Manipulating null positions requires more than one ordering key, which is forbidden
606+
by SQL window syntax for range rolling.
607+
2. Pandas does not allow range rolling on timeseries with nulls.
608+
609+
Therefore, we opt for the simplest approach here: generate the simplest SQL and follow
610+
the BigQuery engine behavior.
611+
"""
600612
expr = op_compiler.compile_expression(
601613
ordering_column.scalar_expression, value_lookup
602614
)
@@ -695,8 +707,14 @@ def _add_boundary(
695707
) -> ibis_expr_builders.LegacyWindowBuilder:
696708
if isinstance(bounds, RangeWindowBounds):
697709
return ibis_window.range(
698-
start=_to_ibis_boundary(bounds.start_micros),
699-
end=_to_ibis_boundary(bounds.end_micros),
710+
start=_to_ibis_boundary(
711+
None
712+
if bounds.start is None
713+
else utils.timedelta_to_micros(bounds.start)
714+
),
715+
end=_to_ibis_boundary(
716+
None if bounds.end is None else utils.timedelta_to_micros(bounds.end)
717+
),
700718
)
701719
if isinstance(bounds, RowsWindowBounds):
702720
if bounds.start is not None or bounds.end is not None:

bigframes/core/window/ordering.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from functools import singledispatch
16+
17+
from bigframes.core import expression as ex
18+
from bigframes.core import nodes, ordering
19+
20+
21+
@singledispatch
22+
def find_order_direction(
23+
root: nodes.BigFrameNode, column_id: str
24+
) -> ordering.OrderingDirection | None:
25+
"""Returns the order of the given column with tree traversal. If the column cannot be found,
26+
or the ordering information is not available, return None.
27+
"""
28+
return None
29+
30+
31+
@find_order_direction.register
32+
def _(root: nodes.OrderByNode, column_id: str):
33+
if len(root.by) == 0:
34+
# This is a no-op
35+
return find_order_direction(root.child, column_id)
36+
37+
# Make sure the window key is the prefix of sorting keys.
38+
order_expr = root.by[0]
39+
scalar_expr = order_expr.scalar_expression
40+
if isinstance(scalar_expr, ex.DerefOp) and scalar_expr.id.name == column_id:
41+
return order_expr.direction
42+
43+
return None
44+
45+
46+
@find_order_direction.register
47+
def _(root: nodes.ReversedNode, column_id: str):
48+
direction = find_order_direction(root.child, column_id)
49+
50+
if direction is None:
51+
return None
52+
return direction.reverse()
53+
54+
55+
@find_order_direction.register
56+
def _(root: nodes.SelectionNode, column_id: str):
57+
for alias_ref in root.input_output_pairs:
58+
if alias_ref.id.name == column_id:
59+
return find_order_direction(root.child, alias_ref.ref.id.name)
60+
61+
62+
@find_order_direction.register
63+
def _(root: nodes.FilterNode, column_id: str):
64+
return find_order_direction(root.child, column_id)
65+
66+
67+
@find_order_direction.register
68+
def _(root: nodes.InNode, column_id: str):
69+
return find_order_direction(root.left_child, column_id)
70+
71+
72+
@find_order_direction.register
73+
def _(root: nodes.WindowOpNode, column_id: str):
74+
return find_order_direction(root.child, column_id)

bigframes/core/window/rolling.py

Lines changed: 5 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from __future__ import annotations
1616

1717
import datetime
18-
from functools import singledispatch
1918
import typing
2019

2120
import bigframes_vendored.pandas.core.window.rolling as vendored_pandas_rolling
@@ -24,8 +23,9 @@
2423

2524
from bigframes import dtypes
2625
from bigframes.core import expression as ex
27-
from bigframes.core import log_adapter, nodes, ordering, window_spec
26+
from bigframes.core import log_adapter, ordering, window_spec
2827
import bigframes.core.blocks as blocks
28+
from bigframes.core.window import ordering as window_ordering
2929
import bigframes.operations.aggregations as agg_ops
3030

3131

@@ -140,7 +140,9 @@ def create_range_window(
140140
if index_dtypes[0] != dtypes.TIMESTAMP_DTYPE:
141141
raise ValueError("Index type should be timestamps with timezones")
142142

143-
order_direction = _find_order_direction(block.expr.node, block.index_columns[0])
143+
order_direction = window_ordering.find_order_direction(
144+
block.expr.node, block.index_columns[0]
145+
)
144146
if order_direction is None:
145147
raise ValueError(
146148
"The index might not be in a monotonic order. Please sort the index before rolling."
@@ -157,48 +159,3 @@ def create_range_window(
157159
),
158160
)
159161
return Window(block, spec, block.value_columns, is_series=is_series)
160-
161-
162-
@singledispatch
163-
def _find_order_direction(
164-
root: nodes.BigFrameNode, column_id: str
165-
) -> ordering.OrderingDirection | None:
166-
"""Returns the order of the given column with tree traversal. If the column cannot be found,
167-
or the ordering information is not available, return None.
168-
"""
169-
return None
170-
171-
172-
@_find_order_direction.register
173-
def _(root: nodes.OrderByNode, column_id: str):
174-
if len(root.by) == 0:
175-
return None
176-
177-
# Make sure the window key is the prefix of sorting keys.
178-
order_expr = root.by[0]
179-
scalar_expr = order_expr.scalar_expression
180-
if isinstance(scalar_expr, ex.DerefOp) and scalar_expr.id.name == column_id:
181-
return order_expr.direction
182-
183-
return None
184-
185-
186-
@_find_order_direction.register
187-
def _(root: nodes.ReversedNode, column_id: str):
188-
direction = _find_order_direction(root.child, column_id)
189-
190-
if direction is None:
191-
return None
192-
return direction.reverse()
193-
194-
195-
@_find_order_direction.register
196-
def _(root: nodes.SelectionNode, column_id: str):
197-
for alias_ref in root.input_output_pairs:
198-
if alias_ref.id.name == column_id:
199-
return _find_order_direction(root.child, alias_ref.ref.id.name)
200-
201-
202-
@_find_order_direction.register
203-
def _(root: nodes.FilterNode, column_id: str):
204-
return _find_order_direction(root.child, column_id)

bigframes/core/window_spec.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import numpy as np
2222
import pandas as pd
2323

24-
from bigframes.core import utils
2524
import bigframes.core.expression as ex
2625
import bigframes.core.identifiers as ids
2726
import bigframes.core.ordering as orderings
@@ -199,18 +198,6 @@ def from_timedelta_window(
199198
else:
200199
raise ValueError(f"Unsupported value for 'closed' parameter: {closed}")
201200

202-
@property
203-
def start_micros(self) -> int | None:
204-
if self.start is None:
205-
return None
206-
return utils.timedelta_to_micros(self.start)
207-
208-
@property
209-
def end_micros(self) -> int | None:
210-
if self.end is None:
211-
return None
212-
return utils.timedelta_to_micros(self.end)
213-
214201
def __post_init__(self):
215202
if self.start is None:
216203
return

tests/system/small/test_window.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,16 +260,18 @@ def test_range_rolling_order_info_lookup(range_rolling_dfs):
260260
actual_result = (
261261
bf_df.set_index("ts_col")
262262
.sort_index(ascending=False)["int_col"]
263+
.isin(bf_df["int_col"])
263264
.rolling(window="3s")
264-
.min()
265+
.count()
265266
.to_pandas()
266267
)
267268

268269
expected_result = (
269270
pd_df.set_index("ts_col")
270271
.sort_index(ascending=False)["int_col"]
272+
.isin(pd_df["int_col"])
271273
.rolling(window="3s")
272-
.min()
274+
.count()
273275
)
274276
pd.testing.assert_series_equal(
275277
actual_result, expected_result, check_dtype=False, check_index=False

0 commit comments

Comments
 (0)