Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions bigframes/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

if typing.TYPE_CHECKING:
import bigframes.dataframe
import bigframes.operations.strings
import bigframes.series


Expand Down Expand Up @@ -254,6 +255,12 @@ def query_job(self) -> bigquery.QueryJob:
self._query_job = query_job
return self._query_job

@property
def str(self) -> bigframes.operations.strings.StringMethods:
import bigframes.operations.strings

return bigframes.operations.strings.StringMethods(self)

def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
"""Get integer location, slice or boolean mask for requested label.

Expand Down Expand Up @@ -317,7 +324,9 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
result_series = bigframes.series.Series(mask_block)
return result_series.astype("boolean")

def _get_monotonic_slice(self, filtered_block, offsets_id: str) -> slice:
def _get_monotonic_slice(
self, filtered_block, offsets_id: __builtins__.str
) -> slice:
"""Helper method to get a slice for monotonic duplicates with an optimized query."""
# Combine min and max aggregations into a single query for efficiency
min_max_aggs = [
Expand All @@ -343,7 +352,7 @@ def _get_monotonic_slice(self, filtered_block, offsets_id: str) -> slice:
# Create slice (stop is exclusive)
return slice(min_pos, max_pos + 1)

def __repr__(self) -> str:
def __repr__(self) -> __builtins__.str:
# Protect against errors with uninitialized Series. See:
# https://github.com/googleapis/python-bigquery-dataframes/issues/728
if not hasattr(self, "_block"):
Expand Down Expand Up @@ -417,7 +426,7 @@ def sort_values(
*,
inplace: bool = False,
ascending: bool = True,
na_position: str = "last",
na_position: __builtins__.str = "last",
) -> Index:
if na_position not in ["first", "last"]:
raise ValueError("Param na_position must be one of 'first' or 'last'")
Expand Down Expand Up @@ -604,7 +613,7 @@ def dropna(self, how: typing.Literal["all", "any"] = "any") -> Index:
result = block_ops.dropna(self._block, self._block.index_columns, how=how)
return Index(result)

def drop_duplicates(self, *, keep: str = "first") -> Index:
def drop_duplicates(self, *, keep: __builtins__.str = "first") -> Index:
if keep is not False:
validations.enforce_ordered(self, "drop_duplicates")
block = block_ops.drop_duplicates(self._block, self._block.index_columns, keep)
Expand Down Expand Up @@ -656,6 +665,9 @@ def __contains__(self, key) -> bool:
block, match_col = self._block.project_expr(match_expr_final)
return cast(bool, block.get_stat(match_col, agg_ops.AnyOp()))

def _apply_unary_op(self, op: ops.UnaryOp) -> Index:
return self._apply_unary_expr(op.as_expr(ex.free_var("input")))

def _apply_unary_expr(
self,
op: ex.Expression,
Expand Down Expand Up @@ -762,9 +774,15 @@ def item(self):
return self.to_series().peek(2).item()

def __eq__(self, other) -> Index: # type: ignore
return self._apply_binop(other, ops.eq_op)
return self._apply_binary_op(other, ops.eq_op)

def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
def _apply_binary_op(
self,
other,
op: ops.BinaryOp,
alignment: typing.Literal["outer", "left"] = "outer",
) -> Index:
# Note: alignment arg is for compatibility with accessors, is ignored as irrelevant for implicit joins.
# TODO: Handle local objects, or objects not implicitly alignable? Gets ambiguous with partial ordering though
if isinstance(other, (bigframes.series.Series, Index)):
other = Index(other)
Expand All @@ -785,12 +803,13 @@ def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
for lid, rid in zip(lexpr.column_ids, rexpr.column_ids)
]
)
labels = self.names if self.names == other.names else [None] * len(res_ids)
return Index(
blocks.Block(
expr.select_columns(res_ids),
index_columns=res_ids,
column_labels=[],
index_labels=[None] * len(res_ids),
index_labels=labels,
)
)
elif (
Expand All @@ -799,7 +818,7 @@ def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
block, id = self._block.project_expr(
op.as_expr(self._block.index_columns[0], ex.const(other))
)
return Index(block.select_column(id))
return Index(block.set_index([id], index_labels=self.names))
elif isinstance(other, tuple) and len(other) == self.nlevels:
block = self._block.project_exprs(
[
Expand All @@ -809,7 +828,7 @@ def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
labels=[None] * self.nlevels,
drop=True,
)
return Index(block.set_index(block.value_columns))
return Index(block.set_index(block.value_columns, index_labels=self.names))
else:
return NotImplemented

Expand Down
2 changes: 1 addition & 1 deletion bigframes/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __eq__(self, other) -> Index: # type: ignore
import bigframes.operations as ops
import bigframes.operations.aggregations as agg_ops

eq_result = self._apply_binop(other, ops.eq_op)._block.expr
eq_result = self._apply_binary_op(other, ops.eq_op)._block.expr

as_array = ops.ToArrayOp().as_expr(
*(
Expand Down
Loading