Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,25 +387,39 @@ def reversed(self) -> Block:
index_labels=self.index.names,
)

def reset_index(self, drop: bool = True) -> Block:
def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
"""Reset the index of the block, promoting the old index to a value column.

Arguments:
level: the label or index level of the index levels to remove.
name: this is the column id for the new value id derived from the old index

Returns:
A new Block because dropping index columns can break references
from Index classes that point to this block.
"""
if level:
# preserve original order, not user provided order
level_ids: Sequence[str] = [
id for id in self.index_columns if id in self.index.resolve_level(level)
]
else:
level_ids = self.index_columns

expr = self._expr
if (
if set(self.index_columns) > set(level_ids):
new_index_cols = [col for col in self.index_columns if col not in level_ids]
new_index_labels = [self.col_id_to_index_name[id] for id in new_index_cols]
elif (
self.session._default_index_type
== bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
):
expr, new_index_col_id = expr.promote_offsets()
new_index_cols = [new_index_col_id]
new_index_labels = [None]
elif self.session._default_index_type == bigframes.enums.DefaultIndexKind.NULL:
new_index_cols = []
new_index_labels = []
else:
raise ValueError(
f"Unrecognized default index kind: {self.session._default_index_type}"
Expand All @@ -415,22 +429,23 @@ def reset_index(self, drop: bool = True) -> Block:
# Even though the index might be part of the ordering, keep that
# ordering expression as reset_index shouldn't change the row
# order.
expr = expr.drop_columns(self.index_columns)
expr = expr.drop_columns(level_ids)
return Block(
expr,
index_columns=new_index_cols,
index_labels=new_index_labels,
column_labels=self.column_labels,
)
else:
# Add index names to column index
index_labels = self.index.names
column_labels_modified = self.column_labels
for level, label in enumerate(index_labels):
for position, level_id in enumerate(level_ids):
label = self.col_id_to_index_name[level_id]
if label is None:
if "index" not in self.column_labels and len(index_labels) <= 1:
if "index" not in self.column_labels and self.index.nlevels <= 1:
label = "index"
else:
label = f"level_{level}"
label = f"level_{self.index_columns.index(level_id)}"

if label in self.column_labels:
raise ValueError(f"cannot insert {label}, already exists")
Expand All @@ -439,11 +454,12 @@ def reset_index(self, drop: bool = True) -> Block:
label = tuple(label if i == 0 else "" for i in range(nlevels))
# Create index copy with label inserted
# See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
column_labels_modified = column_labels_modified.insert(level, label)
column_labels_modified = column_labels_modified.insert(position, label)

return Block(
expr,
expr.select_columns((*new_index_cols, *level_ids, *self.value_columns)),
index_columns=new_index_cols,
index_labels=new_index_labels,
column_labels=column_labels_modified,
)

Expand Down
36 changes: 33 additions & 3 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2315,9 +2315,39 @@ def _assign_series_join_on_index(

return DataFrame(block.with_index_labels(self._block.index.names))

def reset_index(self, *, drop: bool = False) -> DataFrame:
block = self._block.reset_index(drop)
return DataFrame(block)
@overload # type: ignore[override]
def reset_index(
self,
level: blocks.LevelsType = ...,
drop: bool = ...,
inplace: Literal[False] = ...,
) -> DataFrame:
...

@overload
def reset_index(
self,
level: blocks.LevelsType = ...,
drop: bool = ...,
inplace: Literal[True] = ...,
) -> None:
...

@overload
def reset_index(
self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = ...
) -> Optional[DataFrame]:
...

def reset_index(
self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = False
) -> Optional[DataFrame]:
block = self._block.reset_index(level, drop)
if inplace:
self._set_block(block)
return None
else:
return DataFrame(block)

def set_index(
self,
Expand Down
46 changes: 44 additions & 2 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,17 +406,59 @@ def equals(
return False
return block_ops.equals(self._block, other._block)

@overload # type: ignore[override]
def reset_index(
self,
level: blocks.LevelsType = ...,
*,
name: typing.Optional[str] = ...,
drop: Literal[False] = ...,
inplace: Literal[False] = ...,
) -> bigframes.dataframe.DataFrame:
...

@overload
def reset_index(
self,
level: blocks.LevelsType = ...,
*,
name: typing.Optional[str] = ...,
drop: Literal[True] = ...,
inplace: Literal[False] = ...,
) -> Series:
...

@overload
def reset_index(
self,
level: blocks.LevelsType = ...,
*,
name: typing.Optional[str] = ...,
drop: bool = ...,
inplace: Literal[True] = ...,
) -> None:
...

@validations.requires_ordering()
def reset_index(
self,
level: blocks.LevelsType = None,
*,
name: typing.Optional[str] = None,
drop: bool = False,
) -> bigframes.dataframe.DataFrame | Series:
block = self._block.reset_index(drop)
inplace: bool = False,
) -> bigframes.dataframe.DataFrame | Series | None:
block = self._block.reset_index(level, drop)
if drop:
if inplace:
self._set_block(block)
return None
return Series(block)
else:
if inplace:
raise ValueError(
"Series.reset_index cannot combine inplace=True and drop=False"
)
if name:
block = block.assign_label(self._value_column, name)
return bigframes.dataframe.DataFrame(block)
Expand Down
20 changes: 20 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2070,6 +2070,26 @@ def test_reset_index(scalars_df_index, scalars_pandas_df_index, drop):
pandas.testing.assert_frame_equal(bf_result, pd_result)


@pytest.mark.parametrize(
("drop",),
((True,), (False,)),
)
def test_reset_index_inplace(scalars_df_index, scalars_pandas_df_index, drop):
df = scalars_df_index.copy()
df.reset_index(drop=drop, inplace=True)
assert df.index.name is None

bf_result = df.to_pandas()
pd_result = scalars_pandas_df_index.copy()
pd_result.reset_index(drop=drop, inplace=True)

# Pandas uses int64 instead of Int64 (nullable) dtype.
pd_result.index = pd_result.index.astype(pd.Int64Dtype())

# reset_index should maintain the original ordering.
pandas.testing.assert_frame_equal(bf_result, pd_result)


def test_reset_index_then_filter(
scalars_df_index,
scalars_pandas_df_index,
Expand Down
59 changes: 54 additions & 5 deletions tests/system/small/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,69 @@ def test_set_multi_index(scalars_df_index, scalars_pandas_df_index):
pandas.testing.assert_frame_equal(bf_result, pd_result)


def test_reset_multi_index(scalars_df_index, scalars_pandas_df_index):
@pytest.mark.parametrize(
("level", "drop"),
[
(None, True),
(None, False),
(1, True),
("bool_col", True),
(["float64_col", "int64_too"], True),
([2, 0], False),
],
)
def test_df_reset_multi_index(scalars_df_index, scalars_pandas_df_index, level, drop):
bf_result = (
scalars_df_index.set_index(["bool_col", "int64_too"]).reset_index().to_pandas()
scalars_df_index.set_index(["bool_col", "int64_too", "float64_col"])
.reset_index(level=level, drop=drop)
.to_pandas()
)
pd_result = scalars_pandas_df_index.set_index(
["bool_col", "int64_too"]
).reset_index()
["bool_col", "int64_too", "float64_col"]
).reset_index(level=level, drop=drop)

# Pandas uses int64 instead of Int64 (nullable) dtype.
pd_result.index = pd_result.index.astype(pandas.Int64Dtype())
if pd_result.index.dtype != bf_result.index.dtype:
pd_result.index = pd_result.index.astype(pandas.Int64Dtype())

pandas.testing.assert_frame_equal(bf_result, pd_result)


@pytest.mark.parametrize(
("level", "drop"),
[
(None, True),
(None, False),
(1, True),
("bool_col", True),
(["float64_col", "int64_too"], True),
([2, 0], False),
],
)
def test_series_reset_multi_index(
scalars_df_index, scalars_pandas_df_index, level, drop
):
bf_result = (
scalars_df_index.set_index(["bool_col", "int64_too", "float64_col"])[
"string_col"
]
.reset_index(level=level, drop=drop)
.to_pandas()
)
pd_result = scalars_pandas_df_index.set_index(
["bool_col", "int64_too", "float64_col"]
)["string_col"].reset_index(level=level, drop=drop)

# Pandas uses int64 instead of Int64 (nullable) dtype.
if pd_result.index.dtype != bf_result.index.dtype:
pd_result.index = pd_result.index.astype(pandas.Int64Dtype())

if drop:
pandas.testing.assert_series_equal(bf_result, pd_result)
else:
pandas.testing.assert_frame_equal(bf_result, pd_result)


def test_series_multi_index_idxmin(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index.set_index(["bool_col", "int64_too"])[
"float64_col"
Expand Down
12 changes: 12 additions & 0 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,6 +1339,18 @@ def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index):
pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)


def test_series_reset_index_inplace(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index.sort_index(ascending=False)["float64_col"]
bf_result.reset_index(drop=True, inplace=True)
pd_result = scalars_pandas_df_index.sort_index(ascending=False)["float64_col"]
pd_result.reset_index(drop=True, inplace=True)

# BigQuery DataFrames default indices use nullable Int64 always
pd_result.index = pd_result.index.astype("Int64")

pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)


@pytest.mark.parametrize(
("name",),
[
Expand Down
6 changes: 2 additions & 4 deletions tests/unit/test_dataframe_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -1657,13 +1657,11 @@ def test_reset_index_with_unnamed_index(
pandas.testing.assert_frame_equal(bf_result, pd_result)


def test_reset_index_with_unnamed_multiindex(
scalars_df_index,
scalars_pandas_df_index,
):
def test_reset_index_with_unnamed_multiindex(session):
bf_df = dataframe.DataFrame(
([1, 2, 3], [2, 5, 7]),
index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
session=session,
)
pd_df = pd.DataFrame(
([1, 2, 3], [2, 5, 7]),
Expand Down
7 changes: 7 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1601,8 +1601,10 @@ def droplevel(self, level, axis: str | int = 0):

def reset_index(
self,
level=None,
*,
drop: bool = False,
inplace: bool = False,
) -> DataFrame | None:
"""Reset the index.

Expand Down Expand Up @@ -1696,9 +1698,14 @@ class name speed max


Args:
level (int, str, tuple, or list, default None):
Only remove the given levels from the index. Removes all levels by
default.
drop (bool, default False):
Do not try to insert index into dataframe columns. This resets
the index to the default integer index.
inplace (bool, default False):
Whether to modify the DataFrame rather than creating a new one.

Returns:
bigframes.pandas.DataFrame: DataFrame with the new index.
Expand Down
7 changes: 7 additions & 0 deletions third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,11 @@ def transpose(self) -> Series:

def reset_index(
self,
level=None,
*,
drop: bool = False,
name=pd_ext.no_default,
inplace: bool = False,
) -> DataFrame | Series | None:
"""
Generate a new DataFrame or Series with the index reset.
Expand Down Expand Up @@ -399,13 +401,18 @@ def reset_index(
[4 rows x 3 columns]

Args:
level (int, str, tuple, or list, default optional):
For a Series with a MultiIndex, only remove the specified levels
from the index. Removes all levels by default.
drop (bool, default False):
Just reset the index, without inserting it as a column in
the new DataFrame.
name (object, optional):
The name to use for the column containing the original Series
values. Uses ``self.name`` by default. This argument is ignored
when `drop` is True.
inplace (bool, default False):
Modify the Series in place (do not create a new object).

Returns:
bigframes.pandas.Series or bigframes.pandas.DataFrame or None:
Expand Down