1717from __future__ import annotations
1818
1919import typing
20- from typing import Callable , Tuple
20+ from typing import Callable , Sequence , Tuple , Union
2121
2222import numpy as np
2323import pandas
2626import bigframes .core as core
2727import bigframes .core .blocks as blocks
2828import bigframes .core .joins as joins
29+ import bigframes .core .utils as utils
30+ import bigframes .dtypes
2931import bigframes .dtypes as bf_dtypes
32+ import bigframes .operations as ops
33+ import bigframes .operations .aggregations as agg_ops
3034import third_party .bigframes_vendored .pandas .core .indexes .base as vendored_pandas_index
3135
3236
@@ -51,16 +55,34 @@ def names(self) -> typing.Sequence[blocks.Label]:
5155
5256 @names .setter
5357 def names (self , values : typing .Sequence [blocks .Label ]):
54- return self ._data ._set_block (self ._data . _get_block () .with_index_labels (values ))
58+ return self ._data ._set_block (self ._block .with_index_labels (values ))
5559
5660 @property
5761 def nlevels (self ) -> int :
5862 return len (self ._data ._get_block ().index_columns )
5963
64+ @property
65+ def values (self ) -> np .ndarray :
66+ return self .to_numpy ()
67+
68+ @property
69+ def ndim (self ) -> int :
70+ return 1
71+
6072 @property
6173 def shape (self ) -> typing .Tuple [int ]:
6274 return (self ._data ._get_block ().shape [0 ],)
6375
76+ @property
77+ def dtype (self ):
78+ return self ._block .index_dtypes [0 ] if self .nlevels == 1 else np .dtype ("O" )
79+
80+ @property
81+ def dtypes (self ) -> pandas .Series :
82+ return pandas .Series (
83+ data = self ._block .index_dtypes , index = self ._block .index_labels # type:ignore
84+ )
85+
6486 @property
6587 def size (self ) -> int :
6688 """Returns the size of the Index."""
@@ -103,23 +125,120 @@ def is_monotonic_decreasing(self) -> bool:
103125
104126 @property
105127 def is_unique (self ) -> bool :
128+ # TODO: Cache this at block level
129+ # Avoid circular imports
130+ return not self .has_duplicates
131+
132+ @property
133+ def has_duplicates (self ) -> bool :
106134 # TODO: Cache this at block level
107135 # Avoid circular imports
108136 import bigframes .core .block_transforms as block_ops
109137 import bigframes .dataframe as df
110138
111- duplicates_block , _ = block_ops .indicate_duplicates (
112- self ._data ._get_block (), self ._data ._get_block ().index_columns
113- )
114- duplicates_block = duplicates_block .with_column_labels (
115- ["values" , "is_duplicate" ]
139+ duplicates_block , indicator = block_ops .indicate_duplicates (
140+ self ._block , self ._block .index_columns
116141 )
142+ duplicates_block = duplicates_block .select_columns (
143+ [indicator ]
144+ ).with_column_labels (["is_duplicate" ])
117145 duplicates_df = df .DataFrame (duplicates_block )
118- return not duplicates_df ["is_duplicate" ].any ()
146+ return duplicates_df ["is_duplicate" ].any ()
147+
148+ @property
149+ def _block (self ) -> blocks .Block :
150+ return self ._data ._get_block ()
151+
152+ def astype (
153+ self ,
154+ dtype : Union [bigframes .dtypes .DtypeString , bigframes .dtypes .Dtype ],
155+ ) -> Index :
156+ if self .nlevels > 1 :
157+ raise TypeError ("Multiindex does not support 'astype'" )
158+ return self ._apply_unary_op (ops .AsTypeOp (dtype ))
159+
160+ def all (self ) -> bool :
161+ if self .nlevels > 1 :
162+ raise TypeError ("Multiindex does not support 'all'" )
163+ return typing .cast (bool , self ._apply_aggregation (agg_ops .all_op ))
164+
165+ def any (self ) -> bool :
166+ if self .nlevels > 1 :
167+ raise TypeError ("Multiindex does not support 'any'" )
168+ return typing .cast (bool , self ._apply_aggregation (agg_ops .any_op ))
169+
170+ def nunique (self ) -> int :
171+ return typing .cast (int , self ._apply_aggregation (agg_ops .nunique_op ))
172+
173+ def max (self ) -> typing .Any :
174+ return self ._apply_aggregation (agg_ops .max_op )
175+
176+ def min (self ) -> typing .Any :
177+ return self ._apply_aggregation (agg_ops .min_op )
178+
179+ def fillna (self , value = None ) -> Index :
180+ if self .nlevels > 1 :
181+ raise TypeError ("Multiindex does not support 'fillna'" )
182+ return self ._apply_unary_op (ops .partial_right (ops .fillna_op , value ))
183+
184+ def rename (self , name : Union [str , Sequence [str ]]) -> Index :
185+ names = [name ] if isinstance (name , str ) else list (name )
186+ if len (names ) != self .nlevels :
187+ raise ValueError ("'name' must be same length as levels" )
188+
189+ import bigframes .dataframe as df
190+
191+ return Index (df .DataFrame (self ._block .with_index_labels (names )))
192+
193+ def drop (
194+ self ,
195+ labels : typing .Any ,
196+ ) -> Index :
197+ # ignore axis, columns params
198+ block = self ._block
199+ level_id = self ._block .index_columns [0 ]
200+ if utils .is_list_like (labels ):
201+ block , inverse_condition_id = block .apply_unary_op (
202+ level_id , ops .IsInOp (labels , match_nulls = True )
203+ )
204+ block , condition_id = block .apply_unary_op (
205+ inverse_condition_id , ops .invert_op
206+ )
207+ else :
208+ block , condition_id = block .apply_unary_op (
209+ level_id , ops .partial_right (ops .ne_op , labels )
210+ )
211+ block = block .filter (condition_id , keep_null = True )
212+ block = block .drop_columns ([condition_id ])
213+ import bigframes .dataframe as df
214+
215+ return Index (df .DataFrame (block .select_columns ([])))
216+
217+ def _apply_unary_op (
218+ self ,
219+ op : ops .UnaryOp ,
220+ ) -> Index :
221+ """Applies a unary operator to the index."""
222+ block = self ._block
223+ result_ids = []
224+ for col in self ._block .index_columns :
225+ block , result_id = block .apply_unary_op (col , op )
226+ result_ids .append (result_id )
227+
228+ block = block .set_index (result_ids , index_labels = self ._block .index_labels )
229+ import bigframes .dataframe as df
230+
231+ return Index (df .DataFrame (block ))
232+
233+ def _apply_aggregation (self , op : agg_ops .AggregateOp ) -> typing .Any :
234+ if self .nlevels > 1 :
235+ raise NotImplementedError (f"Multiindex does not yet support { op .name } " )
236+ column_id = self ._block .index_columns [0 ]
237+ return self ._block .get_stat (column_id , op )
119238
120239 def __getitem__ (self , key : int ) -> typing .Any :
121240 if isinstance (key , int ):
122- result_pd_df , _ = self ._data . _get_block () .slice (key , key + 1 , 1 ).to_pandas ()
241+ result_pd_df , _ = self ._block .slice (key , key + 1 , 1 ).to_pandas ()
123242 if result_pd_df .empty :
124243 raise IndexError ("single positional indexer is out-of-bounds" )
125244 return result_pd_df .index [0 ]
@@ -133,7 +252,7 @@ def to_pandas(self) -> pandas.Index:
133252 pandas.Index:
134253 A pandas Index with all of the labels from this Index.
135254 """
136- return IndexValue (self ._data . _get_block () ).to_pandas ()
255+ return IndexValue (self ._block ).to_pandas ()
137256
138257 def to_numpy (self , dtype = None , ** kwargs ) -> np .ndarray :
139258 return self .to_pandas ().to_numpy (dtype , ** kwargs )
@@ -184,13 +303,15 @@ def __repr__(self) -> str:
184303 def to_pandas (self ) -> pandas .Index :
185304 """Executes deferred operations and downloads the results."""
186305 # Project down to only the index column. So the query can be cached to visualize other data.
187- index_column = self ._block .index_columns [0 ]
188- expr = self ._expr .projection ([self ._expr .get_any_column (index_column )])
306+ index_columns = list (self ._block .index_columns )
307+ expr = self ._expr .projection (
308+ [self ._expr .get_any_column (col ) for col in index_columns ]
309+ )
189310 results , _ = expr .start_query ()
190311 df = expr ._session ._rows_to_dataframe (results )
191- df .set_index (index_column )
312+ df = df .set_index (index_columns )
192313 index = df .index
193- index .name = self ._block ._index_labels [ 0 ]
314+ index .names = list ( self ._block ._index_labels )
194315 return index
195316
196317 def join (
@@ -235,6 +356,12 @@ def resolve_level_name(self: IndexValue, label: blocks.Label) -> str:
235356 def is_uniquely_named (self : IndexValue ):
236357 return len (set (self .names )) == len (self .names )
237358
359+ def _set_block (self , block : blocks .Block ):
360+ self ._block = block
361+
362+ def _get_block (self ) -> blocks .Block :
363+ return self ._block
364+
238365
239366def join_mono_indexed (
240367 left : IndexValue ,
0 commit comments