1
- import string
2
-
3
1
import numpy as np
4
2
import pandas .util .testing as tm
5
3
from pandas import (Series , DataFrame , MultiIndex , Int64Index , Float64Index ,
6
- IntervalIndex , IndexSlice )
4
+ IntervalIndex , IndexSlice , concat , date_range )
7
5
from .pandas_vb_common import setup , Panel # noqa
8
6
9
7
@@ -79,27 +77,27 @@ class NonNumericSeriesIndexing(object):
79
77
param_names = ['index' ]
80
78
81
79
def setup (self , index ):
82
- N = 10 ** 6
80
+ N = 10 ** 5
83
81
indexes = {'string' : tm .makeStringIndex (N ),
84
- 'datetime' : tm . makeTimeSeries ( N )}
82
+ 'datetime' : date_range ( '1900' , periods = N , freq = 's' )}
85
83
index = indexes [index ]
86
84
self .s = Series (np .random .rand (N ), index = index )
87
- self .lbl = index [800000 ]
85
+ self .lbl = index [80000 ]
88
86
89
- def time_getitem_label_slice (self ):
87
+ def time_getitem_label_slice (self , index ):
90
88
self .s [:self .lbl ]
91
89
92
- def time_getitem_pos_slice (self ):
93
- self .s [:800000 ]
90
+ def time_getitem_pos_slice (self , index ):
91
+ self .s [:80000 ]
94
92
95
- def time_get_value (self ):
93
+ def time_get_value (self , index ):
96
94
self .s .get_value (self .lbl )
97
95
98
96
def time_getitem_scalar (self , index ):
99
97
self .s [self .lbl ]
100
98
101
99
102
- class DataFrameIndexing (object ):
100
+ class DataFrameStringIndexing (object ):
103
101
104
102
goal_time = 0.2
105
103
@@ -108,67 +106,71 @@ def setup(self):
108
106
columns = tm .makeStringIndex (30 )
109
107
self .df = DataFrame (np .random .randn (1000 , 30 ), index = index ,
110
108
columns = columns )
111
- self .idx = index [100 ]
112
- self .col = columns [10 ]
113
-
114
- self .df2 = DataFrame (np .random .randn (10000 , 4 ),
115
- columns = ['A' , 'B' , 'C' , 'D' ])
116
- self .indexer = self .df2 ['B' ] > 0
117
- self .obj_indexer = self .indexer .astype ('O' )
118
-
119
- # dupes
120
- self .idx_dupe = np .array (range (30 )) * 99
121
- self .df3 = DataFrame ({'A' : [0.1 ] * 1000 , 'B' : [1 ] * 1000 })
122
- self .df3 = concat ([self .df3 , 2 * self .df3 , 3 * self .df3 ])
123
-
124
- self .df_big = DataFrame (dict (A = ['foo' ] * 1000000 ))
109
+ self .idx_scalar = index [100 ]
110
+ self .col_scalar = columns [10 ]
111
+ self .bool_indexer = self .df [self .col_scalar ] > 0
112
+ self .bool_obj_indexer = self .bool_indexer .astype (object )
125
113
126
114
def time_get_value (self ):
127
- self .df .get_value (self .idx , self .col )
115
+ self .df .get_value (self .idx_scalar , self .col_scalar )
116
+
117
+ def time_ix (self ):
118
+ self .df .ix [self .idx_scalar , self .col_scalar ]
128
119
129
- def time_get_value_ix (self ):
130
- self .df .ix [( self .idx , self .col ) ]
120
+ def time_loc (self ):
121
+ self .df .loc [ self .idx_scalar , self .col_scalar ]
131
122
132
123
def time_getitem_scalar (self ):
133
- self .df [self .col ][self .idx ]
124
+ self .df [self .col_scalar ][self .idx_scalar ]
134
125
135
126
def time_boolean_rows (self ):
136
- self .df2 [self .indexer ]
127
+ self .df [self .bool_indexer ]
137
128
138
129
def time_boolean_rows_object (self ):
139
- self .df2 [self .obj_indexer ]
130
+ self .df [self .bool_obj_indexer ]
131
+
132
+
133
+ class DataFrameNumericIndexing (object ):
134
+
135
+ goal_time = 0.2
136
+
137
+ def setup (self ):
138
+ self .idx_dupe = np .array (range (30 )) * 99
139
+ self .df = DataFrame (np .random .randn (10000 , 5 ))
140
+ self .df_dup = concat ([self .df , 2 * self .df , 3 * self .df ])
141
+ self .bool_indexer = [True ] * 5000 + [False ] * 5000
140
142
141
143
def time_iloc_dups (self ):
142
- self .df3 .iloc [self .idx_dupe ]
144
+ self .df_dup .iloc [self .idx_dupe ]
143
145
144
146
def time_loc_dups (self ):
145
- self .df3 .loc [self .idx_dupe ]
147
+ self .df_dup .loc [self .idx_dupe ]
146
148
147
- def time_iloc_big (self ):
148
- self .df_big .iloc [:100 , 0 ]
149
+ def time_iloc (self ):
150
+ self .df .iloc [:100 , 0 ]
149
151
152
+ def time_loc (self ):
153
+ self .df .loc [:100 , 0 ]
150
154
151
- class IndexingMethods (object ):
152
- # GH 13166
153
- goal_time = 0.2
155
+ def time_bool_indexer (self ):
156
+ self .df [self .bool_indexer ]
154
157
155
- def setup (self ):
156
- N = 100000
157
- a = np .arange (N )
158
- self .ind = Float64Index (a * 4.8000000418824129e-08 )
159
158
160
- self .s = Series (np .random .rand (N ))
161
- self .ts = Series (np .random .rand (N ),
162
- index = date_range ('2011-01-01' , freq = 'S' , periods = N ))
163
- self .indexer = [True , False , True , True , False ] * 20000
159
+ class Take (object ):
164
160
165
- def time_get_loc_float (self ):
166
- self .ind .get_loc (0 )
161
+ goal_time = 0.2
162
+ params = ['int' , 'datetime' ]
163
+ param_names = ['index' ]
167
164
168
- def time_take_dtindex (self ):
169
- self .ts .take (self .indexer )
165
+ def setup (self , index ):
166
+ N = 100000
167
+ indexes = {'int' : Int64Index (np .arange (N )),
168
+ 'datetime' : date_range ('2011-01-01' , freq = 'S' , periods = N )}
169
+ index = indexes [index ]
170
+ self .s = Series (np .random .rand (N ), index = index )
171
+ self .indexer = [True , False , True , True , False ] * 20000
170
172
171
- def time_take_intindex (self ):
173
+ def time_take (self , index ):
172
174
self .s .take (self .indexer )
173
175
174
176
@@ -177,11 +179,10 @@ class MultiIndexing(object):
177
179
goal_time = 0.2
178
180
179
181
def setup (self ):
180
- self . mi = MultiIndex .from_product ([range (1000 ), range (1000 )])
181
- self .s = Series (np .random .randn (1000000 ), index = self . mi )
182
+ mi = MultiIndex .from_product ([range (1000 ), range (1000 )])
183
+ self .s = Series (np .random .randn (1000000 ), index = mi )
182
184
self .df = DataFrame (self .s )
183
185
184
- # slicers
185
186
n = 100000
186
187
self .mdt = DataFrame ({'A' : np .random .choice (range (10000 , 45000 , 1000 ),
187
188
n ),
@@ -191,68 +192,16 @@ def setup(self):
191
192
'x' : np .random .choice (range (400 ), n ),
192
193
'y' : np .random .choice (range (25 ), n )})
193
194
self .idx = IndexSlice [20000 :30000 , 20 :30 , 35 :45 , 30000 :40000 ]
194
- self .mdt2 = self .mdt .set_index (['A' , 'B' , 'C' , 'D' ]).sortlevel ()
195
- self .miint = MultiIndex .from_product ([np .arange (1000 ),
196
- np .arange (1000 )],
197
- names = ['one' , 'two' ])
198
- self .obj_index = np .array ([(0 , 10 ), (0 , 11 ), (0 , 12 ),
199
- (0 , 13 ), (0 , 14 ), (0 , 15 ),
200
- (0 , 16 ), (0 , 17 ), (0 , 18 ),
201
- (0 , 19 )], dtype = object )
202
-
203
- self .mi_large = MultiIndex .from_product (
204
- [np .arange (1000 ), np .arange (20 ), list (string .ascii_letters )],
205
- names = ['one' , 'two' , 'three' ])
206
- self .mi_med = MultiIndex .from_product (
207
- [np .arange (1000 ), np .arange (10 ), list ('A' )],
208
- names = ['one' , 'two' , 'three' ])
209
- self .mi_small = MultiIndex .from_product (
210
- [np .arange (100 ), list ('A' ), list ('A' )],
211
- names = ['one' , 'two' , 'three' ])
212
-
213
- size = 65536
214
- self .mi_unused_levels = pd .MultiIndex .from_arrays ([
215
- rng .randint (0 , 8192 , size ),
216
- rng .randint (0 , 1024 , size )])[rng .random .rand (size ) < 0.1 ]
217
-
218
- def time_series_xs_mi_ix (self ):
195
+ self .mdt = self .mdt .set_index (['A' , 'B' , 'C' , 'D' ]).sort_index ()
196
+
197
+ def time_series_ix (self ):
219
198
self .s .ix [999 ]
220
199
221
- def time_frame_xs_mi_ix (self ):
200
+ def time_frame_ix (self ):
222
201
self .df .ix [999 ]
223
202
224
- def time_multiindex_slicers (self ):
225
- self .mdt2 .loc [self .idx , :]
226
-
227
- def time_multiindex_get_indexer (self ):
228
- self .miint .get_indexer (self .obj_index )
229
-
230
- def time_multiindex_large_get_loc (self ):
231
- self .mi_large .get_loc ((999 , 19 , 'Z' ))
232
-
233
- def time_multiindex_large_get_loc_warm (self ):
234
- for _ in range (1000 ):
235
- self .mi_large .get_loc ((999 , 19 , 'Z' ))
236
-
237
- def time_multiindex_med_get_loc (self ):
238
- self .mi_med .get_loc ((999 , 9 , 'A' ))
239
-
240
- def time_multiindex_med_get_loc_warm (self ):
241
- for _ in range (1000 ):
242
- self .mi_med .get_loc ((999 , 9 , 'A' ))
243
-
244
- def time_multiindex_string_get_loc (self ):
245
- self .mi_small .get_loc ((99 , 'A' , 'A' ))
246
-
247
- def time_multiindex_small_get_loc_warm (self ):
248
- for _ in range (1000 ):
249
- self .mi_small .get_loc ((99 , 'A' , 'A' ))
250
-
251
- def time_is_monotonic (self ):
252
- self .miint .is_monotonic
253
-
254
- def time_remove_unused_levels (self ):
255
- self .mi_unused_levels .remove_unused_levels ()
203
+ def time_index_slice (self ):
204
+ self .mdt .loc [self .idx , :]
256
205
257
206
258
207
class IntervalIndexing (object ):
@@ -307,20 +256,6 @@ def time_lookup_loc(self, s):
307
256
s .loc
308
257
309
258
310
- class BooleanRowSelect (object ):
311
-
312
- goal_time = 0.2
313
-
314
- def setup (self ):
315
- N = 10000
316
- self .df = DataFrame (np .random .randn (N , 100 ))
317
- self .bool_arr = np .zeros (N , dtype = bool )
318
- self .bool_arr [:1000 ] = True
319
-
320
- def time_frame_boolean_row_select (self ):
321
- self .df [self .bool_arr ]
322
-
323
-
324
259
class GetItemSingleColumn (object ):
325
260
326
261
goal_time = 0.2
@@ -342,7 +277,7 @@ class AssignTimeseriesIndex(object):
342
277
343
278
def setup (self ):
344
279
N = 100000
345
- dx = date_range ('1/1/2000' , periods = N , freq = 'H' )
280
+ idx = date_range ('1/1/2000' , periods = N , freq = 'H' )
346
281
self .df = DataFrame (np .random .randn (N , 1 ), columns = ['A' ], index = idx )
347
282
348
283
def time_frame_assign_timeseries_index (self ):
0 commit comments