10
10
from pandas .core .internals import BlockManager
11
11
from pandas .core .series import Series
12
12
from pandas .core .panel import WidePanel
13
+ from pandas .util .decorators import cache_readonly
13
14
import pandas ._tseries as _tseries
14
15
15
16
@@ -61,7 +62,8 @@ def name(self):
61
62
else :
62
63
return self ._name
63
64
64
- def _get_obj_with_exclusions (self ):
65
+ @property
66
+ def _obj_with_exclusions (self ):
65
67
return self .obj
66
68
67
69
@property
@@ -83,14 +85,14 @@ def __getattribute__(self, attr):
83
85
def _make_wrapper (self , name ):
84
86
f = getattr (self .obj , name )
85
87
if not isinstance (f , types .MethodType ):
86
- return self .aggregate (lambda self : getattr (self , name ))
88
+ return self .apply (lambda self : getattr (self , name ))
87
89
88
90
f = getattr (type (self .obj ), name )
89
91
90
92
def wrapper (* args , ** kwargs ):
91
93
def curried (x ):
92
94
return f (x , * args , ** kwargs )
93
- return self .aggregate (curried )
95
+ return self .apply (curried )
94
96
95
97
return wrapper
96
98
@@ -112,7 +114,7 @@ def __iter__(self):
112
114
113
115
Returns
114
116
-------
115
- Generator yielding sequence of (groupName , subsetted object)
117
+ Generator yielding sequence of (name , subsetted object)
116
118
for each group
117
119
"""
118
120
if len (self .groupings ) == 1 :
@@ -131,12 +133,12 @@ def __iter__(self):
131
133
132
134
def _multi_iter (self ):
133
135
tipo = type (self .obj )
134
- if isinstance ( self .obj , DataFrame ):
135
- data = self .obj
136
- elif isinstance (self .obj , NDFrame ):
136
+ data = self .obj
137
+ if ( isinstance ( self .obj , NDFrame ) and
138
+ not isinstance (self .obj , DataFrame ) ):
137
139
data = self .obj ._data
138
- else :
139
- data = self . obj
140
+ elif isinstance ( self . obj , Series ) :
141
+ tipo = Series
140
142
141
143
def flatten (gen , level = 0 ):
142
144
ids = self .groupings [level ].ids
@@ -154,6 +156,12 @@ def flatten(gen, level=0):
154
156
for cats , data in flatten (gen ):
155
157
yield cats + (data ,)
156
158
159
+ def apply (self , func ):
160
+ """
161
+ Apply function, combine results together
162
+ """
163
+ return self ._python_apply_general (func )
164
+
157
165
def aggregate (self , func ):
158
166
raise NotImplementedError
159
167
@@ -243,7 +251,7 @@ def _doit(reschunk, ctchunk, gen, shape_axis=0):
243
251
output = np .empty (group_shape + stride_shape ,
244
252
dtype = float )
245
253
output .fill (np .nan )
246
- obj = self ._get_obj_with_exclusions ()
254
+ obj = self ._obj_with_exclusions
247
255
_doit (output , counts , gen_factory (obj ),
248
256
shape_axis = self .axis )
249
257
@@ -267,6 +275,37 @@ def _doit(reschunk, ctchunk, gen, shape_axis=0):
267
275
268
276
return self ._wrap_aggregated_output (output , mask )
269
277
278
+ def _python_apply_general (self , arg ):
279
+ result_keys = []
280
+ result_values = []
281
+
282
+ key_as_tuple = len (self .groupings ) > 1
283
+
284
+ not_indexed_same = False
285
+
286
+ for data in self :
287
+ if key_as_tuple :
288
+ key = data [:- 1 ]
289
+ else :
290
+ key = data [0 ]
291
+
292
+ group = data [- 1 ]
293
+ group .name = key
294
+
295
+ res = arg (group )
296
+
297
+ if not _is_indexed_like (res , group ):
298
+ not_indexed_same = True
299
+
300
+ result_keys .append (key )
301
+ result_values .append (res )
302
+
303
+ return self ._wrap_applied_output (result_keys , result_values ,
304
+ not_indexed_same = not_indexed_same )
305
+
306
+ def _wrap_applied_output (self , * args , ** kwargs ):
307
+ raise NotImplementedError
308
+
270
309
@property
271
310
def _generator_factory (self ):
272
311
labels = [ping .labels for ping in self .groupings ]
@@ -282,6 +321,12 @@ def _generator_factory(self):
282
321
return lambda obj : generate_groups (obj , labels , shape , axis = axis ,
283
322
factory = factory )
284
323
324
+ def _is_indexed_like (obj , other ):
325
+ if isinstance (obj , Series ):
326
+ return obj .index .equals (other .index )
327
+ elif isinstance (obj , DataFrame ):
328
+ return obj ._indexed_same (other )
329
+
285
330
class Grouping (object ):
286
331
287
332
def __init__ (self , index , grouper = None , name = None , level = None ):
@@ -470,6 +515,29 @@ def _wrap_aggregated_output(self, output, mask):
470
515
name_list = self ._get_names ()
471
516
return Series (output , index = name_list [0 ][1 ])
472
517
518
+ def _wrap_applied_output (self , keys , values , not_indexed_same = False ):
519
+ if len (keys ) == 0 :
520
+ return Series ([])
521
+
522
+ if isinstance (values [0 ], Series ):
523
+ if not_indexed_same :
524
+ data_dict = dict (zip (keys , values ))
525
+ result = DataFrame (data_dict ).T
526
+ if len (self .groupings ) > 1 :
527
+ result .index = MultiIndex .from_tuples (keys )
528
+ return result
529
+ else :
530
+ cat_values = np .concatenate ([x .values for x in values ])
531
+ cat_index = np .concatenate ([np .asarray (x .index )
532
+ for x in values ])
533
+ return Series (cat_values , index = cat_index )
534
+ else :
535
+ if len (self .groupings ) > 1 :
536
+ index = MultiIndex .from_tuples (keys )
537
+ return Series (values , index )
538
+ else :
539
+ return Series (values , keys )
540
+
473
541
def _aggregate_multiple_funcs (self , arg ):
474
542
if not isinstance (arg , dict ):
475
543
arg = dict ((func .__name__ , func ) for func in arg )
@@ -498,13 +566,13 @@ def _aggregate_named(self, arg):
498
566
499
567
for name in self .primary :
500
568
grp = self .get_group (name )
501
- grp .groupName = name
569
+ grp .name = name
502
570
output = arg (grp )
503
571
result [name ] = output
504
572
505
573
return result
506
574
507
- def transform (self , applyfunc ):
575
+ def transform (self , func ):
508
576
"""
509
577
For given Series, group index by given mapper function or dict, take
510
578
the sub-Series (reindex) for this group and call apply(applyfunc)
@@ -527,8 +595,8 @@ def transform(self, applyfunc):
527
595
528
596
Example
529
597
-------
530
- series.fgroupby (lambda x: mapping[x],
531
- lambda x: (x - mean(x )) / std(x ))
598
+ series.transform (lambda x: mapping[x],
599
+ lambda x: (x - x. mean()) / x. std())
532
600
533
601
Returns
534
602
-------
@@ -538,9 +606,8 @@ def transform(self, applyfunc):
538
606
539
607
for name , group in self :
540
608
# XXX
541
- group .groupName = name
542
- res = applyfunc (group )
543
-
609
+ group .name = name
610
+ res = func (group )
544
611
indexer , _ = self .obj .index .get_indexer (group .index )
545
612
np .put (result , indexer , res )
546
613
@@ -600,7 +667,8 @@ def _iterate_slices(self):
600
667
601
668
yield val , slicer (val )
602
669
603
- def _get_obj_with_exclusions (self ):
670
+ @cache_readonly
671
+ def _obj_with_exclusions (self ):
604
672
if len (self .exclusions ) > 0 :
605
673
return self .obj .drop (self .exclusions , axis = 1 )
606
674
else :
@@ -641,7 +709,7 @@ def aggregate(self, arg):
641
709
def _aggregate_generic (self , agger , axis = 0 ):
642
710
result = {}
643
711
644
- obj = self ._get_obj_with_exclusions ()
712
+ obj = self ._obj_with_exclusions
645
713
646
714
try :
647
715
for name in self .primary :
@@ -668,7 +736,7 @@ def _aggregate_generic(self, agger, axis=0):
668
736
def _aggregate_item_by_item (self , agger ):
669
737
# only for axis==0
670
738
671
- obj = self ._get_obj_with_exclusions ()
739
+ obj = self ._obj_with_exclusions
672
740
673
741
result = {}
674
742
cannot_agg = []
@@ -694,6 +762,30 @@ def _wrap_aggregated_output(self, output, mask):
694
762
695
763
return result
696
764
765
+ def _wrap_applied_output (self , keys , values , not_indexed_same = False ):
766
+ if len (keys ) == 0 :
767
+ # XXX
768
+ return DataFrame ({})
769
+
770
+ if isinstance (values [0 ], DataFrame ):
771
+ return _concat_frames (values )
772
+ else :
773
+ if len (self .groupings ) > 1 :
774
+ keys = MultiIndex .from_tuples (keys )
775
+
776
+ # obj = self._obj_with_exclusions
777
+
778
+ if self .axis == 0 :
779
+ stacked_values = np .vstack (values )
780
+ columns = values [0 ].index
781
+ index = keys
782
+ else :
783
+ stacked_values = np .vstack (values )
784
+ index = values [0 ].index
785
+ columns = keys
786
+
787
+ return DataFrame (stacked_values , index = index , columns = columns )
788
+
697
789
def transform (self , func ):
698
790
"""
699
791
For given DataFrame, group index by given mapper function or dict, take
@@ -715,8 +807,8 @@ def transform(self, func):
715
807
716
808
Note
717
809
----
718
- Each subframe is endowed the attribute 'groupName ' in case
719
- you need to know which group you are working on.
810
+ Each subframe is endowed the attribute 'name ' in case you need to know
811
+ which group you are working on.
720
812
721
813
Example
722
814
--------
@@ -725,41 +817,49 @@ def transform(self, func):
725
817
"""
726
818
applied = []
727
819
728
- obj = self ._get_obj_with_exclusions ()
729
- for val , inds in self .primary .indices .iteritems ():
730
- subframe = obj .take (inds , axis = self .axis )
731
- subframe .groupName = val
820
+ obj = self ._obj_with_exclusions
821
+ for name , group in self :
822
+ group .name = name
732
823
733
824
try :
734
- res = subframe .apply (func , axis = self .axis )
825
+ res = group .apply (func , axis = self .axis )
735
826
except Exception : # pragma: no cover
736
- res = func (subframe )
827
+ res = func (group )
737
828
738
829
# broadcasting
739
830
if isinstance (res , Series ):
740
831
if res .index is obj .index :
741
- subframe .T .values [:] = res
832
+ group .T .values [:] = res
742
833
else :
743
- subframe .values [:] = res
834
+ group .values [:] = res
744
835
745
- applied .append (subframe )
836
+ applied .append (group )
746
837
else :
747
838
applied .append (res )
748
839
749
- if self .axis == 0 :
750
- all_index = [np .asarray (x .index ) for x in applied ]
751
- new_index = Index (np .concatenate (all_index ))
752
- new_columns = obj .columns
753
- else :
754
- all_columns = [np .asarray (x .columns ) for x in applied ]
755
- new_columns = Index (np .concatenate (all_columns ))
756
- new_index = obj .index
840
+ return _concat_frames (applied , obj .index , obj .columns ,
841
+ axis = self .axis )
757
842
758
- new_values = np . concatenate ([ x . values for x in applied ],
759
- axis = self . axis )
760
- result = DataFrame ( new_values , index = new_index , columns = new_columns )
761
- return result . reindex ( index = obj . index , columns = obj . columns )
843
+ def _concat_frames ( frames , index = None , columns = None , axis = 0 ):
844
+ if axis == 0 :
845
+ all_index = [ np . asarray ( x . index ) for x in frames ]
846
+ new_index = Index ( np . concatenate ( all_index ) )
762
847
848
+ if columns is None :
849
+ new_columns = frames [0 ].columns
850
+ else :
851
+ new_columns = columns
852
+ else :
853
+ all_columns = [np .asarray (x .columns ) for x in frames ]
854
+ new_columns = Index (np .concatenate (all_columns ))
855
+ if index is None :
856
+ new_index = frames [0 ].index
857
+ else :
858
+ new_index = index
859
+
860
+ new_values = np .concatenate ([x .values for x in frames ], axis = axis )
861
+ result = DataFrame (new_values , index = new_index , columns = new_columns )
862
+ return result .reindex (index = index , columns = columns )
763
863
764
864
class WidePanelGroupBy (GroupBy ):
765
865
@@ -788,7 +888,7 @@ def aggregate(self, func):
788
888
def _aggregate_generic (self , agger , axis = 0 ):
789
889
result = {}
790
890
791
- obj = self ._get_obj_with_exclusions ()
891
+ obj = self ._obj_with_exclusions
792
892
793
893
for name in self .primary :
794
894
data = self .get_group (name , obj = obj )
@@ -804,7 +904,8 @@ def _aggregate_generic(self, agger, axis=0):
804
904
805
905
return result
806
906
807
- class LongPanelGroupBy (GroupBy ):
907
+
908
+ class NDArrayGroupBy (GroupBy ):
808
909
pass
809
910
810
911
#-------------------------------------------------------------------------------
0 commit comments