6
6
from collections import defaultdict
7
7
8
8
import numpy as np
9
+ from numpy import percentile as _quantile
10
+
9
11
from pandas .core .base import PandasObject
10
12
11
13
from pandas .core .common import (_possibly_downcast_to_dtype , isnull , _NS_DTYPE ,
@@ -131,6 +133,8 @@ def get_values(self, dtype=None):
131
133
return an internal format, currently just the ndarray
132
134
this is often overriden to handle to_dense like operations
133
135
"""
136
+ if com .is_object_dtype (dtype ):
137
+ return self .values .astype (object )
134
138
return self .values
135
139
136
140
def to_dense (self ):
@@ -141,6 +145,10 @@ def to_object_block(self, mgr):
141
145
values = self .get_values (dtype = object )
142
146
return self .make_block (values , klass = ObjectBlock )
143
147
148
+ @property
149
+ def _na_value (self ):
150
+ return np .nan
151
+
144
152
@property
145
153
def fill_value (self ):
146
154
return np .nan
@@ -1247,6 +1255,19 @@ def equals(self, other):
1247
1255
return False
1248
1256
return array_equivalent (self .values , other .values )
1249
1257
1258
+ def quantile (self , values , qs , ** kwargs ):
1259
+ if len (values ) == 0 :
1260
+ if com .is_list_like (qs ):
1261
+ return np .array ([self .fill_value ])
1262
+ else :
1263
+ return self ._na_value
1264
+
1265
+ if com .is_list_like (qs ):
1266
+ values = [_quantile (values , x * 100 , ** kwargs ) for x in qs ]
1267
+ return np .array (values )
1268
+ else :
1269
+ return _quantile (values , qs * 100 , ** kwargs )
1270
+
1250
1271
1251
1272
class NonConsolidatableMixIn (object ):
1252
1273
""" hold methods for the nonconsolidatable blocks """
@@ -1455,15 +1476,55 @@ def should_store(self, value):
1455
1476
return com .is_integer_dtype (value ) and value .dtype == self .dtype
1456
1477
1457
1478
1458
- class TimeDeltaBlock (IntBlock ):
1479
+ class DatetimeLikeBlockMixin (object ):
1480
+
1481
+ @property
1482
+ def _na_value (self ):
1483
+ return tslib .NaT
1484
+
1485
+ @property
1486
+ def fill_value (self ):
1487
+ return tslib .iNaT
1488
+
1489
+ def _try_operate (self , values ):
1490
+ """ return a version to operate on """
1491
+ return values .view ('i8' )
1492
+
1493
+ def get_values (self , dtype = None ):
1494
+ """
1495
+ return object dtype as boxed values, such as Timestamps/Timedelta
1496
+ """
1497
+ if com .is_object_dtype (dtype ):
1498
+ return lib .map_infer (self .values .ravel (),
1499
+ self ._box_func ).reshape (self .values .shape )
1500
+ return self .values
1501
+
1502
+ def quantile (self , values , qs , ** kwargs ):
1503
+ values = values .view ('i8' )
1504
+ mask = values == self .fill_value
1505
+ if mask .any ():
1506
+ values = values [~ mask ]
1507
+ result = Block .quantile (self , values , qs , ** kwargs )
1508
+
1509
+ if com .is_datetime64tz_dtype (self ):
1510
+ # ToDo: Temp logic to avoid GH 12619 and GH 12772
1511
+ # which affects to DatetimeBlockTZ_try_coerce_result for np.ndarray
1512
+ if isinstance (result , np .ndarray ) and values .ndim > 0 :
1513
+ result = self ._holder (result , tz = 'UTC' )
1514
+ result = result .tz_convert (self .values .tz )
1515
+ return result
1516
+ return self ._try_coerce_result (result )
1517
+
1518
+
1519
+ class TimeDeltaBlock (DatetimeLikeBlockMixin , IntBlock ):
1459
1520
__slots__ = ()
1460
1521
is_timedelta = True
1461
1522
_can_hold_na = True
1462
1523
is_numeric = False
1463
1524
1464
1525
@property
1465
- def fill_value (self ):
1466
- return tslib .iNaT
1526
+ def _box_func (self ):
1527
+ return lambda x : tslib .Timedelta ( x , unit = 'ns' )
1467
1528
1468
1529
def fillna (self , value , ** kwargs ):
1469
1530
@@ -1516,19 +1577,15 @@ def _try_coerce_args(self, values, other):
1516
1577
1517
1578
return values , values_mask , other , other_mask
1518
1579
1519
- def _try_operate (self , values ):
1520
- """ return a version to operate on """
1521
- return values .view ('i8' )
1522
-
1523
1580
def _try_coerce_result (self , result ):
1524
1581
""" reverse of try_coerce_args / try_operate """
1525
1582
if isinstance (result , np .ndarray ):
1526
1583
mask = isnull (result )
1527
1584
if result .dtype .kind in ['i' , 'f' , 'O' ]:
1528
1585
result = result .astype ('m8[ns]' )
1529
1586
result [mask ] = tslib .iNaT
1530
- elif isinstance (result , np .integer ):
1531
- result = lib . Timedelta (result )
1587
+ elif isinstance (result , ( np .integer , np . float ) ):
1588
+ result = self . _box_func (result )
1532
1589
return result
1533
1590
1534
1591
def should_store (self , value ):
@@ -1558,13 +1615,6 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None,
1558
1615
dtype = object )
1559
1616
return rvalues
1560
1617
1561
- def get_values (self , dtype = None ):
1562
- # return object dtypes as Timedelta
1563
- if dtype == object :
1564
- return lib .map_infer (self .values .ravel (),
1565
- lib .Timedelta ).reshape (self .values .shape )
1566
- return self .values
1567
-
1568
1618
1569
1619
class BoolBlock (NumericBlock ):
1570
1620
__slots__ = ()
@@ -1954,7 +2004,7 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
1954
2004
return values .reshape (1 , len (values ))
1955
2005
1956
2006
1957
- class DatetimeBlock (Block ):
2007
+ class DatetimeBlock (DatetimeLikeBlockMixin , Block ):
1958
2008
__slots__ = ()
1959
2009
is_datetime = True
1960
2010
_can_hold_na = True
@@ -1998,10 +2048,6 @@ def _try_cast(self, element):
1998
2048
except :
1999
2049
return element
2000
2050
2001
- def _try_operate (self , values ):
2002
- """ return a version to operate on """
2003
- return values .view ('i8' )
2004
-
2005
2051
def _try_coerce_args (self , values , other ):
2006
2052
"""
2007
2053
Coerce values and other to dtype 'i8'. NaN and NaT convert to
@@ -2029,7 +2075,7 @@ def _try_coerce_args(self, values, other):
2029
2075
other = tslib .iNaT
2030
2076
other_mask = True
2031
2077
elif isinstance (other , (datetime , np .datetime64 , date )):
2032
- other = lib . Timestamp (other )
2078
+ other = self . _box_func (other )
2033
2079
if getattr (other , 'tz' ) is not None :
2034
2080
raise TypeError ("cannot coerce a Timestamp with a tz on a "
2035
2081
"naive Block" )
@@ -2056,13 +2102,13 @@ def _try_coerce_result(self, result):
2056
2102
if isinstance (result , np .ndarray ):
2057
2103
if result .dtype .kind in ['i' , 'f' , 'O' ]:
2058
2104
result = result .astype ('M8[ns]' )
2059
- elif isinstance (result , (np .integer , np .datetime64 )):
2060
- result = lib . Timestamp (result )
2105
+ elif isinstance (result , (np .integer , np .float , np . datetime64 )):
2106
+ result = self . _box_func (result )
2061
2107
return result
2062
2108
2063
2109
@property
2064
- def fill_value (self ):
2065
- return tslib .iNaT
2110
+ def _box_func (self ):
2111
+ return tslib .Timestamp
2066
2112
2067
2113
def to_native_types (self , slicer = None , na_rep = None , date_format = None ,
2068
2114
quoting = None , ** kwargs ):
@@ -2098,13 +2144,6 @@ def set(self, locs, values, check=False):
2098
2144
2099
2145
self .values [locs ] = values
2100
2146
2101
- def get_values (self , dtype = None ):
2102
- # return object dtype as Timestamps
2103
- if dtype == object :
2104
- return lib .map_infer (
2105
- self .values .ravel (), lib .Timestamp ).reshape (self .values .shape )
2106
- return self .values
2107
-
2108
2147
2109
2148
class DatetimeTZBlock (NonConsolidatableMixIn , DatetimeBlock ):
2110
2149
""" implement a datetime64 block with a tz attribute """
@@ -2145,7 +2184,7 @@ def external_values(self):
2145
2184
2146
2185
def get_values (self , dtype = None ):
2147
2186
# return object dtype as Timestamps with the zones
2148
- if dtype == object :
2187
+ if com . is_object_dtype ( dtype ) :
2149
2188
f = lambda x : lib .Timestamp (x , tz = self .values .tz )
2150
2189
return lib .map_infer (
2151
2190
self .values .ravel (), f ).reshape (self .values .shape )
@@ -2228,10 +2267,14 @@ def _try_coerce_result(self, result):
2228
2267
2229
2268
if isinstance (result , np .ndarray ):
2230
2269
result = self ._holder (result , tz = self .values .tz )
2231
- elif isinstance (result , (np .integer , np .datetime64 )):
2270
+ elif isinstance (result , (np .integer , np .float , np . datetime64 )):
2232
2271
result = lib .Timestamp (result , tz = self .values .tz )
2233
2272
return result
2234
2273
2274
+ @property
2275
+ def _box_func (self ):
2276
+ return lambda x : tslib .Timestamp (x , tz = self .dtype .tz )
2277
+
2235
2278
def shift (self , periods , axis = 0 , mgr = None ):
2236
2279
""" shift the block by periods """
2237
2280
@@ -3852,6 +3895,14 @@ def get_values(self):
3852
3895
""" return a dense type view """
3853
3896
return np .array (self ._block .to_dense (), copy = False )
3854
3897
3898
+ @property
3899
+ def asobject (self ):
3900
+ """
3901
+ return a object dtype array. datetime/timedelta like values are boxed
3902
+ to Timestamp/Timedelta instances.
3903
+ """
3904
+ return self ._block .get_values (dtype = object )
3905
+
3855
3906
@property
3856
3907
def itemsize (self ):
3857
3908
return self ._block .values .itemsize
0 commit comments