Skip to content

Commit db00a07

Browse files
committed
ENH: Add Numpy Array Interface support to Pandas objects
TST: test handling Numpy Array Interface, also explicitly test handling rpy2 objects Moved logic to is_array_like, added __array__ check changed pandas check from NDFrame to pandasobject updated with array_like and fixed rpy2 tests
1 parent 7f034bc commit db00a07

File tree

8 files changed

+207
-5
lines changed

8 files changed

+207
-5
lines changed

pandas/core/common.py

+71-3
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def _isnull_new(obj):
217217
return _isnull_ndarraylike(obj)
218218
elif isinstance(obj, ABCGeneric):
219219
return obj._constructor(obj._data.isnull(func=isnull))
220-
elif isinstance(obj, list) or hasattr(obj, '__array__'):
220+
elif isinstance(obj, list) or is_array_like(obj):
221221
return _isnull_ndarraylike(np.asarray(obj))
222222
else:
223223
return obj is None
@@ -243,7 +243,7 @@ def _isnull_old(obj):
243243
return _isnull_ndarraylike_old(obj)
244244
elif isinstance(obj, ABCGeneric):
245245
return obj._constructor(obj._data.isnull(func=_isnull_old))
246-
elif isinstance(obj, list) or hasattr(obj, '__array__'):
246+
elif isinstance(obj, list) or is_array_like(obj):
247247
return _isnull_ndarraylike_old(np.asarray(obj))
248248
else:
249249
return obj is None
@@ -2266,7 +2266,7 @@ def _asarray_tuplesafe(values, dtype=None):
22662266
from pandas.core.index import Index
22672267

22682268
if not (isinstance(values, (list, tuple))
2269-
or hasattr(values, '__array__')):
2269+
or is_array_like(values)):
22702270
values = list(values)
22712271
elif isinstance(values, Index):
22722272
return values.values
@@ -2489,6 +2489,38 @@ def is_list_like(arg):
24892489
return (hasattr(arg, '__iter__') and
24902490
not isinstance(arg, compat.string_and_binary_types))
24912491

2492+
def is_array_like(obj):
2493+
"""
2494+
Check if object provides access to a data buffer via one of the numpy
2495+
array apis.
2496+
2497+
http://docs.scipy.org/doc/numpy/reference/arrays.classes.html
2498+
http://docs.scipy.org/doc/numpy/reference/arrays.interface.html
2499+
2500+
Parameters
2501+
----------
2502+
obj : Object
2503+
2504+
Note
2505+
----
2506+
Remember that ndarrays and NDFrames are array-like.
2507+
"""
2508+
# numpy ndarray subclass api
2509+
tmp = getattr(obj, '__array__', None)
2510+
if callable(tmp):
2511+
return True
2512+
2513+
# Python side
2514+
# __array_interface__ is a dict
2515+
tmp = getattr(obj, '__array_interface__', None)
2516+
if isinstance(tmp, dict):
2517+
return True
2518+
2519+
# C-struct access
2520+
if hasattr(obj, '__array_struct__'):
2521+
return True
2522+
2523+
return False
24922524

24932525
def _is_sequence(x):
24942526
try:
@@ -3105,3 +3137,39 @@ def _maybe_match_name(a, b):
31053137
if a_name == b_name:
31063138
return a_name
31073139
return None
3140+
3141+
def _unhandled_array_interface(obj):
3142+
"""
3143+
Checks whether an object:
3144+
1) Implements the array interface
3145+
2) Is not an object type that pandas handles natively
3146+
3147+
#2 is a moving target. Essentially any 3rd party module can implement the
3148+
NumPy Array Interface and should be treated as array-like. For example,
3149+
the rpy2 SexpVector implements `__array_struct__` which we do not
3150+
explicitly handle.
3151+
3152+
In the future, if we add explicit handling for the SexpVector, this
3153+
function would have to account for that.
3154+
3155+
Parameters
3156+
----------
3157+
obj : Object
3158+
3159+
Usage
3160+
-----
3161+
3162+
```
3163+
if com._unhandled_array_interface(data):
3164+
data = np.asarray(data)
3165+
```
3166+
3167+
"""
3168+
if isinstance(obj, (np.ndarray)):
3169+
return False
3170+
3171+
import pandas.core.base as base
3172+
if isinstance(obj, (base.PandasObject)):
3173+
return False
3174+
3175+
return is_array_like(obj)

pandas/core/frame.py

+4
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
197197
if dtype is not None:
198198
dtype = self._validate_dtype(dtype)
199199

200+
# convert unhandled array-like objects
201+
if com._unhandled_array_interface(data):
202+
data = np.asarray(data)
203+
200204
if isinstance(data, DataFrame):
201205
data = data._data
202206

pandas/core/index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
166166
if copy:
167167
subarr = subarr.copy()
168168

169-
elif hasattr(data, '__array__'):
169+
elif com.is_array_like(data):
170170
return Index(np.asarray(data), dtype=dtype, copy=copy, name=name,
171171
**kwargs)
172172
elif data is None or np.isscalar(data):

pandas/core/panel.py

+4
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ def _init_data(self, data, copy, dtype, **kwargs):
145145
if dtype is not None:
146146
dtype = self._validate_dtype(dtype)
147147

148+
# convert unhandled array-like objects
149+
if com._unhandled_array_interface(data):
150+
data = np.asarray(data)
151+
148152
passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS]
149153
axes = None
150154
if isinstance(data, BlockManager):

pandas/core/series.py

+3
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
190190
raise TypeError("{0!r} type is unordered"
191191
"".format(data.__class__.__name__))
192192
else:
193+
# unhandled array-like objects
194+
if com.is_array_like(data):
195+
data = np.asarray(data)
193196

194197
# handle sparse passed here (and force conversion)
195198
if isinstance(data, ABCSparseArray):

pandas/rpy/tests/test_common.py

+25
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,31 @@ def test_factor(self):
207207
result = com.load_data(name)
208208
assert np.equal(result, factors)
209209

210+
def test_pandas_constructor_compat(self):
211+
"""
212+
test that rpy2 SexpVector get handled by Pandas object constructors
213+
"""
214+
types = [pd.Series, pd.DataFrame, pd.Panel]
215+
rnorm = r['rnorm']
216+
for typ in types:
217+
shape = typ._AXIS_LEN * [10]
218+
N = 10 ** typ._AXIS_LEN
219+
220+
# create array on the R side
221+
r_cmd = "test_arr = rnorm({N}); dim(test_arr) = c({shape});test_arr"
222+
r_cmd = r_cmd.format(N=N, shape=','.join(map(str, shape)))
223+
test_arr = r(r_cmd)
224+
225+
# numpy.array handles array interfaces correctly
226+
npy_arr = np.array(test_arr)
227+
assert npy_arr.ndim == typ._AXIS_LEN
228+
assert npy_arr.size == N
229+
230+
assert isinstance(test_arr, robj.SexpVector)
231+
pobj = typ(test_arr)
232+
tm.assert_almost_equal(pobj.values, np.array(test_arr))
233+
tm.assert_almost_equal(pobj.values, npy_arr)
234+
210235
if __name__ == '__main__':
211236
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
212237
# '--with-coverage', '--cover-package=pandas.core'],

pandas/tests/test_common.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def test_get_callable_name():
4242
from functools import partial
4343
getname = com._get_callable_name
4444

45-
def fn(x):
45+
def fn(x):
4646
return x
4747
lambda_ = lambda x: x
4848
part1 = partial(fn)
@@ -890,6 +890,69 @@ def test_2d_datetime64(self):
890890
expected[:, [2, 4]] = datetime(2007, 1, 1)
891891
tm.assert_almost_equal(result, expected)
892892

893+
class FakeArrArray(object):
894+
def __init__(self, arr):
895+
self.arr = arr
896+
897+
def __array__(self):
898+
return self.arr.__array__()
899+
900+
class FakeArrInterface(object):
901+
def __init__(self, arr):
902+
self.arr = arr
903+
904+
@property
905+
def __array_interface__(self):
906+
return self.arr.__array_interface__
907+
908+
class FakeArrStruct(object):
909+
def __init__(self, arr):
910+
self.arr = arr
911+
912+
@property
913+
def __array_struct__(self):
914+
return self.arr.__array_struct__
915+
916+
def test_is_array_like():
917+
"""
918+
Test interface from:
919+
http://docs.scipy.org/doc/numpy/reference/arrays.interface.html
920+
921+
Different from ndarray subclass
922+
"""
923+
arr = np.arange(10)
924+
assert com.is_array_like(arr) is True
925+
926+
# __array__
927+
arr_array = FakeArrArray(arr)
928+
assert com.is_array_like(arr_array) is True
929+
930+
# __array_interface__
931+
arr_interface = FakeArrInterface(arr)
932+
assert com.is_array_like(arr_interface) is True
933+
934+
# __array_struct__
935+
arr_struct= FakeArrStruct(arr)
936+
assert com.is_array_like(arr_struct) is True
937+
938+
def test_unhandled_array_interface():
939+
"""
940+
"""
941+
# skip the strutures we already explicitly handle
942+
arr = np.arange(10)
943+
series = Series(arr)
944+
frame = tm.makeDataFrame()
945+
assert not com._unhandled_array_interface(series)
946+
assert not com._unhandled_array_interface(frame)
947+
assert not com._unhandled_array_interface(arr)
948+
949+
# __array_interface__
950+
arr_interface = FakeArrInterface(arr)
951+
assert com._unhandled_array_interface(arr_interface) is True
952+
953+
# __array_struct__
954+
arr_struct= FakeArrStruct(arr)
955+
assert com._unhandled_array_interface(arr_struct) is True
893956

894957
if __name__ == '__main__':
895958
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

pandas/tests/test_generic.py

+35
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,41 @@ def f(dtype):
227227
f('float64')
228228
f('M8[ns]')
229229

230+
def test_constructor_array_interface(self):
231+
"""
232+
Test that objects implementing NumPy Array Interface get treated
233+
like arrays in constructor
234+
"""
235+
class FakeArrInterface(object):
236+
def __init__(self, arr):
237+
self.arr = arr
238+
239+
@property
240+
def __array_interface__(self):
241+
return self.arr.__array_interface__
242+
243+
class FakeArrStruct(object):
244+
def __init__(self, arr):
245+
self.arr = arr
246+
247+
@property
248+
def __array_struct__(self):
249+
return self.arr.__array_struct__
250+
251+
shape = [10] * self._ndim
252+
arr = np.random.randn(*shape)
253+
fai = FakeArrInterface(arr)
254+
pobj = self._typ(fai)
255+
assert_almost_equal(pobj.values, arr)
256+
assert_almost_equal(pobj.values, np.array(fai))
257+
258+
arr = np.random.randn(*shape)
259+
fas = FakeArrStruct(arr)
260+
pobj2 = self._typ(fas)
261+
assert_almost_equal(pobj2.values, arr)
262+
assert_almost_equal(pobj2.values, np.array(fas))
263+
264+
230265
def check_metadata(self, x, y=None):
231266
for m in x._metadata:
232267
v = getattr(x,m,None)

0 commit comments

Comments
 (0)