Skip to content

Commit 0aeee8d

Browse files
committed
ENH: inplace dtype changes, df per-column dtype changes; GH7271
1 parent af7bdd3 commit 0aeee8d

File tree

5 files changed

+138
-6
lines changed

5 files changed

+138
-6
lines changed

doc/source/whatsnew/v0.18.2.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ Other enhancements
3030
^^^^^^^^^^^^^^^^^^
3131

3232
- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`)
33-
34-
33+
- The `copy` argument to the ``astype()`` functions has been deprecated in favor of a new ``inplace`` argument. (:issue:`12086`)
34+
- ``astype()`` will now accept a dict of column name to data types mapping as the ``dtype`` argument. (:issue:`12086`)
3535

3636

3737
.. _whatsnew_0182.api:

pandas/core/frame.py

+41
Original file line numberDiff line numberDiff line change
@@ -3772,6 +3772,47 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
37723772
# ----------------------------------------------------------------------
37733773
# Misc methods
37743774

3775+
def astype(self, dtype, copy=True, inplace=False, raise_on_error=True,
3776+
**kwargs):
3777+
"""
3778+
Cast object to given data type(s).
3779+
3780+
Parameters
3781+
----------
3782+
dtype : numpy.dtype or Python type (to cast entire DataFrame to the
3783+
same type). Alternatively, {col: dtype, ...}, where col is a column
3784+
label and dtype is a numpy.dtype or Python type (to cast one or
3785+
more of the DataFrame's columns to column-specific types).
3786+
copy : deprecated; use inplace instead
3787+
inplace : boolean, default False
3788+
Modify the DataFrame in place (do not create a new object)
3789+
raise_on_error : raise on invalid input
3790+
kwargs : keyword arguments to pass on to the constructor if
3791+
inplace=False
3792+
3793+
Returns
3794+
-------
3795+
casted : type of caller
3796+
"""
3797+
if isinstance(dtype, collections.Mapping):
3798+
if inplace:
3799+
for col, typ in dtype.items():
3800+
self[col].astype(typ, inplace=True,
3801+
raise_on_error=raise_on_error)
3802+
return None
3803+
else:
3804+
from pandas.tools.merge import concat
3805+
casted_cols = [self[col].astype(typ, copy=copy)
3806+
for col, typ in dtype.items()]
3807+
other_col_labels = self.columns.difference(dtype.keys())
3808+
other_cols = [self[col].copy() if copy else self[col]
3809+
for col in other_col_labels]
3810+
new_df = concat(casted_cols + other_cols, axis=1)
3811+
return new_df.reindex(columns=self.columns, copy=False)
3812+
df = super(DataFrame, self)
3813+
return df.astype(dtype=dtype, copy=copy, inplace=inplace,
3814+
raise_on_error=raise_on_error, **kwargs)
3815+
37753816
def first_valid_index(self):
37763817
"""
37773818
Return label for first non-NA/null value

pandas/core/generic.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
143143

144144
@property
145145
def _constructor(self):
146-
"""Used when a manipulation result has the same dimesions as the
146+
"""Used when a manipulation result has the same dimensions as the
147147
original.
148148
"""
149149
raise AbstractMethodError(self)
@@ -2930,22 +2930,30 @@ def blocks(self):
29302930
"""Internal property, property synonym for as_blocks()"""
29312931
return self.as_blocks()
29322932

2933-
def astype(self, dtype, copy=True, raise_on_error=True, **kwargs):
2933+
def astype(self, dtype, copy=True, inplace=False, raise_on_error=True,
2934+
**kwargs):
29342935
"""
29352936
Cast object to input numpy.dtype
2936-
Return a copy when copy = True (be really careful with this!)
29372937
29382938
Parameters
29392939
----------
29402940
dtype : numpy.dtype or Python type
2941+
copy : deprecated; use inplace instead
2942+
inplace : boolean, default False
2943+
Modify the NDFrame in place (do not create a new object)
29412944
raise_on_error : raise on invalid input
29422945
kwargs : keyword arguments to pass on to the constructor
29432946
29442947
Returns
29452948
-------
29462949
casted : type of caller
29472950
"""
2948-
2951+
if inplace:
2952+
new_data = self._data.astype(dtype=dtype, copy=False,
2953+
raise_on_error=raise_on_error,
2954+
**kwargs)
2955+
self._update_inplace(new_data)
2956+
return
29492957
mgr = self._data.astype(dtype=dtype, copy=copy,
29502958
raise_on_error=raise_on_error, **kwargs)
29512959
return self._constructor(mgr).__finalize__(self)

pandas/tests/frame/test_dtypes.py

+70
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,76 @@ def test_astype_str(self):
372372
expected = DataFrame(['1.12345678901'])
373373
assert_frame_equal(result, expected)
374374

375+
def test_astype_dict(self):
376+
# GH7271
377+
a = Series(date_range('2010-01-04', periods=5))
378+
b = Series(range(5))
379+
c = Series([0.0, 0.2, 0.4, 0.6, 0.8])
380+
d = Series(['1.0', '2', '3.14', '4', '5.4'])
381+
df = DataFrame({'a': a, 'b': b, 'c': c, 'd': d})
382+
original = df.copy(deep=True)
383+
384+
# change type of a subset of columns
385+
expected = DataFrame({
386+
'a': a,
387+
'b': Series(['0', '1', '2', '3', '4']),
388+
'c': c,
389+
'd': Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype='float32')})
390+
astyped = df.astype({'b': 'str', 'd': 'float32'})
391+
assert_frame_equal(astyped, expected)
392+
assert_frame_equal(df, original)
393+
self.assertEqual(astyped.b.dtype, 'object')
394+
self.assertEqual(astyped.d.dtype, 'float32')
395+
396+
# change all columns
397+
assert_frame_equal(df.astype({'a': str, 'b': str, 'c': str, 'd': str}),
398+
df.astype(str))
399+
assert_frame_equal(df, original)
400+
401+
# error should be raised when using something other than column labels
402+
# in the keys of the dtype dict
403+
self.assertRaises(KeyError, df.astype, {'b': str, 2: str})
404+
self.assertRaises(KeyError, df.astype, {'e': str})
405+
assert_frame_equal(df, original)
406+
407+
# if the dtypes provided are the same as the original dtypes, the
408+
# resulting DataFrame should be the same as the original DataFrame
409+
equiv = df.astype({col: df[col].dtype for col in df.columns})
410+
assert_frame_equal(df, equiv)
411+
assert_frame_equal(df, original)
412+
413+
# using inplace=True, the df should be changed
414+
output = df.astype({'b': 'str', 'd': 'float32'}, inplace=True)
415+
self.assertEqual(output, None)
416+
assert_frame_equal(df, expected)
417+
df.astype({'b': np.float32, 'c': 'float32', 'd': np.float32},
418+
inplace=True)
419+
self.assertEqual(df.a.dtype, original.a.dtype)
420+
self.assertEqual(df.b.dtype, 'float32')
421+
self.assertEqual(df.c.dtype, 'float32')
422+
self.assertEqual(df.d.dtype, 'float32')
423+
self.assertEqual(df.b[0], 0.0)
424+
df.astype({'b': str, 'c': 'float64', 'd': np.float64}, inplace=True)
425+
self.assertEqual(df.a.dtype, original.a.dtype)
426+
self.assertEqual(df.b.dtype, 'object')
427+
self.assertEqual(df.c.dtype, 'float64')
428+
self.assertEqual(df.d.dtype, 'float64')
429+
self.assertEqual(df.b[0], '0.0')
430+
431+
def test_astype_inplace(self):
432+
# GH7271
433+
df = DataFrame({'a': range(10),
434+
'b': range(2, 12),
435+
'c': np.arange(4.0, 14.0, dtype='float64')})
436+
df.astype('float', inplace=True)
437+
for col in df.columns:
438+
self.assertTrue(df[col].map(lambda x: type(x) == float).all())
439+
self.assertEqual(df[col].dtype, 'float64')
440+
df.astype('str', inplace=True)
441+
for col in df.columns:
442+
self.assertTrue(df[col].map(lambda x: type(x) == str).all())
443+
self.assertEqual(df[col].dtype, 'object')
444+
375445
def test_timedeltas(self):
376446
df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3,
377447
freq='D')),

pandas/tests/series/test_dtypes.py

+13
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,19 @@ def test_astype_unicode(self):
133133
reload(sys) # noqa
134134
sys.setdefaultencoding(former_encoding)
135135

136+
def test_astype_inplace(self):
137+
s = Series(np.random.randn(5), name='foo')
138+
139+
for dtype in ['float32', 'float64', 'int64', 'int32']:
140+
astyped = s.astype(dtype, inplace=False)
141+
self.assertEqual(astyped.dtype, dtype)
142+
self.assertEqual(astyped.name, s.name)
143+
144+
for dtype in ['float32', 'float64', 'int64', 'int32']:
145+
s.astype(dtype, inplace=True)
146+
self.assertEqual(s.dtype, dtype)
147+
self.assertEqual(s.name, 'foo')
148+
136149
def test_complexx(self):
137150
# GH4819
138151
# complex access for ndarray compat

0 commit comments

Comments
 (0)