Skip to content

Commit 2d2f173

Browse files
Keming Zhangjreback
Keming Zhang
authored andcommitted
BUG: Simple operation unexpectedly changes dtype.
closes #10503 closes #12477
1 parent 2e4da9b commit 2d2f173

File tree

4 files changed

+108
-3
lines changed

4 files changed

+108
-3
lines changed

doc/source/whatsnew/v0.18.0.txt

+84
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,89 @@ New Behavior:
362362
s.index
363363
print(s.to_csv(path=None))
364364

365+
Changes to dtype assignment behaviors
366+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
367+
368+
When a DataFrame's slice is updated with a new slice of the same
369+
dtype, the dtype of the DataFrame will now remain the same.
370+
371+
Previous Behavior:
372+
373+
.. code-block:: python
374+
375+
In [2]: df = pd.DataFrame({'a':[0, 1, 1], 'b':[100, 200, 300]}, dtype='uint32')
376+
377+
In [3]: df.info()
378+
<class 'pandas.core.frame.DataFrame'>
379+
RangeIndex: 3 entries, 0 to 2
380+
Data columns (total 2 columns):
381+
a 3 non-null uint32
382+
b 3 non-null uint32
383+
dtypes: uint32(2)
384+
memory usage: 96.0 bytes
385+
386+
In [4]: ix = df['a'] == 1
387+
388+
In [5]: df.loc[ix, 'b'] = df.loc[ix, 'b']
389+
390+
In [6]: df.info()
391+
<class 'pandas.core.frame.DataFrame'>
392+
RangeIndex: 3 entries, 0 to 2
393+
Data columns (total 2 columns):
394+
a 3 non-null int64
395+
b 3 non-null int64
396+
dtypes: int64(2)
397+
398+
New Behavior:
399+
400+
.. ipython:: python
401+
402+
df = pd.DataFrame({'a':[0, 1, 1], 'b':[100, 200, 300]}, dtype='uint32')
403+
df.info()
404+
ix = df['a'] == 1
405+
df.loc[ix, 'b'] = df.loc[ix, 'b']
406+
df.info()
407+
408+
409+
When a DataFrame's integer slice is partially updated with a new slice of floats that
410+
could potentially be downcasted to integer without losing precision,
411+
the dtype of the slice will be set to float instead of integer.
412+
413+
Previous Behavior:
414+
415+
.. code-block:: python
416+
417+
In [4]: df = pd.DataFrame(np.array(range(1,10)).reshape(3,3),
418+
...: columns=list('abc'),
419+
...: index=[[4,4,8], [8,10,12]])
420+
421+
In [5]: df
422+
Out[5]:
423+
a b c
424+
4 8 1 2 3
425+
10 4 5 6
426+
8 12 7 8 9
427+
428+
In [6]: df.ix[4, 'c'] = np.array([0., 1.])
429+
430+
In [7]: df
431+
Out[7]:
432+
a b c
433+
4 8 1 2 0
434+
10 4 5 1
435+
8 12 7 8 9
436+
437+
New Behavior:
438+
439+
.. ipython:: python
440+
441+
df = pd.DataFrame(np.array(range(1,10)).reshape(3,3),
442+
columns=list('abc'),
443+
index=[[4,4,8], [8,10,12]])
444+
df
445+
df.ix[4, 'c'] = np.array([0., 1.])
446+
df
447+
365448
.. _whatsnew_0180.enhancements.xarray:
366449

367450
to_xarray
@@ -1119,3 +1202,4 @@ Bug Fixes
11191202
- Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`)
11201203
- Bug when initializing categorical series with a scalar value. (:issue:`12336`)
11211204
- Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`)
1205+
- Bug when modifying a slice of a ``DataFrame`` with the same ``dtype``, the ``dtype`` of the ``DataFrame`` could unexpected changed. (:issue:`10503`).

pandas/core/internals.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,10 @@ def _is_empty_indexer(indexer):
702702
values[indexer] = value
703703

704704
# coerce and try to infer the dtypes of the result
705-
if lib.isscalar(value):
705+
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
706+
value.dtype):
707+
dtype = value.dtype
708+
elif lib.isscalar(value):
706709
dtype, _ = _infer_dtype_from_scalar(value)
707710
else:
708711
dtype = 'infer'

pandas/tests/test_generic.py

+18
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,24 @@ def test_head_tail(self):
362362
self._compare(o.head(-3), o.head(7))
363363
self._compare(o.tail(-3), o.tail(7))
364364

365+
def test_dtype_after_slice_update(self):
366+
# GH10503
367+
368+
# assigning the same type should not change the type
369+
df1 = pd.DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
370+
dtype='uint32')
371+
ix = df1['a'] == 1
372+
newb1 = df1.loc[ix, 'b'] + 1
373+
df1.loc[ix, 'b'] = newb1
374+
assert_equal(df1['a'].dtype, newb1.dtype)
375+
376+
# assigning a new type should get the inferred type
377+
df2 = pd.DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
378+
dtype='uint64')
379+
newb2 = df2.loc[ix, 'b']
380+
df1.loc[ix, 'b'] = newb2
381+
assert_equal(df1['a'].dtype, np.dtype('int64'))
382+
365383
def test_sample(self):
366384
# Fixes issue: 2419
367385

pandas/tests/test_indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3256,12 +3256,12 @@ def test_multiindex_assignment(self):
32563256

32573257
df.ix[4, 'c'] = arr
32583258
assert_series_equal(df.ix[4, 'c'], Series(arr, index=[8, 10], name='c',
3259-
dtype='int64'))
3259+
dtype='float64'))
32603260

32613261
# scalar ok
32623262
df.ix[4, 'c'] = 10
32633263
assert_series_equal(df.ix[4, 'c'], Series(10, index=[8, 10], name='c',
3264-
dtype='int64'))
3264+
dtype='float64'))
32653265

32663266
# invalid assignments
32673267
def f():

0 commit comments

Comments
 (0)