Skip to content

Commit 07761c5

Browse files
toobazjreback
authored andcommitted
CLN: refactor numeric index creation to all numeric sub-classes
Propogate name attribute closes #12309 Author: Pietro Battiston <[email protected]> Closes #13205 from toobaz/numindexname and squashes the following commits: 9d93fea [Pietro Battiston] TST: test "check_same" in assert_numpy_array_equal() bea8101 [Pietro Battiston] BUG: Make DateTimeIndex copy datetime64[ns] data on copy=True 3320727 [Pietro Battiston] DOC: What's new 757d105 [Pietro Battiston] TST: Use assert_numpy_array_equal 6d75e55 [Pietro Battiston] BUG: It makes sense to also catch ValueErrors b6c9233 [Pietro Battiston] BUG: Common NumericIndex.__new__, fixed name handling in indices
1 parent 62b4327 commit 07761c5

File tree

15 files changed

+185
-79
lines changed

15 files changed

+185
-79
lines changed

doc/source/whatsnew/v0.18.2.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,8 @@ Bug Fixes
348348
- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`)
349349

350350

351-
351+
- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
352+
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
352353
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
353354

354355

pandas/indexes/base.py

+27
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,33 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
376376
pass
377377
return Index(values, **attributes)
378378

379+
def _deepcopy_if_needed(self, orig, copy=False):
380+
"""
381+
.. versionadded:: 0.18.2
382+
383+
Make a copy of self if data coincides (in memory) with orig.
384+
Subclasses should override this if self._base is not an ndarray.
385+
386+
Parameters
387+
----------
388+
orig : ndarray
389+
other ndarray to compare self._data against
390+
copy : boolean, default False
391+
when False, do not run any check, just return self
392+
393+
Returns
394+
-------
395+
A copy of self if needed, otherwise self : Index
396+
"""
397+
if copy:
398+
# Retrieve the "base objects", i.e. the original memory allocations
399+
orig = orig if orig.base is None else orig.base
400+
new = self._data if self._data.base is None else self._data.base
401+
if orig is new:
402+
return self.copy(deep=True)
403+
404+
return self
405+
379406
def _update_inplace(self, result, **kwargs):
380407
# guard when called from IndexOpsMixin
381408
raise TypeError("Index can't be updated inplace")

pandas/indexes/category.py

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
4646
if fastpath:
4747
return cls._simple_new(data, name=name)
4848

49+
if name is None and hasattr(data, 'name'):
50+
name = data.name
51+
4952
if isinstance(data, com.ABCCategorical):
5053
data = cls._create_categorical(cls, data, categories, ordered)
5154
elif isinstance(data, CategoricalIndex):

pandas/indexes/numeric.py

+45-65
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,28 @@ class NumericIndex(Index):
2222
"""
2323
_is_numeric_dtype = True
2424

25+
def __new__(cls, data=None, dtype=None, copy=False, name=None,
26+
fastpath=False):
27+
28+
if fastpath:
29+
return cls._simple_new(data, name=name)
30+
31+
# isscalar, generators handled in coerce_to_ndarray
32+
data = cls._coerce_to_ndarray(data)
33+
34+
if issubclass(data.dtype.type, compat.string_types):
35+
cls._string_data_error(data)
36+
37+
if copy or not is_dtype_equal(data.dtype, cls._default_dtype):
38+
subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
39+
cls._assert_safe_casting(data, subarr)
40+
else:
41+
subarr = data
42+
43+
if name is None and hasattr(data, 'name'):
44+
name = data.name
45+
return cls._simple_new(subarr, name=name)
46+
2547
def _maybe_cast_slice_bound(self, label, side, kind):
2648
"""
2749
This function should be overloaded in subclasses that allow non-trivial
@@ -55,6 +77,15 @@ def _convert_tolerance(self, tolerance):
5577
raise ValueError('tolerance argument for %s must be numeric: %r' %
5678
(type(self).__name__, tolerance))
5779

80+
@classmethod
81+
def _assert_safe_casting(cls, data, subarr):
82+
"""
83+
Subclasses need to override this only if the process of casting data
84+
from some accepted dtype to the internal dtype(s) bears the risk of
85+
truncation (e.g. float to int).
86+
"""
87+
pass
88+
5889

5990
class Int64Index(NumericIndex):
6091
"""
@@ -90,29 +121,7 @@ class Int64Index(NumericIndex):
90121

91122
_engine_type = _index.Int64Engine
92123

93-
def __new__(cls, data=None, dtype=None, copy=False, name=None,
94-
fastpath=False, **kwargs):
95-
96-
if fastpath:
97-
return cls._simple_new(data, name=name)
98-
99-
# isscalar, generators handled in coerce_to_ndarray
100-
data = cls._coerce_to_ndarray(data)
101-
102-
if issubclass(data.dtype.type, compat.string_types):
103-
cls._string_data_error(data)
104-
105-
elif issubclass(data.dtype.type, np.integer):
106-
dtype = np.int64
107-
subarr = np.array(data, dtype=dtype, copy=copy)
108-
else:
109-
subarr = np.array(data, dtype=np.int64, copy=copy)
110-
if len(data) > 0:
111-
if (subarr != data).any():
112-
raise TypeError('Unsafe NumPy casting to integer, you must'
113-
' explicitly cast')
114-
115-
return cls._simple_new(subarr, name=name)
124+
_default_dtype = np.int64
116125

117126
@property
118127
def inferred_type(self):
@@ -155,17 +164,22 @@ def equals(self, other):
155164
if self.is_(other):
156165
return True
157166

158-
try:
159-
return com.array_equivalent(com._values_from_object(self),
160-
com._values_from_object(other))
161-
except TypeError:
162-
# e.g. fails in numpy 1.6 with DatetimeIndex #1681
163-
return False
167+
return com.array_equivalent(com._values_from_object(self),
168+
com._values_from_object(other))
164169

165170
def _wrap_joined_index(self, joined, other):
166171
name = self.name if self.name == other.name else None
167172
return Int64Index(joined, name=name)
168173

174+
@classmethod
175+
def _assert_safe_casting(cls, data, subarr):
176+
"""
177+
Ensure incoming data can be represented as ints.
178+
"""
179+
if not issubclass(data.dtype.type, np.integer):
180+
if not np.array_equal(data, subarr):
181+
raise TypeError('Unsafe NumPy casting, you must '
182+
'explicitly cast')
169183

170184
Int64Index._add_numeric_methods()
171185
Int64Index._add_logical_methods()
@@ -200,39 +214,7 @@ class Float64Index(NumericIndex):
200214
_inner_indexer = _algos.inner_join_indexer_float64
201215
_outer_indexer = _algos.outer_join_indexer_float64
202216

203-
def __new__(cls, data=None, dtype=None, copy=False, name=None,
204-
fastpath=False, **kwargs):
205-
206-
if fastpath:
207-
return cls._simple_new(data, name)
208-
209-
data = cls._coerce_to_ndarray(data)
210-
211-
if issubclass(data.dtype.type, compat.string_types):
212-
cls._string_data_error(data)
213-
214-
if dtype is None:
215-
dtype = np.float64
216-
dtype = np.dtype(dtype)
217-
218-
# allow integer / object dtypes to be passed, but coerce to float64
219-
if dtype.kind in ['i', 'O', 'f']:
220-
dtype = np.float64
221-
222-
else:
223-
raise TypeError("cannot support {0} dtype in "
224-
"Float64Index".format(dtype))
225-
226-
try:
227-
subarr = np.array(data, dtype=dtype, copy=copy)
228-
except:
229-
raise TypeError('Unsafe NumPy casting, you must explicitly cast')
230-
231-
# coerce to float64 for storage
232-
if subarr.dtype != np.float64:
233-
subarr = subarr.astype(np.float64)
234-
235-
return cls._simple_new(subarr, name)
217+
_default_dtype = np.float64
236218

237219
@property
238220
def inferred_type(self):
@@ -339,8 +321,7 @@ def equals(self, other):
339321
return False
340322
left, right = self._values, other._values
341323
return ((left == right) | (self._isnan & other._isnan)).all()
342-
except TypeError:
343-
# e.g. fails in numpy 1.6 with DatetimeIndex #1681
324+
except (TypeError, ValueError):
344325
return False
345326

346327
def __contains__(self, other):
@@ -392,6 +373,5 @@ def isin(self, values, level=None):
392373
return lib.ismember_nans(np.array(self), value_set,
393374
isnull(list(value_set)).any())
394375

395-
396376
Float64Index._add_numeric_methods()
397377
Float64Index._add_logical_methods_disabled()

pandas/tests/frame/test_block_internals.py

+2
Original file line numberDiff line numberDiff line change
@@ -372,11 +372,13 @@ def test_consolidate_datetime64(self):
372372
ser_starting.index = ser_starting.values
373373
ser_starting = ser_starting.tz_localize('US/Eastern')
374374
ser_starting = ser_starting.tz_convert('UTC')
375+
ser_starting.index.name = 'starting'
375376

376377
ser_ending = df.ending
377378
ser_ending.index = ser_ending.values
378379
ser_ending = ser_ending.tz_localize('US/Eastern')
379380
ser_ending = ser_ending.tz_convert('UTC')
381+
ser_ending.index.name = 'ending'
380382

381383
df.starting = ser_starting.index
382384
df.ending = ser_ending.index

pandas/tests/indexes/common.py

+47
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,53 @@ def test_hash_error(self):
205205
type(ind).__name__):
206206
hash(ind)
207207

208+
def test_copy_name(self):
209+
# Check that "name" argument passed at initialization is honoured
210+
# GH12309
211+
for name, index in compat.iteritems(self.indices):
212+
if isinstance(index, MultiIndex):
213+
continue
214+
215+
first = index.__class__(index, copy=True, name='mario')
216+
second = first.__class__(first, copy=False)
217+
218+
# Even though "copy=False", we want a new object.
219+
self.assertIsNot(first, second)
220+
# Not using tm.assert_index_equal() since names differ:
221+
self.assertTrue(index.equals(first))
222+
223+
self.assertEqual(first.name, 'mario')
224+
self.assertEqual(second.name, 'mario')
225+
226+
s1 = Series(2, index=first)
227+
s2 = Series(3, index=second[:-1])
228+
if not isinstance(index, CategoricalIndex): # See GH13365
229+
s3 = s1 * s2
230+
self.assertEqual(s3.index.name, 'mario')
231+
232+
def test_ensure_copied_data(self):
233+
# Check the "copy" argument of each Index.__new__ is honoured
234+
# GH12309
235+
for name, index in compat.iteritems(self.indices):
236+
init_kwargs = {}
237+
if isinstance(index, PeriodIndex):
238+
# Needs "freq" specification:
239+
init_kwargs['freq'] = index.freq
240+
elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
241+
# RangeIndex cannot be initialized from data
242+
# MultiIndex and CategoricalIndex are tested separately
243+
continue
244+
245+
index_type = index.__class__
246+
result = index_type(index.values, copy=True, **init_kwargs)
247+
tm.assert_index_equal(index, result)
248+
tm.assert_numpy_array_equal(index.values, result.values,
249+
check_same='copy')
250+
251+
result = index_type(index.values, copy=False, **init_kwargs)
252+
tm.assert_numpy_array_equal(index.values, result.values,
253+
check_same='same')
254+
208255
def test_copy_and_deepcopy(self):
209256
from copy import copy, deepcopy
210257

pandas/tests/indexes/test_base.py

+1
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def test_constructor_from_series(self):
172172
df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990',
173173
'5-1-1990']
174174
result = DatetimeIndex(df['date'], freq='MS')
175+
expected.name = 'date'
175176
self.assert_index_equal(result, expected)
176177
self.assertEqual(df['date'].dtype, object)
177178

pandas/tests/indexes/test_category.py

+14
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,20 @@ def test_identical(self):
507507
self.assertTrue(ci1.identical(ci1.copy()))
508508
self.assertFalse(ci1.identical(ci2))
509509

510+
def test_ensure_copied_data(self):
511+
# Check the "copy" argument of each Index.__new__ is honoured
512+
# GH12309
513+
# Must be tested separately from other indexes because
514+
# self.value is not an ndarray
515+
_base = lambda ar : ar if ar.base is None else ar.base
516+
for index in self.indices.values():
517+
result = CategoricalIndex(index.values, copy=True)
518+
tm.assert_index_equal(index, result)
519+
self.assertIsNot(_base(index.values), _base(result.values))
520+
521+
result = CategoricalIndex(index.values, copy=False)
522+
self.assertIs(_base(index.values), _base(result.values))
523+
510524
def test_equals(self):
511525

512526
ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)

pandas/tests/indexes/test_numeric.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ def test_constructor(self):
169169
# explicit construction
170170
index = Float64Index([1, 2, 3, 4, 5])
171171
self.assertIsInstance(index, Float64Index)
172-
self.assertTrue((index.values == np.array(
173-
[1, 2, 3, 4, 5], dtype='float64')).all())
172+
expected = np.array([1, 2, 3, 4, 5], dtype='float64')
173+
self.assert_numpy_array_equal(index.values, expected)
174174
index = Float64Index(np.array([1, 2, 3, 4, 5]))
175175
self.assertIsInstance(index, Float64Index)
176176
index = Float64Index([1., 2, 3, 4, 5])

pandas/tests/test_testing.py

+11
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,17 @@ def test_numpy_array_equal_object_message(self):
315315
with assertRaisesRegexp(AssertionError, expected):
316316
assert_almost_equal(a, b)
317317

318+
def test_numpy_array_equal_copy_flag(self):
319+
a = np.array([1, 2, 3])
320+
b = a.copy()
321+
c = a.view()
322+
expected = 'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)'
323+
with assertRaisesRegexp(AssertionError, expected):
324+
assert_numpy_array_equal(a, b, check_same='same')
325+
expected = 'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)'
326+
with assertRaisesRegexp(AssertionError, expected):
327+
assert_numpy_array_equal(a, c, check_same='copy')
328+
318329
def test_assert_almost_equal_iterable_message(self):
319330

320331
expected = """Iterable are different

pandas/tseries/index.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,15 @@ def __new__(cls, data=None,
225225
verify_integrity=True, normalize=False,
226226
closed=None, ambiguous='raise', dtype=None, **kwargs):
227227

228+
# This allows to later ensure that the 'copy' parameter is honored:
229+
if isinstance(data, Index):
230+
ref_to_data = data._data
231+
else:
232+
ref_to_data = data
233+
234+
if name is None and hasattr(data, 'name'):
235+
name = data.name
236+
228237
dayfirst = kwargs.pop('dayfirst', None)
229238
yearfirst = kwargs.pop('yearfirst', None)
230239

@@ -302,7 +311,7 @@ def __new__(cls, data=None,
302311
raise TypeError("Already tz-aware, use tz_convert "
303312
"to convert.")
304313

305-
return data
314+
return data._deepcopy_if_needed(ref_to_data, copy)
306315

307316
if issubclass(data.dtype.type, compat.string_types):
308317
data = tslib.parse_str_array_to_datetime(data, freq=freq,
@@ -335,10 +344,7 @@ def __new__(cls, data=None,
335344
elif data.dtype == _INT64_DTYPE:
336345
if isinstance(data, Int64Index):
337346
raise TypeError('cannot convert Int64Index->DatetimeIndex')
338-
if copy:
339-
subarr = np.asarray(data, dtype=_NS_DTYPE)
340-
else:
341-
subarr = data.view(_NS_DTYPE)
347+
subarr = data.view(_NS_DTYPE)
342348
else:
343349
if isinstance(data, (ABCSeries, Index)):
344350
values = data._values
@@ -414,7 +420,7 @@ def __new__(cls, data=None,
414420
if inferred:
415421
subarr.offset = to_offset(inferred)
416422

417-
return subarr
423+
return subarr._deepcopy_if_needed(ref_to_data, copy)
418424

419425
@classmethod
420426
def _generate(cls, start, end, periods, name, offset,

0 commit comments

Comments
 (0)