diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 09bd09b06d9b9..2d5fce1335f62 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -761,6 +761,7 @@ Datetimelike API Changes - ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- Construction of :class:`Series` from list of length 1 and index of length > 1, which used to interpret the list as a scalar, now raises a ``ValueError`` (:issue:`18626`). - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) @@ -1135,6 +1136,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) +- Fixed construction of a :class:`Series` from a ``dict`` containing nested lists as values (:issue:`18625`) - Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) - Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1d6f770d92795..e164351952368 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4048,7 +4048,8 @@ def _try_cast(arr, take_fast_path): # GH #846 if isinstance(data, (np.ndarray, Index, Series)): - + if data.ndim > 1: + raise ValueError('Data must be 1-dimensional') if dtype is not None: subarr = np.array(data, copy=False) @@ -4085,7 +4086,9 @@ def _try_cast(arr, take_fast_path): return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: - if dtype is not None: + if all(is_list_like(item) for item in data): + subarr = construct_1d_object_array_from_listlike(data) + elif dtype is not None: try: subarr = _try_cast(data, False) except Exception: @@ -4107,11 +4110,12 @@ def _try_cast(arr, take_fast_path): else: subarr = _try_cast(data, False) - # scalar like, GH - if getattr(subarr, 'ndim', 0) == 0: - if isinstance(data, list): # pragma: no cover - subarr = np.array(data, dtype=object) - elif index is not None: + if subarr.ndim == 0 or is_scalar(data): + if subarr.ndim == 1: + # a scalar upcasted to 1-dimensional by maybe_cast_to_datetime() + value = subarr[0] + dtype = subarr.dtype + else: value = data # figure out the dtype from the value (upcast if necessary) @@ -4121,26 +4125,7 @@ def _try_cast(arr, take_fast_path): # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) - subarr = construct_1d_arraylike_from_scalar( - value, len(index), dtype) - - else: - return subarr.item() - - # the result that we want - elif subarr.ndim == 1: - if index is not None: - - # a 1-element ndarray - if len(subarr) != len(index) and len(subarr) == 1: - subarr = construct_1d_arraylike_from_scalar( - subarr[0], len(index), subarr.dtype) - - elif subarr.ndim > 1: - if isinstance(data, np.ndarray): - raise Exception('Data must be 1-dimensional') - else: - subarr = com._asarray_tuplesafe(data, dtype=dtype) + subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index a057ca0879cac..528dd52ce7a57 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -135,7 +135,8 @@ def test_apply_broadcast(self): # scalars result = self.frame.apply(np.mean, result_type='broadcast') - expected = DataFrame([self.frame.mean()], index=self.frame.index) + expected = DataFrame([self.frame.mean()] * len(self.frame.index), + index=self.frame.index) tm.assert_frame_equal(result, expected) result = self.frame.apply(np.mean, axis=1, result_type='broadcast') diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 984f37042d600..cdda1c81a1bdb 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -3215,8 +3215,7 @@ def test_nan_stays_float(self): assert pd.isna(idx0.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(idxm.get_level_values(1)[:-1]).all() - - df0 = pd.DataFrame([[1, 2]], index=idx0) + df0 = pd.DataFrame([[1, 2]] * 2, index=idx0) df1 = pd.DataFrame([[3, 4]], index=idx1) dfm = df0 - df1 assert pd.isna(df0.index.get_level_values(1)).all() diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index c4d925b83585b..f48155fc8f18b 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -496,7 +496,7 @@ def test_preserve_categorical_dtype(self): cidx = pd.CategoricalIndex(list("xyz"), ordered=ordered) midx = pd.MultiIndex(levels=[['a'], cidx], labels=[[0, 0], [0, 1]]) - df = DataFrame([[10, 11]], index=midx) + df = DataFrame([[10, 11]] * 2, index=midx) expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], index=midx, columns=cidx) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 82b5b1c10fa2d..d1b50cef22ad7 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -50,6 +50,11 @@ def test_scalar_conversion(self): assert int(Series([1.])) == 1 assert long(Series([1.])) == 1 + @pytest.mark.parametrize('scalar', [1, 'abc', {2, 3}]) + @pytest.mark.parametrize('index', [range(2), ['a', 'b']]) + def test_invalid_1_dimensional(self, scalar, index): + pytest.raises(ValueError, Series, [scalar], index=index) + def test_constructor(self): assert self.ts.index.is_all_dates @@ -828,6 +833,17 @@ def test_constructor_dict_order(self): expected = Series([0, 1, 2], index=list('abc')) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('input_class', [list, tuple, iter]) + @pytest.mark.parametrize('dtype', ['object', None]) + def test_constructor_dict_nested_lists(self, input_class, dtype): + # GH 18625 + d = {'a': input_class([input_class([1, 2, 3]), + input_class([4, 5, 6])]), + 'b': input_class([input_class([7, 8, 9])])} + result = Series(d, index=['a', 'b'], dtype=dtype) + expected = Series([d['a'], d['b']], index=['a', 'b']) + assert_series_equal(result, expected) + @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')]) def test_constructor_dict_nan_key(self, value): # GH 18480