From 01f76c0f62a0859adddd576b69f766fee0362d4b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Mar 2021 10:17:02 -0800 Subject: [PATCH 1/3] TST: split bloated constructor tests --- pandas/core/construction.py | 4 ++-- pandas/core/series.py | 1 + pandas/tests/frame/test_constructors.py | 23 ++++++++++++++++++---- pandas/tests/series/test_constructors.py | 25 ++++++++++++++++-------- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index d0fe5b5ab0c19..58315f41542cc 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -517,9 +517,9 @@ def sanitize_array( elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: # TODO: deque, array.array - if isinstance(data, set): + if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys - raise TypeError("Set type is unordered") + raise TypeError(f"'{type(data).__name__}' type is unordered") data = list(data) if dtype is not None: diff --git a/pandas/core/series.py b/pandas/core/series.py index ddfeea381ff2e..0b1a365b4a4b5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -378,6 +378,7 @@ def __init__( elif is_extension_array_dtype(data): pass elif isinstance(data, (set, frozenset)): + # TODO: catch this in sanitize_array raise TypeError(f"'{type(data).__name__}' type is unordered") else: data = com.maybe_iterable_to_list(data) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 873c58f976508..29f70226879e7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -376,15 +376,18 @@ def test_constructor_dict(self): with pytest.raises(ValueError, match=msg): DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + def test_constructor_dict_length1(self): # Length-one dict micro-optimization frame = DataFrame({"A": {"1": 1, "2": 2}}) tm.assert_index_equal(frame.index, Index(["1", "2"])) + def test_constructor_dict_with_index(self): # empty dict plus index idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx) assert frame.index is idx + def test_constructor_dict_with_index_and_columns(self): # empty dict with index and columns idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx, columns=idx) @@ -392,10 +395,12 @@ def test_constructor_dict(self): assert frame.columns is idx assert len(frame._series) == 3 + def test_constructor_dict_of_empty_lists(self): # with dict of empty list and Series frame = DataFrame({"A": [], "B": []}, columns=["A", "B"]) tm.assert_index_equal(frame.index, RangeIndex(0), exact=True) + def test_constructor_dict_with_none(self): # GH 14381 # Dict with None value frame_none = DataFrame({"a": None}, index=[0]) @@ -404,6 +409,7 @@ def test_constructor_dict(self): assert frame_none_list._get_value(0, "a") is None tm.assert_frame_equal(frame_none, frame_none_list) + def test_constructor_dict_errors(self): # GH10856 # dict with scalar values should raise error, even if columns passed msg = "If using all scalar values, you must pass an index" @@ -559,7 +565,7 @@ def test_constructor_error_msgs(self): with pytest.raises(ValueError, match=msg): DataFrame({"a": False, "b": True}) - def test_constructor_subclass_dict(self, float_frame, dict_subclass): + def test_constructor_subclass_dict(self, dict_subclass): # Test for passing dict subclass to constructor data = { "col1": dict_subclass((x, 10.0 * x) for x in range(10)), @@ -573,6 +579,7 @@ def test_constructor_subclass_dict(self, float_frame, dict_subclass): df = DataFrame(data) tm.assert_frame_equal(refdf, df) + def test_constructor_defaultdict(self, float_frame): # try with defaultdict from collections import defaultdict @@ -607,6 +614,7 @@ def test_constructor_dict_cast(self): assert frame["B"].dtype == np.object_ assert frame["A"].dtype == np.float64 + def test_constructor_dict_cast2(self): # can't cast to float test_data = { "A": dict(zip(range(20), tm.makeStringIndex(20))), @@ -622,6 +630,7 @@ def test_constructor_dict_dont_upcast(self): df = DataFrame(d) assert isinstance(df["Col1"]["Row2"], float) + def test_constructor_dict_dont_upcast2(self): dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2]) assert isinstance(dm[1][1], int) @@ -1193,6 +1202,7 @@ def __len__(self, n): expected = DataFrame([[1, "a"], [2, "b"]], columns=columns) tm.assert_frame_equal(result, expected, check_dtype=False) + def test_constructor_stdlib_array(self): # GH 4297 # support Array import array @@ -2425,11 +2435,16 @@ def test_from_2d_ndarray_with_dtype(self): expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]") tm.assert_frame_equal(df, expected) - def test_construction_from_set_raises(self): + @pytest.mark.parametrize("typ", [set, frozenset]) + def test_construction_from_set_raises(self, typ): # https://github.com/pandas-dev/pandas/issues/32582 - msg = "Set type is unordered" + values = typ({1, 2, 3}) + msg = f"'{typ.__name__}' type is unordered" with pytest.raises(TypeError, match=msg): - DataFrame({"a": {1, 2, 3}}) + DataFrame({"a": values}) + + with pytest.raises(TypeError, match=msg): + Series(values) def get1(obj): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 63c9b4d899622..5d26109312763 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -69,6 +69,7 @@ class TestSeriesConstructors: ], ) def test_empty_constructor(self, constructor, check_index_type): + # TODO: share with frame test of the same name with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): expected = Series() result = constructor() @@ -310,6 +311,7 @@ def test_constructor_generator(self): exp = Series(range(10)) tm.assert_series_equal(result, exp) + # same but with non-default index gen = (i for i in range(10)) result = Series(gen, index=range(10, 20)) exp.index = range(10, 20) @@ -323,6 +325,7 @@ def test_constructor_map(self): exp = Series(range(10)) tm.assert_series_equal(result, exp) + # same but with non-default index m = map(lambda x: x, range(10)) result = Series(m, index=range(10, 20)) exp.index = range(10, 20) @@ -386,6 +389,7 @@ def test_constructor_categorical_with_coercion(self): str(df.values) str(df) + def test_constructor_categorical_with_coercion2(self): # GH8623 x = DataFrame( [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]], @@ -747,6 +751,7 @@ def test_constructor_datelike_coercion(self): assert s.iloc[1] == "NOV" assert s.dtype == object + def test_constructor_datelike_coercion2(self): # the dtype was being reset on the slicing and re-inferred to datetime # even thought the blocks are mixed belly = "216 3T19".split() @@ -798,6 +803,7 @@ def test_constructor_dtype_datetime64(self): assert isna(s[1]) assert s.dtype == "M8[ns]" + def test_constructor_dtype_datetime64_10(self): # GH3416 dates = [ np.datetime64(datetime(2013, 1, 1)), @@ -850,6 +856,7 @@ def test_constructor_dtype_datetime64(self): expected = Series(dts.astype(np.int64)) tm.assert_series_equal(result, expected) + def test_constructor_dtype_datetime64_9(self): # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) assert result[0] == datetime(2, 1, 1, 0, 0) @@ -857,11 +864,13 @@ def test_constructor_dtype_datetime64(self): result = Series([datetime(3000, 1, 1)]) assert result[0] == datetime(3000, 1, 1, 0, 0) + def test_constructor_dtype_datetime64_8(self): # don't mix types result = Series([Timestamp("20130101"), 1], index=["a", "b"]) assert result["a"] == Timestamp("20130101") assert result["b"] == 1 + def test_constructor_dtype_datetime64_7(self): # GH6529 # coerce datetime64 non-ns properly dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M") @@ -887,6 +896,7 @@ def test_constructor_dtype_datetime64(self): tm.assert_numpy_array_equal(series1.values, dates2) assert series1.dtype == object + def test_constructor_dtype_datetime64_6(self): # these will correctly infer a datetime s = Series([None, pd.NaT, "2013-08-05 15:30:00.000001"]) assert s.dtype == "datetime64[ns]" @@ -897,6 +907,7 @@ def test_constructor_dtype_datetime64(self): s = Series([pd.NaT, np.nan, "2013-08-05 15:30:00.000001"]) assert s.dtype == "datetime64[ns]" + def test_constructor_dtype_datetime64_5(self): # tz-aware (UTC and other tz's) # GH 8411 dr = date_range("20130101", periods=3) @@ -906,18 +917,21 @@ def test_constructor_dtype_datetime64(self): dr = date_range("20130101", periods=3, tz="US/Eastern") assert str(Series(dr).iloc[0].tz) == "US/Eastern" + def test_constructor_dtype_datetime64_4(self): # non-convertible s = Series([1479596223000, -1479590, pd.NaT]) assert s.dtype == "object" assert s[2] is pd.NaT assert "NaT" in str(s) + def test_constructor_dtype_datetime64_3(self): # if we passed a NaT it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT]) assert s.dtype == "object" assert s[2] is pd.NaT assert "NaT" in str(s) + def test_constructor_dtype_datetime64_2(self): # if we passed a nan it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) assert s.dtype == "object" @@ -980,6 +994,7 @@ def test_constructor_with_datetime_tz(self): result = pd.DatetimeIndex(s, freq="infer") tm.assert_index_equal(result, dr) + def test_constructor_with_datetime_tz4(self): # inference s = Series( [ @@ -990,6 +1005,7 @@ def test_constructor_with_datetime_tz(self): assert s.dtype == "datetime64[ns, US/Pacific]" assert lib.infer_dtype(s, skipna=True) == "datetime64" + def test_constructor_with_datetime_tz3(self): s = Series( [ Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), @@ -999,6 +1015,7 @@ def test_constructor_with_datetime_tz(self): assert s.dtype == "object" assert lib.infer_dtype(s, skipna=True) == "datetime" + def test_constructor_with_datetime_tz2(self): # with all NaT s = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") expected = Series(pd.DatetimeIndex(["NaT", "NaT"], tz="US/Eastern")) @@ -1231,14 +1248,6 @@ def test_constructor_dict_of_tuples(self): expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) tm.assert_series_equal(result, expected) - def test_constructor_set(self): - values = {1, 2, 3, 4, 5} - with pytest.raises(TypeError, match="'set' type is unordered"): - Series(values) - values = frozenset(values) - with pytest.raises(TypeError, match="'frozenset' type is unordered"): - Series(values) - # https://github.com/pandas-dev/pandas/issues/22698 @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning") def test_fromDict(self): From d4a659b251550476522bfcf017813fb837aebb7c Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Mar 2021 19:43:07 -0800 Subject: [PATCH 2/3] REF: unify set/frozenset catching --- pandas/core/series.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6f1782d3e0afc..24c356e7a8269 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -375,11 +375,8 @@ def __init__( "`index` argument. `copy` must be False." ) - elif is_extension_array_dtype(data): + elif isinstance(data, ExtensionArray): pass - elif isinstance(data, (set, frozenset)): - # TODO: catch this in sanitize_array - raise TypeError(f"'{type(data).__name__}' type is unordered") else: data = com.maybe_iterable_to_list(data) From 3d0859314d7ab57b6db1b90255d30dab6934e947 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Mar 2021 12:53:59 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 41db72612a66b..ee4ca6a06a634 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -402,6 +402,7 @@ Conversion - Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`) - Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`) - Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`) +- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`) - Strings