Skip to content

BUG: DataFrame(frozenset) should raise #40163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ Conversion
- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`)
- Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`)
- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
-

Strings
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,9 +518,9 @@ def sanitize_array(

elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
# TODO: deque, array.array
if isinstance(data, set):
if isinstance(data, (set, frozenset)):
# Raise only for unordered sets, e.g., not for dict_keys
raise TypeError("Set type is unordered")
raise TypeError(f"'{type(data).__name__}' type is unordered")
data = list(data)

if dtype is not None:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,8 @@ def __init__(
"`index` argument. `copy` must be False."
)

elif is_extension_array_dtype(data):
elif isinstance(data, ExtensionArray):
pass
elif isinstance(data, (set, frozenset)):
raise TypeError(f"'{type(data).__name__}' type is unordered")
else:
data = com.maybe_iterable_to_list(data)

Expand Down
23 changes: 19 additions & 4 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,26 +377,31 @@ def test_constructor_dict(self):
with pytest.raises(ValueError, match=msg):
DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]})

def test_constructor_dict_length1(self):
# Length-one dict micro-optimization
frame = DataFrame({"A": {"1": 1, "2": 2}})
tm.assert_index_equal(frame.index, Index(["1", "2"]))

def test_constructor_dict_with_index(self):
# empty dict plus index
idx = Index([0, 1, 2])
frame = DataFrame({}, index=idx)
assert frame.index is idx

def test_constructor_dict_with_index_and_columns(self):
# empty dict with index and columns
idx = Index([0, 1, 2])
frame = DataFrame({}, index=idx, columns=idx)
assert frame.index is idx
assert frame.columns is idx
assert len(frame._series) == 3

def test_constructor_dict_of_empty_lists(self):
# with dict of empty list and Series
frame = DataFrame({"A": [], "B": []}, columns=["A", "B"])
tm.assert_index_equal(frame.index, RangeIndex(0), exact=True)

def test_constructor_dict_with_none(self):
# GH 14381
# Dict with None value
frame_none = DataFrame({"a": None}, index=[0])
Expand All @@ -405,6 +410,7 @@ def test_constructor_dict(self):
assert frame_none_list._get_value(0, "a") is None
tm.assert_frame_equal(frame_none, frame_none_list)

def test_constructor_dict_errors(self):
# GH10856
# dict with scalar values should raise error, even if columns passed
msg = "If using all scalar values, you must pass an index"
Expand Down Expand Up @@ -560,7 +566,7 @@ def test_constructor_error_msgs(self):
with pytest.raises(ValueError, match=msg):
DataFrame({"a": False, "b": True})

def test_constructor_subclass_dict(self, float_frame, dict_subclass):
def test_constructor_subclass_dict(self, dict_subclass):
# Test for passing dict subclass to constructor
data = {
"col1": dict_subclass((x, 10.0 * x) for x in range(10)),
Expand All @@ -574,6 +580,7 @@ def test_constructor_subclass_dict(self, float_frame, dict_subclass):
df = DataFrame(data)
tm.assert_frame_equal(refdf, df)

def test_constructor_defaultdict(self, float_frame):
# try with defaultdict
from collections import defaultdict

Expand Down Expand Up @@ -608,6 +615,7 @@ def test_constructor_dict_cast(self):
assert frame["B"].dtype == np.object_
assert frame["A"].dtype == np.float64

def test_constructor_dict_cast2(self):
# can't cast to float
test_data = {
"A": dict(zip(range(20), tm.makeStringIndex(20))),
Expand All @@ -623,6 +631,7 @@ def test_constructor_dict_dont_upcast(self):
df = DataFrame(d)
assert isinstance(df["Col1"]["Row2"], float)

def test_constructor_dict_dont_upcast2(self):
dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2])
assert isinstance(dm[1][1], int)

Expand Down Expand Up @@ -1195,6 +1204,7 @@ def __len__(self, n):
expected = DataFrame([[1, "a"], [2, "b"]], columns=columns)
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_constructor_stdlib_array(self):
# GH 4297
# support Array
import array
Expand Down Expand Up @@ -2427,11 +2437,16 @@ def test_from_2d_ndarray_with_dtype(self):
expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]")
tm.assert_frame_equal(df, expected)

def test_construction_from_set_raises(self):
@pytest.mark.parametrize("typ", [set, frozenset])
def test_construction_from_set_raises(self, typ):
# https://github.com/pandas-dev/pandas/issues/32582
msg = "Set type is unordered"
values = typ({1, 2, 3})
msg = f"'{typ.__name__}' type is unordered"
with pytest.raises(TypeError, match=msg):
DataFrame({"a": {1, 2, 3}})
DataFrame({"a": values})

with pytest.raises(TypeError, match=msg):
Series(values)


def get1(obj):
Expand Down
25 changes: 17 additions & 8 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class TestSeriesConstructors:
],
)
def test_empty_constructor(self, constructor, check_index_type):
# TODO: share with frame test of the same name
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
expected = Series()
result = constructor()
Expand Down Expand Up @@ -310,6 +311,7 @@ def test_constructor_generator(self):
exp = Series(range(10))
tm.assert_series_equal(result, exp)

# same but with non-default index
gen = (i for i in range(10))
result = Series(gen, index=range(10, 20))
exp.index = range(10, 20)
Expand All @@ -323,6 +325,7 @@ def test_constructor_map(self):
exp = Series(range(10))
tm.assert_series_equal(result, exp)

# same but with non-default index
m = map(lambda x: x, range(10))
result = Series(m, index=range(10, 20))
exp.index = range(10, 20)
Expand Down Expand Up @@ -386,6 +389,7 @@ def test_constructor_categorical_with_coercion(self):
str(df.values)
str(df)

def test_constructor_categorical_with_coercion2(self):
# GH8623
x = DataFrame(
[[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]],
Expand Down Expand Up @@ -747,6 +751,7 @@ def test_constructor_datelike_coercion(self):
assert s.iloc[1] == "NOV"
assert s.dtype == object

def test_constructor_datelike_coercion2(self):
# the dtype was being reset on the slicing and re-inferred to datetime
# even thought the blocks are mixed
belly = "216 3T19".split()
Expand Down Expand Up @@ -798,6 +803,7 @@ def test_constructor_dtype_datetime64(self):
assert isna(s[1])
assert s.dtype == "M8[ns]"

def test_constructor_dtype_datetime64_10(self):
# GH3416
dates = [
np.datetime64(datetime(2013, 1, 1)),
Expand Down Expand Up @@ -850,18 +856,21 @@ def test_constructor_dtype_datetime64(self):
expected = Series(dts.astype(np.int64))
tm.assert_series_equal(result, expected)

def test_constructor_dtype_datetime64_9(self):
# invalid dates can be help as object
result = Series([datetime(2, 1, 1)])
assert result[0] == datetime(2, 1, 1, 0, 0)

result = Series([datetime(3000, 1, 1)])
assert result[0] == datetime(3000, 1, 1, 0, 0)

def test_constructor_dtype_datetime64_8(self):
# don't mix types
result = Series([Timestamp("20130101"), 1], index=["a", "b"])
assert result["a"] == Timestamp("20130101")
assert result["b"] == 1

def test_constructor_dtype_datetime64_7(self):
# GH6529
# coerce datetime64 non-ns properly
dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M")
Expand All @@ -887,6 +896,7 @@ def test_constructor_dtype_datetime64(self):
tm.assert_numpy_array_equal(series1.values, dates2)
assert series1.dtype == object

def test_constructor_dtype_datetime64_6(self):
# these will correctly infer a datetime
s = Series([None, NaT, "2013-08-05 15:30:00.000001"])
assert s.dtype == "datetime64[ns]"
Expand All @@ -897,6 +907,7 @@ def test_constructor_dtype_datetime64(self):
s = Series([NaT, np.nan, "2013-08-05 15:30:00.000001"])
assert s.dtype == "datetime64[ns]"

def test_constructor_dtype_datetime64_5(self):
# tz-aware (UTC and other tz's)
# GH 8411
dr = date_range("20130101", periods=3)
Expand All @@ -906,18 +917,21 @@ def test_constructor_dtype_datetime64(self):
dr = date_range("20130101", periods=3, tz="US/Eastern")
assert str(Series(dr).iloc[0].tz) == "US/Eastern"

def test_constructor_dtype_datetime64_4(self):
# non-convertible
s = Series([1479596223000, -1479590, NaT])
assert s.dtype == "object"
assert s[2] is NaT
assert "NaT" in str(s)

def test_constructor_dtype_datetime64_3(self):
# if we passed a NaT it remains
s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT])
assert s.dtype == "object"
assert s[2] is NaT
assert "NaT" in str(s)

def test_constructor_dtype_datetime64_2(self):
# if we passed a nan it remains
s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan])
assert s.dtype == "object"
Expand Down Expand Up @@ -980,6 +994,7 @@ def test_constructor_with_datetime_tz(self):
result = DatetimeIndex(s, freq="infer")
tm.assert_index_equal(result, dr)

def test_constructor_with_datetime_tz4(self):
# inference
s = Series(
[
Expand All @@ -990,6 +1005,7 @@ def test_constructor_with_datetime_tz(self):
assert s.dtype == "datetime64[ns, US/Pacific]"
assert lib.infer_dtype(s, skipna=True) == "datetime64"

def test_constructor_with_datetime_tz3(self):
s = Series(
[
Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"),
Expand All @@ -999,6 +1015,7 @@ def test_constructor_with_datetime_tz(self):
assert s.dtype == "object"
assert lib.infer_dtype(s, skipna=True) == "datetime"

def test_constructor_with_datetime_tz2(self):
# with all NaT
s = Series(NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
expected = Series(DatetimeIndex(["NaT", "NaT"], tz="US/Eastern"))
Expand Down Expand Up @@ -1231,14 +1248,6 @@ def test_constructor_dict_of_tuples(self):
expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)]))
tm.assert_series_equal(result, expected)

def test_constructor_set(self):
values = {1, 2, 3, 4, 5}
with pytest.raises(TypeError, match="'set' type is unordered"):
Series(values)
values = frozenset(values)
with pytest.raises(TypeError, match="'frozenset' type is unordered"):
Series(values)

# https://github.com/pandas-dev/pandas/issues/22698
@pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning")
def test_fromDict(self):
Expand Down