Skip to content

Commit d70b95b

Browse files
authored
TST: tests for setitem-like casting issues (#45154)
1 parent 70c7b41 commit d70b95b

File tree

1 file changed

+368
-0
lines changed

1 file changed

+368
-0
lines changed

pandas/tests/series/indexing/test_setitem.py

+368
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,374 @@ def expected(self, val, inplace):
10861086
return Series(res_values)
10871087

10881088

1089+
@pytest.mark.parametrize("val", [512, np.int16(512)])
1090+
class TestSetitemIntoIntegerSeriesNeedsUpcast(SetitemCastingEquivalents):
1091+
@pytest.fixture
1092+
def obj(self):
1093+
return Series([1, 2, 3], dtype=np.int8)
1094+
1095+
@pytest.fixture
1096+
def key(self):
1097+
return 1
1098+
1099+
@pytest.fixture
1100+
def inplace(self):
1101+
return False
1102+
1103+
@pytest.fixture
1104+
def expected(self):
1105+
return Series([1, 512, 3], dtype=np.int16)
1106+
1107+
def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request):
1108+
if not isinstance(val, np.int16):
1109+
mark = pytest.mark.xfail
1110+
request.node.add_marker(mark)
1111+
super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace)
1112+
1113+
def test_mask_key(self, obj, key, expected, val, indexer_sli, request):
1114+
if not isinstance(val, np.int16):
1115+
mark = pytest.mark.xfail
1116+
request.node.add_marker(mark)
1117+
super().test_mask_key(obj, key, expected, val, indexer_sli)
1118+
1119+
1120+
@pytest.mark.parametrize("val", [2 ** 33 + 1.0, 2 ** 33 + 1.1, 2 ** 62])
1121+
class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents):
1122+
# https://github.com/pandas-dev/pandas/issues/39584#issuecomment-941212124
1123+
@pytest.fixture
1124+
def obj(self):
1125+
return Series([1, 2, 3], dtype="i4")
1126+
1127+
@pytest.fixture
1128+
def key(self):
1129+
return 0
1130+
1131+
@pytest.fixture
1132+
def inplace(self):
1133+
return False
1134+
1135+
@pytest.fixture
1136+
def expected(self, val):
1137+
if val == 2 ** 62:
1138+
return Series([val, 2, 3], dtype="i8")
1139+
elif val == 2 ** 33 + 1.1:
1140+
return Series([val, 2, 3], dtype="f8")
1141+
else:
1142+
return Series([val, 2, 3], dtype="i8")
1143+
1144+
def test_series_where(self, obj, key, expected, val, is_inplace, request):
1145+
if isinstance(val, float) and val % 1 == 0:
1146+
mark = pytest.mark.xfail
1147+
request.node.add_marker(mark)
1148+
super().test_series_where(obj, key, expected, val, is_inplace)
1149+
1150+
def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request):
1151+
if val % 1 == 0:
1152+
mark = pytest.mark.xfail
1153+
request.node.add_marker(mark)
1154+
super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace)
1155+
1156+
def test_mask_key(self, obj, key, expected, val, indexer_sli, request):
1157+
if val % 1 == 0:
1158+
mark = pytest.mark.xfail
1159+
request.node.add_marker(mark)
1160+
super().test_mask_key(obj, key, expected, val, indexer_sli)
1161+
1162+
1163+
def test_20643():
1164+
# closed by GH#45121
1165+
orig = Series([0, 1, 2], index=["a", "b", "c"])
1166+
1167+
expected = Series([0, 2.7, 2], index=["a", "b", "c"])
1168+
1169+
ser = orig.copy()
1170+
ser.at["b"] = 2.7
1171+
tm.assert_series_equal(ser, expected)
1172+
1173+
ser = orig.copy()
1174+
ser.loc["b"] = 2.7
1175+
tm.assert_series_equal(ser, expected)
1176+
1177+
ser = orig.copy()
1178+
ser["b"] = 2.7
1179+
tm.assert_series_equal(ser, expected)
1180+
1181+
ser = orig.copy()
1182+
ser.iat[1] = 2.7
1183+
tm.assert_series_equal(ser, expected)
1184+
1185+
ser = orig.copy()
1186+
ser.iloc[1] = 2.7
1187+
tm.assert_series_equal(ser, expected)
1188+
1189+
orig_df = orig.to_frame("A")
1190+
expected_df = expected.to_frame("A")
1191+
1192+
df = orig_df.copy()
1193+
df.at["b", "A"] = 2.7
1194+
tm.assert_frame_equal(df, expected_df)
1195+
1196+
df = orig_df.copy()
1197+
df.loc["b", "A"] = 2.7
1198+
tm.assert_frame_equal(df, expected_df)
1199+
1200+
df = orig_df.copy()
1201+
df.iloc[1, 0] = 2.7
1202+
tm.assert_frame_equal(df, expected_df)
1203+
1204+
df = orig_df.copy()
1205+
df.iat[1, 0] = 2.7
1206+
tm.assert_frame_equal(df, expected_df)
1207+
1208+
1209+
def test_20643_comment():
1210+
# https://github.com/pandas-dev/pandas/issues/20643#issuecomment-431244590
1211+
# fixed sometime prior to GH#45121
1212+
orig = Series([0, 1, 2], index=["a", "b", "c"])
1213+
expected = Series([np.nan, 1, 2], index=["a", "b", "c"])
1214+
1215+
ser = orig.copy()
1216+
ser.iat[0] = None
1217+
tm.assert_series_equal(ser, expected)
1218+
1219+
ser = orig.copy()
1220+
ser.iloc[0] = None
1221+
tm.assert_series_equal(ser, expected)
1222+
1223+
1224+
def test_15413():
1225+
# fixed by GH#45121
1226+
ser = Series([1, 2, 3])
1227+
1228+
ser[ser == 2] += 0.5
1229+
expected = Series([1, 2.5, 3])
1230+
tm.assert_series_equal(ser, expected)
1231+
1232+
ser = Series([1, 2, 3])
1233+
ser[1] += 0.5
1234+
tm.assert_series_equal(ser, expected)
1235+
1236+
ser = Series([1, 2, 3])
1237+
ser.loc[1] += 0.5
1238+
tm.assert_series_equal(ser, expected)
1239+
1240+
ser = Series([1, 2, 3])
1241+
ser.iloc[1] += 0.5
1242+
tm.assert_series_equal(ser, expected)
1243+
1244+
ser = Series([1, 2, 3])
1245+
ser.iat[1] += 0.5
1246+
tm.assert_series_equal(ser, expected)
1247+
1248+
ser = Series([1, 2, 3])
1249+
ser.at[1] += 0.5
1250+
tm.assert_series_equal(ser, expected)
1251+
1252+
1253+
def test_37477():
1254+
# fixed by GH#45121
1255+
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
1256+
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
1257+
1258+
df = orig.copy()
1259+
df.at[1, "B"] = 1.2
1260+
tm.assert_frame_equal(df, expected)
1261+
1262+
df = orig.copy()
1263+
df.loc[1, "B"] = 1.2
1264+
tm.assert_frame_equal(df, expected)
1265+
1266+
df = orig.copy()
1267+
df.iat[1, 1] = 1.2
1268+
tm.assert_frame_equal(df, expected)
1269+
1270+
df = orig.copy()
1271+
df.iloc[1, 1] = 1.2
1272+
tm.assert_frame_equal(df, expected)
1273+
1274+
1275+
def test_32878_int_itemsize():
1276+
# Fixed by GH#45121
1277+
arr = np.arange(5).astype("i4")
1278+
ser = Series(arr)
1279+
val = np.int64(np.iinfo(np.int64).max)
1280+
ser[0] = val
1281+
expected = Series([val, 1, 2, 3, 4], dtype=np.int64)
1282+
tm.assert_series_equal(ser, expected)
1283+
1284+
1285+
def test_26395(indexer_al):
1286+
# .at case fixed by GH#45121 (best guess)
1287+
df = DataFrame(index=["A", "B", "C"])
1288+
df["D"] = 0
1289+
1290+
indexer_al(df)["C", "D"] = 2
1291+
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
1292+
tm.assert_frame_equal(df, expected)
1293+
1294+
indexer_al(df)["C", "D"] = 44.5
1295+
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
1296+
tm.assert_frame_equal(df, expected)
1297+
1298+
indexer_al(df)["C", "D"] = "hello"
1299+
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
1300+
tm.assert_frame_equal(df, expected)
1301+
1302+
1303+
def test_37692(indexer_al):
1304+
# GH#37692
1305+
ser = Series([1, 2, 3], index=["a", "b", "c"])
1306+
indexer_al(ser)["b"] = "test"
1307+
expected = Series([1, "test", 3], index=["a", "b", "c"], dtype=object)
1308+
tm.assert_series_equal(ser, expected)
1309+
1310+
1311+
def test_setitem_bool_int_float_consistency(indexer_sli):
1312+
# GH#21513
1313+
# bool-with-int and bool-with-float both upcast to object
1314+
# int-with-float and float-with-int are both non-casting so long
1315+
# as the setitem can be done losslessly
1316+
for dtype in [np.float64, np.int64]:
1317+
ser = Series(0, index=range(3), dtype=dtype)
1318+
indexer_sli(ser)[0] = True
1319+
assert ser.dtype == object
1320+
1321+
ser = Series(0, index=range(3), dtype=bool)
1322+
ser[0] = dtype(1)
1323+
assert ser.dtype == object
1324+
1325+
# 1.0 can be held losslessly, so no casting
1326+
ser = Series(0, index=range(3), dtype=np.int64)
1327+
indexer_sli(ser)[0] = np.float64(1.0)
1328+
assert ser.dtype == np.int64
1329+
1330+
# 1 can be held losslessly, so no casting
1331+
ser = Series(0, index=range(3), dtype=np.float64)
1332+
indexer_sli(ser)[0] = np.int64(1)
1333+
1334+
1335+
def test_6942(indexer_al):
1336+
# check that the .at __setitem__ after setting "Live" actually sets the data
1337+
start = Timestamp("2014-04-01")
1338+
t1 = Timestamp("2014-04-23 12:42:38.883082")
1339+
t2 = Timestamp("2014-04-24 01:33:30.040039")
1340+
1341+
dti = date_range(start, periods=1)
1342+
orig = DataFrame(index=dti, columns=["timenow", "Live"])
1343+
1344+
df = orig.copy()
1345+
indexer_al(df)[start, "timenow"] = t1
1346+
1347+
df["Live"] = True
1348+
1349+
df.at[start, "timenow"] = t2
1350+
assert df.iloc[0, 0] == t2
1351+
1352+
1353+
@pytest.mark.xfail(reason="Doesn't catch when numpy raises.")
1354+
def test_45070():
1355+
ser = Series([1, 2, 3], index=["a", "b", "c"])
1356+
1357+
ser[0] = "X"
1358+
expected = Series(["X", 2, 3], index=["a", "b", "c"], dtype=object)
1359+
tm.assert_series_equal(ser, expected)
1360+
1361+
1362+
@pytest.mark.xfail(reason="unwanted upcast")
1363+
def test_15231():
1364+
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
1365+
df.loc[2] = Series({"a": 5, "b": 6})
1366+
assert (df.dtypes == np.int64).all()
1367+
1368+
df.loc[3] = Series({"a": 7})
1369+
1370+
# df["a"] doesn't have any NaNs, should not have been cast
1371+
exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"])
1372+
tm.assert_series_equal(df.dtypes, exp_dtypes)
1373+
1374+
1375+
@pytest.mark.xfail(reason="Fails to upcast")
1376+
def test_32878_complex_itemsize():
1377+
# TODO: when fixed, put adjacent to test_32878_int_itemsize
1378+
arr = np.arange(5).astype("c8")
1379+
ser = Series(arr)
1380+
val = np.finfo(np.float64).max
1381+
val = val.astype("c16")
1382+
1383+
# GH#32878 used to coerce val to inf+0.000000e+00j
1384+
ser[0] = val
1385+
assert ser[0] == val
1386+
expected = Series([val, 1, 2, 3, 4], dtype="c16")
1387+
tm.assert_series_equal(ser, expected)
1388+
1389+
1390+
@pytest.mark.xfail(reason="Unnecessarily upcasts to float64")
1391+
def test_iloc_setitem_unnecesssary_float_upcasting():
1392+
# GH#12255
1393+
df = DataFrame(
1394+
{
1395+
0: np.array([1, 3], dtype=np.float32),
1396+
1: np.array([2, 4], dtype=np.float32),
1397+
2: ["a", "b"],
1398+
}
1399+
)
1400+
orig = df.copy()
1401+
1402+
values = df[0].values.reshape(2, 1)
1403+
df.iloc[:, 0:1] = values
1404+
1405+
tm.assert_frame_equal(df, orig)
1406+
1407+
1408+
@pytest.mark.xfail(reason="unwanted casting to dt64")
1409+
def test_12499():
1410+
# TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT,
1411+
# which has consequences for the expected df["two"] (though i think at
1412+
# the time it might not have because of a separate bug). See if it makes
1413+
# a difference which one we use here.
1414+
ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC")
1415+
1416+
data = [{"one": 0, "two": ts}]
1417+
orig = DataFrame(data)
1418+
df = orig.copy()
1419+
df.loc[1] = [np.nan, NaT]
1420+
1421+
expected = DataFrame(
1422+
{"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")}
1423+
)
1424+
tm.assert_frame_equal(df, expected)
1425+
1426+
data = [{"one": 0, "two": ts}]
1427+
df = orig.copy()
1428+
df.loc[1, :] = [np.nan, NaT]
1429+
tm.assert_frame_equal(df, expected)
1430+
1431+
1432+
@pytest.mark.xfail(reason="Too many columns cast to float64")
1433+
def test_20476():
1434+
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
1435+
df = DataFrame(-1, index=range(3), columns=mi)
1436+
filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"])
1437+
df["A"] = filler
1438+
1439+
expected = DataFrame(
1440+
{
1441+
0: [1, 1, 1],
1442+
1: [2, 2, 2],
1443+
2: [3.0, 3.0, 3.0],
1444+
3: [-1, -1, -1],
1445+
4: [-1, -1, -1],
1446+
5: [-1, -1, -1],
1447+
}
1448+
)
1449+
expected.columns = mi
1450+
exp_dtypes = Series(
1451+
[np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3,
1452+
index=mi,
1453+
)
1454+
tm.assert_series_equal(df.dtypes, exp_dtypes)
1455+
1456+
10891457
def test_setitem_int_as_positional_fallback_deprecation():
10901458
# GH#42215 deprecated falling back to positional on __setitem__ with an
10911459
# int not contained in the index

0 commit comments

Comments
 (0)