@@ -1663,3 +1663,89 @@ def func(x):
1663
1663
msg = "length must not be 0"
1664
1664
with pytest .raises (ValueError , match = msg ):
1665
1665
df .groupby ("A" , observed = False ).agg (func )
1666
+
1667
+
1668
+ def test_groupby_aggregation_duplicate_columns_single_dict_value ():
1669
+ # GH#55041
1670
+ df = DataFrame (
1671
+ [[1 , 2 , 3 , 4 ], [1 , 3 , 4 , 5 ], [2 , 4 , 5 , 6 ]],
1672
+ columns = ["a" , "b" , "c" , "c" ],
1673
+ )
1674
+ gb = df .groupby ("a" )
1675
+ result = gb .agg ({"c" : "sum" })
1676
+
1677
+ expected = DataFrame (
1678
+ [[7 , 9 ], [5 , 6 ]], columns = ["c" , "c" ], index = Index ([1 , 2 ], name = "a" )
1679
+ )
1680
+ tm .assert_frame_equal (result , expected )
1681
+
1682
+
1683
+ def test_groupby_aggregation_duplicate_columns_multiple_dict_values ():
1684
+ # GH#55041
1685
+ df = DataFrame (
1686
+ [[1 , 2 , 3 , 4 ], [1 , 3 , 4 , 5 ], [2 , 4 , 5 , 6 ]],
1687
+ columns = ["a" , "b" , "c" , "c" ],
1688
+ )
1689
+ gb = df .groupby ("a" )
1690
+ result = gb .agg ({"c" : ["sum" , "min" , "max" , "min" ]})
1691
+
1692
+ expected = DataFrame (
1693
+ [[7 , 3 , 4 , 3 , 9 , 4 , 5 , 4 ], [5 , 5 , 5 , 5 , 6 , 6 , 6 , 6 ]],
1694
+ columns = MultiIndex (
1695
+ levels = [["c" ], ["sum" , "min" , "max" ]],
1696
+ codes = [[0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], [0 , 1 , 2 , 1 , 0 , 1 , 2 , 1 ]],
1697
+ ),
1698
+ index = Index ([1 , 2 ], name = "a" ),
1699
+ )
1700
+ tm .assert_frame_equal (result , expected )
1701
+
1702
+
1703
+ def test_groupby_aggregation_duplicate_columns_some_empty_result ():
1704
+ # GH#55041
1705
+ df = DataFrame (
1706
+ [
1707
+ [1 , 9843 , 43 , 54 , 7867 ],
1708
+ [2 , 940 , 9 , - 34 , 44 ],
1709
+ [1 , - 34 , - 546 , - 549358 , 0 ],
1710
+ [2 , 244 , - 33 , - 100 , 44 ],
1711
+ ],
1712
+ columns = ["a" , "b" , "b" , "c" , "c" ],
1713
+ )
1714
+ gb = df .groupby ("a" )
1715
+ result = gb .agg ({"b" : [], "c" : ["var" ]})
1716
+
1717
+ expected = DataFrame (
1718
+ [[1.509268e11 , 30944844.5 ], [2.178000e03 , 0.0 ]],
1719
+ columns = MultiIndex (levels = [["c" ], ["var" ]], codes = [[0 , 0 ], [0 , 0 ]]),
1720
+ index = Index ([1 , 2 ], name = "a" ),
1721
+ )
1722
+ tm .assert_frame_equal (result , expected )
1723
+
1724
+
1725
+ def test_groupby_aggregation_multi_index_duplicate_columns ():
1726
+ # GH#55041
1727
+ df = DataFrame (
1728
+ [
1729
+ [1 , - 9843 , 43 , 54 , 7867 ],
1730
+ [2 , 940 , 9 , - 34 , 44 ],
1731
+ [1 , - 34 , 546 , - 549358 , 0 ],
1732
+ [2 , 244 , - 33 , - 100 , 44 ],
1733
+ ],
1734
+ columns = MultiIndex (
1735
+ levels = [["level1.1" , "level1.2" ], ["level2.1" , "level2.2" ]],
1736
+ codes = [[0 , 0 , 0 , 1 , 1 ], [0 , 1 , 1 , 0 , 1 ]],
1737
+ ),
1738
+ index = MultiIndex (
1739
+ levels = [["level1.1" , "level1.2" ], ["level2.1" , "level2.2" ]],
1740
+ codes = [[0 , 0 , 0 , 1 ], [0 , 1 , 1 , 0 ]],
1741
+ ),
1742
+ )
1743
+ gb = df .groupby (level = 0 )
1744
+ result = gb .agg ({("level1.1" , "level2.2" ): "min" })
1745
+
1746
+ expected = DataFrame (
1747
+ [[- 9843 , 9 ], [244 , - 33 ]],
1748
+ columns = MultiIndex (levels = [["level1.1" ], ["level2.2" ]], codes = [[0 , 0 ], [0 , 0 ]]),
1749
+ index = Index (["level1.1" , "level1.2" ]),
1750
+ )
1751
+ tm .assert_frame_equal (result , expected )
0 commit comments