From 92966d69b43c7673f4ae286d2357344d3dd1a2db Mon Sep 17 00:00:00 2001
From: luke <2736230899@qq.com>
Date: Wed, 22 Mar 2023 18:42:51 +0800
Subject: [PATCH 1/7] BUG: Agg in non-unique col

---
 pandas/core/apply.py                   | 21 ++++++++++++++++++---
 pandas/tests/apply/test_frame_apply.py | 12 ++++++++++++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 08618d5a6aa16..34119b1894709 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -416,9 +416,14 @@ def agg_dict_like(self) -> DataFrame | Series:
                 results = {key: colg.agg(how) for key, how in arg.items()}
             else:
                 # key used for column selection and output
-                results = {
-                    key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
-                }
+                results = {}
+                for key, how in arg.items():
+                    indices = [i for i, col in enumerate(obj.columns) if col == key]
+                    if len(indices) == 1:  # for unique columns
+                        results[key] = obj._gotitem(key, ndim=1).agg(how)
+                    else:  # for non-unique columns
+                        col_results = [obj.iloc[:, i].agg(how) for i in indices]
+                        results[key] = col_results
 
         # set the final keys
         keys = list(arg.keys())
@@ -426,6 +431,8 @@ def agg_dict_like(self) -> DataFrame | Series:
         # Avoid making two isinstance calls in all and any below
         is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
 
+        is_list = [isinstance(v, list) for v in results.values()]
+
         # combine results
         if all(is_ndframe):
             keys_to_use: Iterable[Hashable]
@@ -451,6 +458,14 @@ def agg_dict_like(self) -> DataFrame | Series:
                 "and transformation operations "
                 "simultaneously"
             )
+        elif any(is_list):
+            # GH#51099
+            # convert list-like values in results to Series with corresponding keys
+            from pandas import Series
+
+            values = [val for sublist in results.values() for val in sublist]
+            keys = [key for key, sublist in results.items() for _ in sublist]
+            result = Series(values, index=keys)
         else:
             from pandas import Series
 
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 6ed3f6140d361..bf79b5efa3acc 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1642,3 +1642,15 @@ def foo2(x, b=2, c=0):
         columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_agg_dist_like_and_nonunique_columns():
+    # GH#51099
+    df = DataFrame(
+        {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
+    )
+    df.columns = ["A", "A", "C"]
+
+    result = df.agg({"A": "count"})  # same with 'apply' instead of 'agg'
+    expected = df["A"].count()
+    tm.assert_series_equal(result, expected)

From d830964aca0e1ec35edad3d06c578384c1de474f Mon Sep 17 00:00:00 2001
From: luke <2736230899@qq.com>
Date: Wed, 22 Mar 2023 18:58:50 +0800
Subject: [PATCH 2/7] what is new

---
 doc/source/whatsnew/v2.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index ec5d08e75f0e4..f691c9b9be179 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1365,6 +1365,7 @@ Reshaping
 - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`)
 - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`)
 - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would ignore arguments when passed a list of functions (:issue:`50863`)
+- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would return incorrect type when dist-like argument passed in (:issue:`51099`)
 
 Sparse
 ^^^^^^

From 9c21e639646aae5251a23890f3e20e2d0414f7bf Mon Sep 17 00:00:00 2001
From: luke <2736230899@qq.com>
Date: Tue, 28 Mar 2023 11:13:20 +0800
Subject: [PATCH 3/7] Fix bug but add more codes

---
 pandas/core/apply.py                   | 56 +++++++++++++++-----------
 pandas/tests/apply/test_frame_apply.py |  2 +-
 2 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 34119b1894709..9415dfa6a1446 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -409,30 +409,38 @@ def agg_dict_like(self) -> DataFrame | Series:
             context_manager = com.temp_setattr(obj, "as_index", True)
         else:
             context_manager = nullcontext()
+
+        if isinstance(selected_obj, ABCDataFrame):
+            is_non_unique_col = selected_obj.columns.duplicated()
+        else:
+            is_non_unique_col = [False]
+
         with context_manager:
             if selected_obj.ndim == 1:
                 # key only used for output
-                colg = obj._gotitem(selection, ndim=1)
-                results = {key: colg.agg(how) for key, how in arg.items()}
-            else:
-                # key used for column selection and output
+                key_res = obj._gotitem(selection, ndim=1)
+                results = {key: key_res.agg(how) for key, how in arg.items()}
+            elif any(is_non_unique_col):
+                # GH#51099
+                # results is a dict of lists
                 results = {}
                 for key, how in arg.items():
-                    indices = [i for i, col in enumerate(obj.columns) if col == key]
-                    if len(indices) == 1:  # for unique columns
-                        results[key] = obj._gotitem(key, ndim=1).agg(how)
-                    else:  # for non-unique columns
-                        col_results = [obj.iloc[:, i].agg(how) for i in indices]
-                        results[key] = col_results
-
+                    key_res = []
+                    for col_idx in selected_obj.columns.get_indexer_for([key]):
+                        col = selected_obj.iloc[:, col_idx]
+                        key_res.append(col.agg(how))
+                    results[key] = key_res
+            else:
+                # key used for column selection and output
+                results = {
+                    key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
+                }
         # set the final keys
         keys = list(arg.keys())
 
         # Avoid making two isinstance calls in all and any below
         is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
 
-        is_list = [isinstance(v, list) for v in results.values()]
-
         # combine results
         if all(is_ndframe):
             keys_to_use: Iterable[Hashable]
@@ -458,18 +466,10 @@ def agg_dict_like(self) -> DataFrame | Series:
                 "and transformation operations "
                 "simultaneously"
             )
-        elif any(is_list):
-            # GH#51099
-            # convert list-like values in results to Series with corresponding keys
-            from pandas import Series
-
-            values = [val for sublist in results.values() for val in sublist]
-            keys = [key for key, sublist in results.items() for _ in sublist]
-            result = Series(values, index=keys)
         else:
             from pandas import Series
 
-            # we have a dict of scalars
+            # we have a dict of scalars or a list of scalars
             # GH 36212 use name only if obj is a series
             if obj.ndim == 1:
                 obj = cast("Series", obj)
@@ -477,7 +477,17 @@ def agg_dict_like(self) -> DataFrame | Series:
             else:
                 name = None
 
-            result = Series(results, name=name)
+            if any(is_non_unique_col):
+                # Expand the scalar list and construct a series.
+                series_list = []
+                for key, value in results.items():
+                    assert isinstance(value, list)
+                    series_list.append(Series(value, index=[key] * len(value)))
+
+                result = concat(series_list, axis=0)
+                result.name = name
+            else:
+                result = Series(results, name=name)
 
         return result
 
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index f17aa2ebb6e32..f6907c0cea09b 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1674,6 +1674,6 @@ def test_agg_dist_like_and_nonunique_columns():
     )
     df.columns = ["A", "A", "C"]
 
-    result = df.agg({"A": "count"})  # same with 'apply' instead of 'agg'
+    result = df.agg({"A": "count"})
     expected = df["A"].count()
     tm.assert_series_equal(result, expected)

From f03122b518509e65628d55d48ad83f3809207d73 Mon Sep 17 00:00:00 2001
From: luke <2736230899@qq.com>
Date: Tue, 28 Mar 2023 16:03:40 +0800
Subject: [PATCH 4/7] Fix mypy and improve what's new

---
 doc/source/whatsnew/v2.0.0.rst | 2 +-
 pandas/core/apply.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 7dce93730bb69..87a41a01ec81f 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1366,7 +1366,7 @@ Reshaping
 - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`)
 - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`)
 - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would ignore arguments when passed a list of functions (:issue:`50863`)
-- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would return incorrect type when dist-like argument passed in (:issue:`51099`)
+- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 9415dfa6a1446..be346515b77b5 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -411,7 +411,7 @@ def agg_dict_like(self) -> DataFrame | Series:
             context_manager = nullcontext()
 
         if isinstance(selected_obj, ABCDataFrame):
-            is_non_unique_col = selected_obj.columns.duplicated()
+            is_non_unique_col = selected_obj.columns.duplicated().tolist()
         else:
             is_non_unique_col = [False]
 

From 14ef7e98e6c42bacc2dd8eb2c034b75480f54600 Mon Sep 17 00:00:00 2001
From: luke <2736230899@qq.com>
Date: Sat, 1 Apr 2023 17:01:39 +0800
Subject: [PATCH 5/7] Improve preformance

---
 pandas/core/apply.py | 51 +++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index cbe1d77d4ff0b..c397cf28ce3b4 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -411,26 +411,33 @@ def agg_dict_like(self) -> DataFrame | Series:
         else:
             context_manager = nullcontext()
 
-        if isinstance(selected_obj, ABCDataFrame):
-            is_non_unique_col = selected_obj.columns.duplicated().tolist()
-        else:
-            is_non_unique_col = [False]
+        is_non_unique_col = (
+            selected_obj.ndim == 2
+            and selected_obj.columns.nunique() < len(selected_obj.columns)
+        )
 
         with context_manager:
             if selected_obj.ndim == 1:
                 # key only used for output
-                key_res = obj._gotitem(selection, ndim=1)
-                results = {key: key_res.agg(how) for key, how in arg.items()}
-            elif any(is_non_unique_col):
+                colg = obj._gotitem(selection, ndim=1)
+                results = {key: colg.agg(how) for key, how in arg.items()}
+            elif is_non_unique_col:
                 # GH#51099
-                # results is a dict of lists
-                results = {}
+                result_data = []
+                result_index = []
                 for key, how in arg.items():
-                    key_res = []
-                    for col_idx in selected_obj.columns.get_indexer_for([key]):
-                        col = selected_obj.iloc[:, col_idx]
-                        key_res.append(col.agg(how))
-                    results[key] = key_res
+                    indices = selected_obj.columns.get_indexer_for([key])
+                    labels = selected_obj.columns.take(indices)
+                    label_to_indices = defaultdict(list)
+                    for index, label in zip(indices, labels):
+                        label_to_indices[label].append(index)
+
+                    for indices in label_to_indices.values():
+                        for indice in indices:
+                            result_index.append(key)
+                            result_data.append(
+                                selected_obj._ixs(indice, axis=1).agg(how)
+                            )
             else:
                 # key used for column selection and output
                 results = {
@@ -440,7 +447,10 @@ def agg_dict_like(self) -> DataFrame | Series:
         keys = list(arg.keys())
 
         # Avoid making two isinstance calls in all and any below
-        is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
+        if is_non_unique_col:
+            is_ndframe = [False]
+        else:
+            is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
 
         # combine results
         if all(is_ndframe):
@@ -478,15 +488,8 @@ def agg_dict_like(self) -> DataFrame | Series:
             else:
                 name = None
 
-            if any(is_non_unique_col):
-                # Expand the scalar list and construct a series.
-                series_list = []
-                for key, value in results.items():
-                    assert isinstance(value, list)
-                    series_list.append(Series(value, index=[key] * len(value)))
-
-                result = concat(series_list, axis=0)
-                result.name = name
+            if is_non_unique_col:
+                result = Series(result_data, index=result_index, name=name)
             else:
                 result = Series(results, name=name)
 

From e533f430486637d803c1bf6f1368c7a74f061871 Mon Sep 17 00:00:00 2001
From: luke <2736230899@qq.com>
Date: Mon, 3 Apr 2023 10:14:43 +0800
Subject: [PATCH 6/7] Improve preformance

---
 pandas/core/apply.py | 42 ++++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index c397cf28ce3b4..01e018b2eaaeb 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -420,8 +420,10 @@ def agg_dict_like(self) -> DataFrame | Series:
             if selected_obj.ndim == 1:
                 # key only used for output
                 colg = obj._gotitem(selection, ndim=1)
-                results = {key: colg.agg(how) for key, how in arg.items()}
+                result_data = [colg.agg(how) for _, how in arg.items()]
+                result_index = list(arg.keys())
             elif is_non_unique_col:
+                # key used for column selection and output
                 # GH#51099
                 result_data = []
                 result_index = []
@@ -432,32 +434,31 @@ def agg_dict_like(self) -> DataFrame | Series:
                     for index, label in zip(indices, labels):
                         label_to_indices[label].append(index)
 
-                    for indices in label_to_indices.values():
-                        for indice in indices:
-                            result_index.append(key)
-                            result_data.append(
-                                selected_obj._ixs(indice, axis=1).agg(how)
-                            )
+                    key_data = [
+                        selected_obj._ixs(indice, axis=1).agg(how)
+                        for label, indices in label_to_indices.items()
+                        for indice in indices
+                    ]
+
+                    result_index += [key] * len(key_data)
+                    result_data += key_data
             else:
                 # key used for column selection and output
-                results = {
-                    key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
-                }
-        # set the final keys
-        keys = list(arg.keys())
+                result_data = [
+                    obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
+                ]
+                result_index = list(arg.keys())
 
         # Avoid making two isinstance calls in all and any below
-        if is_non_unique_col:
-            is_ndframe = [False]
-        else:
-            is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
+        is_ndframe = [isinstance(r, ABCNDFrame) for r in result_data]
 
         # combine results
         if all(is_ndframe):
+            results = dict(zip(result_index, result_data))
             keys_to_use: Iterable[Hashable]
-            keys_to_use = [k for k in keys if not results[k].empty]
+            keys_to_use = [k for k in result_index if not results[k].empty]
             # Have to check, if at least one DataFrame is not empty.
-            keys_to_use = keys_to_use if keys_to_use != [] else keys
+            keys_to_use = keys_to_use if keys_to_use != [] else result_index
             if selected_obj.ndim == 2:
                 # keys are columns, so we can preserve names
                 ktu = Index(keys_to_use)
@@ -488,10 +489,7 @@ def agg_dict_like(self) -> DataFrame | Series:
             else:
                 name = None
 
-            if is_non_unique_col:
-                result = Series(result_data, index=result_index, name=name)
-            else:
-                result = Series(results, name=name)
+            result = Series(result_data, index=result_index, name=name)
 
         return result
 

From b2c32b97a96fd9ecd02d375651ac4b47505712e7 Mon Sep 17 00:00:00 2001
From: luke <2736230899@qq.com>
Date: Tue, 11 Apr 2023 10:59:14 +0800
Subject: [PATCH 7/7] Improve what'new and comment

---
 doc/source/whatsnew/v2.0.0.rst | 1 -
 doc/source/whatsnew/v2.1.0.rst | 1 +
 pandas/core/apply.py           | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index e401d89f157c2..2ee6ecc4e6cd4 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1372,7 +1372,6 @@ Reshaping
 - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`)
 - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`)
 - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would ignore arguments when passed a list of functions (:issue:`50863`)
-- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
 
 Sparse
 ^^^^^^
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 9f5d6011a7780..8df644b0ccb41 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -308,6 +308,7 @@ Groupby/resample/rolling
 Reshaping
 ^^^^^^^^^
 - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
+- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
 - Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
 - Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`)
 -
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 4c6611286ad5b..c8e189eeadebd 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -483,7 +483,7 @@ def agg_dict_like(self) -> DataFrame | Series:
         else:
             from pandas import Series
 
-            # we have a dict of scalars or a list of scalars
+            # we have a list of scalars
             # GH 36212 use name only if obj is a series
             if obj.ndim == 1:
                 obj = cast("Series", obj)