From b66a1c8754eec71c3b8dba1dd8e1ac620e923f9d Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 15 May 2016 17:28:36 -0500
Subject: [PATCH 1/5] PERF: DataFrame transform

---
 asv_bench/benchmarks/groupby.py | 15 +++++++++++++++
 doc/source/whatsnew/v0.18.2.txt |  4 ++--
 pandas/core/groupby.py          | 31 ++++++++----------------------
 pandas/tests/test_groupby.py    | 34 +++++++++++++++++++++++++++++++--
 4 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 7279d73eb0d97..586bd00b091fe 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -773,6 +773,21 @@ def setup(self):
     def time_groupby_transform_series2(self):
         self.df.groupby('id')['val'].transform(np.mean)
 
+
+class groupby_transform_dataframe(object):
+    # GH 12737
+    goal_time = 0.2
+
+    def setup(self):
+        self.df = pd.DataFrame({'group': np.repeat(np.arange(1000), 10),
+                                'B': np.nan,
+                                'C': np.nan})
+        self.df.ix[4::10, 'B':'C'] = 5
+
+    def time_groupby_transform_dataframe(self):
+        self.df.groupby('group').transform('first')
+
+
 class groupby_transform_cythonized(object):
     goal_time = 0.2
 
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
index 3ac466158276f..6dcc637a00a74 100644
--- a/doc/source/whatsnew/v0.18.2.txt
+++ b/doc/source/whatsnew/v0.18.2.txt
@@ -104,7 +104,7 @@ Performance Improvements
 - increased performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`)
 
 
-
+- Improved performance of ``DataFrameGroupBy.transform`` (:issue:`12737`)
 
 
 .. _whatsnew_0182.bug_fixes:
@@ -123,7 +123,7 @@ Bug Fixes
 
 - Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()``); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`)
 
-
+- Bug in ``SeriesGroupBy.transform`` with datetime values and missing groups (:issue:`13191`)
 
 - Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError``  (:issue:`13156`)
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 7a4791189726e..f2ca0ca8f8de9 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2776,18 +2776,9 @@ def _transform_fast(self, func):
             func = getattr(self, func)
 
         ids, _, ngroup = self.grouper.group_info
-        mask = ids != -1
-
-        out = func().values[ids]
-        if not mask.all():
-            out = np.where(mask, out, np.nan)
-
-        obs = np.zeros(ngroup, dtype='bool')
-        obs[ids[mask]] = True
-        if not obs.all():
-            out = self._try_cast(out, self._selected_obj)
 
-        return Series(out, index=self.obj.index)
+        out = algos.take_1d(func().values, ids)
+        return Series(out, index=self.obj.index, name=self.obj.name)
 
     def filter(self, func, dropna=True, *args, **kwargs):  # noqa
         """
@@ -3465,19 +3456,13 @@ def transform(self, func, *args, **kwargs):
         if not result.columns.equals(obj.columns):
             return self._transform_general(func, *args, **kwargs)
 
-        results = np.empty_like(obj.values, result.values.dtype)
-        for (name, group), (i, row) in zip(self, result.iterrows()):
-            indexer = self._get_index(name)
-            if len(indexer) > 0:
-                results[indexer] = np.tile(row.values, len(
-                    indexer)).reshape(len(indexer), -1)
-
-        counts = self.size().fillna(0).values
-        if any(counts == 0):
-            results = self._try_cast(results, obj[result.columns])
+        # Fast transform
+        ids, _, ngroup = self.grouper.group_info
+        out = {}
+        for col in result:
+            out[col] = algos.take_nd(result[col].values, ids)
 
-        return (DataFrame(results, columns=result.columns, index=obj.index)
-                ._convert(datetime=True))
+        return DataFrame(out, columns=result.columns, index=obj.index)
 
     def _define_paths(self, func, *args, **kwargs):
         if isinstance(func, compat.string_types):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 583b1c7aea270..accb445ea6248 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1051,13 +1051,33 @@ def test_transform_fast(self):
 
         values = np.repeat(grp.mean().values,
                            com._ensure_platform_int(grp.count().values))
-        expected = pd.Series(values, index=df.index)
+        expected = pd.Series(values, index=df.index, name='val')
         result = grp.transform(np.mean)
         assert_series_equal(result, expected)
 
         result = grp.transform('mean')
         assert_series_equal(result, expected)
 
+        # GH 12737
+        df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5],
+                           'd': pd.date_range('2014-1-1', '2014-1-4'),
+                           'i': [1, 2, 3, 4]},
+                          columns=['grouping', 'f', 'i', 'd'])
+        result = df.groupby('grouping').transform('first')
+
+        dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'),
+                 pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')]
+        expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5],
+                                 'd': dates,
+                                 'i': [1, 2, 2, 4]},
+                                columns=['f', 'i', 'd'])
+        assert_frame_equal(result, expected)
+
+        # selection
+        result = df.groupby('grouping')[['f', 'i']].transform('first')
+        expected = expected[['f', 'i']]
+        assert_frame_equal(result, expected)
+
     def test_transform_broadcast(self):
         grouped = self.ts.groupby(lambda x: x.month)
         result = grouped.transform(np.mean)
@@ -1191,6 +1211,16 @@ def test_transform_function_aliases(self):
         expected = self.df.groupby('A')['C'].transform(np.mean)
         assert_series_equal(result, expected)
 
+    def test_series_fast_transform_date(self):
+        # GH 13191
+        df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3],
+                           'd': pd.date_range('2014-1-1', '2014-1-4')})
+        result = df.groupby('grouping')['d'].transform('first')
+        dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'),
+                 pd.Timestamp('2014-1-4')]
+        expected = pd.Series(dates, name='d')
+        assert_series_equal(result, expected)
+
     def test_transform_length(self):
         # GH 9697
         df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]})
@@ -4406,7 +4436,7 @@ def test_groupby_datetime64_32_bit(self):
 
         df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')] * 2})
         result = df.groupby("A")["B"].transform(min)
-        expected = Series([pd.Timestamp('2000-01-1')] * 2)
+        expected = Series([pd.Timestamp('2000-01-1')] * 2, name='B')
         assert_series_equal(result, expected)
 
     def test_groupby_categorical_unequal_len(self):

From 045d0c73a8be28d52fb86611270ac79c5239e625 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 15 May 2016 19:48:47 -0500
Subject: [PATCH 2/5] add back some casting

---
 pandas/core/groupby.py           | 17 +++++++++++++++--
 pandas/tests/test_categorical.py |  3 +--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index f2ca0ca8f8de9..aedadc5e8018f 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2776,8 +2776,11 @@ def _transform_fast(self, func):
             func = getattr(self, func)
 
         ids, _, ngroup = self.grouper.group_info
-
+        counts = self.size().fillna(0).values
+        cast = (counts == 0).any()
         out = algos.take_1d(func().values, ids)
+        if cast:
+            out = self._try_cast(out, self.obj)
         return Series(out, index=self.obj.index, name=self.obj.name)
 
     def filter(self, func, dropna=True, *args, **kwargs):  # noqa
@@ -3456,11 +3459,21 @@ def transform(self, func, *args, **kwargs):
         if not result.columns.equals(obj.columns):
             return self._transform_general(func, *args, **kwargs)
 
-        # Fast transform
+        # Fast transform path for aggregations
+
+        # if there were groups with no observations (Categorical only?)
+        # try casting data to original dtype
+        counts = self.size().fillna(0).values
+        cast = (counts == 0).any()
+
+        # by column (could be by block?) reshape aggregated data to
+        # size of original frame by repeating obvservations with take
         ids, _, ngroup = self.grouper.group_info
         out = {}
         for col in result:
             out[col] = algos.take_nd(result[col].values, ids)
+            if cast:
+                out[col] = self._try_cast(out[col], obj[col])
 
         return DataFrame(out, columns=result.columns, index=obj.index)
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 55df64264d6f9..f11a83fcef46e 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -3043,8 +3043,7 @@ def f(x):
         c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40])
 
         result = df.a.groupby(c).transform(sum)
-        tm.assert_series_equal(result, df['a'], check_names=False)
-        self.assertTrue(result.name is None)
+        tm.assert_series_equal(result, df['a'])
 
         tm.assert_series_equal(
             df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])

From 9d78f659f55128e55d0b062f30fb04ae68994d3e Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 15 May 2016 20:06:54 -0500
Subject: [PATCH 3/5] other categorical test name fix

---
 pandas/tests/test_categorical.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index f11a83fcef46e..5a6667e57ce9d 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -3025,8 +3025,7 @@ def f(x):
         c = pd.cut(df.a, bins=[0, 10, 20, 30, 40])
 
         result = df.a.groupby(c).transform(sum)
-        tm.assert_series_equal(result, df['a'], check_names=False)
-        self.assertTrue(result.name is None)
+        tm.assert_series_equal(result, df['a'])
 
         tm.assert_series_equal(
             df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])

From d61d4e0bff498e9b78427d33525279c29d7b0287 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Tue, 17 May 2016 18:39:55 -0500
Subject: [PATCH 4/5] handle duplicate column case

---
 pandas/core/groupby.py       | 28 ++++++++++++++++------------
 pandas/tests/test_groupby.py |  6 ++++++
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index aedadc5e8018f..41303ca195fe1 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2776,8 +2776,7 @@ def _transform_fast(self, func):
             func = getattr(self, func)
 
         ids, _, ngroup = self.grouper.group_info
-        counts = self.size().fillna(0).values
-        cast = (counts == 0).any()
+        cast = self.size().isnull().any()
         out = algos.take_1d(func().values, ids)
         if cast:
             out = self._try_cast(out, self.obj)
@@ -3459,23 +3458,28 @@ def transform(self, func, *args, **kwargs):
         if not result.columns.equals(obj.columns):
             return self._transform_general(func, *args, **kwargs)
 
-        # Fast transform path for aggregations
+        return self._transform_fast(result, obj)
 
+    def _transform_fast(self, result, obj):
+        """
+        Fast transform path for aggregations
+        """
         # if there were groups with no observations (Categorical only?)
         # try casting data to original dtype
-        counts = self.size().fillna(0).values
-        cast = (counts == 0).any()
+        cast = self.size().isnull().any()
 
-        # by column (could be by block?) reshape aggregated data to
-        # size of original frame by repeating obvservations with take
+        # for each col, reshape to to size of original frame
+        # by take operation
         ids, _, ngroup = self.grouper.group_info
-        out = {}
-        for col in result:
-            out[col] = algos.take_nd(result[col].values, ids)
+        output = []
+        for i, _ in enumerate(result.columns):
+            res = algos.take_1d(result.iloc[:, i].values, ids)
             if cast:
-                out[col] = self._try_cast(out[col], obj[col])
+                res = self._try_cast(res, obj.iloc[:, i])
+            output.append(res)
 
-        return DataFrame(out, columns=result.columns, index=obj.index)
+        return DataFrame._from_arrays(output, columns=result.columns,
+                                      index=obj.index)
 
     def _define_paths(self, func, *args, **kwargs):
         if isinstance(func, compat.string_types):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index accb445ea6248..d15bab708b61f 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1078,6 +1078,12 @@ def test_transform_fast(self):
         expected = expected[['f', 'i']]
         assert_frame_equal(result, expected)
 
+        # dup columns
+        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
+        result = df.groupby('g').transform('first')
+        expected = df.drop('g', axis=1)
+        assert_frame_equal(result, expected)
+
     def test_transform_broadcast(self):
         grouped = self.ts.groupby(lambda x: x.month)
         result = grouped.transform(np.mean)

From 0af1e5582d4cc299137e925285108eaee20bcc8f Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Wed, 18 May 2016 05:26:51 -0500
Subject: [PATCH 5/5] revert casting logic

---
 pandas/core/groupby.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 41303ca195fe1..424859da82877 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2776,7 +2776,7 @@ def _transform_fast(self, func):
             func = getattr(self, func)
 
         ids, _, ngroup = self.grouper.group_info
-        cast = self.size().isnull().any()
+        cast = (self.size().fillna(0) > 0).any()
         out = algos.take_1d(func().values, ids)
         if cast:
             out = self._try_cast(out, self.obj)
@@ -3466,7 +3466,7 @@ def _transform_fast(self, result, obj):
         """
         # if there were groups with no observations (Categorical only?)
         # try casting data to original dtype
-        cast = self.size().isnull().any()
+        cast = (self.size().fillna(0) > 0).any()
 
         # for each col, reshape to to size of original frame
         # by take operation