From c187ac935926694d446445baa9494476acae9ca2 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 7 Sep 2015 18:37:55 -0400
Subject: [PATCH] PERF: use NaT comparisons in int64/datetimelikes #11010

---
 doc/source/whatsnew/v0.17.0.txt |  3 +--
 pandas/core/groupby.py          |  2 --
 pandas/src/generate_code.py     | 13 ++++++-----
 pandas/src/generated.pyx        | 39 ++++++++++++++++++---------------
 4 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index cbcee664d8be4..7100f78cb3c7a 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -1009,11 +1009,10 @@ Bug Fixes
 - Bug in ``to_json`` which was causing segmentation fault when serializing 0-rank ndarray (:issue:`9576`)
 - Bug in plotting functions may raise ``IndexError`` when plotted on ``GridSpec`` (:issue:`10819`)
 - Bug in plot result may show unnecessary minor ticklabels (:issue:`10657`)
-- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`)
+- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`, :issue:`11010`)
 - Bug when constructing ``DataFrame`` where passing a dictionary with only scalar values and specifying columns did not raise an error (:issue:`10856`)
 - Bug in ``.var()`` causing roundoff errors for highly similar values (:issue:`10242`)
 - Bug in ``DataFrame.plot(subplots=True)`` with duplicated columns outputs incorrect result (:issue:`10962`)
 - Bug in ``Index`` arithmetic may result in incorrect class (:issue:`10638`)
 - Bug in ``date_range`` results in empty if freq is negative annualy, quarterly and monthly (:issue:`11018`)
 - Bug in ``DatetimeIndex`` cannot infer negative freq (:issue:`11018`)
-
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 1f5855e63dee8..0293fc655742e 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1523,8 +1523,6 @@ def aggregate(self, values, how, axis=0):
 
         if is_datetime_or_timedelta_dtype(values.dtype):
             values = values.view('int64')
-            values[values == tslib.iNaT] = np.nan
-            # GH 7754
             is_numeric = True
         elif is_bool_dtype(values.dtype):
             values = _algos.ensure_float64(values)
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index b055d75df4cf4..8c5c7d709e5f1 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -739,7 +739,7 @@ def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != %(nan_val)s:
                     nobs[lab, j] += 1
                     resx[lab, j] = val
 
@@ -785,7 +785,7 @@ def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != %(nan_val)s:
                     nobs[lab, j] += 1
                     if nobs[lab, j] == rank:
                         resx[lab, j] = val
@@ -1013,7 +1013,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != %(nan_val)s:
                         nobs[lab, j] += 1
                         if val > maxx[lab, j]:
                             maxx[lab, j] = val
@@ -1027,7 +1027,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != %(nan_val)s:
                     nobs[lab, 0] += 1
                     if val > maxx[lab, 0]:
                         maxx[lab, 0] = val
@@ -1076,7 +1076,8 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != %(nan_val)s:
+
                         nobs[lab, j] += 1
                         if val < minx[lab, j]:
                             minx[lab, j] = val
@@ -1090,7 +1091,7 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != %(nan_val)s:
                     nobs[lab, 0] += 1
                     if val < minx[lab, 0]:
                         minx[lab, 0] = val
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index 2f2fd528999d6..767e7d6292b6d 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -7315,7 +7315,7 @@ def group_last_float64(ndarray[float64_t, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, j] += 1
                     resx[lab, j] = val
 
@@ -7360,7 +7360,7 @@ def group_last_float32(ndarray[float32_t, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, j] += 1
                     resx[lab, j] = val
 
@@ -7405,7 +7405,7 @@ def group_last_int64(ndarray[int64_t, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != iNaT:
                     nobs[lab, j] += 1
                     resx[lab, j] = val
 
@@ -7451,7 +7451,7 @@ def group_nth_float64(ndarray[float64_t, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, j] += 1
                     if nobs[lab, j] == rank:
                         resx[lab, j] = val
@@ -7497,7 +7497,7 @@ def group_nth_float32(ndarray[float32_t, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, j] += 1
                     if nobs[lab, j] == rank:
                         resx[lab, j] = val
@@ -7543,7 +7543,7 @@ def group_nth_int64(ndarray[int64_t, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
+                if val == val and val != iNaT:
                     nobs[lab, j] += 1
                     if nobs[lab, j] == rank:
                         resx[lab, j] = val
@@ -7592,7 +7592,8 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != NAN:
+
                         nobs[lab, j] += 1
                         if val < minx[lab, j]:
                             minx[lab, j] = val
@@ -7606,7 +7607,7 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, 0] += 1
                     if val < minx[lab, 0]:
                         minx[lab, 0] = val
@@ -7654,7 +7655,8 @@ def group_min_float32(ndarray[float32_t, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != NAN:
+
                         nobs[lab, j] += 1
                         if val < minx[lab, j]:
                             minx[lab, j] = val
@@ -7668,7 +7670,7 @@ def group_min_float32(ndarray[float32_t, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, 0] += 1
                     if val < minx[lab, 0]:
                         minx[lab, 0] = val
@@ -7716,7 +7718,8 @@ def group_min_int64(ndarray[int64_t, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != iNaT:
+
                         nobs[lab, j] += 1
                         if val < minx[lab, j]:
                             minx[lab, j] = val
@@ -7730,7 +7733,7 @@ def group_min_int64(ndarray[int64_t, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != iNaT:
                     nobs[lab, 0] += 1
                     if val < minx[lab, 0]:
                         minx[lab, 0] = val
@@ -7779,7 +7782,7 @@ def group_max_float64(ndarray[float64_t, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != NAN:
                         nobs[lab, j] += 1
                         if val > maxx[lab, j]:
                             maxx[lab, j] = val
@@ -7793,7 +7796,7 @@ def group_max_float64(ndarray[float64_t, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, 0] += 1
                     if val > maxx[lab, 0]:
                         maxx[lab, 0] = val
@@ -7841,7 +7844,7 @@ def group_max_float32(ndarray[float32_t, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != NAN:
                         nobs[lab, j] += 1
                         if val > maxx[lab, j]:
                             maxx[lab, j] = val
@@ -7855,7 +7858,7 @@ def group_max_float32(ndarray[float32_t, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != NAN:
                     nobs[lab, 0] += 1
                     if val > maxx[lab, 0]:
                         maxx[lab, 0] = val
@@ -7903,7 +7906,7 @@ def group_max_int64(ndarray[int64_t, ndim=2] out,
                     val = values[i, j]
 
                     # not nan
-                    if val == val:
+                    if val == val and val != iNaT:
                         nobs[lab, j] += 1
                         if val > maxx[lab, j]:
                             maxx[lab, j] = val
@@ -7917,7 +7920,7 @@ def group_max_int64(ndarray[int64_t, ndim=2] out,
                 val = values[i, 0]
 
                 # not nan
-                if val == val:
+                if val == val and val != iNaT:
                     nobs[lab, 0] += 1
                     if val > maxx[lab, 0]:
                         maxx[lab, 0] = val