Skip to content

PERF: use NaT comparisons in int64/datetimelikes #11010 #11023

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1009,11 +1009,10 @@ Bug Fixes
- Bug in ``to_json`` which was causing segmentation fault when serializing 0-rank ndarray (:issue:`9576`)
- Bug in plotting functions may raise ``IndexError`` when plotted on ``GridSpec`` (:issue:`10819`)
- Bug in plot result may show unnecessary minor ticklabels (:issue:`10657`)
- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`)
- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`, :issue:`11010`)
- Bug when constructing ``DataFrame`` where passing a dictionary with only scalar values and specifying columns did not raise an error (:issue:`10856`)
- Bug in ``.var()`` causing roundoff errors for highly similar values (:issue:`10242`)
- Bug in ``DataFrame.plot(subplots=True)`` with duplicated columns outputs incorrect result (:issue:`10962`)
- Bug in ``Index`` arithmetic may result in incorrect class (:issue:`10638`)
- Bug in ``date_range`` results in empty if freq is negative annualy, quarterly and monthly (:issue:`11018`)
- Bug in ``DatetimeIndex`` cannot infer negative freq (:issue:`11018`)

2 changes: 0 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1523,8 +1523,6 @@ def aggregate(self, values, how, axis=0):

if is_datetime_or_timedelta_dtype(values.dtype):
values = values.view('int64')
values[values == tslib.iNaT] = np.nan
# GH 7754
is_numeric = True
elif is_bool_dtype(values.dtype):
values = _algos.ensure_float64(values)
Expand Down
13 changes: 7 additions & 6 deletions pandas/src/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != %(nan_val)s:
nobs[lab, j] += 1
resx[lab, j] = val

Expand Down Expand Up @@ -785,7 +785,7 @@ def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != %(nan_val)s:
nobs[lab, j] += 1
if nobs[lab, j] == rank:
resx[lab, j] = val
Expand Down Expand Up @@ -1013,7 +1013,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != %(nan_val)s:
nobs[lab, j] += 1
if val > maxx[lab, j]:
maxx[lab, j] = val
Expand All @@ -1027,7 +1027,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != %(nan_val)s:
nobs[lab, 0] += 1
if val > maxx[lab, 0]:
maxx[lab, 0] = val
Expand Down Expand Up @@ -1076,7 +1076,8 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != %(nan_val)s:

nobs[lab, j] += 1
if val < minx[lab, j]:
minx[lab, j] = val
Expand All @@ -1090,7 +1091,7 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != %(nan_val)s:
nobs[lab, 0] += 1
if val < minx[lab, 0]:
minx[lab, 0] = val
Expand Down
39 changes: 21 additions & 18 deletions pandas/src/generated.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7315,7 +7315,7 @@ def group_last_float64(ndarray[float64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, j] += 1
resx[lab, j] = val

Expand Down Expand Up @@ -7360,7 +7360,7 @@ def group_last_float32(ndarray[float32_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, j] += 1
resx[lab, j] = val

Expand Down Expand Up @@ -7405,7 +7405,7 @@ def group_last_int64(ndarray[int64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != iNaT:
nobs[lab, j] += 1
resx[lab, j] = val

Expand Down Expand Up @@ -7451,7 +7451,7 @@ def group_nth_float64(ndarray[float64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, j] += 1
if nobs[lab, j] == rank:
resx[lab, j] = val
Expand Down Expand Up @@ -7497,7 +7497,7 @@ def group_nth_float32(ndarray[float32_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, j] += 1
if nobs[lab, j] == rank:
resx[lab, j] = val
Expand Down Expand Up @@ -7543,7 +7543,7 @@ def group_nth_int64(ndarray[int64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != iNaT:
nobs[lab, j] += 1
if nobs[lab, j] == rank:
resx[lab, j] = val
Expand Down Expand Up @@ -7592,7 +7592,8 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:

nobs[lab, j] += 1
if val < minx[lab, j]:
minx[lab, j] = val
Expand All @@ -7606,7 +7607,7 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, 0] += 1
if val < minx[lab, 0]:
minx[lab, 0] = val
Expand Down Expand Up @@ -7654,7 +7655,8 @@ def group_min_float32(ndarray[float32_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:

nobs[lab, j] += 1
if val < minx[lab, j]:
minx[lab, j] = val
Expand All @@ -7668,7 +7670,7 @@ def group_min_float32(ndarray[float32_t, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, 0] += 1
if val < minx[lab, 0]:
minx[lab, 0] = val
Expand Down Expand Up @@ -7716,7 +7718,8 @@ def group_min_int64(ndarray[int64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != iNaT:

nobs[lab, j] += 1
if val < minx[lab, j]:
minx[lab, j] = val
Expand All @@ -7730,7 +7733,7 @@ def group_min_int64(ndarray[int64_t, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != iNaT:
nobs[lab, 0] += 1
if val < minx[lab, 0]:
minx[lab, 0] = val
Expand Down Expand Up @@ -7779,7 +7782,7 @@ def group_max_float64(ndarray[float64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, j] += 1
if val > maxx[lab, j]:
maxx[lab, j] = val
Expand All @@ -7793,7 +7796,7 @@ def group_max_float64(ndarray[float64_t, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, 0] += 1
if val > maxx[lab, 0]:
maxx[lab, 0] = val
Expand Down Expand Up @@ -7841,7 +7844,7 @@ def group_max_float32(ndarray[float32_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, j] += 1
if val > maxx[lab, j]:
maxx[lab, j] = val
Expand All @@ -7855,7 +7858,7 @@ def group_max_float32(ndarray[float32_t, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != NAN:
nobs[lab, 0] += 1
if val > maxx[lab, 0]:
maxx[lab, 0] = val
Expand Down Expand Up @@ -7903,7 +7906,7 @@ def group_max_int64(ndarray[int64_t, ndim=2] out,
val = values[i, j]

# not nan
if val == val:
if val == val and val != iNaT:
nobs[lab, j] += 1
if val > maxx[lab, j]:
maxx[lab, j] = val
Expand All @@ -7917,7 +7920,7 @@ def group_max_int64(ndarray[int64_t, ndim=2] out,
val = values[i, 0]

# not nan
if val == val:
if val == val and val != iNaT:
nobs[lab, 0] += 1
if val > maxx[lab, 0]:
maxx[lab, 0] = val
Expand Down