Skip to content

Commit 4dd6080

Browse files
committed
BUG: track nan values before filling them GH6945
seperate real na and filled na
1 parent 34978a7 commit 4dd6080

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

pandas/_libs/algos_rank_helper.pxi.in

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,21 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
4444

4545
{{if dtype == 'object'}}
4646
ndarray sorted_data, values
47+
ndarray[np.uint8_t, cast=True] sorted_namask
48+
{{elif dtype != 'uint64'}}
49+
ndarray[{{ctype}}] sorted_data, values
50+
ndarray[np.uint8_t, cast=True] sorted_namask
4751
{{else}}
4852
ndarray[{{ctype}}] sorted_data, values
4953
{{endif}}
50-
54+
5155
ndarray[float64_t] ranks
5256
ndarray[int64_t] argsorted
5357

5458
{{if dtype == 'uint64'}}
5559
{{ctype}} val
5660
{{else}}
57-
{{ctype}} val, nan_value
61+
{{ctype}} val, nan_value, isnan
5862
{{endif}}
5963

6064
float64_t sum_ranks = 0
@@ -121,18 +125,23 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
121125

122126
if not ascending:
123127
_as = _as[::-1]
124-
128+
125129
sorted_data = values.take(_as)
130+
# need to distinguish between pos/neg nan and real nan when keep_na is true
131+
{{if dtype != 'uint64'}}
132+
sorted_namask = mask.take(_as)
133+
sorted_namask = sorted_namask.astype(np.bool)
134+
{{endif}}
126135
argsorted = _as.astype('i8')
127136

128137
{{if dtype == 'object'}}
129138
for i in range(n):
130139
sum_ranks += i + 1
131140
dups += 1
132-
141+
isnan = sorted_namask[i]
133142
val = util.get_value_at(sorted_data, i)
134143

135-
if (val is nan_value) and keep_na:
144+
if isnan and keep_na:
136145
ranks[argsorted[i]] = nan
137146
continue
138147

@@ -168,7 +177,8 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
168177
val = sorted_data[i]
169178

170179
{{if dtype != 'uint64'}}
171-
if (val == nan_value) and keep_na:
180+
isnan = sorted_namask[i]
181+
if isnan and keep_na:
172182
ranks[argsorted[i]] = nan
173183
continue
174184
{{endif}}

0 commit comments

Comments
 (0)