@@ -44,26 +44,24 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
44
44
45
45
{{if dtype == 'object'}}
46
46
ndarray sorted_data, values
47
- ndarray[np.uint8_t, cast=True] sorted_namask
48
- {{elif dtype != 'uint64'}}
49
- ndarray[{{ctype}}] sorted_data, values
50
- ndarray[np.uint8_t, cast=True] sorted_namask
51
47
{{else}}
52
48
ndarray[{{ctype}}] sorted_data, values
53
49
{{endif}}
54
50
55
51
ndarray[float64_t] ranks
56
52
ndarray[int64_t] argsorted
53
+ ndarray[np.uint8_t, cast=True] sorted_mask
57
54
58
55
{{if dtype == 'uint64'}}
59
56
{{ctype}} val
60
57
{{else}}
61
- {{ctype}} val, nan_value, isnan
58
+ {{ctype}} val, nan_value
62
59
{{endif}}
63
60
64
61
float64_t sum_ranks = 0
65
62
int tiebreak = 0
66
63
bint keep_na = 0
64
+ bint isnan
67
65
float count = 0.0
68
66
tiebreak = tiebreakers[ties_method]
69
67
@@ -95,14 +93,16 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
95
93
{{endif}}
96
94
97
95
np.putmask(values, mask, nan_value)
96
+ {{else}}
97
+ mask = np.zeros(shape=len(values), dtype=bool)
98
98
{{endif}}
99
99
100
100
n = len(values)
101
101
ranks = np.empty(n, dtype='f8')
102
102
103
103
{{if dtype == 'object'}}
104
104
try:
105
- _as = values.argsort( )
105
+ _as = np.lexsort(keys=(mask, values) )
106
106
except TypeError:
107
107
if not retry:
108
108
raise
@@ -116,40 +116,37 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
116
116
{{else}}
117
117
if tiebreak == TIEBREAK_FIRST:
118
118
# need to use a stable sort here
119
- _as = values.argsort(kind='mergesort' )
119
+ _as = np.lexsort(keys=(mask, values) )
120
120
if not ascending:
121
121
tiebreak = TIEBREAK_FIRST_DESCENDING
122
122
else:
123
- _as = values.argsort( )
123
+ _as = np.lexsort(keys=(mask, values) )
124
124
{{endif}}
125
125
126
126
if not ascending:
127
127
_as = _as[::-1]
128
128
129
129
sorted_data = values.take(_as)
130
130
# need to distinguish between pos/neg nan and real nan when keep_na is true
131
- {{if dtype != 'uint64'}}
132
- sorted_namask = mask.take(_as)
133
- sorted_namask = sorted_namask.astype(np.bool)
134
- {{endif}}
131
+ sorted_mask = mask.take(_as)
135
132
argsorted = _as.astype('i8')
136
133
137
134
{{if dtype == 'object'}}
138
135
for i in range(n):
139
136
sum_ranks += i + 1
140
137
dups += 1
141
- isnan = sorted_namask [i]
138
+ isnan = sorted_mask [i]
142
139
val = util.get_value_at(sorted_data, i)
143
140
144
141
if isnan and keep_na:
145
142
ranks[argsorted[i]] = nan
146
- sum_ranks = dups = 0
147
143
continue
148
144
149
145
count += 1.0
150
146
151
147
if (i == n - 1 or
152
- are_diff(util.get_value_at(sorted_data, i + 1), val)):
148
+ are_diff(util.get_value_at(sorted_data, i + 1), val) or
149
+ sorted_mask[i + 1]):
153
150
if tiebreak == TIEBREAK_AVERAGE:
154
151
for j in range(i - dups + 1, i + 1):
155
152
ranks[argsorted[j]] = sum_ranks / dups
@@ -178,16 +175,15 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
178
175
val = sorted_data[i]
179
176
180
177
{{if dtype != 'uint64'}}
181
- isnan = sorted_namask [i]
182
- if isnan and keep_na:
178
+ isnan = sorted_mask [i]
179
+ if isnan and keep_na:
183
180
ranks[argsorted[i]] = nan
184
- sum_ranks = dups = 0
185
181
continue
186
182
{{endif}}
187
183
188
184
count += 1.0
189
185
190
- if i == n - 1 or sorted_data[i + 1] != val:
186
+ if i == n - 1 or sorted_data[i + 1] != val or sorted_mask[i + 1] :
191
187
if tiebreak == TIEBREAK_AVERAGE:
192
188
for j in range(i - dups + 1, i + 1):
193
189
ranks[argsorted[j]] = sum_ranks / dups
0 commit comments