Skip to content

Commit aa4578d

Browse files
committed
Added grp_sizes array, broke out pct calc
1 parent b430635 commit aa4578d

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

pandas/_libs/groupby_helper.pxi.in

+14-10
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
481481
Py_ssize_t i, j, N, K, val_start=0, grp_start=0, dups=0, sum_ranks=0
482482
Py_ssize_t grp_vals_seen=1, grp_na_count=0
483483
ndarray[int64_t] _as
484+
ndarray[float64_t, ndim=2] grp_sizes
484485
ndarray[{{c_type}}] masked_vals
485486
ndarray[uint8_t] mask
486487
bint keep_na
@@ -489,6 +490,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
489490
tiebreak = tiebreakers[ties_method]
490491
keep_na = na_option == 'keep'
491492
N, K = (<object> values).shape
493+
grp_sizes = np.ones_like(out)
492494

493495
# Copy values into new array in order to fill missing data
494496
# with mask, without obfuscating location of missing data
@@ -588,25 +590,27 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
588590
val_start = i
589591
grp_vals_seen += 1
590592

591-
# Similar to the previous conditional, check now if we are moving to a
592-
# new group. If so, keep track of the index where the new group occurs,
593-
# so the tiebreaker calculations can decrement that from their position
594-
# if the pct flag is True, go back and overwrite the result for
595-
# the group to be divided by the size of the group (excluding na values)
596-
# also be sure to reset any of the items helping to calculate dups
593+
# Similar to the previous conditional, check now if we are moving
594+
# to a new group. If so, keep track of the index where the new
595+
# group occurs, so the tiebreaker calculations can decrement that
596+
# from their position. fill in the size of each group encountered
597+
# (used by pct calculations later). also be sure to reset any of
598+
# the items helping to calculate dups
597599
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
598-
if pct:
599-
for j in range(grp_start, i + 1):
600-
out[_as[j], 0] = out[_as[j], 0] / (i - grp_start + 1
601-
- grp_na_count)
600+
for j in range(grp_start, i + 1):
601+
grp_sizes[_as[j], 0] = i - grp_start + 1 - grp_na_count
602602
dups = sum_ranks = 0
603603
grp_na_count = 0
604604
val_start = i + 1
605605
grp_start = i + 1
606606
grp_vals_seen = 1
607607

608+
if pct:
609+
for i in range(N):
610+
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
608611
{{endfor}}
609612

613+
610614
#----------------------------------------------------------------------
611615
# group_min, group_max
612616
#----------------------------------------------------------------------

0 commit comments

Comments
 (0)