Skip to content

Commit a8df404

Browse files
committed
Speed up xstrtod by first computing integer part of a float number in int variable
1 parent c2bc5dd commit a8df404

File tree

1 file changed

+19
-3
lines changed

1 file changed

+19
-3
lines changed

pandas/_libs/src/parser/tokenizer.c

+19-3
Original file line numberDiff line numberDiff line change
@@ -1539,9 +1539,14 @@ int main(int argc, char *argv[]) {
15391539
// * Add tsep argument for thousands separator
15401540
//
15411541

1542+
// pessimistic but quick assessment,
1543+
// assuming that each decimal digit requires 4 bits to store
1544+
const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4;
1545+
15421546
double xstrtod(const char *str, char **endptr, char decimal, char sci,
15431547
char tsep, int skip_trailing) {
15441548
double number;
1549+
unsigned int i_number = 0;
15451550
int exponent;
15461551
int negative;
15471552
char *p = (char *)str;
@@ -1564,19 +1569,30 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
15641569
p++;
15651570
}
15661571

1567-
number = 0.;
15681572
exponent = 0;
15691573
num_digits = 0;
15701574
num_decimals = 0;
15711575

15721576
// Process string of digits.
1573-
while (isdigit_ascii(*p)) {
1574-
number = number * 10. + (*p - '0');
1577+
while (isdigit_ascii(*p) && num_digits <= max_int_decimal_digits) {
1578+
i_number = i_number * 10 + (*p - '0');
15751579
p++;
15761580
num_digits++;
15771581

15781582
p += (tsep != '\0' && *p == tsep);
15791583
}
1584+
number = i_number;
1585+
1586+
if (num_digits > max_int_decimal_digits) {
1587+
// process what's left as double
1588+
while (isdigit_ascii(*p)) {
1589+
number = number * 10. + (*p - '0');
1590+
p++;
1591+
num_digits++;
1592+
1593+
p += (tsep != '\0' && *p == tsep);
1594+
}
1595+
}
15801596

15811597
// Process decimal part.
15821598
if (*p == decimal) {

0 commit comments

Comments
 (0)