Skip to content

Commit b3171cf

Browse files
committed
core: Add MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC.
Most MCUs apart from Cortex-M0 with Thumb 1 have an instruction for computing the "high part" of a multiplication (e.g., the upper 32 bits of a 32x32 multiply). When they do, gcc uses this to implement a small and fast overflow check using the __builtin_mul_overflow intrinsic, which is preferable to the guard division method previously used in smallint.c. However, in contrast to the previous mp_small_int_mul_overflow routine, which checks that the result fits not only within mp_int_t but is SMALL_INT_FITS(), __builtin_mul_overflow only checks for overflow of the C type. As a result, a slight change in the code flow is needed for MP_BINARY_OP_MULTIPLY. Other sites using mp_small_int_mul_overflow already had the result value flow through to a SMALL_INT_FITS check so they didn't need any additional changes. Do similarly for the _ll and _ull multiply overflows checks. Signed-off-by: Jeff Epler <jepler@gmail.com>
1 parent e3ef682 commit b3171cf

6 files changed

Lines changed: 110 additions & 85 deletions

File tree

py/misc.h

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@
3535
#include <stdbool.h>
3636
#include <stdint.h>
3737
#include <stddef.h>
38+
#if __cplusplus // Required on at least one compiler to get ULLONG_MAX
39+
#include <climits>
40+
#else
3841
#include <limits.h>
42+
#endif
3943

4044
typedef unsigned char byte;
4145
typedef unsigned int uint;
@@ -454,53 +458,38 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) {
454458
#endif
455459
}
456460

457-
// Overflow-checked operations for long long
461+
// Overflow-checked operations
458462

459463
// Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10
460464
//
461465
// Note that the builtins has a defined result when overflow occurs, whereas the custom
462466
// functions below don't update the result if an overflow would occur (to avoid UB).
463467
#define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5)
464468

465-
#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
469+
#if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
470+
466471
#define mp_mul_ull_overflow __builtin_umulll_overflow
472+
#define mp_mul_ll_overflow __builtin_smulll_overflow
473+
inline static bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
474+
// __builtin_mul_overflow is a type-generic function, this inline ensures the argument
475+
// types are checked to match mp_int_t.
476+
return __builtin_mul_overflow(x, y, res);
477+
}
478+
467479
#else
480+
481+
extern bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res);
482+
extern bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res);
483+
484+
// only called once, so inline it
468485
inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) {
469486
if (y > 0 && x > (ULLONG_MAX / y)) {
470487
return true; // overflow
471488
}
472489
*res = x * y;
473490
return false;
474491
}
475-
#endif
476492

477-
#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
478-
#define mp_mul_ll_overflow __builtin_smulll_overflow
479-
#else
480-
inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) {
481-
bool overflow;
482-
483-
// Check for multiply overflow; see CERT INT32-C
484-
if (x > 0) { // x is positive
485-
if (y > 0) { // x and y are positive
486-
overflow = (x > (LLONG_MAX / y));
487-
} else { // x positive, y nonpositive
488-
overflow = (y < (LLONG_MIN / x));
489-
} // x positive, y nonpositive
490-
} else { // x is nonpositive
491-
if (y > 0) { // x is nonpositive, y is positive
492-
overflow = (x < (LLONG_MIN / y));
493-
} else { // x and y are nonpositive
494-
overflow = (x != 0 && y < (LLONG_MAX / x));
495-
} // End if x and y are nonpositive
496-
} // End if x is nonpositive
497-
498-
if (!overflow) {
499-
*res = x * y;
500-
}
501-
502-
return overflow;
503-
}
504493
#endif
505494

506495
#if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW

py/mpconfig.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2336,4 +2336,23 @@ typedef time_t mp_timestamp_t;
23362336
#define MP_WARN_CAT(x) (NULL)
23372337
#endif
23382338

2339+
// If true, use __builtin_mul_overflow (a gcc intrinsic supported by clang) for
2340+
// overflow checking when multiplying two small ints. Otherwise, use a portable
2341+
// algorithm.
2342+
//
2343+
// Most MCUs have a 32x32->64 bit multiply instruction, in which case the
2344+
// intrinsic is likely to be faster and generate smaller code. The main exception is
2345+
// cortex-m0 with __ARM_ARCH_ISA_THUMB == 1.
2346+
//
2347+
// The intrinsic is in GCC starting with version 5.
2348+
#ifndef MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
2349+
#if defined(__ARM_ARCH_ISA_THUMB) && (__GNUC__ >= 5)
2350+
#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (__ARM_ARCH_ISA_THUMB >= 2)
2351+
#elif (__GNUC__ >= 5)
2352+
#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (1)
2353+
#else
2354+
#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (0)
2355+
#endif
2356+
#endif
2357+
23392358
#endif // MICROPY_INCLUDED_PY_MPCONFIG_H

py/parsenum.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <stdlib.h>
2929

3030
#include "py/runtime.h"
31+
#include "py/misc.h"
3132
#include "py/parsenumbase.h"
3233
#include "py/parsenum.h"
3334
#include "py/smallint.h"
@@ -52,7 +53,11 @@ static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) {
5253
// to bigint parsing if supported)
5354
typedef mp_int_t parsed_int_t;
5455

55-
#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow
56+
#if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
57+
#define PARSED_INT_MUL_OVERFLOW __builtin_mul_overflow
58+
#else
59+
#define PARSED_INT_MUL_OVERFLOW mp_mul_mp_int_t_overflow
60+
#endif
5661
#define PARSED_INT_FITS MP_SMALL_INT_FITS
5762
#else
5863
// In the special case where bigint support is long long, we save code size by

py/runtime.c

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -490,30 +490,15 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
490490
case MP_BINARY_OP_MULTIPLY:
491491
case MP_BINARY_OP_INPLACE_MULTIPLY: {
492492

493-
// If long long type exists and is larger than mp_int_t, then
494-
// we can use the following code to perform overflow-checked multiplication.
495-
// Otherwise (eg in x64 case) we must use mp_small_int_mul_overflow.
496-
#if 0
497-
// compute result using long long precision
498-
long long res = (long long)lhs_val * (long long)rhs_val;
499-
if (res > MP_SMALL_INT_MAX || res < MP_SMALL_INT_MIN) {
500-
// result overflowed SMALL_INT, so return higher precision integer
501-
return mp_obj_new_int_from_ll(res);
502-
} else {
503-
// use standard precision
504-
lhs_val = (mp_int_t)res;
505-
}
506-
#endif
507-
508493
mp_int_t int_res;
509-
if (mp_small_int_mul_overflow(lhs_val, rhs_val, &int_res)) {
510-
// use higher precision
494+
if (mp_mul_mp_int_t_overflow(lhs_val, rhs_val, &int_res)) {
511495
lhs = mp_obj_new_int_from_ll(lhs_val);
512496
goto generic_binary_op;
513497
} else {
514-
// use standard precision
515-
return MP_OBJ_NEW_SMALL_INT(int_res);
498+
lhs_val = int_res;
516499
}
500+
501+
break; // result fits in mp_int_t but might not be MP_SMALL_INT_FITS
517502
}
518503
case MP_BINARY_OP_FLOOR_DIVIDE:
519504
case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE:
@@ -553,7 +538,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
553538
mp_int_t ans = 1;
554539
while (rhs_val > 0) {
555540
if (rhs_val & 1) {
556-
if (mp_small_int_mul_overflow(ans, lhs_val, &ans)) {
541+
if (mp_mul_mp_int_t_overflow(ans, lhs_val, &ans)) {
557542
goto power_overflow;
558543
}
559544
}
@@ -562,7 +547,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
562547
}
563548
rhs_val /= 2;
564549
mp_int_t int_res;
565-
if (mp_small_int_mul_overflow(lhs_val, lhs_val, &int_res)) {
550+
if (mp_mul_mp_int_t_overflow(lhs_val, lhs_val, &int_res)) {
566551
goto power_overflow;
567552
}
568553
lhs_val = int_res;
@@ -1784,3 +1769,63 @@ MP_NORETURN void mp_raise_recursion_depth(void) {
17841769
mp_raise_type_arg(&mp_type_RuntimeError, MP_OBJ_NEW_QSTR(MP_QSTR_maximum_space_recursion_space_depth_space_exceeded));
17851770
}
17861771
#endif
1772+
1773+
#if !MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
1774+
bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) {
1775+
bool overflow;
1776+
1777+
// Check for multiply overflow; see CERT INT32-C
1778+
if (x > 0) { // x is positive
1779+
if (y > 0) { // x and y are positive
1780+
overflow = (x > (LLONG_MAX / y));
1781+
} else { // x positive, y nonpositive
1782+
overflow = (y < (LLONG_MIN / x));
1783+
} // x positive, y nonpositive
1784+
} else { // x is nonpositive
1785+
if (y > 0) { // x is nonpositive, y is positive
1786+
overflow = (x < (LLONG_MIN / y));
1787+
} else { // x and y are nonpositive
1788+
overflow = (x != 0 && y < (LLONG_MAX / x));
1789+
} // End if x and y are nonpositive
1790+
} // End if x is nonpositive
1791+
1792+
if (!overflow) {
1793+
*res = x * y;
1794+
}
1795+
1796+
return overflow;
1797+
}
1798+
1799+
#define MP_UINT_MAX (~(mp_uint_t)0)
1800+
#define MP_INT_MAX ((mp_int_t)(MP_UINT_MAX >> 1))
1801+
#define MP_INT_MIN (-MP_INT_MAX - 1)
1802+
1803+
bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
1804+
// Check for multiply overflow; see CERT INT32-C
1805+
if (x > 0) { // x is positive
1806+
if (y > 0) { // x and y are positive
1807+
if (x > (MP_INT_MAX / y)) {
1808+
return true;
1809+
}
1810+
} else { // x positive, y nonpositive
1811+
if (y < (MP_INT_MIN / x)) {
1812+
return true;
1813+
}
1814+
} // x positive, y nonpositive
1815+
} else { // x is nonpositive
1816+
if (y > 0) { // x is nonpositive, y is positive
1817+
if (x < (MP_INT_MIN / y)) {
1818+
return true;
1819+
}
1820+
} else { // x and y are nonpositive
1821+
if (x != 0 && y < (MP_INT_MAX / x)) {
1822+
return true;
1823+
}
1824+
} // End if x and y are nonpositive
1825+
} // End if x is nonpositive
1826+
1827+
// Result doesn't overflow
1828+
*res = x * y;
1829+
return false;
1830+
}
1831+
#endif

py/smallint.c

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,35 +26,6 @@
2626

2727
#include "py/smallint.h"
2828

29-
bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
30-
// Check for multiply overflow; see CERT INT32-C
31-
if (x > 0) { // x is positive
32-
if (y > 0) { // x and y are positive
33-
if (x > (MP_SMALL_INT_MAX / y)) {
34-
return true;
35-
}
36-
} else { // x positive, y nonpositive
37-
if (y < (MP_SMALL_INT_MIN / x)) {
38-
return true;
39-
}
40-
} // x positive, y nonpositive
41-
} else { // x is nonpositive
42-
if (y > 0) { // x is nonpositive, y is positive
43-
if (x < (MP_SMALL_INT_MIN / y)) {
44-
return true;
45-
}
46-
} else { // x and y are nonpositive
47-
if (x != 0 && y < (MP_SMALL_INT_MAX / x)) {
48-
return true;
49-
}
50-
} // End if x and y are nonpositive
51-
} // End if x is nonpositive
52-
53-
// Result doesn't overflow
54-
*res = x * y;
55-
return false;
56-
}
57-
5829
mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor) {
5930
// Python specs require that mod has same sign as second operand
6031
dividend %= divisor;

py/smallint.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,6 @@
6868
// The number of bits in a MP_SMALL_INT including the sign bit.
6969
#define MP_SMALL_INT_BITS (MP_IMAX_BITS(MP_SMALL_INT_MAX) + 1)
7070

71-
// Multiply two small ints.
72-
// If returns false, the correct result is stored in 'res'
73-
// If returns true, the multiplication would have overflowed. 'res' is unchanged.
74-
bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res);
7571
mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor);
7672
mp_int_t mp_small_int_floor_divide(mp_int_t num, mp_int_t denom);
7773

0 commit comments

Comments
 (0)