Skip to content

Commit 7cc9267

Browse files
committed
no_asm.h: add optimization hints.
1 parent 36587ea commit 7cc9267

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

src/no_asm.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,18 @@ typedef unsigned long long llimb_t;
1616
# pragma GCC diagnostic ignored "-Wstatic-in-inline"
1717
#endif
1818

19+
#if !defined(__clang__)
20+
# if defined(__GNUC__) && __GNUC__>=5
21+
# define __builtin_assume(condition) if (!(condition)) __builtin_unreachable()
22+
# else
23+
# define __builtin_assume(condition)
24+
# endif
25+
#endif
26+
1927
static void mul_mont_n(limb_t ret[], const limb_t a[], const limb_t b[],
2028
const limb_t p[], limb_t n0, size_t n)
2129
{
30+
__builtin_assume(n != 0 && n%2 == 0);
2231
llimb_t limbx;
2332
limb_t mask, borrow, mx, hi, tmp[n+1], carry;
2433
size_t i, j;
@@ -92,6 +101,7 @@ MUL_MONT_IMPL(384)
92101
static void add_mod_n(limb_t ret[], const limb_t a[], const limb_t b[],
93102
const limb_t p[], size_t n)
94103
{
104+
__builtin_assume(n != 0);
95105
llimb_t limbx;
96106
limb_t mask, carry, borrow, tmp[n];
97107
size_t i;
@@ -125,6 +135,7 @@ ADD_MOD_IMPL(384)
125135
static void sub_mod_n(limb_t ret[], const limb_t a[], const limb_t b[],
126136
const limb_t p[], size_t n)
127137
{
138+
__builtin_assume(n != 0);
128139
llimb_t limbx;
129140
limb_t mask, carry, borrow;
130141
size_t i;
@@ -155,6 +166,7 @@ SUB_MOD_IMPL(384)
155166
static void mul_by_3_mod_n(limb_t ret[], const limb_t a[], const limb_t p[],
156167
size_t n)
157168
{
169+
__builtin_assume(n != 0);
158170
llimb_t limbx;
159171
limb_t mask, carry, borrow, tmp[n], two_a[n];
160172
size_t i;
@@ -205,6 +217,7 @@ MUL_BY_3_MOD_IMPL(384)
205217
static void lshift_mod_n(limb_t ret[], const limb_t a[], size_t count,
206218
const limb_t p[], size_t n)
207219
{
220+
__builtin_assume(n != 0);
208221
llimb_t limbx;
209222
limb_t mask, carry, borrow, tmp[n];
210223
size_t i;
@@ -242,6 +255,7 @@ LSHIFT_MOD_IMPL(384)
242255
static void cneg_mod_n(limb_t ret[], const limb_t a[], bool_t flag,
243256
const limb_t p[], size_t n)
244257
{
258+
__builtin_assume(n != 0);
245259
llimb_t limbx;
246260
limb_t borrow, mask, tmp[n];
247261
size_t i;
@@ -269,6 +283,7 @@ CNEG_MOD_IMPL(384)
269283

270284
static limb_t check_mod_n(const byte a[], const limb_t p[], size_t n)
271285
{
286+
__builtin_assume(n != 0);
272287
llimb_t limbx;
273288
limb_t borrow, ai, acc;
274289
size_t i, j;
@@ -293,6 +308,7 @@ CHECK_MOD_IMPL(256)
293308
static limb_t add_n_check_mod_n(byte ret[], const byte a[], const byte b[],
294309
const limb_t p[], size_t n)
295310
{
311+
__builtin_assume(n != 0);
296312
limb_t ret_[n], a_[n], b_[n], zero;
297313

298314
limbs_from_le_bytes(a_, a, sizeof(a_));
@@ -316,6 +332,7 @@ ADD_N_CHECK_MOD_IMPL(256)
316332
static limb_t sub_n_check_mod_n(byte ret[], const byte a[], const byte b[],
317333
const limb_t p[], size_t n)
318334
{
335+
__builtin_assume(n != 0);
319336
limb_t ret_[n], a_[n], b_[n], zero;
320337

321338
limbs_from_le_bytes(a_, a, sizeof(a_));
@@ -339,6 +356,7 @@ SUB_N_CHECK_MOD_IMPL(256)
339356
static void from_mont_n(limb_t ret[], const limb_t a[],
340357
const limb_t p[], limb_t n0, size_t n)
341358
{
359+
__builtin_assume(n != 0 && n%2 == 0);
342360
llimb_t limbx;
343361
limb_t mask, borrow, mx, hi, tmp[n];
344362
size_t i, j;
@@ -380,6 +398,7 @@ FROM_MONT_IMPL(384)
380398
static void redc_mont_n(limb_t ret[], const limb_t a[],
381399
const limb_t p[], limb_t n0, size_t n)
382400
{
401+
__builtin_assume(n != 0 && n%2 == 0);
383402
llimb_t limbx;
384403
limb_t mask, carry, borrow, mx, hi, tmp[n];
385404
const limb_t *b = a;
@@ -427,6 +446,7 @@ REDC_MONT_IMPL(384, 768)
427446
static void rshift_mod_n(limb_t ret[], const limb_t a[], size_t count,
428447
const limb_t p[], size_t n)
429448
{
449+
__builtin_assume(n != 0 && n%2 == 0);
430450
llimb_t limbx;
431451
limb_t mask, carry, limb, next;
432452
size_t i;
@@ -467,6 +487,7 @@ DIV_BY_2_MOD_IMPL(384)
467487

468488
static limb_t sgn0_pty_mod_n(const limb_t a[], const limb_t p[], size_t n)
469489
{
490+
__builtin_assume(n != 0);
470491
llimb_t limbx;
471492
limb_t carry, borrow, ret, tmp[n];
472493
size_t i;
@@ -552,6 +573,7 @@ void mul_mont_384x(vec384x ret, const vec384x a, const vec384x b,
552573
static void mul_mont_nonred_n(limb_t ret[], const limb_t a[], const limb_t b[],
553574
const limb_t p[], limb_t n0, size_t n)
554575
{
576+
__builtin_assume(n != 0 && n%2 == 0);
555577
llimb_t limbx;
556578
limb_t mx, hi, tmp[n+1];
557579
size_t i, j;
@@ -592,6 +614,7 @@ static void mul_mont_nonred_n(limb_t ret[], const limb_t a[], const limb_t b[],
592614
void sqr_n_mul_mont_383(vec384 ret, const vec384 a, size_t count,
593615
const vec384 p, limb_t n0, const vec384 b)
594616
{
617+
__builtin_assume(count != 0);
595618
while(count--) {
596619
mul_mont_nonred_n(ret, a, a, p, n0, NLIMBS(384));
597620
a = ret;
@@ -704,6 +727,7 @@ static limb_t lshift_2(limb_t hi, limb_t lo, size_t l)
704727
static void ab_approximation_n(limb_t a_[2], const limb_t a[],
705728
limb_t b_[2], const limb_t b[], size_t n)
706729
{
730+
__builtin_assume(n != 0 && n%2 == 0);
707731
limb_t a_hi, a_lo, b_hi, b_lo, mask;
708732
size_t i;
709733

@@ -729,6 +753,7 @@ typedef struct { limb_t f0, g0, f1, g1; } factors;
729753
static void inner_loop_n(factors *fg, const limb_t a_[2], const limb_t b_[2],
730754
size_t n)
731755
{
756+
__builtin_assume(n != 0);
732757
llimb_t limbx;
733758
limb_t f0 = 1, g0 = 0, f1 = 0, g1 = 1;
734759
limb_t a_lo, a_hi, b_lo, b_hi, t_lo, t_hi, odd, borrow, xorm;
@@ -784,6 +809,7 @@ static void inner_loop_n(factors *fg, const limb_t a_[2], const limb_t b_[2],
784809

785810
static limb_t cneg_n(limb_t ret[], const limb_t a[], limb_t neg, size_t n)
786811
{
812+
__builtin_assume(n != 0);
787813
llimb_t limbx = 0;
788814
limb_t carry;
789815
size_t i;
@@ -799,6 +825,7 @@ static limb_t cneg_n(limb_t ret[], const limb_t a[], limb_t neg, size_t n)
799825

800826
static limb_t add_n(limb_t ret[], const limb_t a[], limb_t b[], size_t n)
801827
{
828+
__builtin_assume(n != 0);
802829
llimb_t limbx;
803830
limb_t carry;
804831
size_t i;
@@ -814,6 +841,7 @@ static limb_t add_n(limb_t ret[], const limb_t a[], limb_t b[], size_t n)
814841

815842
static limb_t umul_n(limb_t ret[], const limb_t a[], limb_t b, size_t n)
816843
{
844+
__builtin_assume(n != 0);
817845
llimb_t limbx;
818846
limb_t hi;
819847
size_t i;
@@ -831,6 +859,7 @@ static limb_t smul_n_shift_n(limb_t ret[], const limb_t a[], limb_t *f_,
831859
const limb_t b[], limb_t *g_,
832860
size_t n)
833861
{
862+
__builtin_assume(n != 0);
834863
limb_t a_[n+1], b_[n+1], f, g, neg, carry, hi;
835864
size_t i;
836865

@@ -872,6 +901,7 @@ static limb_t smul_n_shift_n(limb_t ret[], const limb_t a[], limb_t *f_,
872901
static limb_t smul_2n(limb_t ret[], const limb_t u[], limb_t f,
873902
const limb_t v[], limb_t g, size_t n)
874903
{
904+
__builtin_assume(n != 0);
875905
limb_t u_[n], v_[n], neg, hi;
876906

877907
/* |u|*|f_| */
@@ -895,6 +925,7 @@ static limb_t smul_2n(limb_t ret[], const limb_t u[], limb_t f,
895925
static void ct_inverse_mod_n(limb_t ret[], const limb_t inp[],
896926
const limb_t mod[], const limb_t modx[], size_t n)
897927
{
928+
__builtin_assume(n != 0 && n%2 == 0);
898929
llimb_t limbx;
899930
limb_t a[n], b[n], u[2*n], v[2*n], t[2*n];
900931
limb_t a_[2], b_[2], sign, carry, top;
@@ -949,6 +980,7 @@ CT_INVERSE_MOD_IMPL(384)
949980
static limb_t legendre_loop_n(limb_t L, factors *fg, const limb_t a_[2],
950981
const limb_t b_[2], size_t n)
951982
{
983+
__builtin_assume(n != 0);
952984
llimb_t limbx;
953985
limb_t f0 = 1, g0 = 0, f1 = 0, g1 = 1;
954986
limb_t a_lo, a_hi, b_lo, b_hi, t_lo, t_hi, odd, borrow, xorm;
@@ -1010,6 +1042,7 @@ static limb_t legendre_loop_n(limb_t L, factors *fg, const limb_t a_[2],
10101042

10111043
static bool_t ct_is_sqr_mod_n(const limb_t inp[], const limb_t mod[], size_t n)
10121044
{
1045+
__builtin_assume(n != 0 && n%2 == 0);
10131046
limb_t a[n], b[n], t[n];
10141047
limb_t a_[2], b_[2], neg, L = 0;
10151048
factors fg;
@@ -1103,6 +1136,7 @@ limb_t div_3_limbs(const limb_t div_top[2], limb_t d_lo, limb_t d_hi)
11031136
static limb_t quot_rem_n(limb_t *div_rem, const limb_t *divisor,
11041137
limb_t quotient, size_t n)
11051138
{
1139+
__builtin_assume(n != 0 && n%2 == 0);
11061140
llimb_t limbx;
11071141
limb_t tmp[n+1], carry, mask, borrow;
11081142
size_t i;

0 commit comments

Comments
 (0)