Skip to content

Commit cd393ce

Browse files
committed
Optimization: only do 59 hddivsteps per iteration instead of 62
1 parent 277b224 commit cd393ce

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

src/modinv64_impl.h

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ typedef struct {
145145
int64_t u, v, q, r;
146146
} secp256k1_modinv64_trans2x2;
147147

148-
/* Compute the transition matrix and zeta for 62 divsteps (where zeta=-(delta+1/2)).
148+
/* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)).
149+
* Note that the transformation matrix is scaled by 2^62 and not 2^59.
149150
*
150151
* Input: zeta: initial zeta
151152
* f0: bottom limb of initial f
@@ -155,18 +156,19 @@ typedef struct {
155156
*
156157
* Implements the divsteps_n_matrix function from the explanation.
157158
*/
158-
static int64_t secp256k1_modinv64_divsteps_62(int64_t zeta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
159+
static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
159160
/* u,v,q,r are the elements of the transformation matrix being built up,
160-
* starting with the identity matrix. Semantically they are signed integers
161+
* starting with the identity matrix times 8 (because the caller expects
162+
* a result scaled by 2^62). Semantically they are signed integers
161163
* in range [-2^62,2^62], but here represented as unsigned mod 2^64. This
162164
* permits left shifting (which is UB for negative numbers). The range
163165
* being inside [-2^63,2^63) means that casting to signed works correctly.
164166
*/
165-
uint64_t u = 1, v = 0, q = 0, r = 1;
167+
uint64_t u = 8, v = 0, q = 0, r = 8;
166168
uint64_t c1, c2, f = f0, g = g0, x, y, z;
167169
int i;
168170

169-
for (i = 0; i < 62; ++i) {
171+
for (i = 3; i < 62; ++i) {
170172
VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
171173
VERIFY_CHECK((u * f0 + v * g0) == f << i);
172174
VERIFY_CHECK((q * f0 + r * g0) == g << i);
@@ -193,8 +195,8 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t zeta, uint64_t f0, uint64_
193195
g >>= 1;
194196
u <<= 1;
195197
v <<= 1;
196-
/* Bounds on zeta that follow from the bounds on iteration count (max 10*62 divsteps). */
197-
VERIFY_CHECK(zeta >= -621 && zeta <= 621);
198+
/* Bounds on zeta that follow from the bounds on iteration count (max 10*59 divsteps). */
199+
VERIFY_CHECK(zeta >= -591 && zeta <= 591);
198200
}
199201
/* Return data in t and return value. */
200202
t->u = (int64_t)u;
@@ -204,8 +206,10 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t zeta, uint64_t f0, uint64_
204206
/* The determinant of t must be a power of two. This guarantees that multiplication with t
205207
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
206208
* will be divided out again). As each divstep's individual matrix has determinant 2, the
207-
* aggregate of 62 of them will have determinant 2^62. */
208-
VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62);
209+
* aggregate of 59 of them will have determinant 2^59. Multiplying with the initial
210+
* 8*identity (which has determinant 2^6) means the overall outputs has determinant
211+
* 2^65. */
212+
VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 65);
209213
return zeta;
210214
}
211215

@@ -290,7 +294,7 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint
290294
return eta;
291295
}
292296

293-
/* Compute (t/2^62) * [d, e] mod modulus, where t is a transition matrix for 62 divsteps.
297+
/* Compute (t/2^62) * [d, e] mod modulus, where t is a transition matrix scaled by 2^62.
294298
*
295299
* On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
296300
* (-2^62,2^62).
@@ -376,7 +380,7 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
376380
#endif
377381
}
378382

379-
/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps.
383+
/* Compute (t/2^62) * [f, g], where t is a transition matrix scaled by 2^62.
380384
*
381385
* This implements the update_fg function from the explanation.
382386
*/
@@ -463,11 +467,11 @@ static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_m
463467
int i;
464468
int64_t zeta = -1; /* zeta = -(delta+1/2); delta starts at 1/2. */
465469

466-
/* Do 10 iterations of 62 divsteps each = 620 divsteps. 590 suffices for 256-bit inputs. */
470+
/* Do 10 iterations of 59 divsteps each = 590 divsteps. This suffices for 256-bit inputs. */
467471
for (i = 0; i < 10; ++i) {
468-
/* Compute transition matrix and new zeta after 62 divsteps. */
472+
/* Compute transition matrix and new zeta after 59 divsteps. */
469473
secp256k1_modinv64_trans2x2 t;
470-
zeta = secp256k1_modinv64_divsteps_62(zeta, f.v[0], g.v[0], &t);
474+
zeta = secp256k1_modinv64_divsteps_59(zeta, f.v[0], g.v[0], &t);
471475
/* Update d,e using that transition matrix. */
472476
secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo);
473477
/* Update f,g using that transition matrix. */

0 commit comments

Comments
 (0)