bitcoin-core · deadalnix · Mar 28, 2020 · Mar 28, 2020 · Mar 28, 2020 · Mar 28, 2020
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
@@ -181,6 +181,31 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
     return 2 * (mask == 0) - 1;
 }
 
+static int secp256k1_scalar_complement(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    uint128_t t = 1;
+    t += ~a->d[0];
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += ~a->d[1];
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += ~a->d[2];
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += ~a->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    return t;
+}
+
+static int secp256k1_scalar_binadd(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    uint128_t t = (uint128_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    return t;
+}
+
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
@@ -929,6 +954,19 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const
     return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0;
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    int i;
+    for (i = 3; i >= 0; i--) {
+        if (a->d[i] > b->d[i]) {
+            return 1;
+        }
+        if (a->d[i] < b->d[i]) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
 SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
     uint64_t l[8];
     unsigned int shiftlimbs;

diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
@@ -259,6 +259,46 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
     return 2 * (mask == 0) - 1;
 }
 
+static int secp256k1_scalar_complement(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    uint64_t t = 1;
+    t += ~a->d[0];
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[1];
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[2];
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[3];
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[4];
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[5];
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[6];
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[7];
+    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
+    return t;
+}
+
+static int secp256k1_scalar_binadd(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    uint64_t t = (uint64_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[4] + b->d[4];
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[5] + b->d[5];
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[6] + b->d[6];
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[7] + b->d[7];
+    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
+    return t;
+}
 
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
@@ -697,6 +737,19 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const
     return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0;
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    int i;
+    for (i = 7; i >= 0; i--) {
+        if (a->d[i] > b->d[i]) {
+            return 1;
+        }
+        if (a->d[i] < b->d[i]) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
 SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
     uint32_t l[16];
     unsigned int shiftlimbs;

diff --git a/src/scalar_impl.h b/src/scalar_impl.h
@@ -27,6 +27,8 @@
 static const secp256k1_scalar secp256k1_scalar_one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
 static const secp256k1_scalar secp256k1_scalar_zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
 
+static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b);
+
 #ifndef USE_NUM_NONE
 static void secp256k1_scalar_get_num(secp256k1_num *r, const secp256k1_scalar *a) {
     unsigned char c[32];
@@ -65,7 +67,6 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
     /* If this VERIFY_CHECK triggers we were given a noninvertible scalar (and thus
      * have a composite group order; fix it in exhaustive_tests.c). */
     VERIFY_CHECK(*r != 0);
-}
 #else
     secp256k1_scalar *t;
     int i;
@@ -218,16 +219,183 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
         secp256k1_scalar_sqr(t, t);
     }
     secp256k1_scalar_mul(r, t, &x6); /* 111111 */
+#endif
 }
 
-SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
-    return !(a->d[0] & 1);
+#if !defined(EXHAUSTIVE_TEST_ORDER)
+static void secp256k1_scalar_pow2_div(secp256k1_scalar *r, const secp256k1_scalar *a, int k) {
+    static const secp256k1_scalar lookup[16] = {
+        SECP256K1_SCALAR_CONST(
+            0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
+            0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL),
+        SECP256K1_SCALAR_CONST(
+            0xEFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+            0xCF03EF18UL, 0x44541638UL, 0x03D538A4UL, 0x0332DD2DUL),
+        SECP256K1_SCALAR_CONST(
+            0xDFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+            0xE3590149UL, 0xD95F8C34UL, 0x47D812BBUL, 0x362F7919UL),
+        SECP256K1_SCALAR_CONST(
+            0xCFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+            0xF7AE137BUL, 0x6E6B0230UL, 0x8BDAECD2UL, 0x692C1505UL),
+        SECP256K1_SCALAR_CONST(
+            0xBFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x0C0325ADUL, 0x0376782CUL, 0xCFDDC6E9UL, 0x9C28B0F1UL),
+        SECP256K1_SCALAR_CONST(
+            0xAFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x205837DEUL, 0x9881EE29UL, 0x13E0A100UL, 0xCF254CDDUL),
+        SECP256K1_SCALAR_CONST(
+            0x9FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x34AD4A10UL, 0x2D8D6425UL, 0x57E37B18UL, 0x0221E8C9UL),
+        SECP256K1_SCALAR_CONST(
+            0x8FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x49025C41UL, 0xC298DA21UL, 0x9BE6552FUL, 0x351E84B5UL),
+        SECP256K1_SCALAR_CONST(
+            0x7FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x5D576E73UL, 0x57A4501DUL, 0xDFE92F46UL, 0x681B20A1UL),
+        SECP256K1_SCALAR_CONST(
+            0x6FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x71AC80A4UL, 0xECAFC61AUL, 0x23EC095DUL, 0x9B17BC8DUL),
+        SECP256K1_SCALAR_CONST(
+            0x5FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x860192D6UL, 0x81BB3C16UL, 0x67EEE374UL, 0xCE145879UL),
+        SECP256K1_SCALAR_CONST(
+            0x4FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x9A56A508UL, 0x16C6B212UL, 0xABF1BD8CUL, 0x0110F465UL),
+        SECP256K1_SCALAR_CONST(
+            0x3FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xAEABB739UL, 0xABD2280EUL, 0xEFF497A3UL, 0x340D9051UL),
+        SECP256K1_SCALAR_CONST(
+            0x2FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xC300C96BUL, 0x40DD9E0BUL, 0x33F771BAUL, 0x670A2C3DUL),
+        SECP256K1_SCALAR_CONST(
+            0x1FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xD755DB9CUL, 0xD5E91407UL, 0x77FA4BD1UL, 0x9A06C829UL),
+        SECP256K1_SCALAR_CONST(
+            0x0FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xEBAAEDCEUL, 0x6AF48A03UL, 0xBBFD25E8UL, 0xCD036415UL),
+    };
+
+    int data;
+    int extra_bits = k % 4;
+
+    *r = *a;
+    if (extra_bits) {
+        k -= extra_bits;
+        data = secp256k1_scalar_shr_int(r, extra_bits);
+        secp256k1_scalar_add(r, r, &lookup[data << (4 - extra_bits)]);
+    }
+
+    while (k > 0) {
+        k -= 4;
+        data = secp256k1_scalar_shr_int(r, 4);
+        secp256k1_scalar_add(r, r, &lookup[data]);
+    }
+
+    VERIFY_CHECK(k == 0);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_shr_zeros(secp256k1_scalar *r) {
+    int n, k = 0;
+
+    /* Ensure that we do not have more than 15 trailing zeros. */
+    while ((n = __builtin_ctz(r->d[0] | (1 << 15)))) {
+        k += n;
+        secp256k1_scalar_shr_int(r, n);
+    }
+
+    return k;
+}
+
+static int secp256k1_scalar_eea_inverse(secp256k1_scalar *r, const secp256k1_scalar *n) {
+    secp256k1_scalar u, v, i, j, acomp, negx;
+    secp256k1_scalar *a, *b, *x0, *x1, *tmp;
+    int ka, kb;
+
+    /* zero is not invertible */
+    if (secp256k1_scalar_is_zero(n)) {
+        secp256k1_scalar_set_int(r, 0);
+        return 0;
+    }
+
+    /**
+     * The extended euclidian algorithm compute x, y and gcd(a, b) such as
+     * a*x + b*y = gcd(a, b)
+     * If we use this algorithm with b = p, then we solve a*x + p*y = gcd(a, p)
+     * We note that:
+     *  - The order is a prime, so gcd(a, p) = 1.
+     *  - We compute modulo p, and y*p = 0 mod p.
+     * So the equation simplify to a*x = 1, and x = a^-1.
+     */
+
+    /* a = n */
+    u = *n;
+    a = &u;
+
+    /* Because 2 is not a common factor between a and b, we can detect
+     * multiples of 2 using the LSB and eliminate them aggressively. */
+    ka = secp256k1_scalar_shr_zeros(a);
+
+    /* b = p - a */
+    secp256k1_scalar_negate(&v, a);
+    b = &v;
+
+    /* x0 = 1 */
+    secp256k1_scalar_set_int(&i, 1);
+    secp256k1_scalar_negate(&negx, &i);
+    x0 = &i;
+
+    /* x1 = 0 */
+    secp256k1_scalar_set_int(&j, 0);
+    x1 = &j;
+
+    if (secp256k1_scalar_is_one(a)) {
+        goto done;
+    }
+
+    /* For a and b, we use 2 comlement math and ensure no overflow happens. */
+    secp256k1_scalar_complement(&acomp, a);
+    goto bzero;
+
+    while (!secp256k1_scalar_is_one(a)) {
+        secp256k1_scalar_complement(&acomp, a);
+        secp256k1_scalar_negate(&negx, x0);
+
+        VERIFY_CHECK(secp256k1_scalar_cmp_var(b, a) > 0);
+        do {
+            secp256k1_scalar_binadd(b, b, &acomp);
+
+        bzero:
+            /* We ensure that a and b are odd, so b must be even after subtracting a. */
+            VERIFY_CHECK(secp256k1_scalar_is_even(b));
+            kb = secp256k1_scalar_shr_zeros(b);
+            secp256k1_scalar_add(x1, x1, &negx);
+            secp256k1_scalar_pow2_div(x1, x1, kb);
+        } while (secp256k1_scalar_cmp_var(b, a) > 0);
+
+        /* a and b can never be equal, so if we exited, it is because a > b. */
+        VERIFY_CHECK(secp256k1_scalar_cmp_var(a, b) > 0);
+
+        /* In order to speed things up, we only swap pointers */
+        tmp = a;
+        a = b;
+        b = tmp;
+
+        tmp = x0;
+        x0 = x1;
+        x1 = tmp;
+    }
+
+done:
+    secp256k1_scalar_pow2_div(r, x0, ka);
+    return 1;
 }
 #endif
 
 static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
-#if defined(USE_SCALAR_INV_BUILTIN)
+#if defined(EXHAUSTIVE_TEST_ORDER)
     secp256k1_scalar_inverse(r, x);
+#elif defined(USE_SCALAR_INV_BUILTIN)
+    secp256k1_scalar_eea_inverse(r, x);
 #elif defined(USE_SCALAR_INV_NUM)
     unsigned char b[32];
     secp256k1_num n, m;
@@ -246,6 +414,12 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc
 #endif
 }
 
+#if !defined(EXHAUSTIVE_TEST_ORDER)
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
+    return !(a->d[0] & 1);
+}
+#endif
+
 #ifdef USE_ENDOMORPHISM
 #if defined(EXHAUSTIVE_TEST_ORDER)
 /**

diff --git a/src/scalar_low_impl.h b/src/scalar_low_impl.h
@@ -114,6 +114,10 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const
     return *a == *b;
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    return (*a > *b) - (*a < *b);
+}
+
 static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) {
     uint32_t mask0, mask1;
     mask0 = flag + ~((uint32_t)0);