Make 64x64->64 multiplications constant-time with MSVC on 32bit x86

real-or-random · real-or-random · commit 1f18ac777cac · 2020-04-23T13:15:54.000+02:00
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
@@ -454,18 +454,15 @@ void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
 
 #else
 
-#ifdef VERIFY
-#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
-#else
-#define VERIFY_BITS(x, n) do { } while(0)
-#endif
-
 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
     uint64_t c, d;
     uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
     uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
     const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
 
+    VERIFY_BITS(R0, 14);
+    VERIFY_BITS(R1, 11);
+
     VERIFY_BITS(a[0], 30);
     VERIFY_BITS(a[1], 30);
     VERIFY_BITS(a[2], 30);
@@ -765,14 +762,14 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
     /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
 
-    d    = c * (R0 >> 4) + t0;
+    d    = MUL_64X64_63(c, R0 >> 4) + t0;
     VERIFY_BITS(d, 56);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     r[0] = d & M; d >>= 26;
     VERIFY_BITS(r[0], 26);
     VERIFY_BITS(d, 30);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
-    d   += c * (R1 >> 4) + t1;
+    d   += MUL_64X64_63(c, R1 >> 4) + t1;
     VERIFY_BITS(d, 53);
     VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
@@ -1039,14 +1036,14 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
     /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
 
-    d    = c * (R0 >> 4) + t0;
+    d    = MUL_64X64_63(c, R0 >> 4) + t0;
     VERIFY_BITS(d, 56);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     r[0] = d & M; d >>= 26;
     VERIFY_BITS(r[0], 26);
     VERIFY_BITS(d, 30);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
-    d   += c * (R1 >> 4) + t1;
+    d   += MUL_64X64_63(c, R1 >> 4) + t1;
     VERIFY_BITS(d, 53);
     VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h
@@ -9,12 +9,6 @@
 
 #include <stdint.h>
 
-#ifdef VERIFY
-#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
-#else
-#define VERIFY_BITS(x, n) do { } while(0)
-#endif
-
 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
     uint128_t c, d;
     uint64_t t3, t4, tx, u0;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
@@ -265,6 +265,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define muladd(a,b) { \
     uint32_t tl, th; \
+    /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSVC, see util.h. */ \
+    VERIFY_BITS((uint64_t)a, 32); \
+    VERIFY_BITS((uint64_t)b, 32); \
     { \
         uint64_t t = (uint64_t)a * b; \
         th = t >> 32;         /* at most 0xFFFFFFFE */ \
@@ -280,6 +283,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
 /** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
 #define muladd_fast(a,b) { \
     uint32_t tl, th; \
+    /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSVC, see util.h. */ \
+    VERIFY_BITS((uint64_t)a, 32); \
+    VERIFY_BITS((uint64_t)b, 32); \
     { \
         uint64_t t = (uint64_t)a * b; \
         th = t >> 32;         /* at most 0xFFFFFFFE */ \
@@ -294,6 +300,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
 /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define muladd2(a,b) { \
     uint32_t tl, th, th2, tl2; \
+    /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSVC, see util.h. */ \
+    VERIFY_BITS((uint64_t)a, 32); \
+    VERIFY_BITS((uint64_t)b, 32); \
     { \
         uint64_t t = (uint64_t)a * b; \
         th = t >> 32;               /* at most 0xFFFFFFFE */ \
diff --git a/src/util.h b/src/util.h
@@ -69,6 +69,12 @@ static SECP256K1_INLINE void secp256k1_callback_call(const secp256k1_callback *
 #define VERIFY_SETUP(stmt)
 #endif
 
+#ifdef VERIFY
+#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
+#else
+#define VERIFY_BITS(x, n) do { } while(0)
+#endif
+
 static SECP256K1_INLINE void *checked_malloc(const secp256k1_callback* cb, size_t size) {
     void *ret = malloc(size);
     if (ret == NULL) {
@@ -172,4 +178,43 @@ static SECP256K1_INLINE void memczero(void *s, size_t len, int flag) {
     }
 }
 
+/* MSVC for x86 32-bit targets implements 64x64->64 bit multiplications using
+   a non-constant subroutine. The subroutine is not constant-time because it
+   shortcuts when the high 32 bits of both multiplicands are all 0.
+   See https://research.kudelskisecurity.com/2017/01/16/when-constant-time-source-may-not-save-you/
+   and also https://www.bearssl.org/ctmul.html for more information.
+
+   To work around this we can use the following macro if all we need is a
+   64x64->63 multiplication. We set the MSB of the right multiplicand
+   before the multiplication and clear the MSB of the product.
+
+   For 64-bit integers a, b where a, b, and a*b can be represented in 63
+   bits, we have
+
+      (a * (b | 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF
+    = (a * (b + 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF
+    = ((a * b) + (a * 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF
+    = ((a * b) + (a << 63)) & 0x7FFFFFFFFFFFFFFF
+    = ((a * b) | (a << 63)) & 0x7FFFFFFFFFFFFFFF
+    = ((a * b) & 0x7FFFFFFFFFFFFFFF) | (a << 63) & 0x7FFFFFFFFFFFFFFF
+    = (a * b) | 0
+    = a * b.
+
+   If a*b can be represented in 63 bits but a or b can't, we necessarily
+   have a == 0 or b == 0. Then it's easy to check that
+
+     (0 * (b | 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF = 0, and
+     (a * (0 | 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF = 0.
+
+   Thus our macro works for any 64x64->63 multiplication.
+
+   A variant of the macro for 16x16->15 bit multiplications has been
+   verified for all acceptable inputs by brute-force computation.
+*/
+#if defined(_MSC_VER) && defined(_M_IX86)
+#define MUL_64X64_63(a, b) ((((a) | UINT64_C(0x8000000000000000)) * (b)) & UINT64_C(0x7FFFFFFFFFFFFFFF))
+#else
+#define MUL_64X64_63(a, b) ((a) * (b))
+#endif
+
 #endif /* SECP256K1_UTIL_H */