Skip to content

Commit 96cd94e

Browse files
Merge bitcoin#337: variable sized precomputed table for signing
dcb2e3b variable signing precompute table (djb) Pull request description: This pull request gives an option to reduce the precomputed table size for the signing context (`ctx`) by setting `#define ECMULT_GEN_PREC_BITS [N_BITS]`. Motivation: Per bitcoin#251 and bitcoin#254, the static table can be reduced to 64kB. However, this is still too big for some of my embedded applications. Setting `#define ECMULT_GEN_PREC_BITS 2` produces a 32kB table at a tradeoff of about 75% of the signing speed. Not defining this value will default to the existing implementation of 4 bits. Statistics: ``` ECMULT_GEN_PREC_BITS = 1 Precomputed table size: 32kB ./bench_sign ecdsa_sign: min 195us / avg 200us / max 212us ECMULT_GEN_PREC_BITS = 2 Precomputed table size: 32kB ./bench_sign ecdsa_sign: min 119us / avg 126us / max 134us ECMULT_GEN_PREC_BITS = 4 (default) Precomputed table size: 64kB ./bench_sign ecdsa_sign: min 83.5us / avg 89.6us / max 95.3us ECMULT_GEN_PREC_BITS = 8 Precomputed table size: 512kB ./bench_sign ecdsa_sign: min 96.4us / avg 99.4us / max 104us ``` Only values of 2 and 4 make sense. 8 bits causes a larger table size with no increase in speed. 1 bit runs, actually, but does not reduce table size and is slower than 2 bits. ACKs for top commit: real-or-random: ACK dcb2e3b verified that all changes to the previous ACKed 1d26b27ac90092306bfbc9cdd5123e8a5035202a were due to the rebase jonasnick: ACK dcb2e3b read the code and tested various configurations with valgrind Tree-SHA512: ed6f68ca23ffdc4b59d51525336b34b25521233537edbc74d32dfb3eafd8196419be17f01cbf10bd8d87ce745ce143085abc6034727f742163f7e5f13f26f56e
2 parents b4bff99 + dcb2e3b commit 96cd94e

File tree

8 files changed

+91
-39
lines changed

8 files changed

+91
-39
lines changed

.travis.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ cache:
1111
- src/java/guava/
1212
env:
1313
global:
14-
- FIELD=auto BIGNUM=auto SCALAR=auto ENDOMORPHISM=no STATICPRECOMPUTATION=yes ASM=no BUILD=check EXTRAFLAGS= HOST= ECDH=no RECOVERY=no EXPERIMENTAL=no JNI=no
14+
- FIELD=auto BIGNUM=auto SCALAR=auto ENDOMORPHISM=no STATICPRECOMPUTATION=yes ECMULTGENPRECISION=auto ASM=no BUILD=check EXTRAFLAGS= HOST= ECDH=no RECOVERY=no EXPERIMENTAL=no JNI=no
1515
- GUAVA_URL=https://search.maven.org/remotecontent?filepath=com/google/guava/guava/18.0/guava-18.0.jar GUAVA_JAR=src/java/guava/guava-18.0.jar
1616
matrix:
1717
- SCALAR=32bit RECOVERY=yes
@@ -30,6 +30,8 @@ env:
3030
- EXTRAFLAGS=CPPFLAGS=-DDETERMINISTIC
3131
- EXTRAFLAGS=CFLAGS=-O0
3232
- BUILD=check-java JNI=yes ECDH=yes EXPERIMENTAL=yes
33+
- ECMULTGENPRECISION=2
34+
- ECMULTGENPRECISION=8
3335
matrix:
3436
fast_finish: true
3537
include:
@@ -65,4 +67,4 @@ before_script: ./autogen.sh
6567
script:
6668
- if [ -n "$HOST" ]; then export USE_HOST="--host=$HOST"; fi
6769
- if [ "x$HOST" = "xi686-linux-gnu" ]; then export CC="$CC -m32"; fi
68-
- ./configure --enable-experimental=$EXPERIMENTAL --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR --enable-ecmult-static-precomputation=$STATICPRECOMPUTATION --enable-module-ecdh=$ECDH --enable-module-recovery=$RECOVERY --enable-jni=$JNI $EXTRAFLAGS $USE_HOST && make -j2 $BUILD
70+
- ./configure --enable-experimental=$EXPERIMENTAL --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR --enable-ecmult-static-precomputation=$STATICPRECOMPUTATION --with-ecmult-gen-precision=$ECMULTGENPRECISION --enable-module-ecdh=$ECDH --enable-module-recovery=$RECOVERY --enable-jni=$JNI $EXTRAFLAGS $USE_HOST && make -j2 $BUILD

Makefile.am

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,11 @@ endif
151151
endif
152152

153153
if USE_ECMULT_STATIC_PRECOMPUTATION
154-
CPPFLAGS_FOR_BUILD +=-I$(top_srcdir)
154+
CPPFLAGS_FOR_BUILD +=-I$(top_srcdir) -I$(builddir)/src
155155

156156
gen_context_OBJECTS = gen_context.o
157157
gen_context_BIN = gen_context$(BUILD_EXEEXT)
158-
gen_%.o: src/gen_%.c
158+
gen_%.o: src/gen_%.c src/libsecp256k1-config.h
159159
$(CC_FOR_BUILD) $(CPPFLAGS_FOR_BUILD) $(CFLAGS_FOR_BUILD) -c $< -o $@
160160

161161
$(gen_context_BIN): $(gen_context_OBJECTS)

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,12 @@ libsecp256k1 is built using autotools:
6161
$ make
6262
$ make check
6363
$ sudo make install # optional
64+
65+
Exhaustive tests
66+
-----------
67+
68+
$ ./exhaustive_tests
69+
70+
With valgrind, you might need to increase the max stack size:
71+
72+
$ valgrind --max-stackframe=2500000 ./exhaustive_tests

configure.ac

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,14 @@ AC_ARG_WITH([ecmult-window], [AS_HELP_STRING([--with-ecmult-window=SIZE|auto],
165165
)],
166166
[req_ecmult_window=$withval], [req_ecmult_window=auto])
167167

168+
AC_ARG_WITH([ecmult-gen-precision], [AS_HELP_STRING([--with-ecmult-gen-precision=2|4|8|auto],
169+
[Precision bits to tune the precomputed table size for signing.]
170+
[The size of the table is 32kB for 2 bits, 64kB for 4 bits, 512kB for 8 bits of precision.]
171+
[A larger table size usually results in possible faster signing.]
172+
["auto" is a reasonable setting for desktop machines (currently 4). [default=auto]]
173+
)],
174+
[req_ecmult_gen_precision=$withval], [req_ecmult_gen_precision=auto])
175+
168176
AC_CHECK_TYPES([__int128])
169177

170178
if test x"$enable_coverage" = x"yes"; then
@@ -423,6 +431,22 @@ case $set_ecmult_window in
423431
;;
424432
esac
425433

434+
#set ecmult gen precision
435+
if test x"$req_ecmult_gen_precision" = x"auto"; then
436+
set_ecmult_gen_precision=4
437+
else
438+
set_ecmult_gen_precision=$req_ecmult_gen_precision
439+
fi
440+
441+
case $set_ecmult_gen_precision in
442+
2|4|8)
443+
AC_DEFINE_UNQUOTED(ECMULT_GEN_PREC_BITS, $set_ecmult_gen_precision, [Set ecmult gen precision bits])
444+
;;
445+
*)
446+
AC_MSG_ERROR(['ecmult gen precision not 2, 4, 8 or "auto"'])
447+
;;
448+
esac
449+
426450
if test x"$use_tests" = x"yes"; then
427451
SECP_OPENSSL_CHECK
428452
if test x"$has_openssl_ec" = x"yes"; then
@@ -558,6 +582,7 @@ echo " bignum = $set_bignum"
558582
echo " field = $set_field"
559583
echo " scalar = $set_scalar"
560584
echo " ecmult window size = $set_ecmult_window"
585+
echo " ecmult gen prec. bits = $set_ecmult_gen_precision"
561586
echo
562587
echo " CC = $CC"
563588
echo " CFLAGS = $CFLAGS"

src/basic-config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#undef USE_SCALAR_8X32
2525
#undef USE_SCALAR_INV_BUILTIN
2626
#undef USE_SCALAR_INV_NUM
27+
#undef ECMULT_WINDOW_SIZE
2728

2829
#define USE_NUM_NONE 1
2930
#define USE_FIELD_INV_BUILTIN 1

src/ecmult_gen.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,27 @@
1010
#include "scalar.h"
1111
#include "group.h"
1212

13+
#if ECMULT_GEN_PREC_BITS != 2 && ECMULT_GEN_PREC_BITS != 4 && ECMULT_GEN_PREC_BITS != 8
14+
# error "Set ECMULT_GEN_PREC_BITS to 2, 4 or 8."
15+
#endif
16+
#define ECMULT_GEN_PREC_B ECMULT_GEN_PREC_BITS
17+
#define ECMULT_GEN_PREC_G (1 << ECMULT_GEN_PREC_B)
18+
#define ECMULT_GEN_PREC_N (256 / ECMULT_GEN_PREC_B)
19+
1320
typedef struct {
1421
/* For accelerating the computation of a*G:
1522
* To harden against timing attacks, use the following mechanism:
16-
* * Break up the multiplicand into groups of 4 bits, called n_0, n_1, n_2, ..., n_63.
17-
* * Compute sum(n_i * 16^i * G + U_i, i=0..63), where:
18-
* * U_i = U * 2^i (for i=0..62)
19-
* * U_i = U * (1-2^63) (for i=63)
20-
* where U is a point with no known corresponding scalar. Note that sum(U_i, i=0..63) = 0.
21-
* For each i, and each of the 16 possible values of n_i, (n_i * 16^i * G + U_i) is
22-
* precomputed (call it prec(i, n_i)). The formula now becomes sum(prec(i, n_i), i=0..63).
23+
* * Break up the multiplicand into groups of PREC_B bits, called n_0, n_1, n_2, ..., n_(PREC_N-1).
24+
* * Compute sum(n_i * (PREC_G)^i * G + U_i, i=0 ... PREC_N-1), where:
25+
* * U_i = U * 2^i, for i=0 ... PREC_N-2
26+
* * U_i = U * (1-2^(PREC_N-1)), for i=PREC_N-1
27+
* where U is a point with no known corresponding scalar. Note that sum(U_i, i=0 ... PREC_N-1) = 0.
28+
* For each i, and each of the PREC_G possible values of n_i, (n_i * (PREC_G)^i * G + U_i) is
29+
* precomputed (call it prec(i, n_i)). The formula now becomes sum(prec(i, n_i), i=0 ... PREC_N-1).
2330
* None of the resulting prec group elements have a known scalar, and neither do any of
2431
* the intermediate sums while computing a*G.
2532
*/
26-
secp256k1_ge_storage (*prec)[64][16]; /* prec[j][i] = 16^j * i * G + U_i */
33+
secp256k1_ge_storage (*prec)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G]; /* prec[j][i] = (PREC_G)^j * i * G + U_i */
2734
secp256k1_scalar blind;
2835
secp256k1_gej initial;
2936
} secp256k1_ecmult_gen_context;

src/ecmult_gen_impl.h

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context *ctx)
2828

2929
static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx, void **prealloc) {
3030
#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
31-
secp256k1_ge prec[1024];
31+
secp256k1_ge prec[ECMULT_GEN_PREC_N * ECMULT_GEN_PREC_G];
3232
secp256k1_gej gj;
3333
secp256k1_gej nums_gej;
3434
int i, j;
@@ -40,7 +40,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
4040
return;
4141
}
4242
#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
43-
ctx->prec = (secp256k1_ge_storage (*)[64][16])manual_alloc(prealloc, prealloc_size, base, prealloc_size);
43+
ctx->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])manual_alloc(prealloc, prealloc_size, base, prealloc_size);
4444

4545
/* get the generator */
4646
secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
@@ -64,39 +64,39 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
6464

6565
/* compute prec. */
6666
{
67-
secp256k1_gej precj[1024]; /* Jacobian versions of prec. */
67+
secp256k1_gej precj[ECMULT_GEN_PREC_N * ECMULT_GEN_PREC_G]; /* Jacobian versions of prec. */
6868
secp256k1_gej gbase;
6969
secp256k1_gej numsbase;
70-
gbase = gj; /* 16^j * G */
70+
gbase = gj; /* PREC_G^j * G */
7171
numsbase = nums_gej; /* 2^j * nums. */
72-
for (j = 0; j < 64; j++) {
73-
/* Set precj[j*16 .. j*16+15] to (numsbase, numsbase + gbase, ..., numsbase + 15*gbase). */
74-
precj[j*16] = numsbase;
75-
for (i = 1; i < 16; i++) {
76-
secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase, NULL);
72+
for (j = 0; j < ECMULT_GEN_PREC_N; j++) {
73+
/* Set precj[j*PREC_G .. j*PREC_G+(PREC_G-1)] to (numsbase, numsbase + gbase, ..., numsbase + (PREC_G-1)*gbase). */
74+
precj[j*ECMULT_GEN_PREC_G] = numsbase;
75+
for (i = 1; i < ECMULT_GEN_PREC_G; i++) {
76+
secp256k1_gej_add_var(&precj[j*ECMULT_GEN_PREC_G + i], &precj[j*ECMULT_GEN_PREC_G + i - 1], &gbase, NULL);
7777
}
78-
/* Multiply gbase by 16. */
79-
for (i = 0; i < 4; i++) {
78+
/* Multiply gbase by PREC_G. */
79+
for (i = 0; i < ECMULT_GEN_PREC_B; i++) {
8080
secp256k1_gej_double_var(&gbase, &gbase, NULL);
8181
}
8282
/* Multiply numbase by 2. */
8383
secp256k1_gej_double_var(&numsbase, &numsbase, NULL);
84-
if (j == 62) {
84+
if (j == ECMULT_GEN_PREC_N - 2) {
8585
/* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
8686
secp256k1_gej_neg(&numsbase, &numsbase);
8787
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
8888
}
8989
}
90-
secp256k1_ge_set_all_gej_var(prec, precj, 1024);
90+
secp256k1_ge_set_all_gej_var(prec, precj, ECMULT_GEN_PREC_N * ECMULT_GEN_PREC_G);
9191
}
92-
for (j = 0; j < 64; j++) {
93-
for (i = 0; i < 16; i++) {
94-
secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*16 + i]);
92+
for (j = 0; j < ECMULT_GEN_PREC_N; j++) {
93+
for (i = 0; i < ECMULT_GEN_PREC_G; i++) {
94+
secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*ECMULT_GEN_PREC_G + i]);
9595
}
9696
}
9797
#else
9898
(void)prealloc;
99-
ctx->prec = (secp256k1_ge_storage (*)[64][16])secp256k1_ecmult_static_context;
99+
ctx->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])secp256k1_ecmult_static_context;
100100
#endif
101101
secp256k1_ecmult_gen_blind(ctx, NULL);
102102
}
@@ -109,7 +109,7 @@ static void secp256k1_ecmult_gen_context_finalize_memcpy(secp256k1_ecmult_gen_co
109109
#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
110110
if (src->prec != NULL) {
111111
/* We cast to void* first to suppress a -Wcast-align warning. */
112-
dst->prec = (secp256k1_ge_storage (*)[64][16])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
112+
dst->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
113113
}
114114
#else
115115
(void)dst, (void)src;
@@ -133,9 +133,9 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
133133
/* Blind scalar/point multiplication by computing (n-b)G + bG instead of nG. */
134134
secp256k1_scalar_add(&gnb, gn, &ctx->blind);
135135
add.infinity = 0;
136-
for (j = 0; j < 64; j++) {
137-
bits = secp256k1_scalar_get_bits(&gnb, j * 4, 4);
138-
for (i = 0; i < 16; i++) {
136+
for (j = 0; j < ECMULT_GEN_PREC_N; j++) {
137+
bits = secp256k1_scalar_get_bits(&gnb, j * ECMULT_GEN_PREC_B, ECMULT_GEN_PREC_B);
138+
for (i = 0; i < ECMULT_GEN_PREC_G; i++) {
139139
/** This uses a conditional move to avoid any secret data in array indexes.
140140
* _Any_ use of secret indexes has been demonstrated to result in timing
141141
* sidechannels, even when the cache-line access patterns are uniform.

src/gen_context.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,14 @@
44
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
55
**********************************************************************/
66

7+
// Autotools creates libsecp256k1-config.h, of which ECMULT_GEN_PREC_BITS is needed.
8+
// ifndef guard so downstream users can define their own if they do not use autotools.
9+
#if !defined(ECMULT_GEN_PREC_BITS)
10+
#include "libsecp256k1-config.h"
11+
#endif
712
#define USE_BASIC_CONFIG 1
8-
913
#include "basic-config.h"
14+
1015
#include "include/secp256k1.h"
1116
#include "util.h"
1217
#include "field_impl.h"
@@ -45,23 +50,26 @@ int main(int argc, char **argv) {
4550
fprintf(fp, "#define _SECP256K1_ECMULT_STATIC_CONTEXT_\n");
4651
fprintf(fp, "#include \"src/group.h\"\n");
4752
fprintf(fp, "#define SC SECP256K1_GE_STORAGE_CONST\n");
48-
fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_static_context[64][16] = {\n");
53+
fprintf(fp, "#if ECMULT_GEN_PREC_N != %d || ECMULT_GEN_PREC_G != %d\n", ECMULT_GEN_PREC_N, ECMULT_GEN_PREC_G);
54+
fprintf(fp, " #error configuration mismatch, invalid ECMULT_GEN_PREC_N, ECMULT_GEN_PREC_G. Try deleting ecmult_static_context.h before the build.\n");
55+
fprintf(fp, "#endif\n");
56+
fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_static_context[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G] = {\n");
4957

5058
base = checked_malloc(&default_error_callback, SECP256K1_ECMULT_GEN_CONTEXT_PREALLOCATED_SIZE);
5159
prealloc = base;
5260
secp256k1_ecmult_gen_context_init(&ctx);
5361
secp256k1_ecmult_gen_context_build(&ctx, &prealloc);
54-
for(outer = 0; outer != 64; outer++) {
62+
for(outer = 0; outer != ECMULT_GEN_PREC_N; outer++) {
5563
fprintf(fp,"{\n");
56-
for(inner = 0; inner != 16; inner++) {
64+
for(inner = 0; inner != ECMULT_GEN_PREC_G; inner++) {
5765
fprintf(fp," SC(%uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu)", SECP256K1_GE_STORAGE_CONST_GET((*ctx.prec)[outer][inner]));
58-
if (inner != 15) {
66+
if (inner != ECMULT_GEN_PREC_G - 1) {
5967
fprintf(fp,",\n");
6068
} else {
6169
fprintf(fp,"\n");
6270
}
6371
}
64-
if (outer != 63) {
72+
if (outer != ECMULT_GEN_PREC_N - 1) {
6573
fprintf(fp,"},\n");
6674
} else {
6775
fprintf(fp,"}\n");

0 commit comments

Comments
 (0)