Skip to content

Commit a2d7926

Browse files
committed
crypto: Simplify modinv_pow2 Newton-Raphson iteration
Rearrange the Newton-Raphson formula from inv * (2 - x * inv) to 2 * inv - x * inv^2. This eliminates the neg_add2 helper (single-use x = 2 - x operation) and computes the result directly into r, removing the per-iteration copy. Also return carry from add() to place the doubling overflow directly into the next word.
1 parent 33a61b7 commit a2d7926

File tree

1 file changed

+15
-24
lines changed

1 file changed

+15
-24
lines changed

lib/evmone_precompiles/modexp.cpp

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ using namespace intx;
1313
namespace
1414
{
1515
/// Adds y to x: x[] += y[]. The result is truncated to the size of x.
16-
constexpr void add(std::span<uint64_t> x, std::span<const uint64_t> y) noexcept
16+
constexpr bool add(std::span<uint64_t> x, std::span<const uint64_t> y) noexcept
1717
{
1818
assert(x.size() >= y.size());
1919

@@ -22,6 +22,7 @@ constexpr void add(std::span<uint64_t> x, std::span<const uint64_t> y) noexcept
2222
std::tie(x[i], carry) = addc(x[i], y[i], carry);
2323
for (size_t i = y.size(); carry && i < x.size(); ++i)
2424
std::tie(x[i], carry) = addc(x[i], uint64_t{0}, carry);
25+
return carry;
2526
}
2627

2728
/// Subtracts y from x: x[] -= y[]. The result is truncated to the size of x.
@@ -81,17 +82,6 @@ constexpr void mul(
8182
addmul(r.subspan(j), r.subspan(j), x.first(r.size() - j), y[j]);
8283
}
8384

84-
/// Computes x[] = 2 - x[].
85-
constexpr void neg_add2(std::span<uint64_t> x) noexcept
86-
{
87-
assert(!x.empty());
88-
bool c = false;
89-
90-
std::tie(x[0], c) = intx::subc(2, x[0]);
91-
for (auto it = x.begin() + 1; it != x.end(); ++it)
92-
std::tie(*it, c) = intx::subc(0, *it, c);
93-
}
94-
9585
/// Trims a little-endian word array to significant words.
9686
template <typename T>
9787
constexpr std::span<T> trim(std::span<T> x) noexcept
@@ -489,25 +479,26 @@ void modinv_pow2(
489479
assert(!r.empty());
490480
assert(scratch.size() >= 2 * r.size());
491481

492-
r[0] = evmmax::modinv(x[0]); // Good start: 64 correct bits.
482+
r[0] = evmmax::modinv(x[0]); // Good start: 64 correct bits.
483+
std::ranges::fill(r.subspan(1), uint64_t{0}); // Zero the rest for correct final subtraction.
493484

494-
// Each iteration doubles the number of correct bits in the inverse. See evmmax::modinv().
485+
// Newton-Raphson iteration for modular inverse: inv' = inv * (2 - x * inv).
486+
// Rearranged as: inv' = 2 * inv - x * inv^2, which avoids the (2 - x) negation helper
487+
// and computes the result directly into r (no copy needed).
488+
// Each iteration doubles the number of correct bits. See evmmax::modinv().
495489
for (size_t i = 1; i < r.size(); i *= 2)
496490
{
497-
// At the start of the iteration we have i-word correct inverse in r[0-i].
498-
// The iteration performs the Newton-Raphson step with double the precision (n=2i).
491+
// We have i-word correct inverse in r[0..i). Double the precision to n = min(2i, r.size()).
499492
const auto n = std::min(i * 2, r.size());
493+
assert(n > i);
500494
const auto t1 = scratch.subspan(0, n);
501495
const auto t2 = scratch.subspan(n, n);
502496

503-
// Clamp x to available words: high words beyond x.size() are implicitly zero.
504-
mul(t1, x.subspan(0, std::min(n, x.size())), r.subspan(0, i)); // t1 = x * inv
505-
neg_add2(t1); // t1 = 2 - x * inv
506-
mul(t2, t1, r.subspan(0, i)); // t2 = inv * (2 - x * inv)
507-
// TODO: Consider implementing the step as (inv << 1) - (x * inv * inv).
508-
509-
// TODO: Avoid copy by swapping buffers.
510-
std::ranges::copy(t2, r.begin());
497+
const auto inv = r.first(i);
498+
mul(t1, inv, inv); // t1 = inv^2
499+
mul(t2, t1, x.first(std::min(n, x.size()))); // t2 = inv^2 * x, x clamped to n words.
500+
r[i] = uint64_t{add(inv, inv)}; // r[0..i] = 2 * inv (carry into r[i])
501+
sub(r.first(n), t2); // r[0..n) = 2 * inv - x * inv^2
511502
}
512503
}
513504

0 commit comments

Comments
 (0)