nss/blinding_ct.patch
Robert Relyea 7d4bb6fbe6 Resolves: RHEL-15131
CVE-2023-5388
    nss: timing attack against RSA decryption.
    Make the final blinding multmod constant time.
2023-11-22 08:58:18 -08:00

950 lines
30 KiB
Diff

diff --git a/lib/freebl/mpi/mpi-priv.h b/lib/freebl/mpi/mpi-priv.h
--- a/lib/freebl/mpi/mpi-priv.h
+++ b/lib/freebl/mpi/mpi-priv.h
@@ -199,16 +199,19 @@ void MPI_ASM_DECL s_mpv_mul_d(const mp_d
void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
mp_digit b, mp_digit *c);
#endif
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
mp_size a_len, mp_digit b,
mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
+ mp_size a_len, mp_digit b,
+ mp_digit *c, mp_size c_len);
void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
mp_size a_len,
mp_digit *sqrs);
mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
mp_digit divisor, mp_digit *quot, mp_digit *rem);
/* c += a * b * (MP_RADIX ** offset); */
diff --git a/lib/freebl/mpi/mpi.c b/lib/freebl/mpi/mpi.c
--- a/lib/freebl/mpi/mpi.c
+++ b/lib/freebl/mpi/mpi.c
@@ -5,16 +5,18 @@
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mpi-priv.h"
#include "mplogic.h"
+#include <assert.h>
+
#if defined(__arm__) && \
((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
#undef MP_ASSEMBLY_MULTIPLY
#undef MP_ASSEMBLY_SQUARE
#endif
#if MP_LOGTAB
@@ -797,25 +799,28 @@ mp_sub(const mp_int *a, const mp_int *b,
CLEANUP:
return res;
} /* end mp_sub() */
/* }}} */
-/* {{{ mp_mul(a, b, c) */
+/* {{{ s_mp_mulg(a, b, c) */
/*
- mp_mul(a, b, c)
-
- Compute c = a * b. All parameters may be identical.
+ s_mp_mulg(a, b, c)
+
+ Compute c = a * b. All parameters may be identical. if constantTime is set,
+ then the operations are done in constant time. The original is mostly
+ constant time as long as s_mpv_mul_d_add() is constant time. This is true
+ of the x86 assembler, as well as the current c code.
*/
mp_err
-mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
{
mp_digit *pb;
mp_int tmp;
mp_err res;
mp_size ib;
mp_size useda, usedb;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
@@ -841,17 +846,24 @@ mp_mul(const mp_int *a, const mp_int *b,
}
MP_USED(c) = 1;
MP_DIGIT(c, 0) = 0;
if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
goto CLEANUP;
#ifdef NSS_USE_COMBA
- if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
+ /* comba isn't constant time because it clamps! If we cared
+ * (we needed a constant time version of multiply that was 'faster'
+ * we could easily pass constantTime down to the comba code and
+ * get it to skip the clamp... but here are assembler versions
+ * which add comba to platforms that can't compile the normal
+ * comba's imbedded assembler which would also need to change, so
+ * for now we just skip comba when we are running constant time. */
+ if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
if (MP_USED(a) == 4) {
s_mp_mul_comba_4(a, b, c);
goto CLEANUP;
}
if (MP_USED(a) == 8) {
s_mp_mul_comba_8(a, b, c);
goto CLEANUP;
}
@@ -871,36 +883,82 @@ mp_mul(const mp_int *a, const mp_int *b,
/* Outer loop: Digits of b */
useda = MP_USED(a);
usedb = MP_USED(b);
for (ib = 1; ib < usedb; ib++) {
mp_digit b_i = *pb++;
/* Inner product: Digits of a */
- if (b_i)
+ if (constantTime || b_i)
s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
else
MP_DIGIT(c, ib + useda) = b_i;
}
- s_mp_clamp(c);
+ if (!constantTime) {
+ s_mp_clamp(c);
+ }
if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
SIGN(c) = ZPOS;
else
SIGN(c) = NEG;
CLEANUP:
mp_clear(&tmp);
return res;
+} /* end smp_mulg() */
+
+/* }}} */
+
+/* {{{ mp_mul(a, b, c) */
+
+/*
+ mp_mul(a, b, c)
+
+ Compute c = a * b. All parameters may be identical.
+ */
+
+mp_err
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ return s_mp_mulg(a, b, c, 0);
} /* end mp_mul() */
/* }}} */
+/* {{{ mp_mulCT(a, b, c) */
+
+/*
+ mp_mulCT(a, b, c)
+
+ Compute c = a * b. In constant time. Parameters may not be identical.
+ NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
+{
+ mp_err res;
+
+ /* make the multiply values fixed length so multiply
+ * doesn't leak the length. at this point all the
+ * values are blinded, but once we finish we want the
+ * output size to be hidden (so no clamping the out put) */
+ MP_CHECKOK(s_mp_pad(a, setSize));
+ MP_CHECKOK(s_mp_pad(b, setSize));
+ MP_CHECKOK(s_mp_pad(c, 2*setSize));
+ MP_CHECKOK(s_mp_mulg(a, b, c, 1));
+CLEANUP:
+ return res;
+} /* end mp_mulCT() */
+
+/* }}} */
+
/* {{{ mp_sqr(a, sqr) */
#if MP_SQUARE
/*
Computes the square of a. This can be done more
efficiently than a general multiplication, because many of the
computation steps are redundant when squaring. The inner product
step is a bit more complicated, but we save a fair number of
@@ -1263,16 +1321,174 @@ mp_mod(const mp_int *a, const mp_int *m,
}
return MP_OKAY;
} /* end mp_mod() */
/* }}} */
+/* {{{ s_mp_subCT_d(a, b, borrow, c) */
+
+/*
+ s_mp_subCT_d(a, b, borrow, c)
+
+ Compute c = (a -b) - subtract in constant time. returns borrow
+ */
+mp_digit
+s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret) {
+ mp_digit borrow1, borrow2, t;
+#ifdef MP_COMPILER_USES_CARRY
+ /* while it doesn't look constant-time, this is idiomatic code
+ * to tell compilers to use the carry bit from subtraction */
+ t = a - borrow;
+ if (t > a) {
+ borrow1 = 1;
+ } else {
+ borrow1 = 0;
+ }
+ *ret = t - b;
+ if (*ret > t) {
+ borrow2 = 1;
+ } else {
+ borrow2 = 0;
+ }
+#else
+ mp_digit bitr, bitb, nbitt;
+ /* this is constant time independent of compilier */
+ t = a - borrow;
+ borrow1 = ((~a) >> (MP_DIGIT_BIT-1)) & ((t) >> (MP_DIGIT_BIT-1));
+ *ret = t - b;
+ bitb = b >> (MP_DIGIT_BIT-1);
+ bitr = *ret >> (MP_DIGIT_BIT-1);
+ nbitt = (~t) >> (MP_DIGIT_BIT-1);
+ borrow2 = (nbitt & bitb) | (bitb & bitr) | (nbitt & bitr);
+#endif
+ /* only borrow 1 or borrow 2 should be 1, we want to guarrentee
+ * the overall borrow is 1, so use | here */
+ return borrow1 | borrow2;
+} /* s_mp_subCT_d() */
+
+/* }}} */
+
+/* {{{ mp_subCT(a, b, ret, borrow) */
+
+/* return ret= a - b and borrow in borrow. done in constant time.
+ * b could be modified.
+ */
+mp_err
+mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
+{
+ mp_size used_a = MP_USED(a);
+ mp_size i;
+ mp_err res;
+
+ MP_CHECKOK(s_mp_pad(b, used_a));
+ MP_CHECKOK(s_mp_pad(ret, used_a));
+ *borrow = 0;
+ for (i=0; i < used_a; i++) {
+ *borrow = s_mp_subCT_d(MP_DIGIT(a,i), MP_DIGIT(b,i), *borrow,
+ &MP_DIGIT(ret,i));
+ }
+
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+} /* end mp_subCT() */
+
+/* }}} */
+
+/* {{{ mp_selectCT(cond, a, b, ret) */
+
+/*
+ * return ret= cond ? a : b; cond should be either 0 or 1
+ */
+mp_err
+mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
+{
+ mp_size used_a = MP_USED(a);
+ mp_err res;
+ mp_size i;
+
+ cond *= MP_DIGIT_MAX;
+
+ /* we currently require these to be equal on input,
+ * we could use pad to extend one of them, but that might
+ * leak data as it wouldn't be constant time */
+ assert(used_a == MP_USED(b));
+
+ MP_CHECKOK(s_mp_pad(ret, used_a));
+ for (i=0; i < used_a; i++) {
+ MP_DIGIT(ret,i) = (MP_DIGIT(a,i)&cond) | (MP_DIGIT(b,i)&~cond);
+ }
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+} /* end mp_selectCT() */
+
+
+/* {{{ mp_reduceCT(a, m, c) */
+
+/*
+ mp_reduceCT(a, m, c)
+
+ Compute c = aR^-1 (mod m) in constant time.
+ input should be in montgomery form. If input is the
+ result of a montgomery multiply then out put will be
+ in mongomery form.
+ Result will be reduced to MP_USED(m), but not be
+ clamped.
+ */
+
+mp_err
+mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
+{
+ mp_size used_m = MP_USED(m);
+ mp_size used_c = used_m*2+1;
+ mp_digit *m_digits, *c_digits;
+ mp_size i;
+ mp_digit borrow, carry;
+ mp_err res;
+ mp_int sub;
+
+ MP_DIGITS(&sub) = 0;
+ MP_CHECKOK(mp_init_size(&sub,used_m));
+
+ if (a != c) {
+ MP_CHECKOK(mp_copy(a, c));
+ }
+ MP_CHECKOK(s_mp_pad(c, used_c));
+ m_digits = MP_DIGITS(m);
+ c_digits = MP_DIGITS(c);
+ for (i=0; i < used_m; i++) {
+ mp_digit m_i = MP_DIGIT(c,i)*n0i;
+ s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
+ }
+ s_mp_rshd(c, used_m);
+ /* MP_USED(c) should be used_m+1 with the high word being any carry
+ * from the previous multiply, save that carry and drop the high
+ * word for the substraction below */
+ carry = MP_DIGIT(c,used_m);
+ MP_DIGIT(c,used_m) = 0;
+ MP_USED(c) = used_m;
+ /* mp_subCT wants c and m to be the same size, we've already
+ * guarrenteed that in the previous statement, so mp_subCT won't actually
+ * modify m, so it's safe to recast */
+ MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
+
+ /* we return c-m if c >= m no borrow or there was a borrow and a carry */
+ MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
+ res = MP_OKAY;
+CLEANUP:
+ mp_clear(&sub);
+ return res;
+} /* end mp_reduceCT() */
+
+/* }}} */
+
/* {{{ mp_mod_d(a, d, c) */
/*
mp_mod_d(a, d, c)
Compute c = a (mod d). Result will always be 0 <= c < d
*/
mp_err
@@ -1379,16 +1595,47 @@ mp_mulmod(const mp_int *a, const mp_int
if ((res = mp_mod(c, m, c)) != MP_OKAY)
return res;
return MP_OKAY;
}
/* }}} */
+/* {{{ mp_mulmontmodCT(a, b, m, c) */
+
+/*
+ mp_mulmontmodCT(a, b, m, c)
+
+ Compute c = (a * b) mod m in constant time wrt a and b. either a or b
+ should be in montgomery form and the output is native. If both a and b
+ are in montgomery form, then the output will also be in montgomery form
+ and can be recovered with an mp_reduceCT call.
+ NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
+ mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
+ return res;
+
+ if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
/* {{{ mp_sqrmod(a, m, c) */
#if MP_SQUARE
mp_err
mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
{
mp_err res;
@@ -3936,25 +4183,73 @@ s_mp_mul(mp_int *a, const mp_int *b)
{ \
mp_digit a0b1, a1b0; \
Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
a1b0 += a0b1; \
Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
- if (a1b0 < a0b1) \
- Phi += MP_HALF_RADIX; \
+ Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \
a1b0 <<= MP_HALF_DIGIT_BIT; \
Plo += a1b0; \
- if (Plo < a1b0) \
- ++Phi; \
+ Phi += MP_CT_LTU(Plo, a1b0); \
}
#endif
+/* Constant time version of s_mpv_mul_d_add_prop.
+ * Presently, this is only used by the Constant time Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c, mp_size c_len)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ c_len -= a_len;
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ /* propagate the carry to the end, even if carry is zero */
+ while (c_len--) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ c_len -= a_len;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ a1b1 += MP_CT_LTU(a0b0, carry);
+ a0b0 += a_i = *c;
+ a1b1 += MP_CT_LTU(a0b0, a_i);
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ /* propagate the carry to the end, even if carry is zero */
+ while (c_len--) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = MP_CT_LTU(carry, c_i);
+ }
+#endif
+}
+
#if !defined(MP_ASSEMBLY_MULTIPLY)
/* c = a * b */
void
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
mp_digit d = 0;
@@ -3969,18 +4264,17 @@ s_mpv_mul_d(const mp_digit *a, mp_size a
mp_digit carry = 0;
while (a_len--) {
mp_digit a_i = *a++;
mp_digit a0b0, a1b1;
MP_MUL_DxD(a_i, b, a1b1, a0b0);
a0b0 += carry;
- if (a0b0 < carry)
- ++a1b1;
+ a1b1 += a0b0 < carry;
*c++ = a0b0;
carry = a1b1;
}
*c = carry;
#endif
}
/* c += a * b */
@@ -4002,21 +4296,19 @@ s_mpv_mul_d_add(const mp_digit *a, mp_si
mp_digit carry = 0;
while (a_len--) {
mp_digit a_i = *a++;
mp_digit a0b0, a1b1;
MP_MUL_DxD(a_i, b, a1b1, a0b0);
a0b0 += carry;
- if (a0b0 < carry)
- ++a1b1;
+ a1b1 += (a0b0 < carry);
a0b0 += a_i = *c;
- if (a0b0 < a_i)
- ++a1b1;
+ a1b1 += (a0b0 < a_i);
*c++ = a0b0;
carry = a1b1;
}
*c = carry;
#endif
}
/* Presently, this is only used by the Montgomery arithmetic code. */
diff --git a/lib/freebl/mpi/mpi.h b/lib/freebl/mpi/mpi.h
--- a/lib/freebl/mpi/mpi.h
+++ b/lib/freebl/mpi/mpi.h
@@ -145,16 +145,54 @@ typedef int mp_sword;
#define MP_USED(MP) ((MP)->used)
#define MP_ALLOC(MP) ((MP)->alloc)
#define MP_DIGITS(MP) ((MP)->dp)
#define MP_DIGIT(MP, N) (MP)->dp[(N)]
/* This defines the maximum I/O base (minimum is 2) */
#define MP_MAX_RADIX 64
+/* Constant Time Macros on mp_digits */
+#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
+
+/* basic zero and non zero tests */
+#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+#define MP_CT_ZERO(x) (~MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+
+
+/* basic constant-time helper macro for equalities and inequalities.
+ * The inequalities will produce incorrect results if
+ * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
+ * within the range 0-MP_DIGIT_MAX/2. */
+#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) - (b)))
+#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) - (b)))
+#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
+#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
+#define MP_CT_GE(a, b) (1^MP_CT_LT(a, b))
+#define MP_CT_LE(a, b) (1^MP_CT_GT(a, b))
+#define MP_CT_TRUE ((mp_digit)1)
+#define MP_CT_FALSE ((mp_digit)0)
+
+/* use constant time result to select a boolean value */
+#define MP_CT_SELB(m, l, r) (((m) & (l)) | (~(m) & (r)))
+
+/* full inequalities that work with full mp_digit values */
+#define MP_CT_OVERFLOW(a,b,c,d) \
+ MP_CT_SELB(MP_CT_HIGH_TO_LOW((a)^(b)), \
+ (MP_CT_HIGH_TO_LOW(d)),c)
+#define MP_CT_GTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),a)
+#define MP_CT_LTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),b)
+#define MP_CT_GEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),a)
+#define MP_CT_LEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),b)
+#define MP_CT_GTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),b)
+#define MP_CT_LTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),a)
+#define MP_CT_GES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),b)
+#define MP_CT_LES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),a)
+
+
typedef struct {
mp_sign sign; /* sign of this quantity */
mp_size alloc; /* how many digits allocated */
mp_size used; /* how many digits used */
mp_digit *dp; /* the digits themselves */
} mp_int;
/* Default precision */
@@ -185,17 +223,19 @@ mp_err mp_expt_d(const mp_int *a, mp_dig
/* Sign manipulations */
mp_err mp_abs(const mp_int *a, mp_int *b);
mp_err mp_neg(const mp_int *a, mp_int *b);
/* Full arithmetic */
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow);
mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize);
#if MP_SQUARE
mp_err mp_sqr(const mp_int *a, mp_int *b);
#else
#define mp_sqr(a, b) mp_mul(a, a, b)
#endif
mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
@@ -212,23 +252,30 @@ mp_err mp_mulmod(const mp_int *a, const
mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
#else
#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
#endif
mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
#endif /* MP_MODARITH */
+/* montgomery math */
+mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont);
+mp_digit mp_calculate_mont_n0i(const mp_int *N);
+mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct);
+mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c);
+
/* Comparisons */
int mp_cmp_z(const mp_int *a);
int mp_cmp_d(const mp_int *a, mp_digit d);
int mp_cmp(const mp_int *a, const mp_int *b);
int mp_cmp_mag(const mp_int *a, const mp_int *b);
int mp_isodd(const mp_int *a);
int mp_iseven(const mp_int *a);
+mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret);
/* Number theoretic */
mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
diff --git a/lib/freebl/mpi/mpmontg.c b/lib/freebl/mpi/mpmontg.c
--- a/lib/freebl/mpi/mpmontg.c
+++ b/lib/freebl/mpi/mpmontg.c
@@ -124,30 +124,37 @@ s_mp_mul_mont(const mp_int *a, const mp_
}
res = MP_OKAY;
CLEANUP:
return res;
}
#endif
-STATIC
mp_err
-s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
+mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
{
mp_err res;
/* xMont = x * R mod N where N is modulus */
- MP_CHECKOK(mp_copy(x, xMont));
- MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
- MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */
+ if (x != xMont) {
+ MP_CHECKOK(mp_copy(x, xMont));
+ }
+ MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
+ MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */
CLEANUP:
return res;
}
+mp_digit
+mp_calculate_mont_n0i(const mp_int *N)
+{
+ return 0 - s_mp_invmod_radix(MP_DIGIT(N,0));
+}
+
#ifdef MP_USING_MONT_MULF
/* the floating point multiply is already cache safe,
* don't turn on cache safe unless we specifically
* force it */
#ifndef MP_FORCE_CACHE_SAFE
#undef MP_USING_CACHE_SAFE_MOD_EXP
#endif
@@ -193,17 +200,17 @@ mp_exptmod_f(const mp_int *montBase,
MP_DIGITS(&accum1) = 0;
for (i = 0; i < MAX_ODD_INTS; ++i)
oddPowers[i] = 0;
MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
mp_set(&accum1, 1);
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
MP_CHECKOK(s_mp_pad(&accum1, nLen));
oddPowSize = 2 * nLen + 1;
dTmpSize = 2 * oddPowSize;
dSize = sizeof(double) * (nLen * 4 + 1 +
((odd_ints + 1) * oddPowSize) + dTmpSize);
dBuf = malloc(dSize);
if (!dBuf) {
@@ -473,17 +480,17 @@ mp_exptmod_i(const mp_int *montBase,
for (i = 1; i < odd_ints; ++i) {
MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
}
/* set accumulator to montgomery residue of 1 */
mp_set(&accum1, 1);
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
pa1 = &accum1;
pa2 = &accum2;
for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
mp_size smallExp;
MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
smallExp = (mp_size)res;
@@ -862,17 +869,17 @@ mp_exptmod_safe_i(const mp_int *montBase
/* build the first WEAVE_WORD powers inline */
/* if WEAVE_WORD_SIZE is not 4, this code will have to change */
if (num_powers > 2) {
MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
mp_set(&accum[0], 1);
- MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
+ MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
MP_CHECKOK(mp_copy(montBase, &accum[1]));
SQR(montBase, &accum[2]);
MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
if (!powersArray) {
res = MP_MEM;
goto CLEANUP;
}
@@ -881,17 +888,17 @@ mp_exptmod_safe_i(const mp_int *montBase
MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
if (first_window < 4) {
MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
first_window = num_powers;
}
} else {
if (first_window == 0) {
mp_set(&accum1, 1);
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
} else {
/* assert first_window == 1? */
MP_CHECKOK(mp_copy(montBase, &accum1));
}
}
/*
* calculate all the powers in the powers array.
@@ -1054,19 +1061,19 @@ mp_exptmod(const mp_int *inBase, const m
nLen = MP_USED(modulus);
MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
mmm.N = *modulus; /* a copy of the mp_int struct */
/* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
** where n0 = least significant mp_digit of N, the modulus.
*/
- mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
+ mmm.n0prime = mp_calculate_mont_n0i(modulus);
- MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
+ MP_CHECKOK(mp_to_mont(base, modulus, &montBase));
bits_in_exponent = mpl_significant_bits(exponent);
#ifdef MP_USING_CACHE_SAFE_MOD_EXP
if (mp_using_cache_safe_exp) {
if (bits_in_exponent > 780)
window_bits = 6;
else if (bits_in_exponent > 256)
window_bits = 5;
diff --git a/lib/freebl/rsa.c b/lib/freebl/rsa.c
--- a/lib/freebl/rsa.c
+++ b/lib/freebl/rsa.c
@@ -65,16 +65,18 @@ struct blindingParamsStr {
** the Handbook of Applied Cryptography, 11.118-11.119.
*/
struct RSABlindingParamsStr {
/* Blinding-specific parameters */
PRCList link; /* link to list of structs */
SECItem modulus; /* list element "key" */
blindingParams *free, *bp; /* Blinding parameters queue */
blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
+ /* precalculate montegomery reduction value */
+ mp_digit n0i; /* n0i = -( n & MP_DIGIT) ** -1 mod mp_RADIX */
};
typedef struct RSABlindingParamsStr RSABlindingParams;
/*
** RSABlindingParamsListStr
**
** List of key-specific blinding params. The arena holds the volatile pool
** of memory for each entry and the list itself. The lock is for list
@@ -1210,16 +1212,18 @@ generate_blinding_params(RSAPrivateKey *
CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen));
CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen));
/* k < n */
CHECK_MPI_OK(mp_mod(&k, n, &k));
/* f = k**e mod n */
CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
/* g = k**-1 mod n */
CHECK_MPI_OK(mp_invmod(&k, n, g));
+ /* g in montgomery form.. */
+ CHECK_MPI_OK(mp_to_mont(g, n, g));
cleanup:
if (kb)
PORT_ZFree(kb, modLen);
mp_clear(&k);
mp_clear(&e);
if (err) {
MP_TO_SEC_ERROR(err);
rv = SECFailure;
@@ -1246,23 +1250,26 @@ init_blinding_params(RSABlindingParams *
* of rsabp->array pointer and must be set to NULL
*/
rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL;
bp = rsabp->array;
rsabp->bp = NULL;
rsabp->free = bp;
+ /* precalculate montgomery reduction parameter */
+ rsabp->n0i = mp_calculate_mont_n0i(n);
+
/* List elements are keyed using the modulus */
return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
}
static SECStatus
get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen,
- mp_int *f, mp_int *g)
+ mp_int *f, mp_int *g, mp_digit *n0i)
{
RSABlindingParams *rsabp = NULL;
blindingParams *bpUnlinked = NULL;
blindingParams *bp;
PRCList *el;
SECStatus rv = SECSuccess;
mp_err err = MP_OKAY;
int cmp = -1;
@@ -1312,16 +1319,17 @@ get_blinding_params(RSAPrivateKey *key,
** head (since el would have looped back to the head).
*/
PR_INSERT_BEFORE(&rsabp->link, el);
}
/* We've found (or created) the RSAblindingParams struct for this key.
* Now, search its list of ready blinding params for a usable one.
*/
+ *n0i = rsabp->n0i;
while (0 != (bp = rsabp->bp)) {
#ifdef UNSAFE_FUZZER_MODE
/* Found a match and there are still remaining uses left */
/* Return the parameters */
CHECK_MPI_OK(mp_copy(&bp->f, f));
CHECK_MPI_OK(mp_copy(&bp->g, g));
PZ_Unlock(blindingParamsList.lock);
@@ -1426,16 +1434,17 @@ cleanup:
rsabp->free = bp;
}
if (holdingLock) {
PZ_Unlock(blindingParamsList.lock);
}
if (err) {
MP_TO_SEC_ERROR(err);
}
+ *n0i = 0;
return SECFailure;
}
/*
** Perform a raw private-key operation
** Length of input and output buffers are equal to key's modulus len.
*/
static SECStatus
@@ -1445,16 +1454,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
PRBool check)
{
unsigned int modLen;
unsigned int offset;
SECStatus rv = SECSuccess;
mp_err err;
mp_int n, c, m;
mp_int f, g;
+ mp_digit n0i;
if (!key || !output || !input) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
/* check input out of range (needs to be in range [0..n-1]) */
modLen = rsa_modulusLen(&key->modulus);
if (modLen == 0) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
@@ -1476,17 +1486,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
CHECK_MPI_OK(mp_init(&f));
CHECK_MPI_OK(mp_init(&g));
SECITEM_TO_MPINT(key->modulus, &n);
OCTETS_TO_MPINT(input, &c, modLen);
/* If blinding, compute pre-image of ciphertext by multiplying by
** blinding factor
*/
if (nssRSAUseBlinding) {
- CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g));
+ CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g, &n0i));
/* c' = c*f mod n */
CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
}
/* Do the private key operation m = c**d mod n */
if (key->prime1.len == 0 ||
key->prime2.len == 0 ||
key->exponent1.len == 0 ||
key->exponent2.len == 0 ||
@@ -1497,17 +1507,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
} else {
CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c));
}
/* If blinding, compute post-image of plaintext by multiplying by
** blinding factor
*/
if (nssRSAUseBlinding) {
/* m = m'*g mod n */
- CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m));
+ CHECK_MPI_OK(mp_mulmontmodCT(&m, &g, &n, n0i, &m));
}
err = mp_to_fixlen_octets(&m, output, modLen);
if (err >= 0)
err = MP_OKAY;
cleanup:
mp_clear(&n);
mp_clear(&c);
mp_clear(&m);