7d4bb6fbe6
CVE-2023-5388 nss: timing attack against RSA decryption. Make the final blinding multmod constant time.
950 lines
30 KiB
Diff
950 lines
30 KiB
Diff
diff --git a/lib/freebl/mpi/mpi-priv.h b/lib/freebl/mpi/mpi-priv.h
|
|
--- a/lib/freebl/mpi/mpi-priv.h
|
|
+++ b/lib/freebl/mpi/mpi-priv.h
|
|
@@ -199,16 +199,19 @@ void MPI_ASM_DECL s_mpv_mul_d(const mp_d
|
|
void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
|
|
mp_digit b, mp_digit *c);
|
|
|
|
#endif
|
|
|
|
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
|
|
mp_size a_len, mp_digit b,
|
|
mp_digit *c);
|
|
+void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
|
|
+ mp_size a_len, mp_digit b,
|
|
+ mp_digit *c, mp_size c_len);
|
|
void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
|
|
mp_size a_len,
|
|
mp_digit *sqrs);
|
|
|
|
mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
|
|
mp_digit divisor, mp_digit *quot, mp_digit *rem);
|
|
|
|
/* c += a * b * (MP_RADIX ** offset); */
|
|
diff --git a/lib/freebl/mpi/mpi.c b/lib/freebl/mpi/mpi.c
|
|
--- a/lib/freebl/mpi/mpi.c
|
|
+++ b/lib/freebl/mpi/mpi.c
|
|
@@ -5,16 +5,18 @@
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "mpi-priv.h"
|
|
#include "mplogic.h"
|
|
|
|
+#include <assert.h>
|
|
+
|
|
#if defined(__arm__) && \
|
|
((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
|
|
/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
|
|
#undef MP_ASSEMBLY_MULTIPLY
|
|
#undef MP_ASSEMBLY_SQUARE
|
|
#endif
|
|
|
|
#if MP_LOGTAB
|
|
@@ -797,25 +799,28 @@ mp_sub(const mp_int *a, const mp_int *b,
|
|
|
|
CLEANUP:
|
|
return res;
|
|
|
|
} /* end mp_sub() */
|
|
|
|
/* }}} */
|
|
|
|
-/* {{{ mp_mul(a, b, c) */
|
|
+/* {{{ s_mp_mulg(a, b, c) */
|
|
|
|
/*
|
|
- mp_mul(a, b, c)
|
|
-
|
|
- Compute c = a * b. All parameters may be identical.
|
|
+ s_mp_mulg(a, b, c)
|
|
+
|
|
+ Compute c = a * b. All parameters may be identical. if constantTime is set,
|
|
+ then the operations are done in constant time. The original is mostly
|
|
+ constant time as long as s_mpv_mul_d_add() is constant time. This is true
|
|
+ of the x86 assembler, as well as the current c code.
|
|
*/
|
|
mp_err
|
|
-mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
|
|
+s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
|
|
{
|
|
mp_digit *pb;
|
|
mp_int tmp;
|
|
mp_err res;
|
|
mp_size ib;
|
|
mp_size useda, usedb;
|
|
|
|
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
|
@@ -841,17 +846,24 @@ mp_mul(const mp_int *a, const mp_int *b,
|
|
}
|
|
|
|
MP_USED(c) = 1;
|
|
MP_DIGIT(c, 0) = 0;
|
|
if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
|
|
goto CLEANUP;
|
|
|
|
#ifdef NSS_USE_COMBA
|
|
- if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
|
|
+ /* comba isn't constant time because it clamps! If we cared
|
|
+ * (we needed a constant time version of multiply that was 'faster'
|
|
+ * we could easily pass constantTime down to the comba code and
|
|
+ * get it to skip the clamp... but here are assembler versions
|
|
+ * which add comba to platforms that can't compile the normal
|
|
+ * comba's imbedded assembler which would also need to change, so
|
|
+ * for now we just skip comba when we are running constant time. */
|
|
+ if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
|
|
if (MP_USED(a) == 4) {
|
|
s_mp_mul_comba_4(a, b, c);
|
|
goto CLEANUP;
|
|
}
|
|
if (MP_USED(a) == 8) {
|
|
s_mp_mul_comba_8(a, b, c);
|
|
goto CLEANUP;
|
|
}
|
|
@@ -871,36 +883,82 @@ mp_mul(const mp_int *a, const mp_int *b,
|
|
|
|
/* Outer loop: Digits of b */
|
|
useda = MP_USED(a);
|
|
usedb = MP_USED(b);
|
|
for (ib = 1; ib < usedb; ib++) {
|
|
mp_digit b_i = *pb++;
|
|
|
|
/* Inner product: Digits of a */
|
|
- if (b_i)
|
|
+ if (constantTime || b_i)
|
|
s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
|
|
else
|
|
MP_DIGIT(c, ib + useda) = b_i;
|
|
}
|
|
|
|
- s_mp_clamp(c);
|
|
+ if (!constantTime) {
|
|
+ s_mp_clamp(c);
|
|
+ }
|
|
|
|
if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
|
|
SIGN(c) = ZPOS;
|
|
else
|
|
SIGN(c) = NEG;
|
|
|
|
CLEANUP:
|
|
mp_clear(&tmp);
|
|
return res;
|
|
+} /* end smp_mulg() */
|
|
+
|
|
+/* }}} */
|
|
+
|
|
+/* {{{ mp_mul(a, b, c) */
|
|
+
|
|
+/*
|
|
+ mp_mul(a, b, c)
|
|
+
|
|
+ Compute c = a * b. All parameters may be identical.
|
|
+ */
|
|
+
|
|
+mp_err
|
|
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
|
|
+{
|
|
+ return s_mp_mulg(a, b, c, 0);
|
|
} /* end mp_mul() */
|
|
|
|
/* }}} */
|
|
|
|
+/* {{{ mp_mulCT(a, b, c) */
|
|
+
|
|
+/*
|
|
+ mp_mulCT(a, b, c)
|
|
+
|
|
+ Compute c = a * b. In constant time. Parameters may not be identical.
|
|
+ NOTE: a and b may be modified.
|
|
+ */
|
|
+
|
|
+mp_err
|
|
+mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
|
|
+{
|
|
+ mp_err res;
|
|
+
|
|
+ /* make the multiply values fixed length so multiply
|
|
+ * doesn't leak the length. at this point all the
|
|
+ * values are blinded, but once we finish we want the
|
|
+ * output size to be hidden (so no clamping the out put) */
|
|
+ MP_CHECKOK(s_mp_pad(a, setSize));
|
|
+ MP_CHECKOK(s_mp_pad(b, setSize));
|
|
+ MP_CHECKOK(s_mp_pad(c, 2*setSize));
|
|
+ MP_CHECKOK(s_mp_mulg(a, b, c, 1));
|
|
+CLEANUP:
|
|
+ return res;
|
|
+} /* end mp_mulCT() */
|
|
+
|
|
+/* }}} */
|
|
+
|
|
/* {{{ mp_sqr(a, sqr) */
|
|
|
|
#if MP_SQUARE
|
|
/*
|
|
Computes the square of a. This can be done more
|
|
efficiently than a general multiplication, because many of the
|
|
computation steps are redundant when squaring. The inner product
|
|
step is a bit more complicated, but we save a fair number of
|
|
@@ -1263,16 +1321,174 @@ mp_mod(const mp_int *a, const mp_int *m,
|
|
}
|
|
|
|
return MP_OKAY;
|
|
|
|
} /* end mp_mod() */
|
|
|
|
/* }}} */
|
|
|
|
+/* {{{ s_mp_subCT_d(a, b, borrow, c) */
|
|
+
|
|
+/*
|
|
+ s_mp_subCT_d(a, b, borrow, c)
|
|
+
|
|
+ Compute c = (a -b) - subtract in constant time. returns borrow
|
|
+ */
|
|
+mp_digit
|
|
+s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret) {
|
|
+ mp_digit borrow1, borrow2, t;
|
|
+#ifdef MP_COMPILER_USES_CARRY
|
|
+ /* while it doesn't look constant-time, this is idiomatic code
|
|
+ * to tell compilers to use the carry bit from subtraction */
|
|
+ t = a - borrow;
|
|
+ if (t > a) {
|
|
+ borrow1 = 1;
|
|
+ } else {
|
|
+ borrow1 = 0;
|
|
+ }
|
|
+ *ret = t - b;
|
|
+ if (*ret > t) {
|
|
+ borrow2 = 1;
|
|
+ } else {
|
|
+ borrow2 = 0;
|
|
+ }
|
|
+#else
|
|
+ mp_digit bitr, bitb, nbitt;
|
|
+ /* this is constant time independent of compilier */
|
|
+ t = a - borrow;
|
|
+ borrow1 = ((~a) >> (MP_DIGIT_BIT-1)) & ((t) >> (MP_DIGIT_BIT-1));
|
|
+ *ret = t - b;
|
|
+ bitb = b >> (MP_DIGIT_BIT-1);
|
|
+ bitr = *ret >> (MP_DIGIT_BIT-1);
|
|
+ nbitt = (~t) >> (MP_DIGIT_BIT-1);
|
|
+ borrow2 = (nbitt & bitb) | (bitb & bitr) | (nbitt & bitr);
|
|
+#endif
|
|
+ /* only borrow 1 or borrow 2 should be 1, we want to guarrentee
|
|
+ * the overall borrow is 1, so use | here */
|
|
+ return borrow1 | borrow2;
|
|
+} /* s_mp_subCT_d() */
|
|
+
|
|
+/* }}} */
|
|
+
|
|
+/* {{{ mp_subCT(a, b, ret, borrow) */
|
|
+
|
|
+/* return ret= a - b and borrow in borrow. done in constant time.
|
|
+ * b could be modified.
|
|
+ */
|
|
+mp_err
|
|
+mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
|
|
+{
|
|
+ mp_size used_a = MP_USED(a);
|
|
+ mp_size i;
|
|
+ mp_err res;
|
|
+
|
|
+ MP_CHECKOK(s_mp_pad(b, used_a));
|
|
+ MP_CHECKOK(s_mp_pad(ret, used_a));
|
|
+ *borrow = 0;
|
|
+ for (i=0; i < used_a; i++) {
|
|
+ *borrow = s_mp_subCT_d(MP_DIGIT(a,i), MP_DIGIT(b,i), *borrow,
|
|
+ &MP_DIGIT(ret,i));
|
|
+ }
|
|
+
|
|
+ res = MP_OKAY;
|
|
+CLEANUP:
|
|
+ return res;
|
|
+} /* end mp_subCT() */
|
|
+
|
|
+/* }}} */
|
|
+
|
|
+/* {{{ mp_selectCT(cond, a, b, ret) */
|
|
+
|
|
+/*
|
|
+ * return ret= cond ? a : b; cond should be either 0 or 1
|
|
+ */
|
|
+mp_err
|
|
+mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
|
|
+{
|
|
+ mp_size used_a = MP_USED(a);
|
|
+ mp_err res;
|
|
+ mp_size i;
|
|
+
|
|
+ cond *= MP_DIGIT_MAX;
|
|
+
|
|
+ /* we currently require these to be equal on input,
|
|
+ * we could use pad to extend one of them, but that might
|
|
+ * leak data as it wouldn't be constant time */
|
|
+ assert(used_a == MP_USED(b));
|
|
+
|
|
+ MP_CHECKOK(s_mp_pad(ret, used_a));
|
|
+ for (i=0; i < used_a; i++) {
|
|
+ MP_DIGIT(ret,i) = (MP_DIGIT(a,i)&cond) | (MP_DIGIT(b,i)&~cond);
|
|
+ }
|
|
+ res = MP_OKAY;
|
|
+CLEANUP:
|
|
+ return res;
|
|
+} /* end mp_selectCT() */
|
|
+
|
|
+
|
|
+/* {{{ mp_reduceCT(a, m, c) */
|
|
+
|
|
+/*
|
|
+ mp_reduceCT(a, m, c)
|
|
+
|
|
+ Compute c = aR^-1 (mod m) in constant time.
|
|
+ input should be in montgomery form. If input is the
|
|
+ result of a montgomery multiply then out put will be
|
|
+ in mongomery form.
|
|
+ Result will be reduced to MP_USED(m), but not be
|
|
+ clamped.
|
|
+ */
|
|
+
|
|
+mp_err
|
|
+mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
|
|
+{
|
|
+ mp_size used_m = MP_USED(m);
|
|
+ mp_size used_c = used_m*2+1;
|
|
+ mp_digit *m_digits, *c_digits;
|
|
+ mp_size i;
|
|
+ mp_digit borrow, carry;
|
|
+ mp_err res;
|
|
+ mp_int sub;
|
|
+
|
|
+ MP_DIGITS(&sub) = 0;
|
|
+ MP_CHECKOK(mp_init_size(&sub,used_m));
|
|
+
|
|
+ if (a != c) {
|
|
+ MP_CHECKOK(mp_copy(a, c));
|
|
+ }
|
|
+ MP_CHECKOK(s_mp_pad(c, used_c));
|
|
+ m_digits = MP_DIGITS(m);
|
|
+ c_digits = MP_DIGITS(c);
|
|
+ for (i=0; i < used_m; i++) {
|
|
+ mp_digit m_i = MP_DIGIT(c,i)*n0i;
|
|
+ s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
|
|
+ }
|
|
+ s_mp_rshd(c, used_m);
|
|
+ /* MP_USED(c) should be used_m+1 with the high word being any carry
|
|
+ * from the previous multiply, save that carry and drop the high
|
|
+ * word for the substraction below */
|
|
+ carry = MP_DIGIT(c,used_m);
|
|
+ MP_DIGIT(c,used_m) = 0;
|
|
+ MP_USED(c) = used_m;
|
|
+ /* mp_subCT wants c and m to be the same size, we've already
|
|
+ * guarrenteed that in the previous statement, so mp_subCT won't actually
|
|
+ * modify m, so it's safe to recast */
|
|
+ MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
|
|
+
|
|
+ /* we return c-m if c >= m no borrow or there was a borrow and a carry */
|
|
+ MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
|
|
+ res = MP_OKAY;
|
|
+CLEANUP:
|
|
+ mp_clear(&sub);
|
|
+ return res;
|
|
+} /* end mp_reduceCT() */
|
|
+
|
|
+/* }}} */
|
|
+
|
|
/* {{{ mp_mod_d(a, d, c) */
|
|
|
|
/*
|
|
mp_mod_d(a, d, c)
|
|
|
|
Compute c = a (mod d). Result will always be 0 <= c < d
|
|
*/
|
|
mp_err
|
|
@@ -1379,16 +1595,47 @@ mp_mulmod(const mp_int *a, const mp_int
|
|
if ((res = mp_mod(c, m, c)) != MP_OKAY)
|
|
return res;
|
|
|
|
return MP_OKAY;
|
|
}
|
|
|
|
/* }}} */
|
|
|
|
+/* {{{ mp_mulmontmodCT(a, b, m, c) */
|
|
+
|
|
+/*
|
|
+ mp_mulmontmodCT(a, b, m, c)
|
|
+
|
|
+ Compute c = (a * b) mod m in constant time wrt a and b. either a or b
|
|
+ should be in montgomery form and the output is native. If both a and b
|
|
+ are in montgomery form, then the output will also be in montgomery form
|
|
+ and can be recovered with an mp_reduceCT call.
|
|
+ NOTE: a and b may be modified.
|
|
+ */
|
|
+
|
|
+mp_err
|
|
+mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
|
|
+ mp_int *c)
|
|
+{
|
|
+ mp_err res;
|
|
+
|
|
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
|
|
+
|
|
+ if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
|
|
+ return res;
|
|
+
|
|
+ if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
|
|
+ return res;
|
|
+
|
|
+ return MP_OKAY;
|
|
+}
|
|
+
|
|
+/* }}} */
|
|
+
|
|
/* {{{ mp_sqrmod(a, m, c) */
|
|
|
|
#if MP_SQUARE
|
|
mp_err
|
|
mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
|
|
{
|
|
mp_err res;
|
|
|
|
@@ -3936,25 +4183,73 @@ s_mp_mul(mp_int *a, const mp_int *b)
|
|
{ \
|
|
mp_digit a0b1, a1b0; \
|
|
Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
|
|
Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
|
|
a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
|
|
a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
|
|
a1b0 += a0b1; \
|
|
Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
|
|
- if (a1b0 < a0b1) \
|
|
- Phi += MP_HALF_RADIX; \
|
|
+ Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \
|
|
a1b0 <<= MP_HALF_DIGIT_BIT; \
|
|
Plo += a1b0; \
|
|
- if (Plo < a1b0) \
|
|
- ++Phi; \
|
|
+ Phi += MP_CT_LTU(Plo, a1b0); \
|
|
}
|
|
#endif
|
|
|
|
+/* Constant time version of s_mpv_mul_d_add_prop.
|
|
+ * Presently, this is only used by the Constant time Montgomery arithmetic code. */
|
|
+/* c += a * b */
|
|
+void
|
|
+s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
|
|
+ mp_digit *c, mp_size c_len)
|
|
+{
|
|
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
|
|
+ mp_digit d = 0;
|
|
+
|
|
+ c_len -= a_len;
|
|
+ /* Inner product: Digits of a */
|
|
+ while (a_len--) {
|
|
+ mp_word w = ((mp_word)b * *a++) + *c + d;
|
|
+ *c++ = ACCUM(w);
|
|
+ d = CARRYOUT(w);
|
|
+ }
|
|
+
|
|
+ /* propagate the carry to the end, even if carry is zero */
|
|
+ while (c_len--) {
|
|
+ mp_word w = (mp_word)*c + d;
|
|
+ *c++ = ACCUM(w);
|
|
+ d = CARRYOUT(w);
|
|
+ }
|
|
+#else
|
|
+ mp_digit carry = 0;
|
|
+ c_len -= a_len;
|
|
+ while (a_len--) {
|
|
+ mp_digit a_i = *a++;
|
|
+ mp_digit a0b0, a1b1;
|
|
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
|
|
+
|
|
+ a0b0 += carry;
|
|
+ a1b1 += MP_CT_LTU(a0b0, carry);
|
|
+ a0b0 += a_i = *c;
|
|
+ a1b1 += MP_CT_LTU(a0b0, a_i);
|
|
+
|
|
+ *c++ = a0b0;
|
|
+ carry = a1b1;
|
|
+ }
|
|
+ /* propagate the carry to the end, even if carry is zero */
|
|
+ while (c_len--) {
|
|
+ mp_digit c_i = *c;
|
|
+ carry += c_i;
|
|
+ *c++ = carry;
|
|
+ carry = MP_CT_LTU(carry, c_i);
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
#if !defined(MP_ASSEMBLY_MULTIPLY)
|
|
/* c = a * b */
|
|
void
|
|
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
|
{
|
|
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
|
|
mp_digit d = 0;
|
|
|
|
@@ -3969,18 +4264,17 @@ s_mpv_mul_d(const mp_digit *a, mp_size a
|
|
mp_digit carry = 0;
|
|
while (a_len--) {
|
|
mp_digit a_i = *a++;
|
|
mp_digit a0b0, a1b1;
|
|
|
|
MP_MUL_DxD(a_i, b, a1b1, a0b0);
|
|
|
|
a0b0 += carry;
|
|
- if (a0b0 < carry)
|
|
- ++a1b1;
|
|
+ a1b1 += a0b0 < carry;
|
|
*c++ = a0b0;
|
|
carry = a1b1;
|
|
}
|
|
*c = carry;
|
|
#endif
|
|
}
|
|
|
|
/* c += a * b */
|
|
@@ -4002,21 +4296,19 @@ s_mpv_mul_d_add(const mp_digit *a, mp_si
|
|
mp_digit carry = 0;
|
|
while (a_len--) {
|
|
mp_digit a_i = *a++;
|
|
mp_digit a0b0, a1b1;
|
|
|
|
MP_MUL_DxD(a_i, b, a1b1, a0b0);
|
|
|
|
a0b0 += carry;
|
|
- if (a0b0 < carry)
|
|
- ++a1b1;
|
|
+ a1b1 += (a0b0 < carry);
|
|
a0b0 += a_i = *c;
|
|
- if (a0b0 < a_i)
|
|
- ++a1b1;
|
|
+ a1b1 += (a0b0 < a_i);
|
|
*c++ = a0b0;
|
|
carry = a1b1;
|
|
}
|
|
*c = carry;
|
|
#endif
|
|
}
|
|
|
|
/* Presently, this is only used by the Montgomery arithmetic code. */
|
|
diff --git a/lib/freebl/mpi/mpi.h b/lib/freebl/mpi/mpi.h
|
|
--- a/lib/freebl/mpi/mpi.h
|
|
+++ b/lib/freebl/mpi/mpi.h
|
|
@@ -145,16 +145,54 @@ typedef int mp_sword;
|
|
#define MP_USED(MP) ((MP)->used)
|
|
#define MP_ALLOC(MP) ((MP)->alloc)
|
|
#define MP_DIGITS(MP) ((MP)->dp)
|
|
#define MP_DIGIT(MP, N) (MP)->dp[(N)]
|
|
|
|
/* This defines the maximum I/O base (minimum is 2) */
|
|
#define MP_MAX_RADIX 64
|
|
|
|
+/* Constant Time Macros on mp_digits */
|
|
+#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
|
|
+
|
|
+/* basic zero and non zero tests */
|
|
+#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
|
|
+#define MP_CT_ZERO(x) (~MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
|
|
+
|
|
+
|
|
+/* basic constant-time helper macro for equalities and inequalities.
|
|
+ * The inequalities will produce incorrect results if
|
|
+ * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
|
|
+ * within the range 0-MP_DIGIT_MAX/2. */
|
|
+#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) - (b)))
|
|
+#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) - (b)))
|
|
+#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
|
|
+#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
|
|
+#define MP_CT_GE(a, b) (1^MP_CT_LT(a, b))
|
|
+#define MP_CT_LE(a, b) (1^MP_CT_GT(a, b))
|
|
+#define MP_CT_TRUE ((mp_digit)1)
|
|
+#define MP_CT_FALSE ((mp_digit)0)
|
|
+
|
|
+/* use constant time result to select a boolean value */
|
|
+#define MP_CT_SELB(m, l, r) (((m) & (l)) | (~(m) & (r)))
|
|
+
|
|
+/* full inequalities that work with full mp_digit values */
|
|
+#define MP_CT_OVERFLOW(a,b,c,d) \
|
|
+ MP_CT_SELB(MP_CT_HIGH_TO_LOW((a)^(b)), \
|
|
+ (MP_CT_HIGH_TO_LOW(d)),c)
|
|
+#define MP_CT_GTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),a)
|
|
+#define MP_CT_LTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),b)
|
|
+#define MP_CT_GEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),a)
|
|
+#define MP_CT_LEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),b)
|
|
+#define MP_CT_GTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),b)
|
|
+#define MP_CT_LTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),a)
|
|
+#define MP_CT_GES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),b)
|
|
+#define MP_CT_LES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),a)
|
|
+
|
|
+
|
|
typedef struct {
|
|
mp_sign sign; /* sign of this quantity */
|
|
mp_size alloc; /* how many digits allocated */
|
|
mp_size used; /* how many digits used */
|
|
mp_digit *dp; /* the digits themselves */
|
|
} mp_int;
|
|
|
|
/* Default precision */
|
|
@@ -185,17 +223,19 @@ mp_err mp_expt_d(const mp_int *a, mp_dig
|
|
|
|
/* Sign manipulations */
|
|
mp_err mp_abs(const mp_int *a, mp_int *b);
|
|
mp_err mp_neg(const mp_int *a, mp_int *b);
|
|
|
|
/* Full arithmetic */
|
|
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
|
|
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
|
|
+mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow);
|
|
mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
|
|
+mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize);
|
|
#if MP_SQUARE
|
|
mp_err mp_sqr(const mp_int *a, mp_int *b);
|
|
#else
|
|
#define mp_sqr(a, b) mp_mul(a, a, b)
|
|
#endif
|
|
mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
|
|
mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
|
|
mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
|
|
@@ -212,23 +252,30 @@ mp_err mp_mulmod(const mp_int *a, const
|
|
mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
|
|
#else
|
|
#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
|
|
#endif
|
|
mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
|
|
mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
|
|
#endif /* MP_MODARITH */
|
|
|
|
+/* montgomery math */
|
|
+mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont);
|
|
+mp_digit mp_calculate_mont_n0i(const mp_int *N);
|
|
+mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct);
|
|
+mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c);
|
|
+
|
|
/* Comparisons */
|
|
int mp_cmp_z(const mp_int *a);
|
|
int mp_cmp_d(const mp_int *a, mp_digit d);
|
|
int mp_cmp(const mp_int *a, const mp_int *b);
|
|
int mp_cmp_mag(const mp_int *a, const mp_int *b);
|
|
int mp_isodd(const mp_int *a);
|
|
int mp_iseven(const mp_int *a);
|
|
+mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret);
|
|
|
|
/* Number theoretic */
|
|
mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
|
|
mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
|
|
mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
|
|
mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
|
|
mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
|
|
|
|
diff --git a/lib/freebl/mpi/mpmontg.c b/lib/freebl/mpi/mpmontg.c
|
|
--- a/lib/freebl/mpi/mpmontg.c
|
|
+++ b/lib/freebl/mpi/mpmontg.c
|
|
@@ -124,30 +124,37 @@ s_mp_mul_mont(const mp_int *a, const mp_
|
|
}
|
|
res = MP_OKAY;
|
|
|
|
CLEANUP:
|
|
return res;
|
|
}
|
|
#endif
|
|
|
|
-STATIC
|
|
mp_err
|
|
-s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
|
|
+mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
|
|
{
|
|
mp_err res;
|
|
|
|
/* xMont = x * R mod N where N is modulus */
|
|
- MP_CHECKOK(mp_copy(x, xMont));
|
|
- MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
|
|
- MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */
|
|
+ if (x != xMont) {
|
|
+ MP_CHECKOK(mp_copy(x, xMont));
|
|
+ }
|
|
+ MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
|
|
+ MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */
|
|
CLEANUP:
|
|
return res;
|
|
}
|
|
|
|
+mp_digit
|
|
+mp_calculate_mont_n0i(const mp_int *N)
|
|
+{
|
|
+ return 0 - s_mp_invmod_radix(MP_DIGIT(N,0));
|
|
+}
|
|
+
|
|
#ifdef MP_USING_MONT_MULF
|
|
|
|
/* the floating point multiply is already cache safe,
|
|
* don't turn on cache safe unless we specifically
|
|
* force it */
|
|
#ifndef MP_FORCE_CACHE_SAFE
|
|
#undef MP_USING_CACHE_SAFE_MOD_EXP
|
|
#endif
|
|
@@ -193,17 +200,17 @@ mp_exptmod_f(const mp_int *montBase,
|
|
MP_DIGITS(&accum1) = 0;
|
|
|
|
for (i = 0; i < MAX_ODD_INTS; ++i)
|
|
oddPowers[i] = 0;
|
|
|
|
MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
|
|
|
|
mp_set(&accum1, 1);
|
|
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
|
|
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
|
|
MP_CHECKOK(s_mp_pad(&accum1, nLen));
|
|
|
|
oddPowSize = 2 * nLen + 1;
|
|
dTmpSize = 2 * oddPowSize;
|
|
dSize = sizeof(double) * (nLen * 4 + 1 +
|
|
((odd_ints + 1) * oddPowSize) + dTmpSize);
|
|
dBuf = malloc(dSize);
|
|
if (!dBuf) {
|
|
@@ -473,17 +480,17 @@ mp_exptmod_i(const mp_int *montBase,
|
|
for (i = 1; i < odd_ints; ++i) {
|
|
MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
|
|
MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
|
|
MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
|
|
}
|
|
|
|
/* set accumulator to montgomery residue of 1 */
|
|
mp_set(&accum1, 1);
|
|
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
|
|
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
|
|
pa1 = &accum1;
|
|
pa2 = &accum2;
|
|
|
|
for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
|
|
mp_size smallExp;
|
|
MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
|
|
smallExp = (mp_size)res;
|
|
|
|
@@ -862,17 +869,17 @@ mp_exptmod_safe_i(const mp_int *montBase
|
|
/* build the first WEAVE_WORD powers inline */
|
|
/* if WEAVE_WORD_SIZE is not 4, this code will have to change */
|
|
if (num_powers > 2) {
|
|
MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
|
|
MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
|
|
MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
|
|
MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
|
|
mp_set(&accum[0], 1);
|
|
- MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
|
|
+ MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
|
|
MP_CHECKOK(mp_copy(montBase, &accum[1]));
|
|
SQR(montBase, &accum[2]);
|
|
MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
|
|
powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
|
|
if (!powersArray) {
|
|
res = MP_MEM;
|
|
goto CLEANUP;
|
|
}
|
|
@@ -881,17 +888,17 @@ mp_exptmod_safe_i(const mp_int *montBase
|
|
MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
|
|
if (first_window < 4) {
|
|
MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
|
|
first_window = num_powers;
|
|
}
|
|
} else {
|
|
if (first_window == 0) {
|
|
mp_set(&accum1, 1);
|
|
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
|
|
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
|
|
} else {
|
|
/* assert first_window == 1? */
|
|
MP_CHECKOK(mp_copy(montBase, &accum1));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* calculate all the powers in the powers array.
|
|
@@ -1054,19 +1061,19 @@ mp_exptmod(const mp_int *inBase, const m
|
|
nLen = MP_USED(modulus);
|
|
MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
|
|
|
|
mmm.N = *modulus; /* a copy of the mp_int struct */
|
|
|
|
/* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
|
|
** where n0 = least significant mp_digit of N, the modulus.
|
|
*/
|
|
- mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
|
|
+ mmm.n0prime = mp_calculate_mont_n0i(modulus);
|
|
|
|
- MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
|
|
+ MP_CHECKOK(mp_to_mont(base, modulus, &montBase));
|
|
|
|
bits_in_exponent = mpl_significant_bits(exponent);
|
|
#ifdef MP_USING_CACHE_SAFE_MOD_EXP
|
|
if (mp_using_cache_safe_exp) {
|
|
if (bits_in_exponent > 780)
|
|
window_bits = 6;
|
|
else if (bits_in_exponent > 256)
|
|
window_bits = 5;
|
|
diff --git a/lib/freebl/rsa.c b/lib/freebl/rsa.c
|
|
--- a/lib/freebl/rsa.c
|
|
+++ b/lib/freebl/rsa.c
|
|
@@ -65,16 +65,18 @@ struct blindingParamsStr {
|
|
** the Handbook of Applied Cryptography, 11.118-11.119.
|
|
*/
|
|
struct RSABlindingParamsStr {
|
|
/* Blinding-specific parameters */
|
|
PRCList link; /* link to list of structs */
|
|
SECItem modulus; /* list element "key" */
|
|
blindingParams *free, *bp; /* Blinding parameters queue */
|
|
blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
|
|
+ /* precalculate montegomery reduction value */
|
|
+ mp_digit n0i; /* n0i = -( n & MP_DIGIT) ** -1 mod mp_RADIX */
|
|
};
|
|
typedef struct RSABlindingParamsStr RSABlindingParams;
|
|
|
|
/*
|
|
** RSABlindingParamsListStr
|
|
**
|
|
** List of key-specific blinding params. The arena holds the volatile pool
|
|
** of memory for each entry and the list itself. The lock is for list
|
|
@@ -1210,16 +1212,18 @@ generate_blinding_params(RSAPrivateKey *
|
|
CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen));
|
|
CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen));
|
|
/* k < n */
|
|
CHECK_MPI_OK(mp_mod(&k, n, &k));
|
|
/* f = k**e mod n */
|
|
CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
|
|
/* g = k**-1 mod n */
|
|
CHECK_MPI_OK(mp_invmod(&k, n, g));
|
|
+ /* g in montgomery form.. */
|
|
+ CHECK_MPI_OK(mp_to_mont(g, n, g));
|
|
cleanup:
|
|
if (kb)
|
|
PORT_ZFree(kb, modLen);
|
|
mp_clear(&k);
|
|
mp_clear(&e);
|
|
if (err) {
|
|
MP_TO_SEC_ERROR(err);
|
|
rv = SECFailure;
|
|
@@ -1246,23 +1250,26 @@ init_blinding_params(RSABlindingParams *
|
|
* of rsabp->array pointer and must be set to NULL
|
|
*/
|
|
rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL;
|
|
|
|
bp = rsabp->array;
|
|
rsabp->bp = NULL;
|
|
rsabp->free = bp;
|
|
|
|
+ /* precalculate montgomery reduction parameter */
|
|
+ rsabp->n0i = mp_calculate_mont_n0i(n);
|
|
+
|
|
/* List elements are keyed using the modulus */
|
|
return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
|
|
}
|
|
|
|
static SECStatus
|
|
get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen,
|
|
- mp_int *f, mp_int *g)
|
|
+ mp_int *f, mp_int *g, mp_digit *n0i)
|
|
{
|
|
RSABlindingParams *rsabp = NULL;
|
|
blindingParams *bpUnlinked = NULL;
|
|
blindingParams *bp;
|
|
PRCList *el;
|
|
SECStatus rv = SECSuccess;
|
|
mp_err err = MP_OKAY;
|
|
int cmp = -1;
|
|
@@ -1312,16 +1319,17 @@ get_blinding_params(RSAPrivateKey *key,
|
|
** head (since el would have looped back to the head).
|
|
*/
|
|
PR_INSERT_BEFORE(&rsabp->link, el);
|
|
}
|
|
|
|
/* We've found (or created) the RSAblindingParams struct for this key.
|
|
* Now, search its list of ready blinding params for a usable one.
|
|
*/
|
|
+ *n0i = rsabp->n0i;
|
|
while (0 != (bp = rsabp->bp)) {
|
|
#ifdef UNSAFE_FUZZER_MODE
|
|
/* Found a match and there are still remaining uses left */
|
|
/* Return the parameters */
|
|
CHECK_MPI_OK(mp_copy(&bp->f, f));
|
|
CHECK_MPI_OK(mp_copy(&bp->g, g));
|
|
|
|
PZ_Unlock(blindingParamsList.lock);
|
|
@@ -1426,16 +1434,17 @@ cleanup:
|
|
rsabp->free = bp;
|
|
}
|
|
if (holdingLock) {
|
|
PZ_Unlock(blindingParamsList.lock);
|
|
}
|
|
if (err) {
|
|
MP_TO_SEC_ERROR(err);
|
|
}
|
|
+ *n0i = 0;
|
|
return SECFailure;
|
|
}
|
|
|
|
/*
|
|
** Perform a raw private-key operation
|
|
** Length of input and output buffers are equal to key's modulus len.
|
|
*/
|
|
static SECStatus
|
|
@@ -1445,16 +1454,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
|
|
PRBool check)
|
|
{
|
|
unsigned int modLen;
|
|
unsigned int offset;
|
|
SECStatus rv = SECSuccess;
|
|
mp_err err;
|
|
mp_int n, c, m;
|
|
mp_int f, g;
|
|
+ mp_digit n0i;
|
|
if (!key || !output || !input) {
|
|
PORT_SetError(SEC_ERROR_INVALID_ARGS);
|
|
return SECFailure;
|
|
}
|
|
/* check input out of range (needs to be in range [0..n-1]) */
|
|
modLen = rsa_modulusLen(&key->modulus);
|
|
if (modLen == 0) {
|
|
PORT_SetError(SEC_ERROR_INVALID_ARGS);
|
|
@@ -1476,17 +1486,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
|
|
CHECK_MPI_OK(mp_init(&f));
|
|
CHECK_MPI_OK(mp_init(&g));
|
|
SECITEM_TO_MPINT(key->modulus, &n);
|
|
OCTETS_TO_MPINT(input, &c, modLen);
|
|
/* If blinding, compute pre-image of ciphertext by multiplying by
|
|
** blinding factor
|
|
*/
|
|
if (nssRSAUseBlinding) {
|
|
- CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g));
|
|
+ CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g, &n0i));
|
|
/* c' = c*f mod n */
|
|
CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
|
|
}
|
|
/* Do the private key operation m = c**d mod n */
|
|
if (key->prime1.len == 0 ||
|
|
key->prime2.len == 0 ||
|
|
key->exponent1.len == 0 ||
|
|
key->exponent2.len == 0 ||
|
|
@@ -1497,17 +1507,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
|
|
} else {
|
|
CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c));
|
|
}
|
|
/* If blinding, compute post-image of plaintext by multiplying by
|
|
** blinding factor
|
|
*/
|
|
if (nssRSAUseBlinding) {
|
|
/* m = m'*g mod n */
|
|
- CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m));
|
|
+ CHECK_MPI_OK(mp_mulmontmodCT(&m, &g, &n, n0i, &m));
|
|
}
|
|
err = mp_to_fixlen_octets(&m, output, modLen);
|
|
if (err >= 0)
|
|
err = MP_OKAY;
|
|
cleanup:
|
|
mp_clear(&n);
|
|
mp_clear(&c);
|
|
mp_clear(&m);
|