Resolves: RHEL-15131

CVE-2023-5388 nss: timing attack against RSA decryption. Make the final blinding multmod constant time.
2023-11-22 08:58:18 -08:00 · 2023-11-22 08:58:18 -08:00 · 7d4bb6fbe6
commit 7d4bb6fbe6
parent b14469dd76
3 changed files with 968 additions and 2 deletions
--- a/NameConstraints_Certs.tar
+++ b/NameConstraints_Certs.tar
--- a/blinding_ct.patch
+++ b/blinding_ct.patch
@ -0,0 +1,949 @@
 diff --git a/lib/freebl/mpi/mpi-priv.h b/lib/freebl/mpi/mpi-priv.h
 --- a/lib/freebl/mpi/mpi-priv.h
 +++ b/lib/freebl/mpi/mpi-priv.h
@@ -199,16 +199,19 @@ void MPI_ASM_DECL s_mpv_mul_d(const mp_d
 void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
                                   mp_digit b, mp_digit *c);
 #endif
 void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
                                        mp_size a_len, mp_digit b,
                                        mp_digit *c);
 +void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
 +                                       mp_size a_len, mp_digit b,
 +                                       mp_digit *c, mp_size c_len);
 void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
                                      mp_size a_len,
                                      mp_digit *sqrs);
 mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
                                     mp_digit divisor, mp_digit *quot, mp_digit *rem);
 /* c += a * b * (MP_RADIX ** offset);  */
 diff --git a/lib/freebl/mpi/mpi.c b/lib/freebl/mpi/mpi.c
 --- a/lib/freebl/mpi/mpi.c
 +++ b/lib/freebl/mpi/mpi.c
@@ -5,16 +5,18 @@
  *
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include "mpi-priv.h"
 #include "mplogic.h"
 +#include <assert.h>
 +
 #if defined(__arm__) && \
     ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
 /* 16-bit thumb or ARM v3 doesn't work inlined assember version */
 #undef MP_ASSEMBLY_MULTIPLY
 #undef MP_ASSEMBLY_SQUARE
 #endif
 #if MP_LOGTAB
@@ -797,25 +799,28 @@ mp_sub(const mp_int *a, const mp_int *b,
 CLEANUP:
     return res;
 } /* end mp_sub() */
 /* }}} */
 -/* {{{ mp_mul(a, b, c) */
 +/* {{{ s_mp_mulg(a, b, c) */
 /*
 -  mp_mul(a, b, c)
 -
 -  Compute c = a * b.  All parameters may be identical.
 +  s_mp_mulg(a, b, c)
 +
 +  Compute c = a * b.  All parameters may be identical. if constantTime is set,
 +  then the operations are done in constant time. The original is mostly
 +  constant time as long as s_mpv_mul_d_add() is constant time. This is true
 +  of the x86 assembler, as well as the current c code.
  */
 mp_err
 -mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
 +s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
 {
     mp_digit *pb;
     mp_int tmp;
     mp_err res;
     mp_size ib;
     mp_size useda, usedb;
     ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
@@ -841,17 +846,24 @@ mp_mul(const mp_int *a, const mp_int *b,
     }
     MP_USED(c) = 1;
     MP_DIGIT(c, 0) = 0;
     if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
         goto CLEANUP;
 #ifdef NSS_USE_COMBA
 -    if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
 +    /* comba isn't constant time because it clamps! If we cared
 +     * (we needed a constant time version of multiply that was 'faster'
 +     * we could easily pass constantTime down to the comba code and
 +     * get it to skip the clamp... but here are assembler versions
 +     * which add comba to platforms that can't compile the normal
 +     * comba's imbedded assembler which would also need to change, so
 +     * for now we just skip comba when we are running constant time. */
 +    if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
         if (MP_USED(a) == 4) {
             s_mp_mul_comba_4(a, b, c);
             goto CLEANUP;
         }
         if (MP_USED(a) == 8) {
             s_mp_mul_comba_8(a, b, c);
             goto CLEANUP;
         }
@@ -871,36 +883,82 @@ mp_mul(const mp_int *a, const mp_int *b,
     /* Outer loop:  Digits of b */
     useda = MP_USED(a);
     usedb = MP_USED(b);
     for (ib = 1; ib < usedb; ib++) {
         mp_digit b_i = *pb++;
         /* Inner product:  Digits of a */
 -        if (b_i)
 +        if (constantTime || b_i)
             s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
         else
             MP_DIGIT(c, ib + useda) = b_i;
     }
 -    s_mp_clamp(c);
 +    if (!constantTime) {
 +        s_mp_clamp(c);
 +    }
     if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
         SIGN(c) = ZPOS;
     else
         SIGN(c) = NEG;
 CLEANUP:
     mp_clear(&tmp);
     return res;
 +} /* end smp_mulg() */
 +
 +/* }}} */
 +
 +/* {{{ mp_mul(a, b, c) */
 +
 +/*
 +  mp_mul(a, b, c)
 +
 +  Compute c = a * b.  All parameters may be identical.
 + */
 +
 +mp_err
 +mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
 +{
 +    return s_mp_mulg(a, b, c, 0);
 } /* end mp_mul() */
 /* }}} */
 +/* {{{ mp_mulCT(a, b, c) */
 +
 +/*
 +  mp_mulCT(a, b, c)
 +
 +  Compute c = a * b. In constant time. Parameters may not be identical.
 +  NOTE: a and b may be modified.
 + */
 +
 +mp_err
 +mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
 +{
 +    mp_err res;
 +
 +    /* make the multiply values fixed length so multiply
 +     * doesn't leak the length. at this point all the
 +     * values are blinded, but once we finish we want the
 +     * output size to be hidden (so no clamping the out put) */
 +    MP_CHECKOK(s_mp_pad(a, setSize));
 +    MP_CHECKOK(s_mp_pad(b, setSize));
 +    MP_CHECKOK(s_mp_pad(c, 2*setSize));
 +    MP_CHECKOK(s_mp_mulg(a, b, c, 1));
 +CLEANUP:
 +    return res;
 +} /* end mp_mulCT() */
 +
 +/* }}} */
 +
 /* {{{ mp_sqr(a, sqr) */
 #if MP_SQUARE
 /*
   Computes the square of a.  This can be done more
   efficiently than a general multiplication, because many of the
   computation steps are redundant when squaring.  The inner product
   step is a bit more complicated, but we save a fair number of
@@ -1263,16 +1321,174 @@ mp_mod(const mp_int *a, const mp_int *m,
     }
     return MP_OKAY;
 } /* end mp_mod() */
 /* }}} */
 +/* {{{ s_mp_subCT_d(a, b, borrow, c) */
 +
 +/*
 +  s_mp_subCT_d(a, b, borrow, c)
 +
 +  Compute c = (a -b) - subtract in constant time. returns borrow
 + */
 +mp_digit
 +s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret) {
 +    mp_digit borrow1, borrow2, t;
 +#ifdef MP_COMPILER_USES_CARRY
 +    /* while it doesn't look constant-time, this is idiomatic code
 +     * to tell compilers to use the carry bit from subtraction */
 +    t = a - borrow;
 +    if (t > a) {
 +        borrow1 = 1;
 +    } else {
 +        borrow1 = 0;
 +    }
 +    *ret = t - b;
 +    if (*ret > t) {
 +        borrow2 = 1;
 +    } else {
 +        borrow2 = 0;
 +    }
 +#else
 +    mp_digit bitr, bitb, nbitt;
 +    /* this is constant time independent of compilier */
 +    t = a - borrow;
 +    borrow1 = ((~a) >> (MP_DIGIT_BIT-1)) & ((t) >> (MP_DIGIT_BIT-1));
 +    *ret = t - b;
 +    bitb = b >> (MP_DIGIT_BIT-1);
 +    bitr = *ret >> (MP_DIGIT_BIT-1);
 +    nbitt = (~t) >> (MP_DIGIT_BIT-1);
 +    borrow2  = (nbitt & bitb) | (bitb & bitr) | (nbitt & bitr);
 +#endif
 +    /* only borrow 1 or borrow 2 should be 1, we want to guarrentee
 +     * the overall borrow is 1, so use | here */
 +    return borrow1 | borrow2;
 +} /*  s_mp_subCT_d() */
 +
 +/* }}} */
 +
 +/* {{{ mp_subCT(a, b, ret, borrow) */
 +
 +/* return ret= a - b and borrow in borrow. done in constant time.
 + * b could be modified.
 + */
 +mp_err
 +mp_subCT(const mp_int *a,  mp_int *b, mp_int *ret, mp_digit *borrow)
 +{
 +    mp_size used_a = MP_USED(a);
 +    mp_size i;
 +    mp_err res;
 +
 +    MP_CHECKOK(s_mp_pad(b, used_a));
 +    MP_CHECKOK(s_mp_pad(ret, used_a));
 +    *borrow = 0;
 +    for (i=0; i < used_a; i++) {
 +        *borrow = s_mp_subCT_d(MP_DIGIT(a,i), MP_DIGIT(b,i), *borrow,
 +                               &MP_DIGIT(ret,i));
 +    }
 +
 +    res = MP_OKAY;
 +CLEANUP:
 +    return res;
 +} /*  end mp_subCT() */
 +
 +/* }}} */
 +
 +/* {{{ mp_selectCT(cond, a, b, ret) */
 +
 +/*
 + * return ret= cond ? a : b; cond should be either 0 or 1
 + */
 +mp_err
 +mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
 +{
 +    mp_size used_a = MP_USED(a);
 +    mp_err res;
 +    mp_size i;
 +
 +    cond *= MP_DIGIT_MAX;
 +
 +    /* we currently require these to be equal on input,
 +     * we could use pad to extend one of them, but that might
 +     * leak data as it wouldn't be constant time */
 +    assert(used_a == MP_USED(b));
 +
 +    MP_CHECKOK(s_mp_pad(ret, used_a));
 +    for (i=0; i < used_a; i++) {
 +        MP_DIGIT(ret,i) = (MP_DIGIT(a,i)&cond) | (MP_DIGIT(b,i)&~cond);
 +    }
 +    res = MP_OKAY;
 +CLEANUP:
 +    return res;
 +} /* end mp_selectCT() */
 +
 +
 +/* {{{ mp_reduceCT(a, m, c) */
 +
 +/*
 +  mp_reduceCT(a, m, c)
 +
 +  Compute c = aR^-1 (mod m) in constant time.
 +   input should be in montgomery form. If input is the
 +   result of a montgomery multiply then out put will be
 +   in mongomery form.
 +   Result will be reduced to MP_USED(m), but not be
 +   clamped.
 + */
 +
 +mp_err
 +mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
 +{
 +    mp_size used_m = MP_USED(m);
 +    mp_size used_c = used_m*2+1;
 +    mp_digit *m_digits, *c_digits;
 +    mp_size i;
 +    mp_digit borrow, carry;
 +    mp_err res;
 +    mp_int sub;
 +
 +    MP_DIGITS(&sub) = 0;
 +    MP_CHECKOK(mp_init_size(&sub,used_m));
 +
 +    if (a != c) {
 +        MP_CHECKOK(mp_copy(a, c));
 +    }
 +    MP_CHECKOK(s_mp_pad(c, used_c));
 +    m_digits = MP_DIGITS(m);
 +    c_digits = MP_DIGITS(c);
 +    for (i=0; i < used_m; i++) {
 +        mp_digit m_i = MP_DIGIT(c,i)*n0i;
 +        s_mpv_mul_d_add_propCT(m_digits, used_m,  m_i, c_digits++, used_c--);
 +    }
 +    s_mp_rshd(c, used_m);
 +    /* MP_USED(c) should be used_m+1 with the high word being any carry
 +     * from the previous multiply, save that carry and drop the high
 +     * word for the substraction below */
 +    carry = MP_DIGIT(c,used_m);
 +    MP_DIGIT(c,used_m) = 0;
 +    MP_USED(c) = used_m;
 +    /* mp_subCT wants c and m to be the same size, we've already
 +     * guarrenteed that in the previous statement, so mp_subCT won't actually
 +     * modify m, so it's safe to recast */
 +    MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
 +
 +    /* we return c-m if c >= m no borrow or there was a borrow and a carry */
 +    MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
 +    res = MP_OKAY;
 +CLEANUP:
 +    mp_clear(&sub);
 +    return res;
 +}  /* end mp_reduceCT() */
 +
 +/* }}} */
 +
 /* {{{ mp_mod_d(a, d, c) */
 /*
   mp_mod_d(a, d, c)
   Compute c = a (mod d).  Result will always be 0 <= c < d
  */
 mp_err
@@ -1379,16 +1595,47 @@ mp_mulmod(const mp_int *a, const mp_int 
     if ((res = mp_mod(c, m, c)) != MP_OKAY)
         return res;
     return MP_OKAY;
 }
 /* }}} */
 +/* {{{ mp_mulmontmodCT(a, b, m, c) */
 +
 +/*
 +  mp_mulmontmodCT(a, b, m, c)
 +
 +  Compute c = (a * b) mod m in constant time wrt a and b. either a or b
 +  should be in montgomery form and the output is native. If both a and b
 +  are in montgomery form, then the output will also be in montgomery form
 +  and can be recovered with an mp_reduceCT call.
 +  NOTE: a and b may be modified.
 + */
 +
 +mp_err
 +mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
 +                mp_int *c)
 +{
 +    mp_err res;
 +
 +    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
 +
 +    if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
 +        return res;
 +
 +    if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
 +        return res;
 +
 +    return MP_OKAY;
 +}
 +
 +/* }}} */
 +
 /* {{{ mp_sqrmod(a, m, c) */
 #if MP_SQUARE
 mp_err
 mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
 {
     mp_err res;
@@ -3936,25 +4183,73 @@ s_mp_mul(mp_int *a, const mp_int *b)
     {                                                              \
         mp_digit a0b1, a1b0;                                       \
         Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX);   \
         Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
         a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
         a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
         a1b0 += a0b1;                                              \
         Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
 -        if (a1b0 < a0b1)                                           \
 -            Phi += MP_HALF_RADIX;                                  \
 +        Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT;     \
         a1b0 <<= MP_HALF_DIGIT_BIT;                                \
         Plo += a1b0;                                               \
 -        if (Plo < a1b0)                                            \
 -            ++Phi;                                                 \
 +        Phi += MP_CT_LTU(Plo, a1b0);                                          \
     }
 #endif
 +/* Constant time version of s_mpv_mul_d_add_prop.
 + * Presently, this is only used by the Constant time Montgomery arithmetic code. */
 +/* c += a * b */
 +void
 +s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
 +                       mp_digit *c, mp_size c_len)
 +{
 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
 +    mp_digit d = 0;
 +
 +    c_len -= a_len;
 +    /* Inner product:  Digits of a */
 +    while (a_len--) {
 +        mp_word w = ((mp_word)b * *a++) + *c + d;
 +        *c++ = ACCUM(w);
 +        d = CARRYOUT(w);
 +    }
 +
 +    /* propagate the carry to the end, even if carry is zero */
 +    while (c_len--) {
 +        mp_word w = (mp_word)*c + d;
 +        *c++ = ACCUM(w);
 +        d = CARRYOUT(w);
 +    }
 +#else
 +    mp_digit carry = 0;
 +    c_len -= a_len;
 +    while (a_len--) {
 +        mp_digit a_i = *a++;
 +        mp_digit a0b0, a1b1;
 +        MP_MUL_DxD(a_i, b, a1b1, a0b0);
 +
 +        a0b0 += carry;
 +        a1b1 += MP_CT_LTU(a0b0, carry);
 +        a0b0 += a_i = *c;
 +        a1b1 += MP_CT_LTU(a0b0, a_i);
 +
 +        *c++ = a0b0;
 +        carry = a1b1;
 +    }
 +    /* propagate the carry to the end, even if carry is zero */
 +    while (c_len--) {
 +        mp_digit c_i = *c;
 +        carry += c_i;
 +        *c++ = carry;
 +        carry = MP_CT_LTU(carry, c_i);
 +    }
 +#endif
 +}
 +
 #if !defined(MP_ASSEMBLY_MULTIPLY)
 /* c = a * b */
 void
 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 {
 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
     mp_digit d = 0;
@@ -3969,18 +4264,17 @@ s_mpv_mul_d(const mp_digit *a, mp_size a
     mp_digit carry = 0;
     while (a_len--) {
         mp_digit a_i = *a++;
         mp_digit a0b0, a1b1;
         MP_MUL_DxD(a_i, b, a1b1, a0b0);
         a0b0 += carry;
 -        if (a0b0 < carry)
 -            ++a1b1;
 +        a1b1 += a0b0 < carry;
         *c++ = a0b0;
         carry = a1b1;
     }
     *c = carry;
 #endif
 }
 /* c += a * b */
@@ -4002,21 +4296,19 @@ s_mpv_mul_d_add(const mp_digit *a, mp_si
     mp_digit carry = 0;
     while (a_len--) {
         mp_digit a_i = *a++;
         mp_digit a0b0, a1b1;
         MP_MUL_DxD(a_i, b, a1b1, a0b0);
         a0b0 += carry;
 -        if (a0b0 < carry)
 -            ++a1b1;
 +        a1b1 += (a0b0 < carry);
         a0b0 += a_i = *c;
 -        if (a0b0 < a_i)
 -            ++a1b1;
 +        a1b1 += (a0b0 < a_i);
         *c++ = a0b0;
         carry = a1b1;
     }
     *c = carry;
 #endif
 }
 /* Presently, this is only used by the Montgomery arithmetic code. */
 diff --git a/lib/freebl/mpi/mpi.h b/lib/freebl/mpi/mpi.h
 --- a/lib/freebl/mpi/mpi.h
 +++ b/lib/freebl/mpi/mpi.h
@@ -145,16 +145,54 @@ typedef int mp_sword;
 #define MP_USED(MP) ((MP)->used)
 #define MP_ALLOC(MP) ((MP)->alloc)
 #define MP_DIGITS(MP) ((MP)->dp)
 #define MP_DIGIT(MP, N) (MP)->dp[(N)]
 /* This defines the maximum I/O base (minimum is 2)   */
 #define MP_MAX_RADIX 64
 +/* Constant Time Macros on mp_digits */
 +#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
 +
 +/* basic zero and non zero tests */
 +#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
 +#define MP_CT_ZERO(x) (~MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
 +
 +
 +/* basic constant-time helper macro for equalities and inequalities.
 + * The inequalities will produce incorrect results if
 + * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
 + * within the range 0-MP_DIGIT_MAX/2. */
 +#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) - (b)))
 +#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) - (b)))
 +#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
 +#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
 +#define MP_CT_GE(a, b) (1^MP_CT_LT(a, b))
 +#define MP_CT_LE(a, b) (1^MP_CT_GT(a, b))
 +#define MP_CT_TRUE ((mp_digit)1)
 +#define MP_CT_FALSE ((mp_digit)0)
 +
 +/* use constant time result to select a boolean value */
 +#define MP_CT_SELB(m, l, r) (((m) & (l)) | (~(m) & (r)))
 +
 +/* full inequalities that work with full mp_digit values */
 +#define MP_CT_OVERFLOW(a,b,c,d) \
 +        MP_CT_SELB(MP_CT_HIGH_TO_LOW((a)^(b)), \
 +                  (MP_CT_HIGH_TO_LOW(d)),c)
 +#define MP_CT_GTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),a)
 +#define MP_CT_LTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),b)
 +#define MP_CT_GEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),a)
 +#define MP_CT_LEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),b)
 +#define MP_CT_GTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),b)
 +#define MP_CT_LTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),a)
 +#define MP_CT_GES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),b)
 +#define MP_CT_LES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),a)
 +
 +
 typedef struct {
     mp_sign sign;  /* sign of this quantity      */
     mp_size alloc; /* how many digits allocated  */
     mp_size used;  /* how many digits used       */
     mp_digit *dp;  /* the digits themselves      */
 } mp_int;
 /* Default precision       */
@@ -185,17 +223,19 @@ mp_err mp_expt_d(const mp_int *a, mp_dig
 /* Sign manipulations      */
 mp_err mp_abs(const mp_int *a, mp_int *b);
 mp_err mp_neg(const mp_int *a, mp_int *b);
 /* Full arithmetic         */
 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
 +mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow);
 mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
 +mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize);
 #if MP_SQUARE
 mp_err mp_sqr(const mp_int *a, mp_int *b);
 #else
 #define mp_sqr(a, b) mp_mul(a, a, b)
 #endif
 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
@@ -212,23 +252,30 @@ mp_err mp_mulmod(const mp_int *a, const 
 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
 #else
 #define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
 #endif
 mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
 #endif /* MP_MODARITH */
 +/* montgomery math */
 +mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont);
 +mp_digit mp_calculate_mont_n0i(const mp_int *N);
 +mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct);
 +mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c);
 +
 /* Comparisons             */
 int mp_cmp_z(const mp_int *a);
 int mp_cmp_d(const mp_int *a, mp_digit d);
 int mp_cmp(const mp_int *a, const mp_int *b);
 int mp_cmp_mag(const mp_int *a, const mp_int *b);
 int mp_isodd(const mp_int *a);
 int mp_iseven(const mp_int *a);
 +mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret);
 /* Number theoretic        */
 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
 diff --git a/lib/freebl/mpi/mpmontg.c b/lib/freebl/mpi/mpmontg.c
 --- a/lib/freebl/mpi/mpmontg.c
 +++ b/lib/freebl/mpi/mpmontg.c
@@ -124,30 +124,37 @@ s_mp_mul_mont(const mp_int *a, const mp_
     }
     res = MP_OKAY;
 CLEANUP:
     return res;
 }
 #endif
 -STATIC
 mp_err
 -s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
 +mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
 {
     mp_err res;
     /* xMont = x * R mod N   where  N is modulus */
 -    MP_CHECKOK(mp_copy(x, xMont));
 -    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
 -    MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont));   /*         mod N */
 +    if (x != xMont) {
 +        MP_CHECKOK(mp_copy(x, xMont));
 +    }
 +    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
 +    MP_CHECKOK(mp_div(xMont, N, 0, xMont));   /*         mod N */
 CLEANUP:
     return res;
 }
 +mp_digit
 +mp_calculate_mont_n0i(const mp_int *N)
 +{
 +    return 0 - s_mp_invmod_radix(MP_DIGIT(N,0));
 +}
 +
 #ifdef MP_USING_MONT_MULF
 /* the floating point multiply is already cache safe,
  * don't turn on cache safe unless we specifically
  * force it */
 #ifndef MP_FORCE_CACHE_SAFE
 #undef MP_USING_CACHE_SAFE_MOD_EXP
 #endif
@@ -193,17 +200,17 @@ mp_exptmod_f(const mp_int *montBase,
     MP_DIGITS(&accum1) = 0;
     for (i = 0; i < MAX_ODD_INTS; ++i)
         oddPowers[i] = 0;
     MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
     mp_set(&accum1, 1);
 -    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
 +    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
     MP_CHECKOK(s_mp_pad(&accum1, nLen));
     oddPowSize = 2 * nLen + 1;
     dTmpSize = 2 * oddPowSize;
     dSize = sizeof(double) * (nLen * 4 + 1 +
                               ((odd_ints + 1) * oddPowSize) + dTmpSize);
     dBuf = malloc(dSize);
     if (!dBuf) {
@@ -473,17 +480,17 @@ mp_exptmod_i(const mp_int *montBase,
     for (i = 1; i < odd_ints; ++i) {
         MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
         MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
         MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
     }
     /* set accumulator to montgomery residue of 1 */
     mp_set(&accum1, 1);
 -    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
 +    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
     pa1 = &accum1;
     pa2 = &accum2;
     for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
         mp_size smallExp;
         MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
         smallExp = (mp_size)res;
@@ -862,17 +869,17 @@ mp_exptmod_safe_i(const mp_int *montBase
     /* build the first WEAVE_WORD powers inline */
     /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
     if (num_powers > 2) {
         MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
         MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
         MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
         MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
         mp_set(&accum[0], 1);
 -        MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
 +        MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
         MP_CHECKOK(mp_copy(montBase, &accum[1]));
         SQR(montBase, &accum[2]);
         MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
         powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
         if (!powersArray) {
             res = MP_MEM;
             goto CLEANUP;
         }
@@ -881,17 +888,17 @@ mp_exptmod_safe_i(const mp_int *montBase
         MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
         if (first_window < 4) {
             MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
             first_window = num_powers;
         }
     } else {
         if (first_window == 0) {
             mp_set(&accum1, 1);
 -            MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
 +            MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
         } else {
             /* assert first_window == 1? */
             MP_CHECKOK(mp_copy(montBase, &accum1));
         }
     }
     /*
      * calculate all the powers in the powers array.
@@ -1054,19 +1061,19 @@ mp_exptmod(const mp_int *inBase, const m
     nLen = MP_USED(modulus);
     MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
     mmm.N = *modulus; /* a copy of the mp_int struct */
     /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
     **        where n0 = least significant mp_digit of N, the modulus.
     */
 -    mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
 +    mmm.n0prime = mp_calculate_mont_n0i(modulus);
 -    MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
 +    MP_CHECKOK(mp_to_mont(base, modulus, &montBase));
     bits_in_exponent = mpl_significant_bits(exponent);
 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
     if (mp_using_cache_safe_exp) {
         if (bits_in_exponent > 780)
             window_bits = 6;
         else if (bits_in_exponent > 256)
             window_bits = 5;
 diff --git a/lib/freebl/rsa.c b/lib/freebl/rsa.c
 --- a/lib/freebl/rsa.c
 +++ b/lib/freebl/rsa.c
@@ -65,16 +65,18 @@ struct blindingParamsStr {
 ** the Handbook of Applied Cryptography, 11.118-11.119.
 */
 struct RSABlindingParamsStr {
     /* Blinding-specific parameters */
     PRCList link;              /* link to list of structs            */
     SECItem modulus;           /* list element "key"                 */
     blindingParams *free, *bp; /* Blinding parameters queue          */
     blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
 +    /* precalculate montegomery reduction value */
 +    mp_digit n0i; /* n0i = -( n & MP_DIGIT) ** -1 mod mp_RADIX */
 };
 typedef struct RSABlindingParamsStr RSABlindingParams;
 /*
 ** RSABlindingParamsListStr
 **
 ** List of key-specific blinding params.  The arena holds the volatile pool
 ** of memory for each entry and the list itself.  The lock is for list
@@ -1210,16 +1212,18 @@ generate_blinding_params(RSAPrivateKey *
     CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen));
     CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen));
     /* k < n */
     CHECK_MPI_OK(mp_mod(&k, n, &k));
     /* f = k**e mod n */
     CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
     /* g = k**-1 mod n */
     CHECK_MPI_OK(mp_invmod(&k, n, g));
 +    /* g in montgomery form.. */
 +    CHECK_MPI_OK(mp_to_mont(g, n, g));
 cleanup:
     if (kb)
         PORT_ZFree(kb, modLen);
     mp_clear(&k);
     mp_clear(&e);
     if (err) {
         MP_TO_SEC_ERROR(err);
         rv = SECFailure;
@@ -1246,23 +1250,26 @@ init_blinding_params(RSABlindingParams *
      * of rsabp->array pointer and must be set to NULL
      */
     rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL;
     bp = rsabp->array;
     rsabp->bp = NULL;
     rsabp->free = bp;
 +    /* precalculate montgomery reduction parameter */
 +    rsabp->n0i = mp_calculate_mont_n0i(n);
 +
     /* List elements are keyed using the modulus */
     return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
 }
 static SECStatus
 get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen,
 -                    mp_int *f, mp_int *g)
 +                    mp_int *f, mp_int *g, mp_digit *n0i)
 {
     RSABlindingParams *rsabp = NULL;
     blindingParams *bpUnlinked = NULL;
     blindingParams *bp;
     PRCList *el;
     SECStatus rv = SECSuccess;
     mp_err err = MP_OKAY;
     int cmp = -1;
@@ -1312,16 +1319,17 @@ get_blinding_params(RSAPrivateKey *key, 
             ** head (since el would have looped back to the head).
             */
             PR_INSERT_BEFORE(&rsabp->link, el);
         }
         /* We've found (or created) the RSAblindingParams struct for this key.
          * Now, search its list of ready blinding params for a usable one.
          */
 +        *n0i = rsabp->n0i;
         while (0 != (bp = rsabp->bp)) {
 #ifdef UNSAFE_FUZZER_MODE
             /* Found a match and there are still remaining uses left */
             /* Return the parameters */
             CHECK_MPI_OK(mp_copy(&bp->f, f));
             CHECK_MPI_OK(mp_copy(&bp->g, g));
             PZ_Unlock(blindingParamsList.lock);
@@ -1426,16 +1434,17 @@ cleanup:
         rsabp->free = bp;
     }
     if (holdingLock) {
         PZ_Unlock(blindingParamsList.lock);
     }
     if (err) {
         MP_TO_SEC_ERROR(err);
     }
 +    *n0i = 0;
     return SECFailure;
 }
 /*
 ** Perform a raw private-key operation
 **  Length of input and output buffers are equal to key's modulus len.
 */
 static SECStatus
@@ -1445,16 +1454,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
                  PRBool check)
 {
     unsigned int modLen;
     unsigned int offset;
     SECStatus rv = SECSuccess;
     mp_err err;
     mp_int n, c, m;
     mp_int f, g;
 +    mp_digit n0i;
     if (!key || !output || !input) {
         PORT_SetError(SEC_ERROR_INVALID_ARGS);
         return SECFailure;
     }
     /* check input out of range (needs to be in range [0..n-1]) */
     modLen = rsa_modulusLen(&key->modulus);
     if (modLen == 0) {
         PORT_SetError(SEC_ERROR_INVALID_ARGS);
@@ -1476,17 +1486,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
     CHECK_MPI_OK(mp_init(&f));
     CHECK_MPI_OK(mp_init(&g));
     SECITEM_TO_MPINT(key->modulus, &n);
     OCTETS_TO_MPINT(input, &c, modLen);
     /* If blinding, compute pre-image of ciphertext by multiplying by
     ** blinding factor
     */
     if (nssRSAUseBlinding) {
 -        CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g));
 +        CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g, &n0i));
         /* c' = c*f mod n */
         CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
     }
     /* Do the private key operation m = c**d mod n */
     if (key->prime1.len == 0 ||
         key->prime2.len == 0 ||
         key->exponent1.len == 0 ||
         key->exponent2.len == 0 ||
@@ -1497,17 +1507,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key,
     } else {
         CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c));
     }
     /* If blinding, compute post-image of plaintext by multiplying by
     ** blinding factor
     */
     if (nssRSAUseBlinding) {
         /* m = m'*g mod n */
 -        CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m));
 +        CHECK_MPI_OK(mp_mulmontmodCT(&m, &g, &n, n0i, &m));
     }
     err = mp_to_fixlen_octets(&m, output, modLen);
     if (err >= 0)
         err = MP_OKAY;
 cleanup:
     mp_clear(&n);
     mp_clear(&c);
     mp_clear(&m);
--- a/nss.spec
+++ b/nss.spec
@ -63,7 +63,7 @@ print(string.sub(hash, 0, 16))
 Summary:          Network Security Services
 Name:             nss
 Version:          %{nss_version}
-Release:          3%{?dist}
+Release:          3.1%{?dist}
 License:          MPLv2.0
 URL:              http://www.mozilla.org/projects/security/pki/nss/
 Requires:         nspr >= %{nspr_version}%{nspr_release}
@ -113,6 +113,8 @@ Source28:         nss-p11-kit.config
 # will have their own validation
 Source30:         fips_algorithms.h
 Source50:         NameConstraints_Certs.tar
 # To inject hardening flags for DSO
 Patch1:           nss-dso-ldflags.patch
 # This patch uses the GCC -iquote option documented at
@ -132,7 +134,7 @@ Patch4:           iquote.patch
 Patch9:		  nss-sysinit-userdb.patch
 # Disable nss-sysinit test which is solely to test the above change
 Patch10:	  nss-skip-sysinit-gtests.patch
-
+Patch15:          nss-3.90-extend-db-dump-time.patch
 # For compatibility reasons, we stick with the old PKCS #11 2.40
 # definition of CK_GCM_PARAMS:
 %if 0%{?fedora} < 34
@ -174,6 +176,8 @@ Patch64:          nss-3.90-pbkdf2-indicator.patch
 #ems policy. needs to upstream
 Patch70:          nss-3.90-add-ems-policy.patch
 Patch80:          blinding_ct.patch
 %description
 Network Security Services (NSS) is a set of libraries designed to
 support cross-platform development of security-enabled client and
@ -310,6 +314,11 @@ popd
 # each vendors claim in their own FIPS certification
 cp %{SOURCE30} nss/lib/softoken/
 #update expired test certs
 pushd nss
 tar xvf %{SOURCE50}
 popd
 # https://bugzilla.redhat.com/show_bug.cgi?id=1247353
 find nss/lib/libpkix -perm /u+x -type f -exec chmod -x {} \;
@ -509,6 +518,10 @@ export USE_64=1
 # disabled by the system policy.
 export NSS_IGNORE_SYSTEM_POLICY=1
 %ifarch i686 ppcle64
 export NSS_DB_DUMP_TIME=10
 %endif
 # enable the following line to force a test failure
 # find ./nss -name \*.chk | xargs rm -f
@ -955,6 +968,10 @@ update-crypto-policies --no-reload &> /dev/null || :
 %changelog
 * Tue Nov 21 2023 Bob Relyea <rrelyea@redhat.com> - 3.90.0-3.1
 - Fix expired certs in tests
 - Fix CVE-2023-5388
 * Thu Aug 3 2023 Bob Relyea <rrelyea@redhat.com> - 3.90.0-3
 - add indicators for pbkdf2
 - add camellia to pkcs12 doc files