/src/openssl/crypto/bn/bn_mont.c

Source
/*
 * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
 *
 * Licensed under the Apache License 2.0 (the "License").  You may not use
 * this file except in compliance with the License.  You can obtain a copy
 * in the file LICENSE in the source distribution or at
 * https://www.openssl.org/source/license.html
 */

/*
 * Details about Montgomery multiplication algorithms can be found in
 * https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf
 * and https://cetinkayakoc.net/docs/r01.pdf
 */

#include "internal/cryptlib.h"
#include "bn_local.h"

#define MONT_WORD /* use the faster word-based algorithm */

#ifdef MONT_WORD
static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
#endif

int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
    BN_MONT_CTX *mont, BN_CTX *ctx)
{
    int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx);

    bn_correct_top(r);
    bn_check_top(r);

    return ret;
}

int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
    BN_MONT_CTX *mont, BN_CTX *ctx)
{
    BIGNUM *tmp;
    int ret = 0;
    int num = mont->N.top;

#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
    if (num > 1 && num <= BN_SOFT_LIMIT && a->top == num && b->top == num) {
        if (bn_wexpand(r, num) == NULL)
            return 0;
        if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
            r->neg = a->neg ^ b->neg;
            r->top = num;
            r->flags |= BN_FLG_FIXED_TOP;
            return 1;
        }
    }
#endif

    if ((a->top + b->top) > 2 * num)
        return 0;

    BN_CTX_start(ctx);
    tmp = BN_CTX_get(ctx);
    if (tmp == NULL)
        goto err;

    bn_check_top(tmp);
    if (a == b) {
        if (!bn_sqr_fixed_top(tmp, a, ctx))
            goto err;
    } else {
        if (!bn_mul_fixed_top(tmp, a, b, ctx))
            goto err;
    }
    /* reduce from aRR to aR */
#ifdef MONT_WORD
    if (!bn_from_montgomery_word(r, tmp, mont))
        goto err;
#else
    if (!BN_from_montgomery(r, tmp, mont, ctx))
        goto err;
#endif
    ret = 1;
err:
    BN_CTX_end(ctx);
    return ret;
}

#ifdef MONT_WORD
static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
{
    BIGNUM *n;
    BN_ULONG *ap, *np, *rp, n0, v, carry;
    int nl, max, i;
    unsigned int rtop;

    n = &(mont->N);
    nl = n->top;
    if (nl == 0) {
        ret->top = 0;
        return 1;
    }

    max = (2 * nl); /* carry is stored separately */
    if (bn_wexpand(r, max) == NULL)
        return 0;

    r->neg ^= n->neg;
    np = n->d;
    rp = r->d;

    /* clear the top words of T */
    for (rtop = r->top, i = 0; i < max; i++) {
        v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
        rp[i] &= v;
    }

    r->top = max;
    r->flags |= BN_FLG_FIXED_TOP;
    n0 = mont->n0[0];

    /*
     * Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
     * input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
     * includes |carry| which is stored separately.
     */
    for (carry = 0, i = 0; i < nl; i++, rp++) {
        v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
        v = (v + carry + rp[nl]) & BN_MASK2;
        carry |= (v != rp[nl]);
        carry &= (v <= rp[nl]);
        rp[nl] = v;
    }

    if (bn_wexpand(ret, nl) == NULL)
        return 0;
    ret->top = nl;
    ret->flags |= BN_FLG_FIXED_TOP;
    ret->neg = r->neg;

    rp = ret->d;

    /*
     * Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap|
     * includes |carry| which is stored separately.
     */
    ap = &(r->d[nl]);

    carry -= bn_sub_words(rp, ap, np, nl);
    /*
     * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note
     * |carry| cannot be 1. That would imply the subtraction did not fit in
     * |nl| words, and we know at most one subtraction is needed.
     */
    for (i = 0; i < nl; i++) {
        rp[i] = (carry & ap[i]) | (~carry & rp[i]);
        ap[i] = 0;
    }

    return 1;
}
#endif /* MONT_WORD */

int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
    BN_CTX *ctx)
{
    int retn;

    retn = bn_from_mont_fixed_top(ret, a, mont, ctx);
    bn_correct_top(ret);
    bn_check_top(ret);

    return retn;
}

int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
    BN_CTX *ctx)
{
    int retn = 0;
#ifdef MONT_WORD
    BIGNUM *t;

    BN_CTX_start(ctx);
    if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
        retn = bn_from_montgomery_word(ret, t, mont);
    }
    BN_CTX_end(ctx);
#else /* !MONT_WORD */
    BIGNUM *t1, *t2;

    BN_CTX_start(ctx);
    t1 = BN_CTX_get(ctx);
    t2 = BN_CTX_get(ctx);
    if (t2 == NULL)
        goto err;

    if (BN_copy(t1, a) == NULL)
        goto err;
    BN_mask_bits(t1, mont->ri);

    if (!BN_mul(t2, t1, &mont->Ni, ctx))
        goto err;
    BN_mask_bits(t2, mont->ri);

    if (!BN_mul(t1, t2, &mont->N, ctx))
        goto err;
    if (!BN_add(t2, a, t1))
        goto err;
    if (!BN_rshift(ret, t2, mont->ri))
        goto err;

    if (BN_ucmp(ret, &(mont->N)) >= 0) {
        if (!BN_usub(ret, ret, &(mont->N)))
            goto err;
    }
    retn = 1;
    bn_check_top(ret);
err:
    BN_CTX_end(ctx);
#endif /* MONT_WORD */
    return retn;
}

int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
    BN_CTX *ctx)
{
    return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx);
}

BN_MONT_CTX *BN_MONT_CTX_new(void)
{
    BN_MONT_CTX *ret;

    if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL)
        return NULL;

    BN_MONT_CTX_init(ret);
    ret->flags = BN_FLG_MALLOCED;
    return ret;
}

void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
{
    ctx->ri = 0;
    bn_init(&ctx->RR);
    bn_init(&ctx->N);
    bn_init(&ctx->Ni);
    ctx->n0[0] = ctx->n0[1] = 0;
    ctx->flags = 0;
}

void BN_MONT_CTX_free(BN_MONT_CTX *mont)
{
    if (mont == NULL)
        return;
    BN_clear_free(&mont->RR);
    BN_clear_free(&mont->N);
    BN_clear_free(&mont->Ni);
    if (mont->flags & BN_FLG_MALLOCED)
        OPENSSL_free(mont);
}

int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
{
    int i, ret = 0;
    BIGNUM *Ri, *R;

    if (BN_is_zero(mod))
        return 0;

    BN_CTX_start(ctx);
    if ((Ri = BN_CTX_get(ctx)) == NULL)
        goto err;
    R = &(mont->RR); /* grab RR as a temp */
    if (BN_copy(&(mont->N), mod) == NULL)
        goto err; /* Set N */
    if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
        BN_set_flags(&(mont->N), BN_FLG_CONSTTIME);
    mont->N.neg = 0;

#ifdef MONT_WORD
    {
        BIGNUM tmod;
        BN_ULONG buf[2];

        bn_init(&tmod);
        tmod.d = buf;
        tmod.dmax = 2;
        tmod.neg = 0;

        if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
            BN_set_flags(&tmod, BN_FLG_CONSTTIME);

        mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;

#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2 <= 32)
        /*
         * Only certain BN_BITS2<=32 platforms actually make use of n0[1],
         * and we could use the #else case (with a shorter R value) for the
         * others.  However, currently only the assembler files do know which
         * is which.
         */

        BN_zero(R);
        if (!(BN_set_bit(R, 2 * BN_BITS2)))
            goto err;

        tmod.top = 0;
        if ((buf[0] = mod->d[0]))
            tmod.top = 1;
        if ((buf[1] = mod->top > 1 ? mod->d[1] : 0))
            tmod.top = 2;

        if (BN_is_one(&tmod))
            BN_zero(Ri);
        else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
            goto err;
        if (!BN_lshift(Ri, Ri, 2 * BN_BITS2))
            goto err; /* R*Ri */
        if (!BN_is_zero(Ri)) {
            if (!BN_sub_word(Ri, 1))
                goto err;
        } else { /* if N mod word size == 1 */

            if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL)
                goto err;
            /* Ri-- (mod double word size) */
            Ri->neg = 0;
            Ri->d[0] = BN_MASK2;
            Ri->d[1] = BN_MASK2;
            Ri->top = 2;
        }
        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
            goto err;
        /*
         * Ni = (R*Ri-1)/N, keep only couple of least significant words:
         */
        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
        mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
#else
        BN_zero(R);
        if (!(BN_set_bit(R, BN_BITS2)))
            goto err; /* R */

        buf[0] = mod->d[0]; /* tmod = N mod word size */
        buf[1] = 0;
        tmod.top = buf[0] != 0 ? 1 : 0;
        /* Ri = R^-1 mod N */
        if (BN_is_one(&tmod))
            BN_zero(Ri);
        else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
            goto err;
        if (!BN_lshift(Ri, Ri, BN_BITS2))
            goto err; /* R*Ri */
        if (!BN_is_zero(Ri)) {
            if (!BN_sub_word(Ri, 1))
                goto err;
        } else { /* if N mod word size == 1 */

            if (!BN_set_word(Ri, BN_MASK2))
                goto err; /* Ri-- (mod word size) */
        }
        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
            goto err;
        /*
         * Ni = (R*Ri-1)/N, keep only least significant word:
         */
        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
        mont->n0[1] = 0;
#endif
    }
#else /* !MONT_WORD */
    { /* bignum version */
        mont->ri = BN_num_bits(&mont->N);
        BN_zero(R);
        if (!BN_set_bit(R, mont->ri))
            goto err; /* R = 2^ri */
        /* Ri = R^-1 mod N */
        if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL)
            goto err;
        if (!BN_lshift(Ri, Ri, mont->ri))
            goto err; /* R*Ri */
        if (!BN_sub_word(Ri, 1))
            goto err;
        /*
         * Ni = (R*Ri-1) / N
         */
        if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx))
            goto err;
    }
#endif

    /* setup RR for conversions */
    BN_zero(&(mont->RR));
    if (!BN_set_bit(&(mont->RR), mont->ri * 2))
        goto err;
    if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx))
        goto err;

    for (i = mont->RR.top, ret = mont->N.top; i < ret; i++)
        mont->RR.d[i] = 0;
    mont->RR.top = ret;
    mont->RR.flags |= BN_FLG_FIXED_TOP;

    ret = 1;
err:
    BN_CTX_end(ctx);
    return ret;
}

BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
{
    if (to == from)
        return to;

    if (BN_copy(&(to->RR), &(from->RR)) == NULL)
        return NULL;
    if (BN_copy(&(to->N), &(from->N)) == NULL)
        return NULL;
    if (BN_copy(&(to->Ni), &(from->Ni)) == NULL)
        return NULL;
    to->ri = from->ri;
    to->n0[0] = from->n0[0];
    to->n0[1] = from->n0[1];
    return to;
}

BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock,
    const BIGNUM *mod, BN_CTX *ctx)
{
    BN_MONT_CTX *ret;

    if (!CRYPTO_THREAD_read_lock(lock))
        return NULL;
    ret = *pmont;
    CRYPTO_THREAD_unlock(lock);
    if (ret)
        return ret;

    /*
     * We don't want to serialize globally while doing our lazy-init math in
     * BN_MONT_CTX_set. That punishes threads that are doing independent
     * things. Instead, punish the case where more than one thread tries to
     * lazy-init the same 'pmont', by having each do the lazy-init math work
     * independently and only use the one from the thread that wins the race
     * (the losers throw away the work they've done).
     */
    ret = BN_MONT_CTX_new();
    if (ret == NULL)
        return NULL;
    if (!BN_MONT_CTX_set(ret, mod, ctx)) {
        BN_MONT_CTX_free(ret);
        return NULL;
    }

    /* The locked compare-and-set, after the local work is done. */
    if (!CRYPTO_THREAD_write_lock(lock)) {
        BN_MONT_CTX_free(ret);
        return NULL;
    }

    if (*pmont) {
        BN_MONT_CTX_free(ret);
        ret = *pmont;
    } else
        *pmont = ret;
    CRYPTO_THREAD_unlock(lock);
    return ret;
}

int ossl_bn_mont_ctx_set(BN_MONT_CTX *ctx, const BIGNUM *modulus, int ri, const unsigned char *rr,
    int rrlen, uint32_t nlo, uint32_t nhi)
{
    if (BN_copy(&ctx->N, modulus) == NULL)
        return 0;
    if (BN_bin2bn(rr, rrlen, &ctx->RR) == NULL)
        return 0;
    ctx->ri = ri;
#if (BN_BITS2 <= 32) && defined(OPENSSL_BN_ASM_MONT)
    ctx->n0[0] = nlo;
    ctx->n0[1] = nhi;
#elif BN_BITS2 <= 32
    ctx->n0[0] = nlo;
    ctx->n0[1] = 0;
#else
    ctx->n0[0] = ((BN_ULONG)nhi << 32) | nlo;
    ctx->n0[1] = 0;
#endif

    return 1;
}

int ossl_bn_mont_ctx_eq(const BN_MONT_CTX *m1, const BN_MONT_CTX *m2)
{
    if (m1->ri != m2->ri)
        return 0;
    if (BN_cmp(&m1->RR, &m2->RR) != 0)
        return 0;
    if (m1->flags != m2->flags)
        return 0;
#ifdef MONT_WORD
    if (m1->n0[0] != m2->n0[0])
        return 0;
    if (m1->n0[1] != m2->n0[1])
        return 0;
#else
    if (BN_cmp(&m1->Ni, &m2->Ni) != 0)
        return 0;
#endif
    return 1;
}

Coverage Report

Created: 2026-04-08 06:20

Line	Count	Source
1		/*
2		* Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
3		*
4		* Licensed under the Apache License 2.0 (the "License"). You may not use
5		* this file except in compliance with the License. You can obtain a copy
6		* in the file LICENSE in the source distribution or at
7		* https://www.openssl.org/source/license.html
8		*/
9
10		/*
11		* Details about Montgomery multiplication algorithms can be found in
12		* https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf
13		* and https://cetinkayakoc.net/docs/r01.pdf
14		*/
15
16		#include "internal/cryptlib.h"
17		#include "bn_local.h"
18
19		#define MONT_WORD /* use the faster word-based algorithm */
20
21		#ifdef MONT_WORD
22		static int bn_from_montgomery_word(BIGNUM ret, BIGNUM r, BN_MONT_CTX *mont);
23		#endif
24
25		int BN_mod_mul_montgomery(BIGNUM r, const BIGNUM a, const BIGNUM *b,
26		BN_MONT_CTX mont, BN_CTX ctx)
27	0	{
28	0	int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx);
29
30	0	bn_correct_top(r);
31	0	bn_check_top(r);
32
33	0	return ret;
34	0	}
35
36		int bn_mul_mont_fixed_top(BIGNUM r, const BIGNUM a, const BIGNUM *b,
37		BN_MONT_CTX mont, BN_CTX ctx)
38	0	{
39	0	BIGNUM *tmp;
40	0	int ret = 0;
41	0	int num = mont->N.top;
42
43	0	#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
44	0	if (num > 1 && num <= BN_SOFT_LIMIT && a->top == num && b->top == num) {
45	0	if (bn_wexpand(r, num) == NULL)
46	0	return 0;
47	0	if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
48	0	r->neg = a->neg ^ b->neg;
49	0	r->top = num;
50	0	r->flags \|= BN_FLG_FIXED_TOP;
51	0	return 1;
52	0	}
53	0	}
54	0	#endif
55
56	0	if ((a->top + b->top) > 2 * num)
57	0	return 0;
58
59	0	BN_CTX_start(ctx);
60	0	tmp = BN_CTX_get(ctx);
61	0	if (tmp == NULL)
62	0	goto err;
63
64	0	bn_check_top(tmp);
65	0	if (a == b) {
66	0	if (!bn_sqr_fixed_top(tmp, a, ctx))
67	0	goto err;
68	0	} else {
69	0	if (!bn_mul_fixed_top(tmp, a, b, ctx))
70	0	goto err;
71	0	}
72		/* reduce from aRR to aR */
73	0	#ifdef MONT_WORD
74	0	if (!bn_from_montgomery_word(r, tmp, mont))
75	0	goto err;
76		#else
77		if (!BN_from_montgomery(r, tmp, mont, ctx))
78		goto err;
79		#endif
80	0	ret = 1;
81	0	err:
82	0	BN_CTX_end(ctx);
83	0	return ret;
84	0	}
85
86		#ifdef MONT_WORD
87		static int bn_from_montgomery_word(BIGNUM ret, BIGNUM r, BN_MONT_CTX *mont)
88	0	{
89	0	BIGNUM *n;
90	0	BN_ULONG ap, np, *rp, n0, v, carry;
91	0	int nl, max, i;
92	0	unsigned int rtop;
93
94	0	n = &(mont->N);
95	0	nl = n->top;
96	0	if (nl == 0) {
97	0	ret->top = 0;
98	0	return 1;
99	0	}
100
101	0	max = (2 * nl); /* carry is stored separately */
102	0	if (bn_wexpand(r, max) == NULL)
103	0	return 0;
104
105	0	r->neg ^= n->neg;
106	0	np = n->d;
107	0	rp = r->d;
108
109		/* clear the top words of T */
110	0	for (rtop = r->top, i = 0; i < max; i++) {
111	0	v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
112	0	rp[i] &= v;
113	0	}
114
115	0	r->top = max;
116	0	r->flags \|= BN_FLG_FIXED_TOP;
117	0	n0 = mont->n0[0];
118
119		/*
120		* Add multiples of \|n\| to \|r\| until R = 2^(nl * BN_BITS2) divides it. On
121		* input, we had \|r\| < \|n\| * R, so now \|r\| < 2 * \|n\| * R. Note that \|r\|
122		* includes \|carry\| which is stored separately.
123		*/
124	0	for (carry = 0, i = 0; i < nl; i++, rp++) {
125	0	v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
126	0	v = (v + carry + rp[nl]) & BN_MASK2;
127	0	carry \|= (v != rp[nl]);
128	0	carry &= (v <= rp[nl]);
129	0	rp[nl] = v;
130	0	}
131
132	0	if (bn_wexpand(ret, nl) == NULL)
133	0	return 0;
134	0	ret->top = nl;
135	0	ret->flags \|= BN_FLG_FIXED_TOP;
136	0	ret->neg = r->neg;
137
138	0	rp = ret->d;
139
140		/*
141		* Shift \|nl\| words to divide by R. We have \|ap\| < 2 * \|n\|. Note that \|ap\|
142		* includes \|carry\| which is stored separately.
143		*/
144	0	ap = &(r->d[nl]);
145
146	0	carry -= bn_sub_words(rp, ap, np, nl);
147		/*
148		* \|carry\| is -1 if \|ap\| - \|np\| underflowed or zero if it did not. Note
149		* \|carry\| cannot be 1. That would imply the subtraction did not fit in
150		* \|nl\| words, and we know at most one subtraction is needed.
151		*/
152	0	for (i = 0; i < nl; i++) {
153	0	rp[i] = (carry & ap[i]) \| (~carry & rp[i]);
154	0	ap[i] = 0;
155	0	}
156
157	0	return 1;
158	0	}
159		#endif /* MONT_WORD */
160
161		int BN_from_montgomery(BIGNUM ret, const BIGNUM a, BN_MONT_CTX *mont,
162		BN_CTX *ctx)
163	0	{
164	0	int retn;
165
166	0	retn = bn_from_mont_fixed_top(ret, a, mont, ctx);
167	0	bn_correct_top(ret);
168	0	bn_check_top(ret);
169
170	0	return retn;
171	0	}
172
173		int bn_from_mont_fixed_top(BIGNUM ret, const BIGNUM a, BN_MONT_CTX *mont,
174		BN_CTX *ctx)
175	0	{
176	0	int retn = 0;
177	0	#ifdef MONT_WORD
178	0	BIGNUM *t;
179
180	0	BN_CTX_start(ctx);
181	0	if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
182	0	retn = bn_from_montgomery_word(ret, t, mont);
183	0	}
184	0	BN_CTX_end(ctx);
185		#else /* !MONT_WORD */
186		BIGNUM t1, t2;
187
188		BN_CTX_start(ctx);
189		t1 = BN_CTX_get(ctx);
190		t2 = BN_CTX_get(ctx);
191		if (t2 == NULL)
192		goto err;
193
194		if (BN_copy(t1, a) == NULL)
195		goto err;
196		BN_mask_bits(t1, mont->ri);
197
198		if (!BN_mul(t2, t1, &mont->Ni, ctx))
199		goto err;
200		BN_mask_bits(t2, mont->ri);
201
202		if (!BN_mul(t1, t2, &mont->N, ctx))
203		goto err;
204		if (!BN_add(t2, a, t1))
205		goto err;
206		if (!BN_rshift(ret, t2, mont->ri))
207		goto err;
208
209		if (BN_ucmp(ret, &(mont->N)) >= 0) {
210		if (!BN_usub(ret, ret, &(mont->N)))
211		goto err;
212		}
213		retn = 1;
214		bn_check_top(ret);
215		err:
216		BN_CTX_end(ctx);
217		#endif /* MONT_WORD */
218	0	return retn;
219	0	}
220
221		int bn_to_mont_fixed_top(BIGNUM r, const BIGNUM a, BN_MONT_CTX *mont,
222		BN_CTX *ctx)
223	0	{
224	0	return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx);
225	0	}
226
227		BN_MONT_CTX *BN_MONT_CTX_new(void)
228	0	{
229	0	BN_MONT_CTX *ret;
230
231	0	if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL)
232	0	return NULL;
233
234	0	BN_MONT_CTX_init(ret);
235	0	ret->flags = BN_FLG_MALLOCED;
236	0	return ret;
237	0	}
238
239		void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
240	0	{
241	0	ctx->ri = 0;
242	0	bn_init(&ctx->RR);
243	0	bn_init(&ctx->N);
244	0	bn_init(&ctx->Ni);
245	0	ctx->n0[0] = ctx->n0[1] = 0;
246	0	ctx->flags = 0;
247	0	}
248
249		void BN_MONT_CTX_free(BN_MONT_CTX *mont)
250	152k	{
251	152k	if (mont == NULL)
252	152k	return;
253	0	BN_clear_free(&mont->RR);
254	0	BN_clear_free(&mont->N);
255	0	BN_clear_free(&mont->Ni);
256	0	if (mont->flags & BN_FLG_MALLOCED)
257	0	OPENSSL_free(mont);
258	0	}
259
260		int BN_MONT_CTX_set(BN_MONT_CTX mont, const BIGNUM mod, BN_CTX *ctx)
261	0	{
262	0	int i, ret = 0;
263	0	BIGNUM Ri, R;
264
265	0	if (BN_is_zero(mod))
266	0	return 0;
267
268	0	BN_CTX_start(ctx);
269	0	if ((Ri = BN_CTX_get(ctx)) == NULL)
270	0	goto err;
271	0	R = &(mont->RR); /* grab RR as a temp */
272	0	if (BN_copy(&(mont->N), mod) == NULL)
273	0	goto err; /* Set N */
274	0	if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
275	0	BN_set_flags(&(mont->N), BN_FLG_CONSTTIME);
276	0	mont->N.neg = 0;
277
278	0	#ifdef MONT_WORD
279	0	{
280	0	BIGNUM tmod;
281	0	BN_ULONG buf[2];
282
283	0	bn_init(&tmod);
284	0	tmod.d = buf;
285	0	tmod.dmax = 2;
286	0	tmod.neg = 0;
287
288	0	if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
289	0	BN_set_flags(&tmod, BN_FLG_CONSTTIME);
290
291	0	mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;
292
293		#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2 <= 32)
294		/*
295		* Only certain BN_BITS2<=32 platforms actually make use of n0[1],
296		* and we could use the #else case (with a shorter R value) for the
297		* others. However, currently only the assembler files do know which
298		* is which.
299		*/
300
301		BN_zero(R);
302		if (!(BN_set_bit(R, 2 * BN_BITS2)))
303		goto err;
304
305		tmod.top = 0;
306		if ((buf[0] = mod->d[0]))
307		tmod.top = 1;
308		if ((buf[1] = mod->top > 1 ? mod->d[1] : 0))
309		tmod.top = 2;
310
311		if (BN_is_one(&tmod))
312		BN_zero(Ri);
313		else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
314		goto err;
315		if (!BN_lshift(Ri, Ri, 2 * BN_BITS2))
316		goto err; /* RRi /
317		if (!BN_is_zero(Ri)) {
318		if (!BN_sub_word(Ri, 1))
319		goto err;
320		} else { /* if N mod word size == 1 */
321
322		if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL)
323		goto err;
324		/* Ri-- (mod double word size) */
325		Ri->neg = 0;
326		Ri->d[0] = BN_MASK2;
327		Ri->d[1] = BN_MASK2;
328		Ri->top = 2;
329		}
330		if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
331		goto err;
332		/*
333		* Ni = (R*Ri-1)/N, keep only couple of least significant words:
334		*/
335		mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
336		mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
337		#else
338	0	BN_zero(R);
339	0	if (!(BN_set_bit(R, BN_BITS2)))
340	0	goto err; /* R */
341
342	0	buf[0] = mod->d[0]; /* tmod = N mod word size */
343	0	buf[1] = 0;
344	0	tmod.top = buf[0] != 0 ? 1 : 0;
345		/* Ri = R^-1 mod N */
346	0	if (BN_is_one(&tmod))
347	0	BN_zero(Ri);
348	0	else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
349	0	goto err;
350	0	if (!BN_lshift(Ri, Ri, BN_BITS2))
351	0	goto err; /* RRi /
352	0	if (!BN_is_zero(Ri)) {
353	0	if (!BN_sub_word(Ri, 1))
354	0	goto err;
355	0	} else { /* if N mod word size == 1 */
356
357	0	if (!BN_set_word(Ri, BN_MASK2))
358	0	goto err; /* Ri-- (mod word size) */
359	0	}
360	0	if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
361	0	goto err;
362		/*
363		* Ni = (R*Ri-1)/N, keep only least significant word:
364		*/
365	0	mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
366	0	mont->n0[1] = 0;
367	0	#endif
368	0	}
369		#else /* !MONT_WORD */
370		{ /* bignum version */
371		mont->ri = BN_num_bits(&mont->N);
372		BN_zero(R);
373		if (!BN_set_bit(R, mont->ri))
374		goto err; /* R = 2^ri */
375		/* Ri = R^-1 mod N */
376		if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL)
377		goto err;
378		if (!BN_lshift(Ri, Ri, mont->ri))
379		goto err; /* RRi /
380		if (!BN_sub_word(Ri, 1))
381		goto err;
382		/*
383		* Ni = (R*Ri-1) / N
384		*/
385		if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx))
386		goto err;
387		}
388		#endif
389
390		/* setup RR for conversions */
391	0	BN_zero(&(mont->RR));
392	0	if (!BN_set_bit(&(mont->RR), mont->ri * 2))
393	0	goto err;
394	0	if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx))
395	0	goto err;
396
397	0	for (i = mont->RR.top, ret = mont->N.top; i < ret; i++)
398	0	mont->RR.d[i] = 0;
399	0	mont->RR.top = ret;
400	0	mont->RR.flags \|= BN_FLG_FIXED_TOP;
401
402	0	ret = 1;
403	0	err:
404	0	BN_CTX_end(ctx);
405	0	return ret;
406	0	}
407
408		BN_MONT_CTX BN_MONT_CTX_copy(BN_MONT_CTX to, BN_MONT_CTX *from)
409	0	{
410	0	if (to == from)
411	0	return to;
412
413	0	if (BN_copy(&(to->RR), &(from->RR)) == NULL)
414	0	return NULL;
415	0	if (BN_copy(&(to->N), &(from->N)) == NULL)
416	0	return NULL;
417	0	if (BN_copy(&(to->Ni), &(from->Ni)) == NULL)
418	0	return NULL;
419	0	to->ri = from->ri;
420	0	to->n0[0] = from->n0[0];
421	0	to->n0[1] = from->n0[1];
422	0	return to;
423	0	}
424
425		BN_MONT_CTX BN_MONT_CTX_set_locked(BN_MONT_CTX pmont, CRYPTO_RWLOCK lock,
426		const BIGNUM mod, BN_CTX ctx)
427	0	{
428	0	BN_MONT_CTX *ret;
429
430	0	if (!CRYPTO_THREAD_read_lock(lock))
431	0	return NULL;
432	0	ret = *pmont;
433	0	CRYPTO_THREAD_unlock(lock);
434	0	if (ret)
435	0	return ret;
436
437		/*
438		* We don't want to serialize globally while doing our lazy-init math in
439		* BN_MONT_CTX_set. That punishes threads that are doing independent
440		* things. Instead, punish the case where more than one thread tries to
441		* lazy-init the same 'pmont', by having each do the lazy-init math work
442		* independently and only use the one from the thread that wins the race
443		* (the losers throw away the work they've done).
444		*/
445	0	ret = BN_MONT_CTX_new();
446	0	if (ret == NULL)
447	0	return NULL;
448	0	if (!BN_MONT_CTX_set(ret, mod, ctx)) {
449	0	BN_MONT_CTX_free(ret);
450	0	return NULL;
451	0	}
452
453		/* The locked compare-and-set, after the local work is done. */
454	0	if (!CRYPTO_THREAD_write_lock(lock)) {
455	0	BN_MONT_CTX_free(ret);
456	0	return NULL;
457	0	}
458
459	0	if (*pmont) {
460	0	BN_MONT_CTX_free(ret);
461	0	ret = *pmont;
462	0	} else
463	0	*pmont = ret;
464	0	CRYPTO_THREAD_unlock(lock);
465	0	return ret;
466	0	}
467
468		int ossl_bn_mont_ctx_set(BN_MONT_CTX ctx, const BIGNUM modulus, int ri, const unsigned char *rr,
469		int rrlen, uint32_t nlo, uint32_t nhi)
470	0	{
471	0	if (BN_copy(&ctx->N, modulus) == NULL)
472	0	return 0;
473	0	if (BN_bin2bn(rr, rrlen, &ctx->RR) == NULL)
474	0	return 0;
475	0	ctx->ri = ri;
476		#if (BN_BITS2 <= 32) && defined(OPENSSL_BN_ASM_MONT)
477		ctx->n0[0] = nlo;
478		ctx->n0[1] = nhi;
479		#elif BN_BITS2 <= 32
480		ctx->n0[0] = nlo;
481		ctx->n0[1] = 0;
482		#else
483	0	ctx->n0[0] = ((BN_ULONG)nhi << 32) \| nlo;
484	0	ctx->n0[1] = 0;
485	0	#endif
486
487	0	return 1;
488	0	}
489
490		int ossl_bn_mont_ctx_eq(const BN_MONT_CTX m1, const BN_MONT_CTX m2)
491	0	{
492	0	if (m1->ri != m2->ri)
493	0	return 0;
494	0	if (BN_cmp(&m1->RR, &m2->RR) != 0)
495	0	return 0;
496	0	if (m1->flags != m2->flags)
497	0	return 0;
498	0	#ifdef MONT_WORD
499	0	if (m1->n0[0] != m2->n0[0])
500	0	return 0;
501	0	if (m1->n0[1] != m2->n0[1])
502	0	return 0;
503		#else
504		if (BN_cmp(&m1->Ni, &m2->Ni) != 0)
505		return 0;
506		#endif
507	0	return 1;
508	0	}