/src/botan/src/lib/math/mp/mp_monty.cpp

Source
/*
* Montgomery Reduction
* (C) 1999-2011,2025 Jack Lloyd
*     2006 Luca Piccarreta
*     2016 Matthias Gierlings
*
* Botan is released under the Simplified BSD License (see license.txt)
*/

#include <botan/internal/mp_core.h>

#include <botan/assert.h>

namespace Botan {

namespace {

BOTAN_FORCE_INLINE void mul_rev_range(word3<word>& accum, const word ws[], const word p[], size_t bound) {
   /*
   Unrolled version of:

   for(size_t i = 0; i < bound; ++i) {
      accum.mul(ws[i], p[bound - i]);
   }
   */

   size_t lower = 0;
   while(lower < bound) {
      const size_t upper = bound - lower;

      if(upper >= 16) {
         accum.mul(ws[lower], p[upper]);
         accum.mul(ws[lower + 1], p[upper - 1]);
         accum.mul(ws[lower + 2], p[upper - 2]);
         accum.mul(ws[lower + 3], p[upper - 3]);
         accum.mul(ws[lower + 4], p[upper - 4]);
         accum.mul(ws[lower + 5], p[upper - 5]);
         accum.mul(ws[lower + 6], p[upper - 6]);
         accum.mul(ws[lower + 7], p[upper - 7]);
         accum.mul(ws[lower + 8], p[upper - 8]);
         accum.mul(ws[lower + 9], p[upper - 9]);
         accum.mul(ws[lower + 10], p[upper - 10]);
         accum.mul(ws[lower + 11], p[upper - 11]);
         accum.mul(ws[lower + 12], p[upper - 12]);
         accum.mul(ws[lower + 13], p[upper - 13]);
         accum.mul(ws[lower + 14], p[upper - 14]);
         accum.mul(ws[lower + 15], p[upper - 15]);
         lower += 16;
      } else if(upper >= 8) {
         accum.mul(ws[lower], p[upper]);
         accum.mul(ws[lower + 1], p[upper - 1]);
         accum.mul(ws[lower + 2], p[upper - 2]);
         accum.mul(ws[lower + 3], p[upper - 3]);
         accum.mul(ws[lower + 4], p[upper - 4]);
         accum.mul(ws[lower + 5], p[upper - 5]);
         accum.mul(ws[lower + 6], p[upper - 6]);
         accum.mul(ws[lower + 7], p[upper - 7]);
         lower += 8;
      } else if(upper >= 4) {
         accum.mul(ws[lower], p[upper]);
         accum.mul(ws[lower + 1], p[upper - 1]);
         accum.mul(ws[lower + 2], p[upper - 2]);
         accum.mul(ws[lower + 3], p[upper - 3]);
         lower += 4;
      } else if(upper >= 2) {
         accum.mul(ws[lower], p[upper]);
         accum.mul(ws[lower + 1], p[upper - 1]);
         lower += 2;
      } else {
         accum.mul(ws[lower], p[upper]);
         lower += 1;
      }
   }
}

}  // namespace

/*
* Montgomery reduction - product scanning form
*
* Algorithm 5 from "Energy-Efficient Software Implementation of Long
* Integer Modular Arithmetic"
* (https://www.iacr.org/archive/ches2005/006.pdf)
*
* See also
*
* https://eprint.iacr.org/2013/882.pdf
* https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf
*/
void bigint_monty_redc_generic(
   word r[], const word z[], size_t z_size, const word p[], size_t p_size, word p_dash, word ws[]) {
   BOTAN_ARG_CHECK(z_size >= 2 * p_size && p_size > 0, "Invalid sizes for bigint_monty_redc_generic");

   word3<word> accum;

   accum.add(z[0]);

   ws[0] = accum.monty_step(p[0], p_dash);

   for(size_t i = 1; i != p_size; ++i) {
      mul_rev_range(accum, ws, p, i);
      accum.add(z[i]);
      ws[i] = accum.monty_step(p[0], p_dash);
   }

   for(size_t i = 0; i != p_size - 1; ++i) {
      mul_rev_range(accum, &ws[i + 1], &p[i], p_size - (i + 1));
      accum.add(z[p_size + i]);
      ws[i] = accum.extract();
   }

   accum.add(z[2 * p_size - 1]);

   ws[p_size - 1] = accum.extract();
   // w1 is the final part, which is not stored in the workspace
   const word w1 = accum.extract();

   /*
   * The result might need to be reduced mod p. To avoid a timing
   * channel, always perform the subtraction. If in the compution
   * of x - p a borrow is required then x was already < p.
   *
   * x starts at ws[0] and is p_size bytes long plus a possible high
   * digit left over in w1.
   *
   * x - p starts at z[0] and is also p_size bytes long
   *
   * If borrow was set after the subtraction, then x was already less
   * than p and the subtraction was not needed. In that case overwrite
   * z[0:p_size] with the original x in ws[0:p_size].
   *
   * We only copy out p_size in the final step because we know
   * the Montgomery result is < P
   */

   bigint_monty_maybe_sub(p_size, r, w1, ws, p);
}

}  // namespace Botan

Line	Count	Source
1		/*
2		* Montgomery Reduction
3		* (C) 1999-2011,2025 Jack Lloyd
4		* 2006 Luca Piccarreta
5		* 2016 Matthias Gierlings
6		*
7		* Botan is released under the Simplified BSD License (see license.txt)
8		*/
9
10		#include <botan/internal/mp_core.h>
11
12		#include <botan/assert.h>
13
14		namespace Botan {
15
16		namespace {
17
18	2.05G	BOTAN_FORCE_INLINE void mul_rev_range(word3<word>& accum, const word ws[], const word p[], size_t bound) {
19		/*
20		Unrolled version of:
21
22		for(size_t i = 0; i < bound; ++i) {
23		accum.mul(ws[i], p[bound - i]);
24		}
25		*/
26
27	2.05G	size_t lower = 0;
28	7.74G	while(lower < bound) {
29	5.69G	const size_t upper = bound - lower;
30
31	5.69G	if(upper >= 16) {
32	1.88G	accum.mul(ws[lower], p[upper]);
33	1.88G	accum.mul(ws[lower + 1], p[upper - 1]);
34	1.88G	accum.mul(ws[lower + 2], p[upper - 2]);
35	1.88G	accum.mul(ws[lower + 3], p[upper - 3]);
36	1.88G	accum.mul(ws[lower + 4], p[upper - 4]);
37	1.88G	accum.mul(ws[lower + 5], p[upper - 5]);
38	1.88G	accum.mul(ws[lower + 6], p[upper - 6]);
39	1.88G	accum.mul(ws[lower + 7], p[upper - 7]);
40	1.88G	accum.mul(ws[lower + 8], p[upper - 8]);
41	1.88G	accum.mul(ws[lower + 9], p[upper - 9]);
42	1.88G	accum.mul(ws[lower + 10], p[upper - 10]);
43	1.88G	accum.mul(ws[lower + 11], p[upper - 11]);
44	1.88G	accum.mul(ws[lower + 12], p[upper - 12]);
45	1.88G	accum.mul(ws[lower + 13], p[upper - 13]);
46	1.88G	accum.mul(ws[lower + 14], p[upper - 14]);
47	1.88G	accum.mul(ws[lower + 15], p[upper - 15]);
48	1.88G	lower += 16;
49	3.80G	} else if(upper >= 8) {
50	847M	accum.mul(ws[lower], p[upper]);
51	847M	accum.mul(ws[lower + 1], p[upper - 1]);
52	847M	accum.mul(ws[lower + 2], p[upper - 2]);
53	847M	accum.mul(ws[lower + 3], p[upper - 3]);
54	847M	accum.mul(ws[lower + 4], p[upper - 4]);
55	847M	accum.mul(ws[lower + 5], p[upper - 5]);
56	847M	accum.mul(ws[lower + 6], p[upper - 6]);
57	847M	accum.mul(ws[lower + 7], p[upper - 7]);
58	847M	lower += 8;
59	2.96G	} else if(upper >= 4) {
60	897M	accum.mul(ws[lower], p[upper]);
61	897M	accum.mul(ws[lower + 1], p[upper - 1]);
62	897M	accum.mul(ws[lower + 2], p[upper - 2]);
63	897M	accum.mul(ws[lower + 3], p[upper - 3]);
64	897M	lower += 4;
65	2.06G	} else if(upper >= 2) {
66	1.02G	accum.mul(ws[lower], p[upper]);
67	1.02G	accum.mul(ws[lower + 1], p[upper - 1]);
68	1.02G	lower += 2;
69	1.03G	} else {
70	1.03G	accum.mul(ws[lower], p[upper]);
71	1.03G	lower += 1;
72	1.03G	}
73	5.69G	}
74	2.05G	}
75
76		} // namespace
77
78		/*
79		* Montgomery reduction - product scanning form
80		*
81		* Algorithm 5 from "Energy-Efficient Software Implementation of Long
82		* Integer Modular Arithmetic"
83		* (https://www.iacr.org/archive/ches2005/006.pdf)
84		*
85		* See also
86		*
87		* https://eprint.iacr.org/2013/882.pdf
88		* https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf
89		*/
90		void bigint_monty_redc_generic(
91	76.0M	word r[], const word z[], size_t z_size, const word p[], size_t p_size, word p_dash, word ws[]) {
92	76.0M	BOTAN_ARG_CHECK(z_size >= 2 * p_size && p_size > 0, "Invalid sizes for bigint_monty_redc_generic");
93
94	76.0M	word3<word> accum;
95
96	76.0M	accum.add(z[0]);
97
98	76.0M	ws[0] = accum.monty_step(p[0], p_dash);
99
100	1.10G	for(size_t i = 1; i != p_size; ++i) {
101	1.02G	mul_rev_range(accum, ws, p, i);
102	1.02G	accum.add(z[i]);
103	1.02G	ws[i] = accum.monty_step(p[0], p_dash);
104	1.02G	}
105
106	1.10G	for(size_t i = 0; i != p_size - 1; ++i) {
107	1.02G	mul_rev_range(accum, &ws[i + 1], &p[i], p_size - (i + 1));
108	1.02G	accum.add(z[p_size + i]);
109	1.02G	ws[i] = accum.extract();
110	1.02G	}
111
112	76.0M	accum.add(z[2 * p_size - 1]);
113
114	76.0M	ws[p_size - 1] = accum.extract();
115		// w1 is the final part, which is not stored in the workspace
116	76.0M	const word w1 = accum.extract();
117
118		/*
119		* The result might need to be reduced mod p. To avoid a timing
120		* channel, always perform the subtraction. If in the compution
121		* of x - p a borrow is required then x was already < p.
122		*
123		* x starts at ws[0] and is p_size bytes long plus a possible high
124		* digit left over in w1.
125		*
126		* x - p starts at z[0] and is also p_size bytes long
127		*
128		* If borrow was set after the subtraction, then x was already less
129		* than p and the subtraction was not needed. In that case overwrite
130		* z[0:p_size] with the original x in ws[0:p_size].
131		*
132		* We only copy out p_size in the final step because we know
133		* the Montgomery result is < P
134		*/
135
136	76.0M	bigint_monty_maybe_sub(p_size, r, w1, ws, p);
137	76.0M	}
138
139		} // namespace Botan

Coverage Report

Created: 2026-01-16 06:20