/src/openssl/crypto/ml_dsa/ml_dsa_ntt.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  *  | 
4  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
5  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
6  |  |  * in the file LICENSE in the source distribution or at  | 
7  |  |  * https://www.openssl.org/source/license.html  | 
8  |  |  */  | 
9  |  |  | 
10  |  | #include "ml_dsa_local.h"  | 
11  |  | #include "ml_dsa_poly.h"  | 
12  |  |  | 
13  |  | /*  | 
14  |  |  * This file has multiple parts required for fast matrix multiplication,  | 
15  |  |  * 1) NTT (See https://eprint.iacr.org/2024/585.pdf)  | 
16  |  |  * NTT and NTT inverse transformations are Discrete Fourier Transforms in a  | 
17  |  |  * polynomial ring. Fast-Fourier Transformations can then be applied to make  | 
18  |  |  * multiplications n log(n). This uses the symmetry of the transformation to  | 
19  |  |  * reduce computations.  | 
20  |  |  *  | 
21  |  |  * 2) Montgomery multiplication  | 
22  |  |  * The multiplication of a.b mod q requires division by q which is a slow operation.  | 
23  |  |  *  | 
24  |  |  * When many multiplications mod q are required montgomery multiplication  | 
25  |  |  * can be used. This requires a number R > q such that R & q are coprime  | 
26  |  |  * (i.e. GCD(R, q) = 1), so that division happens using R instead of q.  | 
27  |  |  * If r is a power of 2 then this division can be done as a bit shift.  | 
28  |  |  *  | 
29  |  |  * Given that q = 2^23 - 2^13 + 1  | 
30  |  |  * We can chose a Montgomery multiplier of R = 2^32.  | 
31  |  |  *  | 
32  |  |  * To transform |a| into Montgomery form |m| we use  | 
33  |  |  *   m = a mod q * ((2^32)*(2^32) mod q)  | 
34  |  |  * which is then Montgomery reduced, removing the excess factor of R = 2^32.  | 
35  |  |  */  | 
36  |  |  | 
37  |  | /*  | 
38  |  |  * The table in FIPS 204 Appendix B uses the following formula  | 
39  |  |  * zeta[k]= 1753^bitrev(k) mod q for (k = 1..255) (The first value is not used).  | 
40  |  |  *  | 
41  |  |  * As this implementation uses montgomery form with a multiplier of 2^32.  | 
42  |  |  * The values need to be transformed i.e.  | 
43  |  |  *  | 
44  |  |  * zetasMontgomery[k] = reduce_montgomery(zeta[k] * (2^32 * 2^32 mod(q)))  | 
45  |  |  * reduce_montgomery() is defined below..  | 
46  |  |  */  | 
47  |  | static const uint32_t zetas_montgomery[256] = { | 
48  |  |     4193792, 25847,   5771523, 7861508, 237124,  7602457, 7504169, 466468,  | 
49  |  |     1826347, 2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103,  | 
50  |  |     2725464, 1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868,  | 
51  |  |     6262231, 4520680, 6980856, 5102745, 1757237, 8360995, 4010497, 280005,  | 
52  |  |     2706023, 95776,   3077325, 3530437, 6718724, 4788269, 5842901, 3915439,  | 
53  |  |     4519302, 5336701, 3574422, 5512770, 3539968, 8079950, 2348700, 7841118,  | 
54  |  |     6681150, 6736599, 3505694, 4558682, 3507263, 6239768, 6779997, 3699596,  | 
55  |  |     811944,  531354,  954230,  3881043, 3900724, 5823537, 2071892, 5582638,  | 
56  |  |     4450022, 6851714, 4702672, 5339162, 6927966, 3475950, 2176455, 6795196,  | 
57  |  |     7122806, 1939314, 4296819, 7380215, 5190273, 5223087, 4747489, 126922,  | 
58  |  |     3412210, 7396998, 2147896, 2715295, 5412772, 4686924, 7969390, 5903370,  | 
59  |  |     7709315, 7151892, 8357436, 7072248, 7998430, 1349076, 1852771, 6949987,  | 
60  |  |     5037034, 264944,  508951,  3097992, 44288,   7280319, 904516,  3958618,  | 
61  |  |     4656075, 8371839, 1653064, 5130689, 2389356, 8169440, 759969,  7063561,  | 
62  |  |     189548,  4827145, 3159746, 6529015, 5971092, 8202977, 1315589, 1341330,  | 
63  |  |     1285669, 6795489, 7567685, 6940675, 5361315, 4499357, 4751448, 3839961,  | 
64  |  |     2091667, 3407706, 2316500, 3817976, 5037939, 2244091, 5933984, 4817955,  | 
65  |  |     266997,  2434439, 7144689, 3513181, 4860065, 4621053, 7183191, 5187039,  | 
66  |  |     900702,  1859098, 909542,  819034,  495491,  6767243, 8337157, 7857917,  | 
67  |  |     7725090, 5257975, 2031748, 3207046, 4823422, 7855319, 7611795, 4784579,  | 
68  |  |     342297,  286988,  5942594, 4108315, 3437287, 5038140, 1735879, 203044,  | 
69  |  |     2842341, 2691481, 5790267, 1265009, 4055324, 1247620, 2486353, 1595974,  | 
70  |  |     4613401, 1250494, 2635921, 4832145, 5386378, 1869119, 1903435, 7329447,  | 
71  |  |     7047359, 1237275, 5062207, 6950192, 7929317, 1312455, 3306115, 6417775,  | 
72  |  |     7100756, 1917081, 5834105, 7005614, 1500165, 777191,  2235880, 3406031,  | 
73  |  |     7838005, 5548557, 6709241, 6533464, 5796124, 4656147, 594136,  4603424,  | 
74  |  |     6366809, 2432395, 2454455, 8215696, 1957272, 3369112, 185531,  7173032,  | 
75  |  |     5196991, 162844,  1616392, 3014001, 810149,  1652634, 4686184, 6581310,  | 
76  |  |     5341501, 3523897, 3866901, 269760,  2213111, 7404533, 1717735, 472078,  | 
77  |  |     7953734, 1723600, 6577327, 1910376, 6712985, 7276084, 8119771, 4546524,  | 
78  |  |     5441381, 6144432, 7959518, 6094090, 183443,  7403526, 1612842, 4834730,  | 
79  |  |     7826001, 3919660, 8332111, 7018208, 3937738, 1400424, 7534263, 1976782  | 
80  |  | };  | 
81  |  |  | 
82  |  | /*  | 
83  |  |  * @brief When multiplying 2 numbers mod q that are in montgomery form, the  | 
84  |  |  * product mod q needs to be multiplied by 2^-32 to be in montgomery form.  | 
85  |  |  * See FIPS 204, Algorithm 49, MontgomeryReduce()  | 
86  |  |  * Note it is slightly different due to the input range being positive  | 
87  |  |  *  | 
88  |  |  * @param a is the result of a multiply of 2 numbers in montgomery form,  | 
89  |  |  *          in the range 0...(2^32)*q  | 
90  |  |  * @returns The Montgomery form of 'a' with multiplier 2^32 in the range 0..q-1  | 
91  |  |  *          The result is congruent to x * 2^-32 mod q  | 
92  |  |  */  | 
93  |  | static uint32_t reduce_montgomery(uint64_t a)  | 
94  | 0  | { | 
95  | 0  |     uint64_t t = (uint32_t)a * (uint32_t)ML_DSA_Q_NEG_INV; /* a * -qinv */  | 
96  | 0  |     uint64_t b = a + t * ML_DSA_Q; /* a - t * q */  | 
97  | 0  |     uint32_t c = b >> 32; /* /2^32  = 0..2q */  | 
98  |  | 
  | 
99  | 0  |     return reduce_once(c); /* 0..q */  | 
100  | 0  | }  | 
101  |  |  | 
102  |  | /*  | 
103  |  |  * @brief Multiply two polynomials in the number theoretically transformed state.  | 
104  |  |  * See FIPS 204, Algorithm 45, MultiplyNTT()  | 
105  |  |  * This function has been modified to use montgomery multiplication  | 
106  |  |  *  | 
107  |  |  * @param lhs A polynomial multiplicand  | 
108  |  |  * @param rhs A polynomial multiplier  | 
109  |  |  * @param out The returned result of the polynomial multiply  | 
110  |  |  */  | 
111  |  | void ossl_ml_dsa_poly_ntt_mult(const POLY *lhs, const POLY *rhs, POLY *out)  | 
112  | 0  | { | 
113  | 0  |     int i;  | 
114  |  | 
  | 
115  | 0  |     for (i = 0; i < ML_DSA_NUM_POLY_COEFFICIENTS; i++)  | 
116  | 0  |         out->coeff[i] =  | 
117  | 0  |             reduce_montgomery((uint64_t)lhs->coeff[i] * (uint64_t)rhs->coeff[i]);  | 
118  | 0  | }  | 
119  |  |  | 
120  |  | /*  | 
121  |  |  * In place number theoretic transform of a given polynomial.  | 
122  |  |  *  | 
123  |  |  * See FIPS 204, Algorithm 41, NTT()  | 
124  |  |  * This function uses montgomery multiplication.  | 
125  |  |  *  | 
126  |  |  * @param p a polynomial that is used as the input, that is replaced with  | 
127  |  |  *        the NTT of the polynomial  | 
128  |  |  */  | 
129  |  | void ossl_ml_dsa_poly_ntt(POLY *p)  | 
130  | 0  | { | 
131  | 0  |     int i, j, k;  | 
132  | 0  |     int step;  | 
133  | 0  |     int offset = ML_DSA_NUM_POLY_COEFFICIENTS;  | 
134  |  |  | 
135  |  |     /* Step: 1, 2, 4, 8, ..., 128 */  | 
136  | 0  |     for (step = 1; step < ML_DSA_NUM_POLY_COEFFICIENTS; step <<= 1) { | 
137  | 0  |         k = 0;  | 
138  | 0  |         offset >>= 1; /* Offset: 128, 64, 32, 16, ..., 1 */  | 
139  | 0  |         for (i = 0; i < step; i++) { | 
140  | 0  |             const uint32_t z_step_root = zetas_montgomery[step + i];  | 
141  |  | 
  | 
142  | 0  |             for (j = k; j < k + offset; j++) { | 
143  | 0  |                 uint32_t w_even = p->coeff[j];  | 
144  | 0  |                 uint32_t t_odd =  | 
145  | 0  |                     reduce_montgomery((uint64_t)z_step_root  | 
146  | 0  |                                       * (uint64_t)p->coeff[j + offset]);  | 
147  |  | 
  | 
148  | 0  |                 p->coeff[j] = reduce_once(w_even + t_odd);  | 
149  | 0  |                 p->coeff[j + offset] = mod_sub(w_even, t_odd);  | 
150  | 0  |             }  | 
151  | 0  |             k += 2 * offset;  | 
152  | 0  |         }  | 
153  | 0  |     }  | 
154  | 0  | }  | 
155  |  |  | 
156  |  | /*  | 
157  |  |  * @brief In place inverse number theoretic transform of a given polynomial.  | 
158  |  |  * See FIPS 204, Algorithm 42,  NTT^-1()  | 
159  |  |  *  | 
160  |  |  * @param p a polynomial that is used as the input, that is overwritten with  | 
161  |  |  *          the inverse of the NTT.  | 
162  |  |  */  | 
163  |  | void ossl_ml_dsa_poly_ntt_inverse(POLY *p)  | 
164  | 0  | { | 
165  |  |     /*  | 
166  |  |      * Step: 128, 64, 32, 16, ..., 1  | 
167  |  |      * Offset: 1, 2, 4, 8, ..., 128  | 
168  |  |      */  | 
169  | 0  |     int i, j, k, offset, step = ML_DSA_NUM_POLY_COEFFICIENTS;  | 
170  |  |     /*  | 
171  |  |      * The multiplicative inverse of 256 mod q, in Montgomery form is  | 
172  |  |      * ((256^-1 mod q) * ((2^32 * 2^32) mod q)) mod q = (8347681 * 2365951) mod 8380417  | 
173  |  |      */  | 
174  | 0  |     static const uint32_t inverse_degree_montgomery = 41978;  | 
175  |  | 
  | 
176  | 0  |     for (offset = 1; offset < ML_DSA_NUM_POLY_COEFFICIENTS; offset <<= 1) { | 
177  | 0  |         step >>= 1;  | 
178  | 0  |         k = 0;  | 
179  | 0  |         for (i = 0; i < step; i++) { | 
180  | 0  |             const uint32_t step_root =  | 
181  | 0  |                 ML_DSA_Q - zetas_montgomery[step + (step - 1 - i)];  | 
182  |  | 
  | 
183  | 0  |             for (j = k; j < k + offset; j++) { | 
184  | 0  |                 uint32_t even = p->coeff[j];  | 
185  | 0  |                 uint32_t odd = p->coeff[j + offset];  | 
186  |  | 
  | 
187  | 0  |                 p->coeff[j] = reduce_once(odd + even);  | 
188  | 0  |                 p->coeff[j + offset] =  | 
189  | 0  |                     reduce_montgomery((uint64_t)step_root  | 
190  | 0  |                                       * (uint64_t)(ML_DSA_Q + even - odd));  | 
191  | 0  |             }  | 
192  | 0  |             k += 2 * offset;  | 
193  | 0  |         }  | 
194  | 0  |     }  | 
195  | 0  |     for (i = 0; i < ML_DSA_NUM_POLY_COEFFICIENTS; i++)  | 
196  | 0  |         p->coeff[i] = reduce_montgomery((uint64_t)p->coeff[i] *  | 
197  | 0  |                                         (uint64_t)inverse_degree_montgomery);  | 
198  | 0  | }  |