Coverage Report

Created: 2026-04-11 06:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/ml_dsa/ml_dsa_ntt.c
Line
Count
Source
1
/*
2
 * Copyright 2024-2026 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include "ml_dsa_local.h"
11
#include "ml_dsa_poly.h"
12
#include <openssl/crypto.h>
13
14
/* Assembly function declarations for AVX2 implementations */
15
#if !defined(OPENSSL_NO_ASM) && (defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64))
16
#define ML_DSA_NTT_ASM
17
int ml_dsa_ntt_avx2_capable(void);
18
void ml_dsa_poly_ntt_avx2(uint32_t *p_coeff, const uint32_t *p_zetas);
19
void ml_dsa_poly_ntt_inverse_avx2(uint32_t *p_coeff);
20
void ml_dsa_poly_ntt_mult_avx2(const uint32_t *a, const uint32_t *b, uint32_t *out);
21
#endif
22
23
/*
24
 * Function pointer types for NTT operations.
25
 * These allow selecting AVX2 or scalar implementations at initialization time.
26
 */
27
typedef void (*ml_dsa_poly_ntt_fn)(POLY *p);
28
typedef void (*ml_dsa_poly_ntt_inverse_fn)(POLY *p);
29
typedef void (*ml_dsa_poly_ntt_mult_fn)(const POLY *lhs, const POLY *rhs,
30
    POLY *out);
31
32
/* Forward declarations of scalar NTT functions */
33
static void poly_ntt_scalar(POLY *p);
34
static void poly_ntt_inverse_scalar(POLY *p);
35
static void poly_ntt_mult_scalar(const POLY *lhs, const POLY *rhs, POLY *out);
36
37
/*
38
 * NTT function pointers - initialized to scalar implementations by default.
39
 */
40
static ml_dsa_poly_ntt_fn poly_ntt_impl = poly_ntt_scalar;
41
static ml_dsa_poly_ntt_inverse_fn poly_ntt_inverse_impl = poly_ntt_inverse_scalar;
42
static ml_dsa_poly_ntt_mult_fn poly_ntt_mult_impl = poly_ntt_mult_scalar;
43
44
static CRYPTO_ONCE ml_dsa_ntt_once = CRYPTO_ONCE_STATIC_INIT;
45
46
/*
47
 * This file has multiple parts required for fast matrix multiplication,
48
 * 1) NTT (See https://eprint.iacr.org/2024/585.pdf)
49
 * NTT and NTT inverse transformations are Discrete Fourier Transforms in a
50
 * polynomial ring. Fast-Fourier Transformations can then be applied to make
51
 * multiplications n log(n). This uses the symmetry of the transformation to
52
 * reduce computations.
53
 *
54
 * 2) Montgomery multiplication
55
 * The multiplication of a.b mod q requires division by q which is a slow operation.
56
 *
57
 * When many multiplications mod q are required montgomery multiplication
58
 * can be used. This requires a number R > q such that R & q are coprime
59
 * (i.e. GCD(R, q) = 1), so that division happens using R instead of q.
60
 * If r is a power of 2 then this division can be done as a bit shift.
61
 *
62
 * Given that q = 2^23 - 2^13 + 1
63
 * We can chose a Montgomery multiplier of R = 2^32.
64
 *
65
 * To transform |a| into Montgomery form |m| we use
66
 *   m = a mod q * ((2^32)*(2^32) mod q)
67
 * which is then Montgomery reduced, removing the excess factor of R = 2^32.
68
 */
69
70
/*
71
 * The table in FIPS 204 Appendix B uses the following formula
72
 * zeta[k]= 1753^bitrev(k) mod q for (k = 1..255) (The first value is not used).
73
 *
74
 * As this implementation uses montgomery form with a multiplier of 2^32.
75
 * The values need to be transformed i.e.
76
 *
77
 * zetasMontgomery[k] = reduce_montgomery(zeta[k] * (2^32 * 2^32 mod(q)))
78
 * reduce_montgomery() is defined below..
79
 */
80
static const uint32_t zetas_montgomery[256] = {
81
    4193792, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468,
82
    1826347, 2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103,
83
    2725464, 1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868,
84
    6262231, 4520680, 6980856, 5102745, 1757237, 8360995, 4010497, 280005,
85
    2706023, 95776, 3077325, 3530437, 6718724, 4788269, 5842901, 3915439,
86
    4519302, 5336701, 3574422, 5512770, 3539968, 8079950, 2348700, 7841118,
87
    6681150, 6736599, 3505694, 4558682, 3507263, 6239768, 6779997, 3699596,
88
    811944, 531354, 954230, 3881043, 3900724, 5823537, 2071892, 5582638,
89
    4450022, 6851714, 4702672, 5339162, 6927966, 3475950, 2176455, 6795196,
90
    7122806, 1939314, 4296819, 7380215, 5190273, 5223087, 4747489, 126922,
91
    3412210, 7396998, 2147896, 2715295, 5412772, 4686924, 7969390, 5903370,
92
    7709315, 7151892, 8357436, 7072248, 7998430, 1349076, 1852771, 6949987,
93
    5037034, 264944, 508951, 3097992, 44288, 7280319, 904516, 3958618,
94
    4656075, 8371839, 1653064, 5130689, 2389356, 8169440, 759969, 7063561,
95
    189548, 4827145, 3159746, 6529015, 5971092, 8202977, 1315589, 1341330,
96
    1285669, 6795489, 7567685, 6940675, 5361315, 4499357, 4751448, 3839961,
97
    2091667, 3407706, 2316500, 3817976, 5037939, 2244091, 5933984, 4817955,
98
    266997, 2434439, 7144689, 3513181, 4860065, 4621053, 7183191, 5187039,
99
    900702, 1859098, 909542, 819034, 495491, 6767243, 8337157, 7857917,
100
    7725090, 5257975, 2031748, 3207046, 4823422, 7855319, 7611795, 4784579,
101
    342297, 286988, 5942594, 4108315, 3437287, 5038140, 1735879, 203044,
102
    2842341, 2691481, 5790267, 1265009, 4055324, 1247620, 2486353, 1595974,
103
    4613401, 1250494, 2635921, 4832145, 5386378, 1869119, 1903435, 7329447,
104
    7047359, 1237275, 5062207, 6950192, 7929317, 1312455, 3306115, 6417775,
105
    7100756, 1917081, 5834105, 7005614, 1500165, 777191, 2235880, 3406031,
106
    7838005, 5548557, 6709241, 6533464, 5796124, 4656147, 594136, 4603424,
107
    6366809, 2432395, 2454455, 8215696, 1957272, 3369112, 185531, 7173032,
108
    5196991, 162844, 1616392, 3014001, 810149, 1652634, 4686184, 6581310,
109
    5341501, 3523897, 3866901, 269760, 2213111, 7404533, 1717735, 472078,
110
    7953734, 1723600, 6577327, 1910376, 6712985, 7276084, 8119771, 4546524,
111
    5441381, 6144432, 7959518, 6094090, 183443, 7403526, 1612842, 4834730,
112
    7826001, 3919660, 8332111, 7018208, 3937738, 1400424, 7534263, 1976782
113
};
114
115
/*
116
 * @brief When multiplying 2 numbers mod q that are in montgomery form, the
117
 * product mod q needs to be multiplied by 2^-32 to be in montgomery form.
118
 * See FIPS 204, Algorithm 49, MontgomeryReduce()
119
 * Note it is slightly different due to the input range being positive
120
 *
121
 * @param a is the result of a multiply of 2 numbers in montgomery form,
122
 *          in the range 0...(2^32)*q
123
 * @returns The Montgomery form of 'a' with multiplier 2^32 in the range 0..q-1
124
 *          The result is congruent to x * 2^-32 mod q
125
 */
126
static uint32_t reduce_montgomery(uint64_t a)
127
0
{
128
0
    uint64_t t = (uint32_t)a * (uint32_t)ML_DSA_Q_NEG_INV; /* a * -qinv */
129
0
    uint64_t b = a + t * ML_DSA_Q; /* a - t * q */
130
0
    uint32_t c = b >> 32; /* /2^32  = 0..2q */
131
132
0
    return reduce_once(c); /* 0..q */
133
0
}
134
135
/*
136
 * Scalar (fallback) implementations of NTT operations.
137
 * These are used when AVX2 is not available.
138
 */
139
static void poly_ntt_mult_scalar(const POLY *lhs, const POLY *rhs, POLY *out)
140
0
{
141
0
    int i;
142
143
0
    for (i = 0; i < ML_DSA_NUM_POLY_COEFFICIENTS; i++)
144
0
        out->coeff[i] = reduce_montgomery((uint64_t)lhs->coeff[i]
145
0
            * (uint64_t)rhs->coeff[i]);
146
0
}
147
148
static void poly_ntt_scalar(POLY *p)
149
0
{
150
0
    int i, j, k;
151
0
    int step;
152
0
    int offset = ML_DSA_NUM_POLY_COEFFICIENTS;
153
154
    /* Step: 1, 2, 4, 8, ..., 128 */
155
0
    for (step = 1; step < ML_DSA_NUM_POLY_COEFFICIENTS; step <<= 1) {
156
0
        k = 0;
157
0
        offset >>= 1; /* Offset: 128, 64, 32, 16, ..., 1 */
158
0
        for (i = 0; i < step; i++) {
159
0
            const uint32_t z_step_root = zetas_montgomery[step + i];
160
161
0
            for (j = k; j < k + offset; j++) {
162
0
                uint32_t w_even = p->coeff[j];
163
0
                uint32_t t_odd = reduce_montgomery((uint64_t)z_step_root
164
0
                    * (uint64_t)p->coeff[j + offset]);
165
166
0
                p->coeff[j] = reduce_once(w_even + t_odd);
167
0
                p->coeff[j + offset] = mod_sub(w_even, t_odd);
168
0
            }
169
0
            k += 2 * offset;
170
0
        }
171
0
    }
172
0
}
173
174
static void poly_ntt_inverse_scalar(POLY *p)
175
0
{
176
    /*
177
     * Step: 128, 64, 32, 16, ..., 1
178
     * Offset: 1, 2, 4, 8, ..., 128
179
     */
180
0
    int i, j, k, offset, step = ML_DSA_NUM_POLY_COEFFICIENTS;
181
    /*
182
     * The multiplicative inverse of 256 mod q, in Montgomery form is
183
     * ((256^-1 mod q) * ((2^32 * 2^32) mod q)) mod q = (8347681 * 2365951) mod 8380417
184
     */
185
0
    static const uint32_t inverse_degree_montgomery = 41978;
186
187
0
    for (offset = 1; offset < ML_DSA_NUM_POLY_COEFFICIENTS; offset <<= 1) {
188
0
        step >>= 1;
189
0
        k = 0;
190
0
        for (i = 0; i < step; i++) {
191
0
            const uint32_t step_root = ML_DSA_Q - zetas_montgomery[step + (step - 1 - i)];
192
193
0
            for (j = k; j < k + offset; j++) {
194
0
                uint32_t even = p->coeff[j];
195
0
                uint32_t odd = p->coeff[j + offset];
196
197
0
                p->coeff[j] = reduce_once(odd + even);
198
0
                p->coeff[j + offset] = reduce_montgomery((uint64_t)step_root
199
0
                    * (uint64_t)(ML_DSA_Q + even - odd));
200
0
            }
201
0
            k += 2 * offset;
202
0
        }
203
0
    }
204
0
    for (i = 0; i < ML_DSA_NUM_POLY_COEFFICIENTS; i++)
205
0
        p->coeff[i] = reduce_montgomery((uint64_t)p->coeff[i]
206
0
            * (uint64_t)inverse_degree_montgomery);
207
0
}
208
209
/*
210
 * AVX2 wrapper functions
211
 */
212
#ifdef ML_DSA_NTT_ASM
213
static void poly_ntt_mult_avx2_wrapper(const POLY *lhs, const POLY *rhs,
214
    POLY *out)
215
{
216
    ml_dsa_poly_ntt_mult_avx2(&lhs->coeff[0], &rhs->coeff[0], &out->coeff[0]);
217
}
218
219
static void poly_ntt_avx2_wrapper(POLY *p)
220
{
221
    ml_dsa_poly_ntt_avx2(&p->coeff[0], zetas_montgomery);
222
}
223
224
static void poly_ntt_inverse_avx2_wrapper(POLY *p)
225
{
226
    ml_dsa_poly_ntt_inverse_avx2(&p->coeff[0]);
227
}
228
#endif
229
230
/*
231
 * Initialize NTT function pointers to AVX2 implementations if available.
232
 * Scalar implementations are used by default.
233
 */
234
static void ml_dsa_ntt_init(void)
235
0
{
236
#ifdef ML_DSA_NTT_ASM
237
    if (ml_dsa_ntt_avx2_capable()) {
238
        poly_ntt_impl = poly_ntt_avx2_wrapper;
239
        poly_ntt_inverse_impl = poly_ntt_inverse_avx2_wrapper;
240
        poly_ntt_mult_impl = poly_ntt_mult_avx2_wrapper;
241
    }
242
#endif
243
0
}
244
245
/*
246
 * @brief Multiply two polynomials in the number theoretically transformed state.
247
 * See FIPS 204, Algorithm 45, MultiplyNTT()
248
 * This function has been modified to use montgomery multiplication
249
 *
250
 * @param lhs A polynomial multiplicand
251
 * @param rhs A polynomial multiplier
252
 * @param out The returned result of the polynomial multiply
253
 */
254
void ossl_ml_dsa_poly_ntt_mult(const POLY *lhs, const POLY *rhs, POLY *out)
255
0
{
256
0
    (void)CRYPTO_THREAD_run_once(&ml_dsa_ntt_once, ml_dsa_ntt_init);
257
0
    poly_ntt_mult_impl(lhs, rhs, out);
258
0
}
259
260
/*
261
 * In place number theoretic transform of a given polynomial.
262
 *
263
 * See FIPS 204, Algorithm 41, NTT()
264
 * This function uses montgomery multiplication.
265
 *
266
 * @param p a polynomial that is used as the input, that is replaced with
267
 *        the NTT of the polynomial
268
 */
269
void ossl_ml_dsa_poly_ntt(POLY *p)
270
0
{
271
0
    (void)CRYPTO_THREAD_run_once(&ml_dsa_ntt_once, ml_dsa_ntt_init);
272
0
    poly_ntt_impl(p);
273
0
}
274
275
/*
276
 * @brief In place inverse number theoretic transform of a given polynomial.
277
 * See FIPS 204, Algorithm 42,  NTT^-1()
278
 *
279
 * @param p a polynomial that is used as the input, that is overwritten with
280
 *          the inverse of the NTT.
281
 */
282
void ossl_ml_dsa_poly_ntt_inverse(POLY *p)
283
0
{
284
0
    (void)CRYPTO_THREAD_run_once(&ml_dsa_ntt_once, ml_dsa_ntt_init);
285
0
    poly_ntt_inverse_impl(p);
286
0
}