/src/openssl/crypto/ml_dsa/ml_dsa_sample.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  *  | 
4  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
5  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
6  |  |  * in the file LICENSE in the source distribution or at  | 
7  |  |  * https://www.openssl.org/source/license.html  | 
8  |  |  */  | 
9  |  |  | 
10  |  | #include <openssl/byteorder.h>  | 
11  |  | #include "ml_dsa_local.h"  | 
12  |  | #include "ml_dsa_vector.h"  | 
13  |  | #include "ml_dsa_matrix.h"  | 
14  |  | #include "ml_dsa_hash.h"  | 
15  |  | #include "internal/sha3.h"  | 
16  |  | #include "internal/packet.h"  | 
17  |  |  | 
18  |  | #define SHAKE128_BLOCKSIZE SHA3_BLOCKSIZE(128)  | 
19  |  | #define SHAKE256_BLOCKSIZE SHA3_BLOCKSIZE(256)  | 
20  |  |  | 
21  |  | /*  | 
22  |  |  * This is a constant time version of n % 5  | 
23  |  |  * Note that 0xFFFF / 5 = 0x3333, 2 is added to make an over-estimate of 1/5  | 
24  |  |  * and then we divide by (0xFFFF + 1)  | 
25  |  |  */  | 
26  | 0  | #define MOD5(n) ((n) - 5 * (0x3335 * (n) >> 16))  | 
27  |  |  | 
28  |  | #if SHAKE128_BLOCKSIZE % 3 != 0  | 
29  |  | # error "rej_ntt_poly() requires SHAKE128_BLOCKSIZE to be a multiple of 3"  | 
30  |  | #endif  | 
31  |  |  | 
32  |  | typedef int (COEFF_FROM_NIBBLE_FUNC)(uint32_t nibble, uint32_t *out);  | 
33  |  |  | 
34  |  | static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_4;  | 
35  |  | static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_2;  | 
36  |  |  | 
37  |  | /**  | 
38  |  |  * @brief Combine 3 bytes to form an coefficient.  | 
39  |  |  * See FIPS 204, Algorithm 14, CoeffFromThreeBytes()  | 
40  |  |  *  | 
41  |  |  * This is not constant time as it is used to generate the matrix A which is public.  | 
42  |  |  *  | 
43  |  |  * @param s A byte array of 3 uniformly distributed bytes.  | 
44  |  |  * @param out The returned coefficient in the range 0..q-1.  | 
45  |  |  * @returns 1 if the value is less than q or 0 otherwise.  | 
46  |  |  *          This is used for rejection sampling.  | 
47  |  |  */  | 
48  |  | static ossl_inline int coeff_from_three_bytes(const uint8_t *s, uint32_t *out)  | 
49  | 0  | { | 
50  |  |     /* Zero out the top bit of the 3rd byte to get a value in the range 0..2^23-1) */  | 
51  | 0  |     *out = (uint32_t)s[0] | ((uint32_t)s[1] << 8) | (((uint32_t)s[2] & 0x7f) << 16);  | 
52  | 0  |     return *out < ML_DSA_Q;  | 
53  | 0  | }  | 
54  |  |  | 
55  |  | /**  | 
56  |  |  * @brief Generate a value in the range (q-4..0..4)  | 
57  |  |  * See FIPS 204, Algorithm 15, CoeffFromHalfByte() where eta = 4  | 
58  |  |  * Note the FIPS 204 code uses the range -4..4 (whereas this code adds q to the  | 
59  |  |  * negative numbers).  | 
60  |  |  *  | 
61  |  |  * @param nibble A value in the range 0..15  | 
62  |  |  * @param out The returned value if the range (q-4)..0..4 if nibble is < 9  | 
63  |  |  * @returns 1 nibble was in range, or 0 if the nibble was rejected.  | 
64  |  |  */  | 
65  |  | static ossl_inline int coeff_from_nibble_4(uint32_t nibble, uint32_t *out)  | 
66  | 0  | { | 
67  |  |     /*  | 
68  |  |      * This is not constant time but will not leak any important info since  | 
69  |  |      * the value is either chosen or thrown away.  | 
70  |  |      */  | 
71  | 0  |     if (value_barrier_32(nibble < 9)) { | 
72  | 0  |         *out = mod_sub(4, nibble);  | 
73  | 0  |         return 1;  | 
74  | 0  |     }  | 
75  | 0  |     return 0;  | 
76  | 0  | }  | 
77  |  |  | 
78  |  | /**  | 
79  |  |  * @brief Generate a value in the range (q-2..0..2)  | 
80  |  |  * See FIPS 204, Algorithm 15, CoeffFromHalfByte() where eta = 2  | 
81  |  |  * Note the FIPS 204 code uses the range -2..2 (whereas this code adds q to the  | 
82  |  |  * negative numbers).  | 
83  |  |  *  | 
84  |  |  * @param nibble A value in the range 0..15  | 
85  |  |  * @param out The returned value if the range (q-2)..0..2 if nibble is < 15  | 
86  |  |  * @returns 1 nibble was in range, or 0 if the nibble was rejected.  | 
87  |  |  */  | 
88  |  | static ossl_inline int coeff_from_nibble_2(uint32_t nibble, uint32_t *out)  | 
89  | 0  | { | 
90  | 0  |     if (value_barrier_32(nibble < 15)) { | 
91  | 0  |         *out = mod_sub(2, MOD5(nibble));  | 
92  | 0  |         return 1;  | 
93  | 0  |     }  | 
94  | 0  |     return 0;  | 
95  | 0  | }  | 
96  |  |  | 
97  |  | /**  | 
98  |  |  * @brief Use a seed value to generate a polynomial with coefficients in the  | 
99  |  |  * range of 0..q-1 using rejection sampling.  | 
100  |  |  * SHAKE128 is used to absorb the seed, and then sequences of 3 sample bytes are  | 
101  |  |  * squeezed to try to produce coefficients.  | 
102  |  |  * The SHAKE128 stream is used to get uniformly distributed elements.  | 
103  |  |  * This algorithm is used for matrix expansion and only operates on public inputs.  | 
104  |  |  *  | 
105  |  |  * See FIPS 204, Algorithm 30, RejNTTPoly()  | 
106  |  |  *  | 
107  |  |  * @param g_ctx A EVP_MD_CTX object used for sampling the seed.  | 
108  |  |  * @param md A pre-fetched SHAKE128 object.  | 
109  |  |  * @param seed The seed to use for sampling.  | 
110  |  |  * @param seed_len The size of |seed|  | 
111  |  |  * @param out The returned polynomial with coefficients in the range of  | 
112  |  |  *            0..q-1. This range is required for NTT.  | 
113  |  |  * @returns 1 if the polynomial was successfully generated, or 0 if any of the  | 
114  |  |  *            digest operations failed.  | 
115  |  |  */  | 
116  |  | static int rej_ntt_poly(EVP_MD_CTX *g_ctx, const EVP_MD *md,  | 
117  |  |                         const uint8_t *seed, size_t seed_len, POLY *out)  | 
118  | 0  | { | 
119  | 0  |     int j = 0;  | 
120  | 0  |     uint8_t blocks[SHAKE128_BLOCKSIZE], *b, *end = blocks + sizeof(blocks);  | 
121  |  |  | 
122  |  |     /*  | 
123  |  |      * Instead of just squeezing 3 bytes at a time, we grab a whole block  | 
124  |  |      * Note that the shake128 blocksize of 168 is divisible by 3.  | 
125  |  |      */  | 
126  | 0  |     if (!shake_xof(g_ctx, md, seed, seed_len, blocks, sizeof(blocks)))  | 
127  | 0  |         return 0;  | 
128  |  |  | 
129  | 0  |     while (1) { | 
130  | 0  |         for (b = blocks; b < end; b += 3) { | 
131  | 0  |             if (coeff_from_three_bytes(b, &(out->coeff[j]))) { | 
132  | 0  |                 if (++j >= ML_DSA_NUM_POLY_COEFFICIENTS)  | 
133  | 0  |                     return 1;   /* finished */  | 
134  | 0  |             }  | 
135  | 0  |         }  | 
136  | 0  |         if (!EVP_DigestSqueeze(g_ctx, blocks, sizeof(blocks)))  | 
137  | 0  |             return 0;  | 
138  | 0  |     }  | 
139  | 0  | }  | 
140  |  |  | 
141  |  | /**  | 
142  |  |  * @brief Use a seed value to generate a polynomial with coefficients in the  | 
143  |  |  * range of ((q-eta)..0..eta) using rejection sampling. eta is either 2 or 4.  | 
144  |  |  * SHAKE256 is used to absorb the seed, and then samples are squeezed.  | 
145  |  |  * See FIPS 204, Algorithm 31, RejBoundedPoly()  | 
146  |  |  *  | 
147  |  |  * @param h_ctx A EVP_MD_CTX object context used to sample the seed.  | 
148  |  |  * @param md A pre-fetched SHAKE256 object.  | 
149  |  |  * @param coef_from_nibble A function that is dependent on eta, which takes a  | 
150  |  |  *                         nibble and tries to see if it is in the correct range.  | 
151  |  |  * @param seed The seed to use for sampling.  | 
152  |  |  * @param seed_len The size of |seed|  | 
153  |  |  * @param out The returned polynomial with coefficients in the range of  | 
154  |  |  *            ((q-eta)..0..eta)  | 
155  |  |  * @returns 1 if the polynomial was successfully generated, or 0 if any of the  | 
156  |  |  *            digest operations failed.  | 
157  |  |  */  | 
158  |  | static int rej_bounded_poly(EVP_MD_CTX *h_ctx, const EVP_MD *md,  | 
159  |  |                             COEFF_FROM_NIBBLE_FUNC *coef_from_nibble,  | 
160  |  |                             const uint8_t *seed, size_t seed_len, POLY *out)  | 
161  | 0  | { | 
162  | 0  |     int j = 0;  | 
163  | 0  |     uint32_t z0, z1;  | 
164  | 0  |     uint8_t blocks[SHAKE256_BLOCKSIZE], *b, *end = blocks + sizeof(blocks);  | 
165  |  |  | 
166  |  |     /* Instead of just squeezing 1 byte at a time, we grab a whole block */  | 
167  | 0  |     if (!shake_xof(h_ctx, md, seed, seed_len, blocks, sizeof(blocks)))  | 
168  | 0  |         return 0;  | 
169  |  |  | 
170  | 0  |     while (1) { | 
171  | 0  |         for (b = blocks; b < end; b++) { | 
172  | 0  |             z0 = *b & 0x0F; /* lower nibble of byte */  | 
173  | 0  |             z1 = *b >> 4;   /* high nibble of byte */  | 
174  |  | 
  | 
175  | 0  |             if (coef_from_nibble(z0, &out->coeff[j])  | 
176  | 0  |                     && ++j >= ML_DSA_NUM_POLY_COEFFICIENTS)  | 
177  | 0  |                 return 1;  | 
178  | 0  |             if (coef_from_nibble(z1, &out->coeff[j])  | 
179  | 0  |                     && ++j >= ML_DSA_NUM_POLY_COEFFICIENTS)  | 
180  | 0  |                 return 1;  | 
181  | 0  |         }  | 
182  | 0  |         if (!EVP_DigestSqueeze(h_ctx, blocks, sizeof(blocks)))  | 
183  | 0  |             return 0;  | 
184  | 0  |     }  | 
185  | 0  | }  | 
186  |  |  | 
187  |  | /**  | 
188  |  |  * @brief Generate a k * l matrix that has uniformly distributed polynomial  | 
189  |  |  *        elements using rejection sampling.  | 
190  |  |  * See FIPS 204, Algorithm 32, ExpandA()  | 
191  |  |  *  | 
192  |  |  * @param g_ctx A EVP_MD_CTX context used for rejection sampling  | 
193  |  |  *              seed values generated from the seed rho.  | 
194  |  |  * @param md A pre-fetched SHAKE128 object  | 
195  |  |  * @param rho A 32 byte seed to generated the matrix from.  | 
196  |  |  * @param out The generated k * l matrix of polynomials with coefficients  | 
197  |  |  *            in the range of 0..q-1.  | 
198  |  |  * @returns 1 if the matrix was generated, or 0 on error.  | 
199  |  |  */  | 
200  |  | int ossl_ml_dsa_matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md,  | 
201  |  |                                 const uint8_t *rho, MATRIX *out)  | 
202  | 0  | { | 
203  | 0  |     int ret = 0;  | 
204  | 0  |     size_t i, j;  | 
205  | 0  |     uint8_t derived_seed[ML_DSA_RHO_BYTES + 2];  | 
206  | 0  |     POLY *poly = out->m_poly;  | 
207  |  |  | 
208  |  |     /* The seed used for each matrix element is rho + column_index + row_index */  | 
209  | 0  |     memcpy(derived_seed, rho, ML_DSA_RHO_BYTES);  | 
210  |  | 
  | 
211  | 0  |     for (i = 0; i < out->k; i++) { | 
212  | 0  |         for (j = 0; j < out->l; j++) { | 
213  | 0  |             derived_seed[ML_DSA_RHO_BYTES + 1] = (uint8_t)i;  | 
214  | 0  |             derived_seed[ML_DSA_RHO_BYTES] = (uint8_t)j;  | 
215  |  |             /* Generate the polynomial for each matrix element using a unique seed */  | 
216  | 0  |             if (!rej_ntt_poly(g_ctx, md, derived_seed, sizeof(derived_seed), poly++))  | 
217  | 0  |                 goto err;  | 
218  | 0  |         }  | 
219  | 0  |     }  | 
220  | 0  |     ret = 1;  | 
221  | 0  | err:  | 
222  | 0  |     return ret;  | 
223  | 0  | }  | 
224  |  |  | 
225  |  | /**  | 
226  |  |  * @brief Generates 2 vectors using rejection sampling whose polynomial  | 
227  |  |  * coefficients are in the interval [q-eta..0..eta]  | 
228  |  |  *  | 
229  |  |  * See FIPS 204, Algorithm 33, ExpandS().  | 
230  |  |  * Note that in FIPS 204 the range -eta..eta is used.  | 
231  |  |  *  | 
232  |  |  * @param h_ctx A EVP_MD_CTX context to use to sample the seed.  | 
233  |  |  * @param md A pre-fetched SHAKE256 object.  | 
234  |  |  * @param eta Is either 2 or 4, and determines the range of the coefficients for  | 
235  |  |  *            s1 and s2.  | 
236  |  |  * @param seed A 64 byte seed to use for sampling.  | 
237  |  |  * @param s1 A 1 * l column vector containing polynomials with coefficients in  | 
238  |  |  *           the range (q-eta)..0..eta  | 
239  |  |  * @param s2 A 1 * k column vector containing polynomials with coefficients in  | 
240  |  |  *           the range (q-eta)..0..eta  | 
241  |  |  * @returns 1 if s1 and s2 were successfully generated, or 0 otherwise.  | 
242  |  |  */  | 
243  |  | int ossl_ml_dsa_vector_expand_S(EVP_MD_CTX *h_ctx, const EVP_MD *md, int eta,  | 
244  |  |                                 const uint8_t *seed, VECTOR *s1, VECTOR *s2)  | 
245  | 0  | { | 
246  | 0  |     int ret = 0;  | 
247  | 0  |     size_t i;  | 
248  | 0  |     size_t l = s1->num_poly;  | 
249  | 0  |     size_t k = s2->num_poly;  | 
250  | 0  |     uint8_t derived_seed[ML_DSA_PRIV_SEED_BYTES + 2];  | 
251  | 0  |     COEFF_FROM_NIBBLE_FUNC *coef_from_nibble_fn;  | 
252  |  | 
  | 
253  | 0  |     coef_from_nibble_fn = (eta == ML_DSA_ETA_4) ? coeff_from_nibble_4 : coeff_from_nibble_2;  | 
254  |  |  | 
255  |  |     /*  | 
256  |  |      * Each polynomial generated uses a unique seed that consists of  | 
257  |  |      * seed + counter (where the counter is 2 bytes starting at 0)  | 
258  |  |      */  | 
259  | 0  |     memcpy(derived_seed, seed, ML_DSA_PRIV_SEED_BYTES);  | 
260  | 0  |     derived_seed[ML_DSA_PRIV_SEED_BYTES] = 0;  | 
261  | 0  |     derived_seed[ML_DSA_PRIV_SEED_BYTES + 1] = 0;  | 
262  |  | 
  | 
263  | 0  |     for (i = 0; i < l; i++) { | 
264  | 0  |         if (!rej_bounded_poly(h_ctx, md, coef_from_nibble_fn,  | 
265  | 0  |                               derived_seed, sizeof(derived_seed), &s1->poly[i]))  | 
266  | 0  |             goto err;  | 
267  | 0  |         ++derived_seed[ML_DSA_PRIV_SEED_BYTES];  | 
268  | 0  |     }  | 
269  | 0  |     for (i = 0; i < k; i++) { | 
270  | 0  |         if (!rej_bounded_poly(h_ctx, md, coef_from_nibble_fn,  | 
271  | 0  |                               derived_seed, sizeof(derived_seed), &s2->poly[i]))  | 
272  | 0  |             goto err;  | 
273  | 0  |         ++derived_seed[ML_DSA_PRIV_SEED_BYTES];  | 
274  | 0  |     }  | 
275  | 0  |     ret = 1;  | 
276  | 0  | err:  | 
277  | 0  |     return ret;  | 
278  | 0  | }  | 
279  |  |  | 
280  |  | /* See FIPS 204, Algorithm 34, ExpandMask(), Step 4 & 5 */  | 
281  |  | int ossl_ml_dsa_poly_expand_mask(POLY *out, const uint8_t *seed, size_t seed_len,  | 
282  |  |                                  uint32_t gamma1,  | 
283  |  |                                  EVP_MD_CTX *h_ctx, const EVP_MD *md)  | 
284  | 0  | { | 
285  | 0  |     uint8_t buf[32 * 20];  | 
286  | 0  |     size_t buf_len = 32 * (gamma1 == ML_DSA_GAMMA1_TWO_POWER_19 ? 20 : 18);  | 
287  |  | 
  | 
288  | 0  |     return shake_xof(h_ctx, md, seed, seed_len, buf, buf_len)  | 
289  | 0  |         && ossl_ml_dsa_poly_decode_expand_mask(out, buf, buf_len, gamma1);  | 
290  | 0  | }  | 
291  |  |  | 
292  |  | /*  | 
293  |  |  * @brief Sample a polynomial with coefficients in the range {-1..1}. | 
294  |  |  * The number of non zero values (hamming weight) is given by tau  | 
295  |  |  *  | 
296  |  |  * See FIPS 204, Algorithm 29, SampleInBall()  | 
297  |  |  * This function is assumed to not be constant time.  | 
298  |  |  * The algorithm is based on Durstenfeld's version of the Fisher-Yates shuffle.  | 
299  |  |  *  | 
300  |  |  * Note that the coefficients returned by this implementation are positive  | 
301  |  |  * i.e one of q-1, 0, or 1.  | 
302  |  |  *  | 
303  |  |  * @param tau is the number of +1 or -1's in the polynomial 'out_c' (39, 49 or 60)  | 
304  |  |  *            that is less than or equal to 64  | 
305  |  |  */  | 
306  |  | int ossl_ml_dsa_poly_sample_in_ball(POLY *out_c, const uint8_t *seed, int seed_len,  | 
307  |  |                                     EVP_MD_CTX *h_ctx, const EVP_MD *md,  | 
308  |  |                                     uint32_t tau)  | 
309  | 0  | { | 
310  | 0  |     uint8_t block[SHAKE256_BLOCKSIZE];  | 
311  | 0  |     uint64_t signs;  | 
312  | 0  |     int offset = 8;  | 
313  | 0  |     size_t end;  | 
314  |  |  | 
315  |  |     /*  | 
316  |  |      * Rather than squeeze 8 bytes followed by lots of 1 byte squeezes  | 
317  |  |      * the SHAKE blocksize is squeezed each time and buffered into 'block'.  | 
318  |  |      */  | 
319  | 0  |     if (!shake_xof(h_ctx, md, seed, seed_len, block, sizeof(block)))  | 
320  | 0  |         return 0;  | 
321  |  |  | 
322  |  |     /*  | 
323  |  |      * grab the first 64 bits - since tau < 64  | 
324  |  |      * Each bit gives a +1 or -1 value.  | 
325  |  |      */  | 
326  | 0  |     OPENSSL_load_u64_le(&signs, block);  | 
327  |  | 
  | 
328  | 0  |     poly_zero(out_c);  | 
329  |  |  | 
330  |  |     /* Loop tau times */  | 
331  | 0  |     for (end = 256 - tau; end < 256; end++) { | 
332  | 0  |         size_t index; /* index is a random offset to write +1 or -1 */  | 
333  |  |  | 
334  |  |         /* rejection sample in {0..end} to choose an index to place -1 or 1 into */ | 
335  | 0  |         for (;;) { | 
336  | 0  |             if (offset == sizeof(block)) { | 
337  |  |                 /* squeeze another block if the bytes from block have been used */  | 
338  | 0  |                 if (!EVP_DigestSqueeze(h_ctx, block, sizeof(block)))  | 
339  | 0  |                     return 0;  | 
340  | 0  |                 offset = 0;  | 
341  | 0  |             }  | 
342  |  |  | 
343  | 0  |             index = block[offset++];  | 
344  | 0  |             if (index <= end)  | 
345  | 0  |                 break;  | 
346  | 0  |         }  | 
347  |  |  | 
348  |  |         /*  | 
349  |  |          * In-place swap the coefficient we are about to replace to the end so  | 
350  |  |          * we don't lose any values that have been already written.  | 
351  |  |          */  | 
352  | 0  |         out_c->coeff[end] = out_c->coeff[index];  | 
353  |  |         /* set the random coefficient value to either 1 or q-1 */  | 
354  | 0  |         out_c->coeff[index] = mod_sub(1, 2 * (signs & 1));  | 
355  | 0  |         signs >>= 1; /* grab the next random bit */  | 
356  | 0  |     }  | 
357  | 0  |     return 1;  | 
358  | 0  | }  |