Coverage Report

Created: 2025-08-28 07:07

/src/openssl35/crypto/ml_dsa/ml_dsa_sample.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/byteorder.h>
11
#include "ml_dsa_local.h"
12
#include "ml_dsa_vector.h"
13
#include "ml_dsa_matrix.h"
14
#include "ml_dsa_hash.h"
15
#include "internal/sha3.h"
16
#include "internal/packet.h"
17
18
#define SHAKE128_BLOCKSIZE SHA3_BLOCKSIZE(128)
19
#define SHAKE256_BLOCKSIZE SHA3_BLOCKSIZE(256)
20
21
/*
22
 * This is a constant time version of n % 5
23
 * Note that 0xFFFF / 5 = 0x3333, 2 is added to make an over-estimate of 1/5
24
 * and then we divide by (0xFFFF + 1)
25
 */
26
1.75M
#define MOD5(n) ((n) - 5 * (0x3335 * (n) >> 16))
27
28
#if SHAKE128_BLOCKSIZE % 3 != 0
29
# error "rej_ntt_poly() requires SHAKE128_BLOCKSIZE to be a multiple of 3"
30
#endif
31
32
typedef int (COEFF_FROM_NIBBLE_FUNC)(uint32_t nibble, uint32_t *out);
33
34
static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_4;
35
static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_2;
36
37
/**
38
 * @brief Combine 3 bytes to form an coefficient.
39
 * See FIPS 204, Algorithm 14, CoeffFromThreeBytes()
40
 *
41
 * This is not constant time as it is used to generate the matrix A which is public.
42
 *
43
 * @param s A byte array of 3 uniformly distributed bytes.
44
 * @param out The returned coefficient in the range 0..q-1.
45
 * @returns 1 if the value is less than q or 0 otherwise.
46
 *          This is used for rejection sampling.
47
 */
48
static ossl_inline int coeff_from_three_bytes(const uint8_t *s, uint32_t *out)
49
14.1M
{
50
    /* Zero out the top bit of the 3rd byte to get a value in the range 0..2^23-1) */
51
14.1M
    *out = (uint32_t)s[0] | ((uint32_t)s[1] << 8) | (((uint32_t)s[2] & 0x7f) << 16);
52
14.1M
    return *out < ML_DSA_Q;
53
14.1M
}
54
55
/**
56
 * @brief Generate a value in the range (q-4..0..4)
57
 * See FIPS 204, Algorithm 15, CoeffFromHalfByte() where eta = 4
58
 * Note the FIPS 204 code uses the range -4..4 (whereas this code adds q to the
59
 * negative numbers).
60
 *
61
 * @param nibble A value in the range 0..15
62
 * @param out The returned value if the range (q-4)..0..4 if nibble is < 9
63
 * @returns 1 nibble was in range, or 0 if the nibble was rejected.
64
 */
65
static ossl_inline int coeff_from_nibble_4(uint32_t nibble, uint32_t *out)
66
1.44M
{
67
    /*
68
     * This is not constant time but will not leak any important info since
69
     * the value is either chosen or thrown away.
70
     */
71
1.44M
    if (value_barrier_32(nibble < 9)) {
72
813k
        *out = mod_sub(4, nibble);
73
813k
        return 1;
74
813k
    }
75
632k
    return 0;
76
1.44M
}
77
78
/**
79
 * @brief Generate a value in the range (q-2..0..2)
80
 * See FIPS 204, Algorithm 15, CoeffFromHalfByte() where eta = 2
81
 * Note the FIPS 204 code uses the range -2..2 (whereas this code adds q to the
82
 * negative numbers).
83
 *
84
 * @param nibble A value in the range 0..15
85
 * @param out The returned value if the range (q-2)..0..2 if nibble is < 15
86
 * @returns 1 nibble was in range, or 0 if the nibble was rejected.
87
 */
88
static ossl_inline int coeff_from_nibble_2(uint32_t nibble, uint32_t *out)
89
1.86M
{
90
1.86M
    if (value_barrier_32(nibble < 15)) {
91
1.75M
        *out = mod_sub(2, MOD5(nibble));
92
1.75M
        return 1;
93
1.75M
    }
94
116k
    return 0;
95
1.86M
}
96
97
/**
98
 * @brief Use a seed value to generate a polynomial with coefficients in the
99
 * range of 0..q-1 using rejection sampling.
100
 * SHAKE128 is used to absorb the seed, and then sequences of 3 sample bytes are
101
 * squeezed to try to produce coefficients.
102
 * The SHAKE128 stream is used to get uniformly distributed elements.
103
 * This algorithm is used for matrix expansion and only operates on public inputs.
104
 *
105
 * See FIPS 204, Algorithm 30, RejNTTPoly()
106
 *
107
 * @param g_ctx A EVP_MD_CTX object used for sampling the seed.
108
 * @param md A pre-fetched SHAKE128 object.
109
 * @param seed The seed to use for sampling.
110
 * @param seed_len The size of |seed|
111
 * @param out The returned polynomial with coefficients in the range of
112
 *            0..q-1. This range is required for NTT.
113
 * @returns 1 if the polynomial was successfully generated, or 0 if any of the
114
 *            digest operations failed.
115
 */
116
static int rej_ntt_poly(EVP_MD_CTX *g_ctx, const EVP_MD *md,
117
                        const uint8_t *seed, size_t seed_len, POLY *out)
118
55.3k
{
119
55.3k
    int j = 0;
120
55.3k
    uint8_t blocks[SHAKE128_BLOCKSIZE], *b, *end = blocks + sizeof(blocks);
121
122
    /*
123
     * Instead of just squeezing 3 bytes at a time, we grab a whole block
124
     * Note that the shake128 blocksize of 168 is divisible by 3.
125
     */
126
55.3k
    if (!shake_xof(g_ctx, md, seed, seed_len, blocks, sizeof(blocks)))
127
0
        return 0;
128
129
276k
    while (1) {
130
14.4M
        for (b = blocks; b < end; b += 3) {
131
14.1M
            if (coeff_from_three_bytes(b, &(out->coeff[j]))) {
132
14.1M
                if (++j >= ML_DSA_NUM_POLY_COEFFICIENTS)
133
55.3k
                    return 1;   /* finished */
134
14.1M
            }
135
14.1M
        }
136
221k
        if (!EVP_DigestSqueeze(g_ctx, blocks, sizeof(blocks)))
137
0
            return 0;
138
221k
    }
139
55.3k
}
140
141
/**
142
 * @brief Use a seed value to generate a polynomial with coefficients in the
143
 * range of ((q-eta)..0..eta) using rejection sampling. eta is either 2 or 4.
144
 * SHAKE256 is used to absorb the seed, and then samples are squeezed.
145
 * See FIPS 204, Algorithm 31, RejBoundedPoly()
146
 *
147
 * @param h_ctx A EVP_MD_CTX object context used to sample the seed.
148
 * @param md A pre-fetched SHAKE256 object.
149
 * @param coef_from_nibble A function that is dependent on eta, which takes a
150
 *                         nibble and tries to see if it is in the correct range.
151
 * @param seed The seed to use for sampling.
152
 * @param seed_len The size of |seed|
153
 * @param out The returned polynomial with coefficients in the range of
154
 *            ((q-eta)..0..eta)
155
 * @returns 1 if the polynomial was successfully generated, or 0 if any of the
156
 *            digest operations failed.
157
 */
158
static int rej_bounded_poly(EVP_MD_CTX *h_ctx, const EVP_MD *md,
159
                            COEFF_FROM_NIBBLE_FUNC *coef_from_nibble,
160
                            const uint8_t *seed, size_t seed_len, POLY *out)
161
10.0k
{
162
10.0k
    int j = 0;
163
10.0k
    uint32_t z0, z1;
164
10.0k
    uint8_t blocks[SHAKE256_BLOCKSIZE], *b, *end = blocks + sizeof(blocks);
165
166
    /* Instead of just squeezing 1 byte at a time, we grab a whole block */
167
10.0k
    if (!shake_xof(h_ctx, md, seed, seed_len, blocks, sizeof(blocks)))
168
0
        return 0;
169
170
16.8k
    while (1) {
171
1.66M
        for (b = blocks; b < end; b++) {
172
1.66M
            z0 = *b & 0x0F; /* lower nibble of byte */
173
1.66M
            z1 = *b >> 4;   /* high nibble of byte */
174
175
1.66M
            if (coef_from_nibble(z0, &out->coeff[j])
176
1.66M
                    && ++j >= ML_DSA_NUM_POLY_COEFFICIENTS)
177
5.00k
                return 1;
178
1.65M
            if (coef_from_nibble(z1, &out->coeff[j])
179
1.65M
                    && ++j >= ML_DSA_NUM_POLY_COEFFICIENTS)
180
5.02k
                return 1;
181
1.65M
        }
182
6.85k
        if (!EVP_DigestSqueeze(h_ctx, blocks, sizeof(blocks)))
183
0
            return 0;
184
6.85k
    }
185
10.0k
}
186
187
/**
188
 * @brief Generate a k * l matrix that has uniformly distributed polynomial
189
 *        elements using rejection sampling.
190
 * See FIPS 204, Algorithm 32, ExpandA()
191
 *
192
 * @param g_ctx A EVP_MD_CTX context used for rejection sampling
193
 *              seed values generated from the seed rho.
194
 * @param md A pre-fetched SHAKE128 object
195
 * @param rho A 32 byte seed to generated the matrix from.
196
 * @param out The generated k * l matrix of polynomials with coefficients
197
 *            in the range of 0..q-1.
198
 * @returns 1 if the matrix was generated, or 0 on error.
199
 */
200
int ossl_ml_dsa_matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md,
201
                                const uint8_t *rho, MATRIX *out)
202
1.69k
{
203
1.69k
    int ret = 0;
204
1.69k
    size_t i, j;
205
1.69k
    uint8_t derived_seed[ML_DSA_RHO_BYTES + 2];
206
1.69k
    POLY *poly = out->m_poly;
207
208
    /* The seed used for each matrix element is rho + column_index + row_index */
209
1.69k
    memcpy(derived_seed, rho, ML_DSA_RHO_BYTES);
210
211
11.6k
    for (i = 0; i < out->k; i++) {
212
65.3k
        for (j = 0; j < out->l; j++) {
213
55.3k
            derived_seed[ML_DSA_RHO_BYTES + 1] = (uint8_t)i;
214
55.3k
            derived_seed[ML_DSA_RHO_BYTES] = (uint8_t)j;
215
            /* Generate the polynomial for each matrix element using a unique seed */
216
55.3k
            if (!rej_ntt_poly(g_ctx, md, derived_seed, sizeof(derived_seed), poly++))
217
0
                goto err;
218
55.3k
        }
219
9.95k
    }
220
1.69k
    ret = 1;
221
1.69k
err:
222
1.69k
    return ret;
223
1.69k
}
224
225
/**
226
 * @brief Generates 2 vectors using rejection sampling whose polynomial
227
 * coefficients are in the interval [q-eta..0..eta]
228
 *
229
 * See FIPS 204, Algorithm 33, ExpandS().
230
 * Note that in FIPS 204 the range -eta..eta is used.
231
 *
232
 * @param h_ctx A EVP_MD_CTX context to use to sample the seed.
233
 * @param md A pre-fetched SHAKE256 object.
234
 * @param eta Is either 2 or 4, and determines the range of the coefficients for
235
 *            s1 and s2.
236
 * @param seed A 64 byte seed to use for sampling.
237
 * @param s1 A 1 * l column vector containing polynomials with coefficients in
238
 *           the range (q-eta)..0..eta
239
 * @param s2 A 1 * k column vector containing polynomials with coefficients in
240
 *           the range (q-eta)..0..eta
241
 * @returns 1 if s1 and s2 were successfully generated, or 0 otherwise.
242
 */
243
int ossl_ml_dsa_vector_expand_S(EVP_MD_CTX *h_ctx, const EVP_MD *md, int eta,
244
                                const uint8_t *seed, VECTOR *s1, VECTOR *s2)
245
898
{
246
898
    int ret = 0;
247
898
    size_t i;
248
898
    size_t l = s1->num_poly;
249
898
    size_t k = s2->num_poly;
250
898
    uint8_t derived_seed[ML_DSA_PRIV_SEED_BYTES + 2];
251
898
    COEFF_FROM_NIBBLE_FUNC *coef_from_nibble_fn;
252
253
898
    coef_from_nibble_fn = (eta == ML_DSA_ETA_4) ? coeff_from_nibble_4 : coeff_from_nibble_2;
254
255
    /*
256
     * Each polynomial generated uses a unique seed that consists of
257
     * seed + counter (where the counter is 2 bytes starting at 0)
258
     */
259
898
    memcpy(derived_seed, seed, ML_DSA_PRIV_SEED_BYTES);
260
898
    derived_seed[ML_DSA_PRIV_SEED_BYTES] = 0;
261
898
    derived_seed[ML_DSA_PRIV_SEED_BYTES + 1] = 0;
262
263
5.62k
    for (i = 0; i < l; i++) {
264
4.72k
        if (!rej_bounded_poly(h_ctx, md, coef_from_nibble_fn,
265
4.72k
                              derived_seed, sizeof(derived_seed), &s1->poly[i]))
266
0
            goto err;
267
4.72k
        ++derived_seed[ML_DSA_PRIV_SEED_BYTES];
268
4.72k
    }
269
6.19k
    for (i = 0; i < k; i++) {
270
5.29k
        if (!rej_bounded_poly(h_ctx, md, coef_from_nibble_fn,
271
5.29k
                              derived_seed, sizeof(derived_seed), &s2->poly[i]))
272
0
            goto err;
273
5.29k
        ++derived_seed[ML_DSA_PRIV_SEED_BYTES];
274
5.29k
    }
275
898
    ret = 1;
276
898
err:
277
898
    return ret;
278
898
}
279
280
/* See FIPS 204, Algorithm 34, ExpandMask(), Step 4 & 5 */
281
int ossl_ml_dsa_poly_expand_mask(POLY *out, const uint8_t *seed, size_t seed_len,
282
                                 uint32_t gamma1,
283
                                 EVP_MD_CTX *h_ctx, const EVP_MD *md)
284
9.06k
{
285
9.06k
    uint8_t buf[32 * 20];
286
9.06k
    size_t buf_len = 32 * (gamma1 == ML_DSA_GAMMA1_TWO_POWER_19 ? 20 : 18);
287
288
9.06k
    return shake_xof(h_ctx, md, seed, seed_len, buf, buf_len)
289
9.06k
        && ossl_ml_dsa_poly_decode_expand_mask(out, buf, buf_len, gamma1);
290
9.06k
}
291
292
/*
293
 * @brief Sample a polynomial with coefficients in the range {-1..1}.
294
 * The number of non zero values (hamming weight) is given by tau
295
 *
296
 * See FIPS 204, Algorithm 29, SampleInBall()
297
 * This function is assumed to not be constant time.
298
 * The algorithm is based on Durstenfeld's version of the Fisher-Yates shuffle.
299
 *
300
 * Note that the coefficients returned by this implementation are positive
301
 * i.e one of q-1, 0, or 1.
302
 *
303
 * @param tau is the number of +1 or -1's in the polynomial 'out_c' (39, 49 or 60)
304
 *            that is less than or equal to 64
305
 */
306
int ossl_ml_dsa_poly_sample_in_ball(POLY *out_c, const uint8_t *seed, int seed_len,
307
                                    EVP_MD_CTX *h_ctx, const EVP_MD *md,
308
                                    uint32_t tau)
309
2.15k
{
310
2.15k
    uint8_t block[SHAKE256_BLOCKSIZE];
311
2.15k
    uint64_t signs;
312
2.15k
    int offset = 8;
313
2.15k
    size_t end;
314
315
    /*
316
     * Rather than squeeze 8 bytes followed by lots of 1 byte squeezes
317
     * the SHAKE blocksize is squeezed each time and buffered into 'block'.
318
     */
319
2.15k
    if (!shake_xof(h_ctx, md, seed, seed_len, block, sizeof(block)))
320
0
        return 0;
321
322
    /*
323
     * grab the first 64 bits - since tau < 64
324
     * Each bit gives a +1 or -1 value.
325
     */
326
2.15k
    OPENSSL_load_u64_le(&signs, block);
327
328
2.15k
    poly_zero(out_c);
329
330
    /* Loop tau times */
331
106k
    for (end = 256 - tau; end < 256; end++) {
332
104k
        size_t index; /* index is a random offset to write +1 or -1 */
333
334
        /* rejection sample in {0..end} to choose an index to place -1 or 1 into */
335
115k
        for (;;) {
336
115k
            if (offset == sizeof(block)) {
337
                /* squeeze another block if the bytes from block have been used */
338
0
                if (!EVP_DigestSqueeze(h_ctx, block, sizeof(block)))
339
0
                    return 0;
340
0
                offset = 0;
341
0
            }
342
343
115k
            index = block[offset++];
344
115k
            if (index <= end)
345
104k
                break;
346
115k
        }
347
348
        /*
349
         * In-place swap the coefficient we are about to replace to the end so
350
         * we don't lose any values that have been already written.
351
         */
352
104k
        out_c->coeff[end] = out_c->coeff[index];
353
        /* set the random coefficient value to either 1 or q-1 */
354
104k
        out_c->coeff[index] = mod_sub(1, 2 * (signs & 1));
355
104k
        signs >>= 1; /* grab the next random bit */
356
104k
    }
357
2.15k
    return 1;
358
2.15k
}