Coverage Report

Created: 2026-04-28 06:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/ml_dsa/ml_dsa_sample.c
Line
Count
Source
1
/*
2
 * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/byteorder.h>
11
#include "ml_dsa_local.h"
12
#include "ml_dsa_vector.h"
13
#include "ml_dsa_matrix.h"
14
#include "ml_dsa_hash.h"
15
#include "internal/constant_time.h"
16
#include "internal/sha3.h"
17
#include "internal/packet.h"
18
19
#define SHAKE128_BLOCKSIZE SHA3_BLOCKSIZE(128)
20
#define SHAKE256_BLOCKSIZE SHA3_BLOCKSIZE(256)
21
22
/*
23
 * This is a constant time version of n % 5
24
 * Note that 0xFFFF / 5 = 0x3333, 2 is added to make an over-estimate of 1/5
25
 * and then we divide by (0xFFFF + 1)
26
 */
27
0
#define MOD5(n) ((n) - 5 * (0x3335 * (n) >> 16))
28
29
#if SHAKE128_BLOCKSIZE % 3 != 0
30
#error "rej_ntt_poly() requires SHAKE128_BLOCKSIZE to be a multiple of 3"
31
#endif
32
33
typedef int(COEFF_FROM_NIBBLE_FUNC)(uint32_t nibble, uint32_t *out);
34
35
static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_4;
36
static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_2;
37
38
/**
39
 * @brief Combine 3 bytes to form an coefficient.
40
 * See FIPS 204, Algorithm 14, CoeffFromThreeBytes()
41
 *
42
 * This is not constant time as it is used to generate the matrix A which is public.
43
 *
44
 * @param s A byte array of 3 uniformly distributed bytes.
45
 * @param out The returned coefficient in the range 0..q-1.
46
 * @returns 1 if the value is less than q or 0 otherwise.
47
 *          This is used for rejection sampling.
48
 */
49
static ossl_inline int coeff_from_three_bytes(const uint8_t *s, uint32_t *out)
50
0
{
51
    /* Zero out the top bit of the 3rd byte to get a value in the range 0..2^23-1) */
52
0
    *out = (uint32_t)s[0] | ((uint32_t)s[1] << 8) | (((uint32_t)s[2] & 0x7f) << 16);
53
0
    return *out < ML_DSA_Q;
54
0
}
55
56
/**
57
 * @brief Generate a value in the range (q-4..0..4)
58
 * See FIPS 204, Algorithm 15, CoeffFromHalfByte() where eta = 4
59
 * Note the FIPS 204 code uses the range -4..4 (whereas this code adds q to the
60
 * negative numbers).
61
 *
62
 * @param nibble A value in the range 0..15
63
 * @param out The returned value if the range (q-4)..0..4 if nibble is < 9
64
 * @returns 1 nibble was in range, or 0 if the nibble was rejected.
65
 */
66
static ossl_inline int coeff_from_nibble_4(uint32_t nibble, uint32_t *out)
67
0
{
68
    /*
69
     * This is not constant time but will not leak any important info since
70
     * the value is either chosen or thrown away.
71
     */
72
0
    if (value_barrier_32(nibble < 9)) {
73
0
        *out = mod_sub(4, nibble);
74
0
        return 1;
75
0
    }
76
0
    return 0;
77
0
}
78
79
/**
80
 * @brief Generate a value in the range (q-2..0..2)
81
 * See FIPS 204, Algorithm 15, CoeffFromHalfByte() where eta = 2
82
 * Note the FIPS 204 code uses the range -2..2 (whereas this code adds q to the
83
 * negative numbers).
84
 *
85
 * @param nibble A value in the range 0..15
86
 * @param out The returned value if the range (q-2)..0..2 if nibble is < 15
87
 * @returns 1 nibble was in range, or 0 if the nibble was rejected.
88
 */
89
static ossl_inline int coeff_from_nibble_2(uint32_t nibble, uint32_t *out)
90
0
{
91
0
    if (value_barrier_32(nibble < 15)) {
92
0
        *out = mod_sub(2, MOD5(nibble));
93
0
        return 1;
94
0
    }
95
0
    return 0;
96
0
}
97
98
/**
99
 * @brief Use a seed value to generate a polynomial with coefficients in the
100
 * range of 0..q-1 using rejection sampling.
101
 * SHAKE128 is used to absorb the seed, and then sequences of 3 sample bytes are
102
 * squeezed to try to produce coefficients.
103
 * The SHAKE128 stream is used to get uniformly distributed elements.
104
 * This algorithm is used for matrix expansion and only operates on public inputs.
105
 *
106
 * See FIPS 204, Algorithm 30, RejNTTPoly()
107
 *
108
 * @param g_ctx A EVP_MD_CTX object used for sampling the seed.
109
 * @param md A pre-fetched SHAKE128 object.
110
 * @param seed The seed to use for sampling.
111
 * @param seed_len The size of |seed|
112
 * @param out The returned polynomial with coefficients in the range of
113
 *            0..q-1. This range is required for NTT.
114
 * @returns 1 if the polynomial was successfully generated, or 0 if any of the
115
 *            digest operations failed.
116
 */
117
static int rej_ntt_poly(EVP_MD_CTX *g_ctx, const EVP_MD *md,
118
    const uint8_t *seed, size_t seed_len, POLY *out)
119
0
{
120
0
    int j = 0;
121
0
    uint8_t blocks[SHAKE128_BLOCKSIZE], *b, *end = blocks + sizeof(blocks);
122
123
    /*
124
     * Instead of just squeezing 3 bytes at a time, we grab a whole block
125
     * Note that the shake128 blocksize of 168 is divisible by 3.
126
     */
127
0
    if (!shake_xof(g_ctx, md, seed, seed_len, blocks, sizeof(blocks)))
128
0
        return 0;
129
130
0
    while (1) {
131
0
        for (b = blocks; b < end; b += 3) {
132
0
            if (coeff_from_three_bytes(b, &(out->coeff[j]))) {
133
0
                if (++j >= ML_DSA_NUM_POLY_COEFFICIENTS)
134
0
                    return 1; /* finished */
135
0
            }
136
0
        }
137
0
        if (!EVP_DigestSqueeze(g_ctx, blocks, sizeof(blocks)))
138
0
            return 0;
139
0
    }
140
0
}
141
142
/**
143
 * @brief Use a seed value to generate a polynomial with coefficients in the
144
 * range of ((q-eta)..0..eta) using rejection sampling. eta is either 2 or 4.
145
 * SHAKE256 is used to absorb the seed, and then samples are squeezed.
146
 * See FIPS 204, Algorithm 31, RejBoundedPoly()
147
 *
148
 * @param h_ctx A EVP_MD_CTX object context used to sample the seed.
149
 * @param md A pre-fetched SHAKE256 object.
150
 * @param coef_from_nibble A function that is dependent on eta, which takes a
151
 *                         nibble and tries to see if it is in the correct range.
152
 * @param seed The seed to use for sampling.
153
 * @param seed_len The size of |seed|
154
 * @param out The returned polynomial with coefficients in the range of
155
 *            ((q-eta)..0..eta)
156
 * @returns 1 if the polynomial was successfully generated, or 0 if any of the
157
 *            digest operations failed.
158
 */
159
static int rej_bounded_poly(EVP_MD_CTX *h_ctx, const EVP_MD *md,
160
    COEFF_FROM_NIBBLE_FUNC *coef_from_nibble,
161
    const uint8_t *seed, size_t seed_len, POLY *out)
162
0
{
163
0
    int j = 0;
164
0
    uint32_t z0, z1;
165
0
    uint8_t blocks[SHAKE256_BLOCKSIZE], *b, *end = blocks + sizeof(blocks);
166
167
    /* Instead of just squeezing 1 byte at a time, we grab a whole block */
168
0
    if (!shake_xof(h_ctx, md, seed, seed_len, blocks, sizeof(blocks)))
169
0
        return 0;
170
171
0
    while (1) {
172
0
        for (b = blocks; b < end; b++) {
173
0
            z0 = *b & 0x0F; /* lower nibble of byte */
174
0
            z1 = *b >> 4; /* high nibble of byte */
175
176
0
            if (coef_from_nibble(z0, &out->coeff[j])
177
0
                && ++j >= ML_DSA_NUM_POLY_COEFFICIENTS)
178
0
                return 1;
179
0
            if (coef_from_nibble(z1, &out->coeff[j])
180
0
                && ++j >= ML_DSA_NUM_POLY_COEFFICIENTS)
181
0
                return 1;
182
0
        }
183
0
        if (!EVP_DigestSqueeze(h_ctx, blocks, sizeof(blocks)))
184
0
            return 0;
185
0
    }
186
0
}
187
188
/**
189
 * @brief Generate a k * l matrix that has uniformly distributed polynomial
190
 *        elements using rejection sampling.
191
 * See FIPS 204, Algorithm 32, ExpandA()
192
 *
193
 * @param g_ctx A EVP_MD_CTX context used for rejection sampling
194
 *              seed values generated from the seed rho.
195
 * @param md A pre-fetched SHAKE128 object
196
 * @param rho A 32 byte seed to generated the matrix from.
197
 * @param out The generated k * l matrix of polynomials with coefficients
198
 *            in the range of 0..q-1.
199
 * @returns 1 if the matrix was generated, or 0 on error.
200
 */
201
int ossl_ml_dsa_matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md,
202
    const uint8_t *rho, MATRIX *out)
203
0
{
204
0
    int ret = 0;
205
0
    size_t i, j;
206
0
    uint8_t derived_seed[ML_DSA_RHO_BYTES + 2];
207
0
    POLY *poly = out->m_poly;
208
209
    /* The seed used for each matrix element is rho + column_index + row_index */
210
0
    memcpy(derived_seed, rho, ML_DSA_RHO_BYTES);
211
212
0
    for (i = 0; i < out->k; i++) {
213
0
        for (j = 0; j < out->l; j++) {
214
0
            derived_seed[ML_DSA_RHO_BYTES + 1] = (uint8_t)i;
215
0
            derived_seed[ML_DSA_RHO_BYTES] = (uint8_t)j;
216
            /* Generate the polynomial for each matrix element using a unique seed */
217
0
            if (!rej_ntt_poly(g_ctx, md, derived_seed, sizeof(derived_seed), poly++))
218
0
                goto err;
219
0
        }
220
0
    }
221
0
    ret = 1;
222
0
err:
223
0
    return ret;
224
0
}
225
226
/**
227
 * @brief Generates 2 vectors using rejection sampling whose polynomial
228
 * coefficients are in the interval [q-eta..0..eta]
229
 *
230
 * See FIPS 204, Algorithm 33, ExpandS().
231
 * Note that in FIPS 204 the range -eta..eta is used.
232
 *
233
 * @param h_ctx A EVP_MD_CTX context to use to sample the seed.
234
 * @param md A pre-fetched SHAKE256 object.
235
 * @param eta Is either 2 or 4, and determines the range of the coefficients for
236
 *            s1 and s2.
237
 * @param seed A 64 byte seed to use for sampling.
238
 * @param s1 A 1 * l column vector containing polynomials with coefficients in
239
 *           the range (q-eta)..0..eta
240
 * @param s2 A 1 * k column vector containing polynomials with coefficients in
241
 *           the range (q-eta)..0..eta
242
 * @returns 1 if s1 and s2 were successfully generated, or 0 otherwise.
243
 */
244
int ossl_ml_dsa_vector_expand_S(EVP_MD_CTX *h_ctx, const EVP_MD *md, int eta,
245
    const uint8_t *seed, VECTOR *s1, VECTOR *s2)
246
0
{
247
0
    int ret = 0;
248
0
    size_t i;
249
0
    size_t l = s1->num_poly;
250
0
    size_t k = s2->num_poly;
251
0
    uint8_t derived_seed[ML_DSA_PRIV_SEED_BYTES + 2];
252
0
    COEFF_FROM_NIBBLE_FUNC *coef_from_nibble_fn;
253
254
0
    coef_from_nibble_fn = (eta == ML_DSA_ETA_4) ? coeff_from_nibble_4 : coeff_from_nibble_2;
255
256
    /*
257
     * Each polynomial generated uses a unique seed that consists of
258
     * seed + counter (where the counter is 2 bytes starting at 0)
259
     */
260
0
    memcpy(derived_seed, seed, ML_DSA_PRIV_SEED_BYTES);
261
0
    derived_seed[ML_DSA_PRIV_SEED_BYTES] = 0;
262
0
    derived_seed[ML_DSA_PRIV_SEED_BYTES + 1] = 0;
263
264
0
    for (i = 0; i < l; i++) {
265
0
        if (!rej_bounded_poly(h_ctx, md, coef_from_nibble_fn,
266
0
                derived_seed, sizeof(derived_seed), &s1->poly[i]))
267
0
            goto err;
268
0
        ++derived_seed[ML_DSA_PRIV_SEED_BYTES];
269
0
    }
270
0
    for (i = 0; i < k; i++) {
271
0
        if (!rej_bounded_poly(h_ctx, md, coef_from_nibble_fn,
272
0
                derived_seed, sizeof(derived_seed), &s2->poly[i]))
273
0
            goto err;
274
0
        ++derived_seed[ML_DSA_PRIV_SEED_BYTES];
275
0
    }
276
0
    ret = 1;
277
0
err:
278
0
    return ret;
279
0
}
280
281
/* See FIPS 204, Algorithm 34, ExpandMask(), Step 4 & 5 */
282
int ossl_ml_dsa_poly_expand_mask(POLY *out, const uint8_t *seed, size_t seed_len,
283
    uint32_t gamma1,
284
    EVP_MD_CTX *h_ctx, const EVP_MD *md)
285
0
{
286
0
    uint8_t buf[32 * 20];
287
0
    size_t buf_len = 32 * (gamma1 == ML_DSA_GAMMA1_TWO_POWER_19 ? 20 : 18);
288
289
0
    return shake_xof(h_ctx, md, seed, seed_len, buf, buf_len)
290
0
        && ossl_ml_dsa_poly_decode_expand_mask(out, buf, buf_len, gamma1);
291
0
}
292
293
/*
294
 * @brief Sample a polynomial with coefficients in the range {-1..1}.
295
 * The number of non zero values (hamming weight) is given by tau
296
 *
297
 * See FIPS 204, Algorithm 29, SampleInBall()
298
 * This function is assumed to not be constant time.
299
 * The algorithm is based on Durstenfeld's version of the Fisher-Yates shuffle.
300
 *
301
 * Note that the coefficients returned by this implementation are positive
302
 * i.e one of q-1, 0, or 1.
303
 *
304
 * @param tau is the number of +1 or -1's in the polynomial 'out_c' (39, 49 or 60)
305
 *            that is less than or equal to 64
306
 */
307
int ossl_ml_dsa_poly_sample_in_ball(POLY *out_c, const uint8_t *seed, int seed_len,
308
    EVP_MD_CTX *h_ctx, const EVP_MD *md,
309
    uint32_t tau)
310
0
{
311
0
    uint8_t block[SHAKE256_BLOCKSIZE];
312
0
    uint64_t signs;
313
0
    int offset = 8;
314
0
    size_t end;
315
316
    /*
317
     * Rather than squeeze 8 bytes followed by lots of 1 byte squeezes
318
     * the SHAKE blocksize is squeezed each time and buffered into 'block'.
319
     */
320
0
    if (!shake_xof(h_ctx, md, seed, seed_len, block, sizeof(block)))
321
0
        return 0;
322
323
    /*
324
     * grab the first 64 bits - since tau < 64
325
     * Each bit gives a +1 or -1 value.
326
     */
327
0
    OPENSSL_load_u64_le(&signs, block);
328
329
    /*
330
     * SampleInBall implements a Fisher-Yates shuffle whose rejection-sampling
331
     * inner loop and data-dependent array index unavoidably leak the structure
332
     * of the challenge polynomial via memory-access pattern and branch timing.
333
     * This is safe: c_tilde = H(mu ‖ w1) is the Fiat-Shamir commitment and is
334
     * published in the accepted signature, so the SHAKE bytes that build c are
335
     * effectively public.  See the BoringSSL design discussion at
336
     * https://boringssl-review.googlesource.com/c/boringssl/+/67747/comment/8d8f01ac_70af3f21/
337
     *
338
     * The first 8 bytes (the sign bits loaded into |signs| above) are left
339
     * tainted: they determine only the ±1 values written into c, which flow
340
     * into the CT arithmetic of cs1/cs2/ct0 alongside the already-tainted
341
     * secret polynomials and cause no spurious violations there.
342
     * Only the rejection-sampling bytes need to be declassified.
343
     */
344
0
    CONSTTIME_DECLASSIFY(block + offset, sizeof(block) - offset);
345
346
0
    poly_zero(out_c);
347
348
    /* Loop tau times */
349
0
    for (end = 256 - tau; end < 256; end++) {
350
0
        size_t index; /* index is a random offset to write +1 or -1 */
351
352
        /* rejection sample in {0..end} to choose an index to place -1 or 1 into */
353
0
        for (;;) {
354
0
            if (offset == sizeof(block)) {
355
                /* squeeze another block if the bytes from block have been used */
356
0
                if (!EVP_DigestSqueeze(h_ctx, block, sizeof(block)))
357
0
                    return 0;
358
                /* See comment above for why the block is declassified. */
359
0
                CONSTTIME_DECLASSIFY(block, sizeof(block));
360
0
                offset = 0;
361
0
            }
362
363
0
            index = block[offset++];
364
0
            if (index <= end)
365
0
                break;
366
0
        }
367
368
        /*
369
         * In-place swap the coefficient we are about to replace to the end so
370
         * we don't lose any values that have been already written.
371
         */
372
0
        out_c->coeff[end] = out_c->coeff[index];
373
        /* set the random coefficient value to either 1 or q-1 */
374
0
        out_c->coeff[index] = mod_sub(1, 2 * (signs & 1));
375
0
        signs >>= 1; /* grab the next random bit */
376
0
    }
377
0
    return 1;
378
0
}