Coverage Report

Created: 2025-06-13 06:58

/src/openssl30/crypto/bn/rsaz_exp_x2.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2020-2025 The OpenSSL Project Authors. All Rights Reserved.
3
 * Copyright (c) 2020, Intel Corporation. All Rights Reserved.
4
 *
5
 * Licensed under the Apache License 2.0 (the "License").  You may not use
6
 * this file except in compliance with the License.  You can obtain a copy
7
 * in the file LICENSE in the source distribution or at
8
 * https://www.openssl.org/source/license.html
9
 *
10
 *
11
 * Originally written by Ilya Albrekht, Sergey Kirillov and Andrey Matyukov
12
 * Intel Corporation
13
 *
14
 */
15
16
#include <openssl/opensslconf.h>
17
#include <openssl/crypto.h>
18
#include "rsaz_exp.h"
19
20
#ifndef RSAZ_ENABLED
21
NON_EMPTY_TRANSLATION_UNIT
22
#else
23
# include <assert.h>
24
# include <string.h>
25
26
# if defined(__GNUC__)
27
0
#  define ALIGN64 __attribute__((aligned(64)))
28
# elif defined(_MSC_VER)
29
#  define ALIGN64 __declspec(align(64))
30
# else
31
#  define ALIGN64
32
# endif
33
34
# define ALIGN_OF(ptr, boundary) \
35
0
    ((unsigned char *)(ptr) + (boundary - (((size_t)(ptr)) & (boundary - 1))))
36
37
/* Internal radix */
38
0
# define DIGIT_SIZE (52)
39
/* 52-bit mask */
40
0
# define DIGIT_MASK ((uint64_t)0xFFFFFFFFFFFFF)
41
42
0
# define BITS2WORD8_SIZE(x)  (((x) + 7) >> 3)
43
0
# define BITS2WORD64_SIZE(x) (((x) + 63) >> 6)
44
45
static ossl_inline uint64_t get_digit52(const uint8_t *in, int in_len);
46
static ossl_inline void put_digit52(uint8_t *out, int out_len, uint64_t digit);
47
static void to_words52(BN_ULONG *out, int out_len, const BN_ULONG *in,
48
                       int in_bitsize);
49
static void from_words52(BN_ULONG *bn_out, int out_bitsize, const BN_ULONG *in);
50
static ossl_inline void set_bit(BN_ULONG *a, int idx);
51
52
/* Number of |digit_size|-bit digits in |bitsize|-bit value */
53
static ossl_inline int number_of_digits(int bitsize, int digit_size)
54
0
{
55
0
    return (bitsize + digit_size - 1) / digit_size;
56
0
}
57
58
typedef void (*AMM52)(BN_ULONG *res, const BN_ULONG *base,
59
                      const BN_ULONG *exp, const BN_ULONG *m, BN_ULONG k0);
60
typedef void (*EXP52_x2)(BN_ULONG *res, const BN_ULONG *base,
61
                         const BN_ULONG *exp[2], const BN_ULONG *m,
62
                         const BN_ULONG *rr, const BN_ULONG k0[2]);
63
64
/*
65
 * For details of the methods declared below please refer to
66
 *    crypto/bn/asm/rsaz-avx512.pl
67
 *
68
 * Naming notes:
69
 *  amm = Almost Montgomery Multiplication
70
 *  ams = Almost Montgomery Squaring
71
 *  52x20 - data represented as array of 20 digits in 52-bit radix
72
 *  _x1_/_x2_ - 1 or 2 independent inputs/outputs
73
 *  _256 suffix - uses 256-bit (AVX512VL) registers
74
 */
75
76
/*AMM = Almost Montgomery Multiplication. */
77
void ossl_rsaz_amm52x20_x1_256(BN_ULONG *res, const BN_ULONG *base,
78
                               const BN_ULONG *exp, const BN_ULONG *m,
79
                               BN_ULONG k0);
80
static void RSAZ_exp52x20_x2_256(BN_ULONG *res, const BN_ULONG *base,
81
                                 const BN_ULONG *exp[2], const BN_ULONG *m,
82
                                 const BN_ULONG *rr, const BN_ULONG k0[2]);
83
void ossl_rsaz_amm52x20_x2_256(BN_ULONG *out, const BN_ULONG *a,
84
                               const BN_ULONG *b, const BN_ULONG *m,
85
                               const BN_ULONG k0[2]);
86
void ossl_extract_multiplier_2x20_win5(BN_ULONG *red_Y,
87
                                       const BN_ULONG *red_table,
88
                                       int red_table_idx, int tbl_idx);
89
90
/*
91
 * Dual Montgomery modular exponentiation using prime moduli of the
92
 * same bit size, optimized with AVX512 ISA.
93
 *
94
 * Input and output parameters for each exponentiation are independent and
95
 * denoted here by index |i|, i = 1..2.
96
 *
97
 * Input and output are all in regular 2^64 radix.
98
 *
99
 * Each moduli shall be |factor_size| bit size.
100
 *
101
 * NOTE: currently only 2x1024 case is supported.
102
 *
103
 *  [out] res|i|      - result of modular exponentiation: array of qword values
104
 *                      in regular (2^64) radix. Size of array shall be enough
105
 *                      to hold |factor_size| bits.
106
 *  [in]  base|i|     - base
107
 *  [in]  exp|i|      - exponent
108
 *  [in]  m|i|        - moduli
109
 *  [in]  rr|i|       - Montgomery parameter RR = R^2 mod m|i|
110
 *  [in]  k0_|i|      - Montgomery parameter k0 = -1/m|i| mod 2^64
111
 *  [in]  factor_size - moduli bit size
112
 *
113
 * \return 0 in case of failure,
114
 *         1 in case of success.
115
 */
116
int ossl_rsaz_mod_exp_avx512_x2(BN_ULONG *res1,
117
                                const BN_ULONG *base1,
118
                                const BN_ULONG *exp1,
119
                                const BN_ULONG *m1,
120
                                const BN_ULONG *rr1,
121
                                BN_ULONG k0_1,
122
                                BN_ULONG *res2,
123
                                const BN_ULONG *base2,
124
                                const BN_ULONG *exp2,
125
                                const BN_ULONG *m2,
126
                                const BN_ULONG *rr2,
127
                                BN_ULONG k0_2,
128
                                int factor_size)
129
0
{
130
0
    int ret = 0;
131
132
    /*
133
     * Number of word-size (BN_ULONG) digits to store exponent in redundant
134
     * representation.
135
     */
136
0
    int exp_digits = number_of_digits(factor_size + 2, DIGIT_SIZE);
137
0
    int coeff_pow = 4 * (DIGIT_SIZE * exp_digits - factor_size);
138
0
    BN_ULONG *base1_red, *m1_red, *rr1_red;
139
0
    BN_ULONG *base2_red, *m2_red, *rr2_red;
140
0
    BN_ULONG *coeff_red;
141
0
    BN_ULONG *storage = NULL;
142
0
    BN_ULONG *storage_aligned = NULL;
143
0
    BN_ULONG storage_len_bytes = 7 * exp_digits * sizeof(BN_ULONG);
144
145
    /* AMM = Almost Montgomery Multiplication */
146
0
    AMM52 amm = NULL;
147
    /* Dual (2-exps in parallel) exponentiation */
148
0
    EXP52_x2 exp_x2 = NULL;
149
150
0
    const BN_ULONG *exp[2] = {0};
151
0
    BN_ULONG k0[2] = {0};
152
153
    /* Only 1024-bit factor size is supported now */
154
0
    switch (factor_size) {
155
0
    case 1024:
156
0
        amm = ossl_rsaz_amm52x20_x1_256;
157
0
        exp_x2 = RSAZ_exp52x20_x2_256;
158
0
        break;
159
0
    default:
160
0
        goto err;
161
0
    }
162
163
0
    storage = (BN_ULONG *)OPENSSL_malloc(storage_len_bytes + 64);
164
0
    if (storage == NULL)
165
0
        goto err;
166
0
    storage_aligned = (BN_ULONG *)ALIGN_OF(storage, 64);
167
168
    /* Memory layout for red(undant) representations */
169
0
    base1_red = storage_aligned;
170
0
    base2_red = storage_aligned + 1 * exp_digits;
171
0
    m1_red    = storage_aligned + 2 * exp_digits;
172
0
    m2_red    = storage_aligned + 3 * exp_digits;
173
0
    rr1_red   = storage_aligned + 4 * exp_digits;
174
0
    rr2_red   = storage_aligned + 5 * exp_digits;
175
0
    coeff_red = storage_aligned + 6 * exp_digits;
176
177
    /* Convert base_i, m_i, rr_i, from regular to 52-bit radix */
178
0
    to_words52(base1_red, exp_digits, base1, factor_size);
179
0
    to_words52(base2_red, exp_digits, base2, factor_size);
180
0
    to_words52(m1_red, exp_digits, m1, factor_size);
181
0
    to_words52(m2_red, exp_digits, m2, factor_size);
182
0
    to_words52(rr1_red, exp_digits, rr1, factor_size);
183
0
    to_words52(rr2_red, exp_digits, rr2, factor_size);
184
185
    /*
186
     * Compute target domain Montgomery converters RR' for each modulus
187
     * based on precomputed original domain's RR.
188
     *
189
     * RR -> RR' transformation steps:
190
     *  (1) coeff = 2^k
191
     *  (2) t = AMM(RR,RR) = RR^2 / R' mod m
192
     *  (3) RR' = AMM(t, coeff) = RR^2 * 2^k / R'^2 mod m
193
     * where
194
     *  k = 4 * (52 * digits52 - modlen)
195
     *  R  = 2^(64 * ceil(modlen/64)) mod m
196
     *  RR = R^2 mod M
197
     *  R' = 2^(52 * ceil(modlen/52)) mod m
198
     *
199
     *  modlen = 1024: k = 64, RR = 2^2048 mod m, RR' = 2^2080 mod m
200
     */
201
0
    memset(coeff_red, 0, exp_digits * sizeof(BN_ULONG));
202
    /* (1) in reduced domain representation */
203
0
    set_bit(coeff_red, 64 * (int)(coeff_pow / 52) + coeff_pow % 52);
204
205
0
    amm(rr1_red, rr1_red, rr1_red, m1_red, k0_1);     /* (2) for m1 */
206
0
    amm(rr1_red, rr1_red, coeff_red, m1_red, k0_1);   /* (3) for m1 */
207
208
0
    amm(rr2_red, rr2_red, rr2_red, m2_red, k0_2);     /* (2) for m2 */
209
0
    amm(rr2_red, rr2_red, coeff_red, m2_red, k0_2);   /* (3) for m2 */
210
211
0
    exp[0] = exp1;
212
0
    exp[1] = exp2;
213
214
0
    k0[0] = k0_1;
215
0
    k0[1] = k0_2;
216
217
0
    exp_x2(rr1_red, base1_red, exp, m1_red, rr1_red, k0);
218
219
    /* Convert rr_i back to regular radix */
220
0
    from_words52(res1, factor_size, rr1_red);
221
0
    from_words52(res2, factor_size, rr2_red);
222
223
    /* bn_reduce_once_in_place expects number of BN_ULONG, not bit size */
224
0
    factor_size /= sizeof(BN_ULONG) * 8;
225
226
0
    bn_reduce_once_in_place(res1, /*carry=*/0, m1, storage, factor_size);
227
0
    bn_reduce_once_in_place(res2, /*carry=*/0, m2, storage, factor_size);
228
229
0
    ret = 1;
230
0
err:
231
0
    if (storage != NULL) {
232
0
        OPENSSL_cleanse(storage, storage_len_bytes);
233
0
        OPENSSL_free(storage);
234
0
    }
235
0
    return ret;
236
0
}
237
238
/*
239
 * Dual 1024-bit w-ary modular exponentiation using prime moduli of the same
240
 * bit size using Almost Montgomery Multiplication, optimized with AVX512_IFMA
241
 * ISA.
242
 *
243
 * The parameter w (window size) = 5.
244
 *
245
 *  [out] res      - result of modular exponentiation: 2x20 qword
246
 *                   values in 2^52 radix.
247
 *  [in]  base     - base (2x20 qword values in 2^52 radix)
248
 *  [in]  exp      - array of 2 pointers to 16 qword values in 2^64 radix.
249
 *                   Exponent is not converted to redundant representation.
250
 *  [in]  m        - moduli (2x20 qword values in 2^52 radix)
251
 *  [in]  rr       - Montgomery parameter for 2 moduli: RR = 2^2080 mod m.
252
 *                   (2x20 qword values in 2^52 radix)
253
 *  [in]  k0       - Montgomery parameter for 2 moduli: k0 = -1/m mod 2^64
254
 *
255
 * \return (void).
256
 */
257
static void RSAZ_exp52x20_x2_256(BN_ULONG *out,          /* [2][20] */
258
                                 const BN_ULONG *base,   /* [2][20] */
259
                                 const BN_ULONG *exp[2], /* 2x16    */
260
                                 const BN_ULONG *m,      /* [2][20] */
261
                                 const BN_ULONG *rr,     /* [2][20] */
262
                                 const BN_ULONG k0[2])
263
0
{
264
0
# define BITSIZE_MODULUS (1024)
265
0
# define EXP_WIN_SIZE (5)
266
0
# define EXP_WIN_MASK ((1U << EXP_WIN_SIZE) - 1)
267
/*
268
 * Number of digits (64-bit words) in redundant representation to handle
269
 * modulus bits
270
 */
271
0
# define RED_DIGITS (20)
272
0
# define EXP_DIGITS (16)
273
0
# define DAMM ossl_rsaz_amm52x20_x2_256
274
/*
275
 * Squaring is done using multiplication now. That can be a subject of
276
 * optimization in future.
277
 */
278
0
# define DAMS(r,a,m,k0) \
279
0
              ossl_rsaz_amm52x20_x2_256((r),(a),(a),(m),(k0))
280
281
    /* Allocate stack for red(undant) result Y and multiplier X */
282
0
    ALIGN64 BN_ULONG red_Y[2][RED_DIGITS];
283
0
    ALIGN64 BN_ULONG red_X[2][RED_DIGITS];
284
285
    /* Allocate expanded exponent */
286
0
    ALIGN64 BN_ULONG expz[2][EXP_DIGITS + 1];
287
288
    /* Pre-computed table of base powers */
289
0
    ALIGN64 BN_ULONG red_table[1U << EXP_WIN_SIZE][2][RED_DIGITS];
290
291
0
    int idx;
292
293
0
    memset(red_Y, 0, sizeof(red_Y));
294
0
    memset(red_table, 0, sizeof(red_table));
295
0
    memset(red_X, 0, sizeof(red_X));
296
297
    /*
298
     * Compute table of powers base^i, i = 0, ..., (2^EXP_WIN_SIZE) - 1
299
     *   table[0] = mont(x^0) = mont(1)
300
     *   table[1] = mont(x^1) = mont(x)
301
     */
302
0
    red_X[0][0] = 1;
303
0
    red_X[1][0] = 1;
304
0
    DAMM(red_table[0][0], (const BN_ULONG*)red_X, rr, m, k0);
305
0
    DAMM(red_table[1][0], base,  rr, m, k0);
306
307
0
    for (idx = 1; idx < (int)((1U << EXP_WIN_SIZE) / 2); idx++) {
308
0
        DAMS(red_table[2 * idx + 0][0], red_table[1 * idx][0], m, k0);
309
0
        DAMM(red_table[2 * idx + 1][0], red_table[2 * idx][0], red_table[1][0], m, k0);
310
0
    }
311
312
    /* Copy and expand exponents */
313
0
    memcpy(expz[0], exp[0], EXP_DIGITS * sizeof(BN_ULONG));
314
0
    expz[0][EXP_DIGITS] = 0;
315
0
    memcpy(expz[1], exp[1], EXP_DIGITS * sizeof(BN_ULONG));
316
0
    expz[1][EXP_DIGITS] = 0;
317
318
    /* Exponentiation */
319
0
    {
320
0
        const int rem = BITSIZE_MODULUS % EXP_WIN_SIZE;
321
0
        BN_ULONG table_idx_mask = EXP_WIN_MASK;
322
323
0
        int exp_bit_no = BITSIZE_MODULUS - rem;
324
0
        int exp_chunk_no = exp_bit_no / 64;
325
0
        int exp_chunk_shift = exp_bit_no % 64;
326
327
0
        BN_ULONG red_table_idx_0, red_table_idx_1;
328
329
        /*
330
         * If rem == 0, then
331
         *      exp_bit_no = modulus_bitsize - exp_win_size
332
         * However, this isn't possible because rem is { 1024, 1536, 2048 } % 5
333
         * which is { 4, 1, 3 } respectively.
334
         *
335
         * If this assertion ever fails the fix above is easy.
336
         */
337
0
        OPENSSL_assert(rem != 0);
338
339
        /* Process 1-st exp window - just init result */
340
0
        red_table_idx_0 = expz[0][exp_chunk_no];
341
0
        red_table_idx_1 = expz[1][exp_chunk_no];
342
        /*
343
         * The function operates with fixed moduli sizes divisible by 64,
344
         * thus table index here is always in supported range [0, EXP_WIN_SIZE).
345
         */
346
0
        red_table_idx_0 >>= exp_chunk_shift;
347
0
        red_table_idx_1 >>= exp_chunk_shift;
348
349
0
        ossl_extract_multiplier_2x20_win5(red_Y[0], (const BN_ULONG*)red_table,
350
0
                                          (int)red_table_idx_0, 0);
351
0
        ossl_extract_multiplier_2x20_win5(red_Y[1], (const BN_ULONG*)red_table,
352
0
                                          (int)red_table_idx_1, 1);
353
354
        /* Process other exp windows */
355
0
        for (exp_bit_no -= EXP_WIN_SIZE; exp_bit_no >= 0; exp_bit_no -= EXP_WIN_SIZE) {
356
            /* Extract pre-computed multiplier from the table */
357
0
            {
358
0
                BN_ULONG T;
359
360
0
                exp_chunk_no = exp_bit_no / 64;
361
0
                exp_chunk_shift = exp_bit_no % 64;
362
0
                {
363
0
                    red_table_idx_0 = expz[0][exp_chunk_no];
364
0
                    T = expz[0][exp_chunk_no + 1];
365
366
0
                    red_table_idx_0 >>= exp_chunk_shift;
367
                    /*
368
                     * Get additional bits from then next quadword
369
                     * when 64-bit boundaries are crossed.
370
                     */
371
0
                    if (exp_chunk_shift > 64 - EXP_WIN_SIZE) {
372
0
                        T <<= (64 - exp_chunk_shift);
373
0
                        red_table_idx_0 ^= T;
374
0
                    }
375
0
                    red_table_idx_0 &= table_idx_mask;
376
377
0
                    ossl_extract_multiplier_2x20_win5(red_X[0],
378
0
                                                      (const BN_ULONG*)red_table,
379
0
                                                      (int)red_table_idx_0, 0);
380
0
                }
381
0
                {
382
0
                    red_table_idx_1 = expz[1][exp_chunk_no];
383
0
                    T = expz[1][exp_chunk_no + 1];
384
385
0
                    red_table_idx_1 >>= exp_chunk_shift;
386
                    /*
387
                     * Get additional bits from then next quadword
388
                     * when 64-bit boundaries are crossed.
389
                     */
390
0
                    if (exp_chunk_shift > 64 - EXP_WIN_SIZE) {
391
0
                        T <<= (64 - exp_chunk_shift);
392
0
                        red_table_idx_1 ^= T;
393
0
                    }
394
0
                    red_table_idx_1 &= table_idx_mask;
395
396
0
                    ossl_extract_multiplier_2x20_win5(red_X[1],
397
0
                                                      (const BN_ULONG*)red_table,
398
0
                                                      (int)red_table_idx_1, 1);
399
0
                }
400
0
            }
401
402
            /* Series of squaring */
403
0
            DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0);
404
0
            DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0);
405
0
            DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0);
406
0
            DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0);
407
0
            DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0);
408
409
0
            DAMM((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, (const BN_ULONG*)red_X, m, k0);
410
0
        }
411
0
    }
412
413
    /*
414
     *
415
     * NB: After the last AMM of exponentiation in Montgomery domain, the result
416
     * may be 1025-bit, but the conversion out of Montgomery domain performs an
417
     * AMM(x,1) which guarantees that the final result is less than |m|, so no
418
     * conditional subtraction is needed here. See "Efficient Software
419
     * Implementations of Modular Exponentiation" (by Shay Gueron) paper for details.
420
     */
421
422
    /* Convert result back in regular 2^52 domain */
423
0
    memset(red_X, 0, sizeof(red_X));
424
0
    red_X[0][0] = 1;
425
0
    red_X[1][0] = 1;
426
0
    DAMM(out, (const BN_ULONG*)red_Y, (const BN_ULONG*)red_X, m, k0);
427
428
    /* Clear exponents */
429
0
    OPENSSL_cleanse(expz, sizeof(expz));
430
0
    OPENSSL_cleanse(red_Y, sizeof(red_Y));
431
432
0
# undef DAMS
433
0
# undef DAMM
434
0
# undef EXP_DIGITS
435
0
# undef RED_DIGITS
436
0
# undef EXP_WIN_MASK
437
0
# undef EXP_WIN_SIZE
438
0
# undef BITSIZE_MODULUS
439
0
}
440
441
static ossl_inline uint64_t get_digit52(const uint8_t *in, int in_len)
442
0
{
443
0
    uint64_t digit = 0;
444
445
0
    assert(in != NULL);
446
447
0
    for (; in_len > 0; in_len--) {
448
0
        digit <<= 8;
449
0
        digit += (uint64_t)(in[in_len - 1]);
450
0
    }
451
0
    return digit;
452
0
}
453
454
/*
455
 * Convert array of words in regular (base=2^64) representation to array of
456
 * words in redundant (base=2^52) one.
457
 */
458
static void to_words52(BN_ULONG *out, int out_len,
459
                       const BN_ULONG *in, int in_bitsize)
460
0
{
461
0
    uint8_t *in_str = NULL;
462
463
0
    assert(out != NULL);
464
0
    assert(in != NULL);
465
    /* Check destination buffer capacity */
466
0
    assert(out_len >= number_of_digits(in_bitsize, DIGIT_SIZE));
467
468
0
    in_str = (uint8_t *)in;
469
470
0
    for (; in_bitsize >= (2 * DIGIT_SIZE); in_bitsize -= (2 * DIGIT_SIZE), out += 2) {
471
0
        uint64_t digit;
472
473
0
        memcpy(&digit, in_str, sizeof(digit));
474
0
        out[0] = digit & DIGIT_MASK;
475
0
        in_str += 6;
476
0
        memcpy(&digit, in_str, sizeof(digit));
477
0
        out[1] = (digit >> 4) & DIGIT_MASK;
478
0
        in_str += 7;
479
0
        out_len -= 2;
480
0
    }
481
482
0
    if (in_bitsize > DIGIT_SIZE) {
483
0
        uint64_t digit = get_digit52(in_str, 7);
484
485
0
        out[0] = digit & DIGIT_MASK;
486
0
        in_str += 6;
487
0
        in_bitsize -= DIGIT_SIZE;
488
0
        digit = get_digit52(in_str, BITS2WORD8_SIZE(in_bitsize));
489
0
        out[1] = digit >> 4;
490
0
        out += 2;
491
0
        out_len -= 2;
492
0
    } else if (in_bitsize > 0) {
493
0
        out[0] = get_digit52(in_str, BITS2WORD8_SIZE(in_bitsize));
494
0
        out++;
495
0
        out_len--;
496
0
    }
497
498
0
    memset(out, 0, out_len * sizeof(BN_ULONG));
499
0
}
500
501
static ossl_inline void put_digit52(uint8_t *pStr, int strLen, uint64_t digit)
502
0
{
503
0
    assert(pStr != NULL);
504
505
0
    for (; strLen > 0; strLen--) {
506
0
        *pStr++ = (uint8_t)(digit & 0xFF);
507
0
        digit >>= 8;
508
0
    }
509
0
}
510
511
/*
512
 * Convert array of words in redundant (base=2^52) representation to array of
513
 * words in regular (base=2^64) one.
514
 */
515
static void from_words52(BN_ULONG *out, int out_bitsize, const BN_ULONG *in)
516
0
{
517
0
    int i;
518
0
    int out_len = BITS2WORD64_SIZE(out_bitsize);
519
520
0
    assert(out != NULL);
521
0
    assert(in != NULL);
522
523
0
    for (i = 0; i < out_len; i++)
524
0
        out[i] = 0;
525
526
0
    {
527
0
        uint8_t *out_str = (uint8_t *)out;
528
529
0
        for (; out_bitsize >= (2 * DIGIT_SIZE);
530
0
               out_bitsize -= (2 * DIGIT_SIZE), in += 2) {
531
0
            uint64_t digit;
532
533
0
            digit = in[0];
534
0
            memcpy(out_str, &digit, sizeof(digit));
535
0
            out_str += 6;
536
0
            digit = digit >> 48 | in[1] << 4;
537
0
            memcpy(out_str, &digit, sizeof(digit));
538
0
            out_str += 7;
539
0
        }
540
541
0
        if (out_bitsize > DIGIT_SIZE) {
542
0
            put_digit52(out_str, 7, in[0]);
543
0
            out_str += 6;
544
0
            out_bitsize -= DIGIT_SIZE;
545
0
            put_digit52(out_str, BITS2WORD8_SIZE(out_bitsize),
546
0
                        (in[1] << 4 | in[0] >> 48));
547
0
        } else if (out_bitsize) {
548
0
            put_digit52(out_str, BITS2WORD8_SIZE(out_bitsize), in[0]);
549
0
        }
550
0
    }
551
0
}
552
553
/*
554
 * Set bit at index |idx| in the words array |a|.
555
 * It does not do any boundaries checks, make sure the index is valid before
556
 * calling the function.
557
 */
558
static ossl_inline void set_bit(BN_ULONG *a, int idx)
559
0
{
560
0
    assert(a != NULL);
561
562
0
    {
563
0
        int i, j;
564
565
0
        i = idx / BN_BITS2;
566
0
        j = idx % BN_BITS2;
567
0
        a[i] |= (((BN_ULONG)1) << j);
568
0
    }
569
0
}
570
571
#endif