Coverage Report

Created: 2025-11-16 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/blst/src/vect.h
Line
Count
Source
1
/*
2
 * Copyright Supranational LLC
3
 * Licensed under the Apache License, Version 2.0, see LICENSE for details.
4
 * SPDX-License-Identifier: Apache-2.0
5
 */
6
#ifndef __BLS12_381_ASM_VECT_H__
7
#define __BLS12_381_ASM_VECT_H__
8
9
#include <stddef.h>
10
11
#if defined(__x86_64__) || defined(__aarch64__)
12
/* These are available even in ILP32 flavours, but even then they are
13
 * capable of performing 64-bit operations as efficiently as in *P64. */
14
typedef unsigned long long limb_t;
15
1.22M
# define LIMB_T_BITS    64
16
17
#elif defined(_WIN64)   /* Win64 is P64 */
18
typedef unsigned __int64 limb_t;
19
# define LIMB_T_BITS    64
20
21
#elif defined(__BLST_NO_ASM__) || defined(__wasm64__)
22
typedef unsigned int limb_t;
23
# define LIMB_T_BITS    32
24
# ifndef __BLST_NO_ASM__
25
#  define __BLST_NO_ASM__
26
# endif
27
28
#else                   /* 32 bits on 32-bit platforms, 64 - on 64-bit */
29
typedef unsigned long limb_t;
30
#  ifdef _LP64
31
#   define LIMB_T_BITS   64
32
#  else
33
#   define LIMB_T_BITS   32
34
#   define __BLST_NO_ASM__
35
#  endif
36
#endif
37
38
/*
39
 * Why isn't LIMB_T_BITS defined as 8*sizeof(limb_t)? Because pre-processor
40
 * knows nothing about sizeof(anything)...
41
 */
42
#if LIMB_T_BITS == 64
43
153k
# define TO_LIMB_T(limb64)     limb64
44
#else
45
# define TO_LIMB_T(limb64)     (limb_t)limb64,(limb_t)(limb64>>32)
46
#endif
47
48
110
#define NLIMBS(bits)   (bits/LIMB_T_BITS)
49
50
typedef limb_t vec256[NLIMBS(256)];
51
typedef limb_t vec512[NLIMBS(512)];
52
typedef limb_t vec384[NLIMBS(384)];
53
typedef limb_t vec768[NLIMBS(768)];
54
typedef vec384 vec384x[2];      /* 0 is "real" part, 1 is "imaginary" */
55
56
typedef unsigned char byte;
57
#define TO_BYTES(limb64)    (byte)limb64,(byte)(limb64>>8),\
58
                            (byte)(limb64>>16),(byte)(limb64>>24),\
59
                            (byte)(limb64>>32),(byte)(limb64>>40),\
60
                            (byte)(limb64>>48),(byte)(limb64>>56)
61
typedef byte pow256[256/8];
62
63
/*
64
 * Internal Boolean type, Boolean by value, hence safe to cast to or
65
 * reinterpret as 'bool'.
66
 */
67
typedef limb_t bool_t;
68
69
/*
70
 * Assembly subroutines...
71
 */
72
#if defined(__ADX__) /* e.g. -march=broadwell */ && !defined(__BLST_PORTABLE__)\
73
                                                 && !defined(__BLST_NO_ASM__)
74
1.21k
# define mul_mont_sparse_256 mulx_mont_sparse_256
75
14
# define sqr_mont_sparse_256 sqrx_mont_sparse_256
76
203
# define from_mont_256 fromx_mont_256
77
10
# define redc_mont_256 redcx_mont_256
78
1.32M
# define mul_mont_384 mulx_mont_384
79
1.69M
# define sqr_mont_384 sqrx_mont_384
80
# define sqr_n_mul_mont_384 sqrx_n_mul_mont_384
81
45.2k
# define sqr_n_mul_mont_383 sqrx_n_mul_mont_383
82
# define mul_384 mulx_384
83
# define sqr_384 sqrx_384
84
138k
# define redc_mont_384 redcx_mont_384
85
4.64k
# define from_mont_384 fromx_mont_384
86
178
# define sgn0_pty_mont_384 sgn0x_pty_mont_384
87
212
# define sgn0_pty_mont_384x sgn0x_pty_mont_384x
88
1.44k
# define ct_inverse_mod_384 ctx_inverse_mod_384
89
#endif
90
91
void mul_mont_sparse_256(vec256 ret, const vec256 a, const vec256 b,
92
                         const vec256 p, limb_t n0);
93
void sqr_mont_sparse_256(vec256 ret, const vec256 a, const vec256 p, limb_t n0);
94
void redc_mont_256(vec256 ret, const vec512 a, const vec256 p, limb_t n0);
95
void from_mont_256(vec256 ret, const vec256 a, const vec256 p, limb_t n0);
96
97
void add_mod_256(vec256 ret, const vec256 a, const vec256 b, const vec256 p);
98
void sub_mod_256(vec256 ret, const vec256 a, const vec256 b, const vec256 p);
99
void mul_by_3_mod_256(vec256 ret, const vec256 a, const vec256 p);
100
void cneg_mod_256(vec256 ret, const vec256 a, bool_t flag, const vec256 p);
101
void lshift_mod_256(vec256 ret, const vec256 a, size_t count, const vec256 p);
102
void rshift_mod_256(vec256 ret, const vec256 a, size_t count, const vec256 p);
103
bool_t eucl_inverse_mod_256(vec256 ret, const vec256 a, const vec256 p,
104
                            const vec256 one);
105
limb_t check_mod_256(const pow256 a, const vec256 p);
106
limb_t add_n_check_mod_256(pow256 ret, const pow256 a, const pow256 b,
107
                                       const vec256 p);
108
limb_t sub_n_check_mod_256(pow256 ret, const pow256 a, const pow256 b,
109
                                       const vec256 p);
110
111
void vec_prefetch(const void *ptr, size_t len);
112
113
void mul_mont_384(vec384 ret, const vec384 a, const vec384 b,
114
                  const vec384 p, limb_t n0);
115
void sqr_mont_384(vec384 ret, const vec384 a, const vec384 p, limb_t n0);
116
void sqr_n_mul_mont_384(vec384 ret, const vec384 a, size_t count,
117
                        const vec384 p, limb_t n0, const vec384 b);
118
void sqr_n_mul_mont_383(vec384 ret, const vec384 a, size_t count,
119
                        const vec384 p, limb_t n0, const vec384 b);
120
121
void mul_384(vec768 ret, const vec384 a, const vec384 b);
122
void sqr_384(vec768 ret, const vec384 a);
123
void redc_mont_384(vec384 ret, const vec768 a, const vec384 p, limb_t n0);
124
void from_mont_384(vec384 ret, const vec384 a, const vec384 p, limb_t n0);
125
limb_t sgn0_pty_mont_384(const vec384 a, const vec384 p, limb_t n0);
126
limb_t sgn0_pty_mont_384x(const vec384x a, const vec384 p, limb_t n0);
127
limb_t sgn0_pty_mod_384(const vec384 a, const vec384 p);
128
limb_t sgn0_pty_mod_384x(const vec384x a, const vec384 p);
129
130
void add_mod_384(vec384 ret, const vec384 a, const vec384 b, const vec384 p);
131
void sub_mod_384(vec384 ret, const vec384 a, const vec384 b, const vec384 p);
132
void mul_by_8_mod_384(vec384 ret, const vec384 a, const vec384 p);
133
void mul_by_3_mod_384(vec384 ret, const vec384 a, const vec384 p);
134
void cneg_mod_384(vec384 ret, const vec384 a, bool_t flag, const vec384 p);
135
void lshift_mod_384(vec384 ret, const vec384 a, size_t count, const vec384 p);
136
void rshift_mod_384(vec384 ret, const vec384 a, size_t count, const vec384 p);
137
void div_by_2_mod_384(vec384 ret, const vec384 a, const vec384 p);
138
void ct_inverse_mod_384(vec768 ret, const vec384 inp, const vec384 mod,
139
                                                      const vec384 modx);
140
void ct_inverse_mod_256(vec512 ret, const vec256 inp, const vec256 mod,
141
                                                      const vec256 modx);
142
bool_t ct_is_square_mod_384(const vec384 inp, const vec384 mod);
143
144
#if defined(__ADX__) /* e.g. -march=broadwell */ && !defined(__BLST_PORTABLE__)
145
590k
# define mul_mont_384x mulx_mont_384x
146
718k
# define sqr_mont_384x sqrx_mont_384x
147
# define sqr_mont_382x sqrx_mont_382x
148
33.2k
# define mul_382x mulx_382x
149
82.3k
# define sqr_382x sqrx_382x
150
#endif
151
152
void mul_mont_384x(vec384x ret, const vec384x a, const vec384x b,
153
                   const vec384 p, limb_t n0);
154
void sqr_mont_384x(vec384x ret, const vec384x a, const vec384 p, limb_t n0);
155
void sqr_mont_382x(vec384x ret, const vec384x a, const vec384 p, limb_t n0);
156
void mul_382x(vec768 ret[2], const vec384x a, const vec384x b, const vec384 p);
157
void sqr_382x(vec768 ret[2], const vec384x a, const vec384 p);
158
159
void add_mod_384x(vec384x ret, const vec384x a, const vec384x b,
160
                  const vec384 p);
161
void sub_mod_384x(vec384x ret, const vec384x a, const vec384x b,
162
                  const vec384 p);
163
void mul_by_8_mod_384x(vec384x ret, const vec384x a, const vec384 p);
164
void mul_by_3_mod_384x(vec384x ret, const vec384x a, const vec384 p);
165
void mul_by_1_plus_i_mod_384x(vec384x ret, const vec384x a, const vec384 p);
166
void add_mod_384x384(vec768 ret, const vec768 a, const vec768 b,
167
                     const vec384 p);
168
void sub_mod_384x384(vec768 ret, const vec768 a, const vec768 b,
169
                     const vec384 p);
170
171
/*
172
 * C subroutines
173
 */
174
static void exp_mont_384(vec384 out, const vec384 inp, const byte *pow,
175
                         size_t pow_bits, const vec384 p, limb_t n0);
176
static void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow,
177
                          size_t pow_bits, const vec384 p, limb_t n0);
178
static void div_by_zz(limb_t val[]);
179
static void div_by_z(limb_t val[]);
180
181
#ifdef __UINTPTR_TYPE__
182
typedef __UINTPTR_TYPE__ uptr_t;
183
#else
184
typedef const void *uptr_t;
185
#endif
186
187
#if !defined(restrict)
188
# if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901
189
#  if defined(__GNUC__) && __GNUC__>=2
190
#   define restrict __restrict__
191
#  elif defined(_MSC_VER)
192
#   define restrict __restrict
193
#  else
194
#   define restrict
195
#  endif
196
# endif
197
#endif
198
199
#if !defined(inline) && !defined(__cplusplus)
200
# if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901
201
#  if defined(__GNUC__) && __GNUC__>=2
202
#   define inline __inline__
203
#  elif defined(_MSC_VER)
204
#   define inline __inline
205
#  else
206
#   define inline
207
#  endif
208
# endif
209
#endif
210
211
#if defined(__GNUC__) || defined(__clang__)
212
2.92M
# define launder(var) __asm__ __volatile__("" : "+r"(var))
213
#else
214
# define launder(var)
215
#endif
216
217
static inline bool_t is_bit_set(const byte *v, size_t i)
218
0
{
219
0
    bool_t ret = (v[i/8] >> (i%8)) & 1;
220
0
    launder(ret);
221
0
    return ret;
222
0
}
223
224
static inline bool_t byte_is_zero(unsigned char c)
225
1.22M
{
226
1.22M
    limb_t ret = ((limb_t)(c) - 1) >> (LIMB_T_BITS - 1);
227
1.22M
    launder(ret);
228
1.22M
    return ret;
229
1.22M
}
230
231
static inline bool_t bytes_are_zero(const unsigned char *a, size_t num)
232
4
{
233
4
    unsigned char acc;
234
4
    size_t i;
235
236
192
    for (acc = 0, i = 0; i < num; i++)
237
188
        acc |= a[i];
238
239
4
    return byte_is_zero(acc);
240
4
}
241
242
static inline void vec_cswap(void *restrict a, void *restrict b, size_t num,
243
                             bool_t cbit)
244
0
{
245
0
    limb_t ai, *ap = (limb_t *)a;
246
0
    limb_t bi, *bp = (limb_t *)b;
247
0
    limb_t xorm, mask;
248
0
    size_t i;
249
0
250
0
    launder(cbit);
251
0
    mask = (limb_t)0 - cbit;
252
0
253
0
    num /= sizeof(limb_t);
254
0
255
0
    for (i = 0; i < num; i++) {
256
0
        xorm = ((ai = ap[i]) ^ (bi = bp[i])) & mask;
257
0
        ap[i] = ai ^ xorm;
258
0
        bp[i] = bi ^ xorm;
259
0
    }
260
0
}
261
262
/* ret = bit ? a : b */
263
void vec_select_32(void *ret, const void *a, const void *b, bool_t sel_a);
264
void vec_select_48(void *ret, const void *a, const void *b, bool_t sel_a);
265
void vec_select_96(void *ret, const void *a, const void *b, bool_t sel_a);
266
void vec_select_144(void *ret, const void *a, const void *b, bool_t sel_a);
267
void vec_select_192(void *ret, const void *a, const void *b, bool_t sel_a);
268
void vec_select_288(void *ret, const void *a, const void *b, bool_t sel_a);
269
static inline void vec_select(void *ret, const void *a, const void *b,
270
                              size_t num, bool_t sel_a)
271
1.62M
{
272
1.62M
    launder(sel_a);
273
1.62M
#ifndef __BLST_NO_ASM__
274
1.62M
    if (num == 32)          vec_select_32(ret, a, b, sel_a);
275
1.62M
    else if (num == 48)     vec_select_48(ret, a, b, sel_a);
276
1.62M
    else if (num == 96)     vec_select_96(ret, a, b, sel_a);
277
1.62M
    else if (num == 144)    vec_select_144(ret, a, b, sel_a);
278
491k
    else if (num == 192)    vec_select_192(ret, a, b, sel_a);
279
491k
    else if (num == 288)    vec_select_288(ret, a, b, sel_a);
280
#else
281
    if (0) ;
282
#endif
283
0
    else {
284
0
        limb_t bi;
285
0
        volatile limb_t *rp = (limb_t *)ret;
286
0
        const limb_t *ap = (const limb_t *)a;
287
0
        const limb_t *bp = (const limb_t *)b;
288
0
        limb_t xorm, mask = (limb_t)0 - sel_a;
289
0
        size_t i;
290
291
0
        num /= sizeof(limb_t);
292
293
0
        for (i = 0; i < num; i++) {
294
0
            xorm = (ap[i] ^ (bi = bp[i])) & mask;
295
0
            rp[i] = bi ^ xorm;
296
0
        }
297
0
    }
298
1.62M
}
299
300
static inline bool_t is_zero(limb_t l)
301
1.23k
{
302
1.23k
    limb_t ret = (~l & (l - 1)) >> (LIMB_T_BITS - 1);
303
1.23k
    launder(ret);
304
1.23k
    return ret;
305
1.23k
}
306
307
static inline bool_t vec_is_zero(const void *a, size_t num)
308
326k
{
309
326k
    const limb_t *ap = (const limb_t *)a;
310
326k
    limb_t acc;
311
326k
    size_t i;
312
313
326k
#ifndef __BLST_NO_ASM__
314
326k
    bool_t vec_is_zero_16x(const void *a, size_t num);
315
326k
    if ((num & 15) == 0)
316
326k
        return vec_is_zero_16x(a, num);
317
0
#endif
318
319
0
    num /= sizeof(limb_t);
320
321
0
    for (acc = 0, i = 0; i < num; i++)
322
0
        acc |= ap[i];
323
324
0
    return is_zero(acc);
325
326k
}
326
327
static inline bool_t vec_is_equal(const void *a, const void *b, size_t num)
328
3.62k
{
329
3.62k
    const limb_t *ap = (const limb_t *)a;
330
3.62k
    const limb_t *bp = (const limb_t *)b;
331
3.62k
    limb_t acc;
332
3.62k
    size_t i;
333
334
3.62k
#ifndef __BLST_NO_ASM__
335
3.62k
    bool_t vec_is_equal_16x(const void *a, const void *b, size_t num);
336
3.62k
    if ((num & 15) == 0)
337
3.62k
        return vec_is_equal_16x(a, b, num);
338
0
#endif
339
340
0
    num /= sizeof(limb_t);
341
342
0
    for (acc = 0, i = 0; i < num; i++)
343
0
        acc |= ap[i] ^ bp[i];
344
345
0
    return is_zero(acc);
346
3.62k
}
347
348
static inline void cneg_mod_384x(vec384x ret, const vec384x a, bool_t flag,
349
                                 const vec384 p)
350
0
{
351
0
    cneg_mod_384(ret[0], a[0], flag, p);
352
0
    cneg_mod_384(ret[1], a[1], flag, p);
353
0
}
354
355
static inline void vec_copy(void *restrict ret, const void *a, size_t num)
356
90.3k
{
357
90.3k
    limb_t *rp = (limb_t *)ret;
358
90.3k
    const limb_t *ap = (const limb_t *)a;
359
90.3k
    size_t i;
360
361
90.3k
    num /= sizeof(limb_t);
362
363
2.18M
    for (i = 0; i < num; i++)
364
2.09M
        rp[i] = ap[i];
365
90.3k
}
366
367
static inline void vec_zero(void *ret, size_t num)
368
549
{
369
549
    volatile limb_t *rp = (volatile limb_t *)ret;
370
549
    size_t i;
371
372
549
    num /= sizeof(limb_t);
373
374
5.55k
    for (i = 0; i < num; i++)
375
5.01k
        rp[i] = 0;
376
377
549
#if defined(__GNUC__) || defined(__clang__)
378
549
    __asm__ __volatile__("" : : "r"(ret) : "memory");
379
549
#endif
380
549
}
381
382
static inline void vec_czero(void *ret, size_t num, bool_t cbit)
383
332
{
384
332
    limb_t *rp = (limb_t *)ret;
385
332
    size_t i;
386
332
    limb_t mask;
387
388
332
    launder(cbit);
389
332
    mask = (limb_t)0 - (cbit^1);
390
391
332
    num /= sizeof(limb_t);
392
393
3.37k
    for (i = 0; i < num; i++)
394
3.04k
        rp[i] &= mask;
395
332
}
396
397
/*
398
 * Some compilers get arguably overzealous(*) when passing pointer to
399
 * multi-dimensional array [such as vec384x] as 'const' argument.
400
 * General direction seems to be to legitimize such constification,
401
 * so it's argued that suppressing the warning is appropriate.
402
 *
403
 * (*)  http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1923.htm
404
 */
405
#if defined(__INTEL_COMPILER)
406
# pragma warning(disable:167)
407
# pragma warning(disable:556)
408
#elif defined(__GNUC__) && !defined(__clang__) && (__STDC_VERSION__-0) < 202311
409
# pragma GCC diagnostic ignored "-Wpedantic"
410
#elif defined(_MSC_VER)
411
# pragma warning(disable: 4127 4189)
412
#endif
413
414
#if !defined(__wasm__) && __STDC_HOSTED__-0 != 0
415
# include <stdlib.h>
416
#endif
417
418
#if defined(__GNUC__)
419
# ifndef alloca
420
#  define alloca(s) __builtin_alloca(s)
421
# endif
422
#elif defined(__sun)
423
# include <alloca.h>
424
#elif defined(_WIN32)
425
# include <malloc.h>
426
# ifndef alloca
427
#  define alloca(s) _alloca(s)
428
# endif
429
#endif
430
431
#endif /* __BLS12_381_ASM_VECT_H__ */