Coverage Report

Created: 2026-01-22 06:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
Line
Count
Source
1
/* MIT License
2
 *
3
 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
4
 * Copyright (c) 2022-2023 HACL* Contributors
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in all
14
 * copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 */
24
25
#include "Hacl_Chacha20Poly1305_256.h"
26
27
#include "internal/Hacl_Poly1305_256.h"
28
#include "internal/Hacl_Krmllib.h"
29
#include "libintvector.h"
30
31
static inline void
32
poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text)
33
27.6k
{
34
27.6k
    uint32_t n = len / (uint32_t)16U;
35
27.6k
    uint32_t r = len % (uint32_t)16U;
36
27.6k
    uint8_t *blocks = text;
37
27.6k
    uint8_t *rem = text + n * (uint32_t)16U;
38
27.6k
    Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U;
39
27.6k
    Lib_IntVector_Intrinsics_vec256 *acc0 = ctx;
40
27.6k
    uint32_t sz_block = (uint32_t)64U;
41
27.6k
    uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
42
27.6k
    uint8_t *t00 = blocks;
43
27.6k
    if (len0 > (uint32_t)0U) {
44
8.85k
        uint32_t bs = (uint32_t)64U;
45
8.85k
        uint8_t *text0 = t00;
46
8.85k
        Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0);
47
8.85k
        uint32_t len1 = len0 - bs;
48
8.85k
        uint8_t *text1 = t00 + bs;
49
8.85k
        uint32_t nb = len1 / bs;
50
21.8k
        for (uint32_t i = (uint32_t)0U; i < nb; i++) {
51
12.9k
            uint8_t *block = text1 + i * bs;
52
12.9k
            KRML_PRE_ALIGN(32)
53
12.9k
            Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
54
12.9k
            Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block);
55
12.9k
            Lib_IntVector_Intrinsics_vec256
56
12.9k
                hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U);
57
12.9k
            Lib_IntVector_Intrinsics_vec256
58
12.9k
                mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
59
12.9k
            Lib_IntVector_Intrinsics_vec256
60
12.9k
                m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
61
12.9k
            Lib_IntVector_Intrinsics_vec256
62
12.9k
                m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
63
12.9k
            Lib_IntVector_Intrinsics_vec256
64
12.9k
                m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
65
12.9k
            Lib_IntVector_Intrinsics_vec256
66
12.9k
                m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
67
12.9k
            Lib_IntVector_Intrinsics_vec256
68
12.9k
                m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
69
12.9k
            Lib_IntVector_Intrinsics_vec256
70
12.9k
                t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
71
12.9k
            Lib_IntVector_Intrinsics_vec256
72
12.9k
                t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
73
12.9k
            Lib_IntVector_Intrinsics_vec256
74
12.9k
                t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
75
12.9k
            Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
76
12.9k
            Lib_IntVector_Intrinsics_vec256
77
12.9k
                t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
78
12.9k
            Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
79
12.9k
            Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
80
12.9k
            Lib_IntVector_Intrinsics_vec256
81
12.9k
                t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
82
12.9k
            Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
83
12.9k
            Lib_IntVector_Intrinsics_vec256
84
12.9k
                o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
85
12.9k
            Lib_IntVector_Intrinsics_vec256 o00 = o5;
86
12.9k
            Lib_IntVector_Intrinsics_vec256 o11 = o10;
87
12.9k
            Lib_IntVector_Intrinsics_vec256 o21 = o20;
88
12.9k
            Lib_IntVector_Intrinsics_vec256 o31 = o30;
89
12.9k
            Lib_IntVector_Intrinsics_vec256 o41 = o40;
90
12.9k
            e[0U] = o00;
91
12.9k
            e[1U] = o11;
92
12.9k
            e[2U] = o21;
93
12.9k
            e[3U] = o31;
94
12.9k
            e[4U] = o41;
95
12.9k
            uint64_t b = (uint64_t)0x1000000U;
96
12.9k
            Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
97
12.9k
            Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
98
12.9k
            e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
99
12.9k
            Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U;
100
12.9k
            Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U;
101
12.9k
            Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
102
12.9k
            Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
103
12.9k
            Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
104
12.9k
            Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
105
12.9k
            Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
106
12.9k
            Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
107
12.9k
            Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
108
12.9k
            Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
109
12.9k
            Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
110
12.9k
            Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U];
111
12.9k
            Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U];
112
12.9k
            Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U];
113
12.9k
            Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U];
114
12.9k
            Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U];
115
12.9k
            Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
116
12.9k
            Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
117
12.9k
            Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
118
12.9k
            Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
119
12.9k
            Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
120
12.9k
            Lib_IntVector_Intrinsics_vec256
121
12.9k
                a01 =
122
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a0,
123
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
124
12.9k
            Lib_IntVector_Intrinsics_vec256
125
12.9k
                a11 =
126
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a1,
127
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
128
12.9k
            Lib_IntVector_Intrinsics_vec256
129
12.9k
                a21 =
130
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a2,
131
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
132
12.9k
            Lib_IntVector_Intrinsics_vec256
133
12.9k
                a31 =
134
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a3,
135
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
136
12.9k
            Lib_IntVector_Intrinsics_vec256
137
12.9k
                a41 =
138
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a4,
139
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
140
12.9k
            Lib_IntVector_Intrinsics_vec256
141
12.9k
                a02 =
142
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a01,
143
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
144
12.9k
            Lib_IntVector_Intrinsics_vec256
145
12.9k
                a12 =
146
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a11,
147
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
148
12.9k
            Lib_IntVector_Intrinsics_vec256
149
12.9k
                a22 =
150
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a21,
151
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
152
12.9k
            Lib_IntVector_Intrinsics_vec256
153
12.9k
                a32 =
154
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a31,
155
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
156
12.9k
            Lib_IntVector_Intrinsics_vec256
157
12.9k
                a42 =
158
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a41,
159
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
160
12.9k
            Lib_IntVector_Intrinsics_vec256
161
12.9k
                a03 =
162
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a02,
163
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
164
12.9k
            Lib_IntVector_Intrinsics_vec256
165
12.9k
                a13 =
166
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a12,
167
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
168
12.9k
            Lib_IntVector_Intrinsics_vec256
169
12.9k
                a23 =
170
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a22,
171
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
172
12.9k
            Lib_IntVector_Intrinsics_vec256
173
12.9k
                a33 =
174
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a32,
175
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
176
12.9k
            Lib_IntVector_Intrinsics_vec256
177
12.9k
                a43 =
178
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a42,
179
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
180
12.9k
            Lib_IntVector_Intrinsics_vec256
181
12.9k
                a04 =
182
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a03,
183
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
184
12.9k
            Lib_IntVector_Intrinsics_vec256
185
12.9k
                a14 =
186
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a13,
187
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
188
12.9k
            Lib_IntVector_Intrinsics_vec256
189
12.9k
                a24 =
190
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a23,
191
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
192
12.9k
            Lib_IntVector_Intrinsics_vec256
193
12.9k
                a34 =
194
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a33,
195
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
196
12.9k
            Lib_IntVector_Intrinsics_vec256
197
12.9k
                a44 =
198
12.9k
                    Lib_IntVector_Intrinsics_vec256_add64(a43,
199
12.9k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
200
12.9k
            Lib_IntVector_Intrinsics_vec256 t01 = a04;
201
12.9k
            Lib_IntVector_Intrinsics_vec256 t1 = a14;
202
12.9k
            Lib_IntVector_Intrinsics_vec256 t2 = a24;
203
12.9k
            Lib_IntVector_Intrinsics_vec256 t3 = a34;
204
12.9k
            Lib_IntVector_Intrinsics_vec256 t4 = a44;
205
12.9k
            Lib_IntVector_Intrinsics_vec256
206
12.9k
                mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
207
12.9k
            Lib_IntVector_Intrinsics_vec256
208
12.9k
                z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
209
12.9k
            Lib_IntVector_Intrinsics_vec256
210
12.9k
                z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
211
12.9k
            Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
212
12.9k
            Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
213
12.9k
            Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
214
12.9k
            Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
215
12.9k
            Lib_IntVector_Intrinsics_vec256
216
12.9k
                z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
217
12.9k
            Lib_IntVector_Intrinsics_vec256
218
12.9k
                z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
219
12.9k
            Lib_IntVector_Intrinsics_vec256
220
12.9k
                t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
221
12.9k
            Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
222
12.9k
            Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
223
12.9k
            Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
224
12.9k
            Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
225
12.9k
            Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
226
12.9k
            Lib_IntVector_Intrinsics_vec256
227
12.9k
                z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
228
12.9k
            Lib_IntVector_Intrinsics_vec256
229
12.9k
                z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
230
12.9k
            Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
231
12.9k
            Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
232
12.9k
            Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
233
12.9k
            Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
234
12.9k
            Lib_IntVector_Intrinsics_vec256
235
12.9k
                z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
236
12.9k
            Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
237
12.9k
            Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
238
12.9k
            Lib_IntVector_Intrinsics_vec256 o01 = x02;
239
12.9k
            Lib_IntVector_Intrinsics_vec256 o12 = x12;
240
12.9k
            Lib_IntVector_Intrinsics_vec256 o22 = x21;
241
12.9k
            Lib_IntVector_Intrinsics_vec256 o32 = x32;
242
12.9k
            Lib_IntVector_Intrinsics_vec256 o42 = x42;
243
12.9k
            acc0[0U] = o01;
244
12.9k
            acc0[1U] = o12;
245
12.9k
            acc0[2U] = o22;
246
12.9k
            acc0[3U] = o32;
247
12.9k
            acc0[4U] = o42;
248
12.9k
            Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U];
249
12.9k
            Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U];
250
12.9k
            Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U];
251
12.9k
            Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U];
252
12.9k
            Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U];
253
12.9k
            Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
254
12.9k
            Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
255
12.9k
            Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
256
12.9k
            Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
257
12.9k
            Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
258
12.9k
            Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
259
12.9k
            Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
260
12.9k
            Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
261
12.9k
            Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
262
12.9k
            Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
263
12.9k
            acc0[0U] = o0;
264
12.9k
            acc0[1U] = o1;
265
12.9k
            acc0[2U] = o2;
266
12.9k
            acc0[3U] = o3;
267
12.9k
            acc0[4U] = o4;
268
12.9k
        }
269
8.85k
        Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0);
270
8.85k
    }
271
27.6k
    uint32_t len1 = n * (uint32_t)16U - len0;
272
27.6k
    uint8_t *t10 = blocks + len0;
273
27.6k
    uint32_t nb = len1 / (uint32_t)16U;
274
27.6k
    uint32_t rem1 = len1 % (uint32_t)16U;
275
37.9k
    for (uint32_t i = (uint32_t)0U; i < nb; i++) {
276
10.2k
        uint8_t *block = t10 + i * (uint32_t)16U;
277
10.2k
        KRML_PRE_ALIGN(32)
278
10.2k
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
279
10.2k
        uint64_t u0 = load64_le(block);
280
10.2k
        uint64_t lo = u0;
281
10.2k
        uint64_t u = load64_le(block + (uint32_t)8U);
282
10.2k
        uint64_t hi = u;
283
10.2k
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
284
10.2k
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
285
10.2k
        Lib_IntVector_Intrinsics_vec256
286
10.2k
            f010 =
287
10.2k
                Lib_IntVector_Intrinsics_vec256_and(f0,
288
10.2k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
289
10.2k
        Lib_IntVector_Intrinsics_vec256
290
10.2k
            f110 =
291
10.2k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
292
10.2k
                                                                                                  (uint32_t)26U),
293
10.2k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
294
10.2k
        Lib_IntVector_Intrinsics_vec256
295
10.2k
            f20 =
296
10.2k
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
297
10.2k
                                                                                                 (uint32_t)52U),
298
10.2k
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
299
10.2k
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
300
10.2k
                                                                                                (uint32_t)12U));
301
10.2k
        Lib_IntVector_Intrinsics_vec256
302
10.2k
            f30 =
303
10.2k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
304
10.2k
                                                                                                  (uint32_t)14U),
305
10.2k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
306
10.2k
        Lib_IntVector_Intrinsics_vec256
307
10.2k
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
308
10.2k
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
309
10.2k
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
310
10.2k
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
311
10.2k
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
312
10.2k
        Lib_IntVector_Intrinsics_vec256 f41 = f40;
313
10.2k
        e[0U] = f01;
314
10.2k
        e[1U] = f111;
315
10.2k
        e[2U] = f2;
316
10.2k
        e[3U] = f3;
317
10.2k
        e[4U] = f41;
318
10.2k
        uint64_t b = (uint64_t)0x1000000U;
319
10.2k
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
320
10.2k
        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
321
10.2k
        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
322
10.2k
        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
323
10.2k
        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
324
10.2k
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
325
10.2k
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
326
10.2k
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
327
10.2k
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
328
10.2k
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
329
10.2k
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
330
10.2k
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
331
10.2k
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
332
10.2k
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
333
10.2k
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
334
10.2k
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
335
10.2k
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
336
10.2k
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
337
10.2k
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
338
10.2k
        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
339
10.2k
        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
340
10.2k
        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
341
10.2k
        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
342
10.2k
        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
343
10.2k
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
344
10.2k
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
345
10.2k
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
346
10.2k
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
347
10.2k
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
348
10.2k
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
349
10.2k
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
350
10.2k
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
351
10.2k
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
352
10.2k
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
353
10.2k
        Lib_IntVector_Intrinsics_vec256
354
10.2k
            a03 =
355
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a02,
356
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
357
10.2k
        Lib_IntVector_Intrinsics_vec256
358
10.2k
            a13 =
359
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a12,
360
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
361
10.2k
        Lib_IntVector_Intrinsics_vec256
362
10.2k
            a23 =
363
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a22,
364
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
365
10.2k
        Lib_IntVector_Intrinsics_vec256
366
10.2k
            a33 =
367
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a32,
368
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
369
10.2k
        Lib_IntVector_Intrinsics_vec256
370
10.2k
            a43 =
371
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a42,
372
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
373
10.2k
        Lib_IntVector_Intrinsics_vec256
374
10.2k
            a04 =
375
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a03,
376
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
377
10.2k
        Lib_IntVector_Intrinsics_vec256
378
10.2k
            a14 =
379
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a13,
380
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
381
10.2k
        Lib_IntVector_Intrinsics_vec256
382
10.2k
            a24 =
383
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a23,
384
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
385
10.2k
        Lib_IntVector_Intrinsics_vec256
386
10.2k
            a34 =
387
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a33,
388
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
389
10.2k
        Lib_IntVector_Intrinsics_vec256
390
10.2k
            a44 =
391
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a43,
392
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
393
10.2k
        Lib_IntVector_Intrinsics_vec256
394
10.2k
            a05 =
395
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a04,
396
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
397
10.2k
        Lib_IntVector_Intrinsics_vec256
398
10.2k
            a15 =
399
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a14,
400
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
401
10.2k
        Lib_IntVector_Intrinsics_vec256
402
10.2k
            a25 =
403
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a24,
404
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
405
10.2k
        Lib_IntVector_Intrinsics_vec256
406
10.2k
            a35 =
407
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a34,
408
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
409
10.2k
        Lib_IntVector_Intrinsics_vec256
410
10.2k
            a45 =
411
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a44,
412
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
413
10.2k
        Lib_IntVector_Intrinsics_vec256
414
10.2k
            a06 =
415
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a05,
416
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
417
10.2k
        Lib_IntVector_Intrinsics_vec256
418
10.2k
            a16 =
419
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a15,
420
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
421
10.2k
        Lib_IntVector_Intrinsics_vec256
422
10.2k
            a26 =
423
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a25,
424
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
425
10.2k
        Lib_IntVector_Intrinsics_vec256
426
10.2k
            a36 =
427
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a35,
428
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
429
10.2k
        Lib_IntVector_Intrinsics_vec256
430
10.2k
            a46 =
431
10.2k
                Lib_IntVector_Intrinsics_vec256_add64(a45,
432
10.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
433
10.2k
        Lib_IntVector_Intrinsics_vec256 t01 = a06;
434
10.2k
        Lib_IntVector_Intrinsics_vec256 t11 = a16;
435
10.2k
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
436
10.2k
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
437
10.2k
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
438
10.2k
        Lib_IntVector_Intrinsics_vec256
439
10.2k
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
440
10.2k
        Lib_IntVector_Intrinsics_vec256
441
10.2k
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
442
10.2k
        Lib_IntVector_Intrinsics_vec256
443
10.2k
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
444
10.2k
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
445
10.2k
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
446
10.2k
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
447
10.2k
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
448
10.2k
        Lib_IntVector_Intrinsics_vec256
449
10.2k
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
450
10.2k
        Lib_IntVector_Intrinsics_vec256
451
10.2k
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
452
10.2k
        Lib_IntVector_Intrinsics_vec256
453
10.2k
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
454
10.2k
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
455
10.2k
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
456
10.2k
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
457
10.2k
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
458
10.2k
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
459
10.2k
        Lib_IntVector_Intrinsics_vec256
460
10.2k
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
461
10.2k
        Lib_IntVector_Intrinsics_vec256
462
10.2k
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
463
10.2k
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
464
10.2k
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
465
10.2k
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
466
10.2k
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
467
10.2k
        Lib_IntVector_Intrinsics_vec256
468
10.2k
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
469
10.2k
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
470
10.2k
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
471
10.2k
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
472
10.2k
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
473
10.2k
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
474
10.2k
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
475
10.2k
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
476
10.2k
        acc0[0U] = o0;
477
10.2k
        acc0[1U] = o1;
478
10.2k
        acc0[2U] = o2;
479
10.2k
        acc0[3U] = o3;
480
10.2k
        acc0[4U] = o4;
481
10.2k
    }
482
27.6k
    if (rem1 > (uint32_t)0U) {
483
0
        uint8_t *last = t10 + nb * (uint32_t)16U;
484
0
        KRML_PRE_ALIGN(32)
485
0
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
486
0
        uint8_t tmp[16U] = { 0U };
487
0
        memcpy(tmp, last, rem1 * sizeof(uint8_t));
488
0
        uint64_t u0 = load64_le(tmp);
489
0
        uint64_t lo = u0;
490
0
        uint64_t u = load64_le(tmp + (uint32_t)8U);
491
0
        uint64_t hi = u;
492
0
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
493
0
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
494
0
        Lib_IntVector_Intrinsics_vec256
495
0
            f010 =
496
0
                Lib_IntVector_Intrinsics_vec256_and(f0,
497
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
498
0
        Lib_IntVector_Intrinsics_vec256
499
0
            f110 =
500
0
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
501
0
                                                                                                  (uint32_t)26U),
502
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
503
0
        Lib_IntVector_Intrinsics_vec256
504
0
            f20 =
505
0
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
506
0
                                                                                                 (uint32_t)52U),
507
0
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
508
0
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
509
0
                                                                                                (uint32_t)12U));
510
0
        Lib_IntVector_Intrinsics_vec256
511
0
            f30 =
512
0
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
513
0
                                                                                                  (uint32_t)14U),
514
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
515
0
        Lib_IntVector_Intrinsics_vec256
516
0
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
517
0
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
518
0
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
519
0
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
520
0
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
521
0
        Lib_IntVector_Intrinsics_vec256 f4 = f40;
522
0
        e[0U] = f01;
523
0
        e[1U] = f111;
524
0
        e[2U] = f2;
525
0
        e[3U] = f3;
526
0
        e[4U] = f4;
527
0
        uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
528
0
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
529
0
        Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
530
0
        e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
531
0
        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
532
0
        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
533
0
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
534
0
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
535
0
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
536
0
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
537
0
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
538
0
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
539
0
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
540
0
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
541
0
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
542
0
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
543
0
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
544
0
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
545
0
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
546
0
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
547
0
        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
548
0
        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
549
0
        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
550
0
        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
551
0
        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
552
0
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
553
0
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
554
0
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
555
0
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
556
0
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
557
0
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
558
0
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
559
0
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
560
0
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
561
0
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
562
0
        Lib_IntVector_Intrinsics_vec256
563
0
            a03 =
564
0
                Lib_IntVector_Intrinsics_vec256_add64(a02,
565
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
566
0
        Lib_IntVector_Intrinsics_vec256
567
0
            a13 =
568
0
                Lib_IntVector_Intrinsics_vec256_add64(a12,
569
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
570
0
        Lib_IntVector_Intrinsics_vec256
571
0
            a23 =
572
0
                Lib_IntVector_Intrinsics_vec256_add64(a22,
573
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
574
0
        Lib_IntVector_Intrinsics_vec256
575
0
            a33 =
576
0
                Lib_IntVector_Intrinsics_vec256_add64(a32,
577
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
578
0
        Lib_IntVector_Intrinsics_vec256
579
0
            a43 =
580
0
                Lib_IntVector_Intrinsics_vec256_add64(a42,
581
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
582
0
        Lib_IntVector_Intrinsics_vec256
583
0
            a04 =
584
0
                Lib_IntVector_Intrinsics_vec256_add64(a03,
585
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
586
0
        Lib_IntVector_Intrinsics_vec256
587
0
            a14 =
588
0
                Lib_IntVector_Intrinsics_vec256_add64(a13,
589
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
590
0
        Lib_IntVector_Intrinsics_vec256
591
0
            a24 =
592
0
                Lib_IntVector_Intrinsics_vec256_add64(a23,
593
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
594
0
        Lib_IntVector_Intrinsics_vec256
595
0
            a34 =
596
0
                Lib_IntVector_Intrinsics_vec256_add64(a33,
597
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
598
0
        Lib_IntVector_Intrinsics_vec256
599
0
            a44 =
600
0
                Lib_IntVector_Intrinsics_vec256_add64(a43,
601
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
602
0
        Lib_IntVector_Intrinsics_vec256
603
0
            a05 =
604
0
                Lib_IntVector_Intrinsics_vec256_add64(a04,
605
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
606
0
        Lib_IntVector_Intrinsics_vec256
607
0
            a15 =
608
0
                Lib_IntVector_Intrinsics_vec256_add64(a14,
609
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
610
0
        Lib_IntVector_Intrinsics_vec256
611
0
            a25 =
612
0
                Lib_IntVector_Intrinsics_vec256_add64(a24,
613
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
614
0
        Lib_IntVector_Intrinsics_vec256
615
0
            a35 =
616
0
                Lib_IntVector_Intrinsics_vec256_add64(a34,
617
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
618
0
        Lib_IntVector_Intrinsics_vec256
619
0
            a45 =
620
0
                Lib_IntVector_Intrinsics_vec256_add64(a44,
621
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
622
0
        Lib_IntVector_Intrinsics_vec256
623
0
            a06 =
624
0
                Lib_IntVector_Intrinsics_vec256_add64(a05,
625
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
626
0
        Lib_IntVector_Intrinsics_vec256
627
0
            a16 =
628
0
                Lib_IntVector_Intrinsics_vec256_add64(a15,
629
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
630
0
        Lib_IntVector_Intrinsics_vec256
631
0
            a26 =
632
0
                Lib_IntVector_Intrinsics_vec256_add64(a25,
633
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
634
0
        Lib_IntVector_Intrinsics_vec256
635
0
            a36 =
636
0
                Lib_IntVector_Intrinsics_vec256_add64(a35,
637
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
638
0
        Lib_IntVector_Intrinsics_vec256
639
0
            a46 =
640
0
                Lib_IntVector_Intrinsics_vec256_add64(a45,
641
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
642
0
        Lib_IntVector_Intrinsics_vec256 t01 = a06;
643
0
        Lib_IntVector_Intrinsics_vec256 t11 = a16;
644
0
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
645
0
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
646
0
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
647
0
        Lib_IntVector_Intrinsics_vec256
648
0
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
649
0
        Lib_IntVector_Intrinsics_vec256
650
0
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
651
0
        Lib_IntVector_Intrinsics_vec256
652
0
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
653
0
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
654
0
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
655
0
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
656
0
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
657
0
        Lib_IntVector_Intrinsics_vec256
658
0
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
659
0
        Lib_IntVector_Intrinsics_vec256
660
0
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
661
0
        Lib_IntVector_Intrinsics_vec256
662
0
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
663
0
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
664
0
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
665
0
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
666
0
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
667
0
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
668
0
        Lib_IntVector_Intrinsics_vec256
669
0
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
670
0
        Lib_IntVector_Intrinsics_vec256
671
0
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
672
0
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
673
0
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
674
0
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
675
0
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
676
0
        Lib_IntVector_Intrinsics_vec256
677
0
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
678
0
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
679
0
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
680
0
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
681
0
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
682
0
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
683
0
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
684
0
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
685
0
        acc0[0U] = o0;
686
0
        acc0[1U] = o1;
687
0
        acc0[2U] = o2;
688
0
        acc0[3U] = o3;
689
0
        acc0[4U] = o4;
690
0
    }
691
27.6k
    uint8_t tmp[16U] = { 0U };
692
27.6k
    memcpy(tmp, rem, r * sizeof(uint8_t));
693
27.6k
    if (r > (uint32_t)0U) {
694
22.9k
        Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
695
22.9k
        Lib_IntVector_Intrinsics_vec256 *acc = ctx;
696
22.9k
        KRML_PRE_ALIGN(32)
697
22.9k
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
698
22.9k
        uint64_t u0 = load64_le(tmp);
699
22.9k
        uint64_t lo = u0;
700
22.9k
        uint64_t u = load64_le(tmp + (uint32_t)8U);
701
22.9k
        uint64_t hi = u;
702
22.9k
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
703
22.9k
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
704
22.9k
        Lib_IntVector_Intrinsics_vec256
705
22.9k
            f010 =
706
22.9k
                Lib_IntVector_Intrinsics_vec256_and(f0,
707
22.9k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
708
22.9k
        Lib_IntVector_Intrinsics_vec256
709
22.9k
            f110 =
710
22.9k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
711
22.9k
                                                                                                  (uint32_t)26U),
712
22.9k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
713
22.9k
        Lib_IntVector_Intrinsics_vec256
714
22.9k
            f20 =
715
22.9k
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
716
22.9k
                                                                                                 (uint32_t)52U),
717
22.9k
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
718
22.9k
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
719
22.9k
                                                                                                (uint32_t)12U));
720
22.9k
        Lib_IntVector_Intrinsics_vec256
721
22.9k
            f30 =
722
22.9k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
723
22.9k
                                                                                                  (uint32_t)14U),
724
22.9k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
725
22.9k
        Lib_IntVector_Intrinsics_vec256
726
22.9k
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
727
22.9k
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
728
22.9k
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
729
22.9k
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
730
22.9k
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
731
22.9k
        Lib_IntVector_Intrinsics_vec256 f41 = f40;
732
22.9k
        e[0U] = f01;
733
22.9k
        e[1U] = f111;
734
22.9k
        e[2U] = f2;
735
22.9k
        e[3U] = f3;
736
22.9k
        e[4U] = f41;
737
22.9k
        uint64_t b = (uint64_t)0x1000000U;
738
22.9k
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
739
22.9k
        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
740
22.9k
        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
741
22.9k
        Lib_IntVector_Intrinsics_vec256 *r1 = pre;
742
22.9k
        Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
743
22.9k
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
744
22.9k
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
745
22.9k
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
746
22.9k
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
747
22.9k
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
748
22.9k
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
749
22.9k
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
750
22.9k
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
751
22.9k
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
752
22.9k
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
753
22.9k
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
754
22.9k
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
755
22.9k
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
756
22.9k
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
757
22.9k
        Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
758
22.9k
        Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
759
22.9k
        Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
760
22.9k
        Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
761
22.9k
        Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
762
22.9k
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
763
22.9k
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
764
22.9k
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
765
22.9k
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
766
22.9k
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
767
22.9k
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
768
22.9k
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
769
22.9k
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
770
22.9k
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
771
22.9k
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
772
22.9k
        Lib_IntVector_Intrinsics_vec256
773
22.9k
            a03 =
774
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a02,
775
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
776
22.9k
        Lib_IntVector_Intrinsics_vec256
777
22.9k
            a13 =
778
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a12,
779
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
780
22.9k
        Lib_IntVector_Intrinsics_vec256
781
22.9k
            a23 =
782
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a22,
783
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
784
22.9k
        Lib_IntVector_Intrinsics_vec256
785
22.9k
            a33 =
786
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a32,
787
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
788
22.9k
        Lib_IntVector_Intrinsics_vec256
789
22.9k
            a43 =
790
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a42,
791
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
792
22.9k
        Lib_IntVector_Intrinsics_vec256
793
22.9k
            a04 =
794
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a03,
795
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
796
22.9k
        Lib_IntVector_Intrinsics_vec256
797
22.9k
            a14 =
798
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a13,
799
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
800
22.9k
        Lib_IntVector_Intrinsics_vec256
801
22.9k
            a24 =
802
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a23,
803
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
804
22.9k
        Lib_IntVector_Intrinsics_vec256
805
22.9k
            a34 =
806
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a33,
807
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
808
22.9k
        Lib_IntVector_Intrinsics_vec256
809
22.9k
            a44 =
810
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a43,
811
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
812
22.9k
        Lib_IntVector_Intrinsics_vec256
813
22.9k
            a05 =
814
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a04,
815
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
816
22.9k
        Lib_IntVector_Intrinsics_vec256
817
22.9k
            a15 =
818
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a14,
819
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
820
22.9k
        Lib_IntVector_Intrinsics_vec256
821
22.9k
            a25 =
822
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a24,
823
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
824
22.9k
        Lib_IntVector_Intrinsics_vec256
825
22.9k
            a35 =
826
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a34,
827
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
828
22.9k
        Lib_IntVector_Intrinsics_vec256
829
22.9k
            a45 =
830
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a44,
831
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
832
22.9k
        Lib_IntVector_Intrinsics_vec256
833
22.9k
            a06 =
834
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a05,
835
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
836
22.9k
        Lib_IntVector_Intrinsics_vec256
837
22.9k
            a16 =
838
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a15,
839
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
840
22.9k
        Lib_IntVector_Intrinsics_vec256
841
22.9k
            a26 =
842
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a25,
843
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
844
22.9k
        Lib_IntVector_Intrinsics_vec256
845
22.9k
            a36 =
846
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a35,
847
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
848
22.9k
        Lib_IntVector_Intrinsics_vec256
849
22.9k
            a46 =
850
22.9k
                Lib_IntVector_Intrinsics_vec256_add64(a45,
851
22.9k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
852
22.9k
        Lib_IntVector_Intrinsics_vec256 t0 = a06;
853
22.9k
        Lib_IntVector_Intrinsics_vec256 t1 = a16;
854
22.9k
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
855
22.9k
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
856
22.9k
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
857
22.9k
        Lib_IntVector_Intrinsics_vec256
858
22.9k
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
859
22.9k
        Lib_IntVector_Intrinsics_vec256
860
22.9k
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
861
22.9k
        Lib_IntVector_Intrinsics_vec256
862
22.9k
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
863
22.9k
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
864
22.9k
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
865
22.9k
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
866
22.9k
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
867
22.9k
        Lib_IntVector_Intrinsics_vec256
868
22.9k
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
869
22.9k
        Lib_IntVector_Intrinsics_vec256
870
22.9k
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
871
22.9k
        Lib_IntVector_Intrinsics_vec256
872
22.9k
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
873
22.9k
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
874
22.9k
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
875
22.9k
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
876
22.9k
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
877
22.9k
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
878
22.9k
        Lib_IntVector_Intrinsics_vec256
879
22.9k
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
880
22.9k
        Lib_IntVector_Intrinsics_vec256
881
22.9k
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
882
22.9k
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
883
22.9k
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
884
22.9k
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
885
22.9k
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
886
22.9k
        Lib_IntVector_Intrinsics_vec256
887
22.9k
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
888
22.9k
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
889
22.9k
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
890
22.9k
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
891
22.9k
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
892
22.9k
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
893
22.9k
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
894
22.9k
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
895
22.9k
        acc[0U] = o0;
896
22.9k
        acc[1U] = o1;
897
22.9k
        acc[2U] = o2;
898
22.9k
        acc[3U] = o3;
899
22.9k
        acc[4U] = o4;
900
22.9k
        return;
901
22.9k
    }
902
27.6k
}
903
904
static inline void
905
poly1305_do_256(
906
    uint8_t *k,
907
    uint32_t aadlen,
908
    uint8_t *aad,
909
    uint32_t mlen,
910
    uint8_t *m,
911
    uint8_t *out)
912
14.7k
{
913
14.7k
    KRML_PRE_ALIGN(32)
914
14.7k
    Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U };
915
14.7k
    uint8_t block[16U] = { 0U };
916
14.7k
    Hacl_Poly1305_256_poly1305_init(ctx, k);
917
14.7k
    if (aadlen != (uint32_t)0U) {
918
14.5k
        poly1305_padded_256(ctx, aadlen, aad);
919
14.5k
    }
920
14.7k
    if (mlen != (uint32_t)0U) {
921
13.1k
        poly1305_padded_256(ctx, mlen, m);
922
13.1k
    }
923
14.7k
    store64_le(block, (uint64_t)aadlen);
924
14.7k
    store64_le(block + (uint32_t)8U, (uint64_t)mlen);
925
14.7k
    Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
926
14.7k
    Lib_IntVector_Intrinsics_vec256 *acc = ctx;
927
14.7k
    KRML_PRE_ALIGN(32)
928
14.7k
    Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
929
14.7k
    uint64_t u0 = load64_le(block);
930
14.7k
    uint64_t lo = u0;
931
14.7k
    uint64_t u = load64_le(block + (uint32_t)8U);
932
14.7k
    uint64_t hi = u;
933
14.7k
    Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
934
14.7k
    Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
935
14.7k
    Lib_IntVector_Intrinsics_vec256
936
14.7k
        f010 =
937
14.7k
            Lib_IntVector_Intrinsics_vec256_and(f0,
938
14.7k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
939
14.7k
    Lib_IntVector_Intrinsics_vec256
940
14.7k
        f110 =
941
14.7k
            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
942
14.7k
                                                                                              (uint32_t)26U),
943
14.7k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
944
14.7k
    Lib_IntVector_Intrinsics_vec256
945
14.7k
        f20 =
946
14.7k
            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
947
14.7k
                                                                                             (uint32_t)52U),
948
14.7k
                                               Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
949
14.7k
                                                                                                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
950
14.7k
                                                                                            (uint32_t)12U));
951
14.7k
    Lib_IntVector_Intrinsics_vec256
952
14.7k
        f30 =
953
14.7k
            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
954
14.7k
                                                                                              (uint32_t)14U),
955
14.7k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
956
14.7k
    Lib_IntVector_Intrinsics_vec256
957
14.7k
        f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
958
14.7k
    Lib_IntVector_Intrinsics_vec256 f01 = f010;
959
14.7k
    Lib_IntVector_Intrinsics_vec256 f111 = f110;
960
14.7k
    Lib_IntVector_Intrinsics_vec256 f2 = f20;
961
14.7k
    Lib_IntVector_Intrinsics_vec256 f3 = f30;
962
14.7k
    Lib_IntVector_Intrinsics_vec256 f41 = f40;
963
14.7k
    e[0U] = f01;
964
14.7k
    e[1U] = f111;
965
14.7k
    e[2U] = f2;
966
14.7k
    e[3U] = f3;
967
14.7k
    e[4U] = f41;
968
14.7k
    uint64_t b = (uint64_t)0x1000000U;
969
14.7k
    Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
970
14.7k
    Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
971
14.7k
    e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
972
14.7k
    Lib_IntVector_Intrinsics_vec256 *r = pre;
973
14.7k
    Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
974
14.7k
    Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
975
14.7k
    Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
976
14.7k
    Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
977
14.7k
    Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
978
14.7k
    Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
979
14.7k
    Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
980
14.7k
    Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
981
14.7k
    Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
982
14.7k
    Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
983
14.7k
    Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
984
14.7k
    Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
985
14.7k
    Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
986
14.7k
    Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
987
14.7k
    Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
988
14.7k
    Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
989
14.7k
    Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
990
14.7k
    Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
991
14.7k
    Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
992
14.7k
    Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
993
14.7k
    Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
994
14.7k
    Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
995
14.7k
    Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
996
14.7k
    Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
997
14.7k
    Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
998
14.7k
    Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
999
14.7k
    Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
1000
14.7k
    Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
1001
14.7k
    Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
1002
14.7k
    Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
1003
14.7k
    Lib_IntVector_Intrinsics_vec256
1004
14.7k
        a03 =
1005
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a02,
1006
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
1007
14.7k
    Lib_IntVector_Intrinsics_vec256
1008
14.7k
        a13 =
1009
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a12,
1010
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
1011
14.7k
    Lib_IntVector_Intrinsics_vec256
1012
14.7k
        a23 =
1013
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a22,
1014
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
1015
14.7k
    Lib_IntVector_Intrinsics_vec256
1016
14.7k
        a33 =
1017
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a32,
1018
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
1019
14.7k
    Lib_IntVector_Intrinsics_vec256
1020
14.7k
        a43 =
1021
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a42,
1022
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
1023
14.7k
    Lib_IntVector_Intrinsics_vec256
1024
14.7k
        a04 =
1025
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a03,
1026
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
1027
14.7k
    Lib_IntVector_Intrinsics_vec256
1028
14.7k
        a14 =
1029
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a13,
1030
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
1031
14.7k
    Lib_IntVector_Intrinsics_vec256
1032
14.7k
        a24 =
1033
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a23,
1034
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
1035
14.7k
    Lib_IntVector_Intrinsics_vec256
1036
14.7k
        a34 =
1037
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a33,
1038
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
1039
14.7k
    Lib_IntVector_Intrinsics_vec256
1040
14.7k
        a44 =
1041
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a43,
1042
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
1043
14.7k
    Lib_IntVector_Intrinsics_vec256
1044
14.7k
        a05 =
1045
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a04,
1046
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
1047
14.7k
    Lib_IntVector_Intrinsics_vec256
1048
14.7k
        a15 =
1049
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a14,
1050
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
1051
14.7k
    Lib_IntVector_Intrinsics_vec256
1052
14.7k
        a25 =
1053
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a24,
1054
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
1055
14.7k
    Lib_IntVector_Intrinsics_vec256
1056
14.7k
        a35 =
1057
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a34,
1058
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
1059
14.7k
    Lib_IntVector_Intrinsics_vec256
1060
14.7k
        a45 =
1061
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a44,
1062
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
1063
14.7k
    Lib_IntVector_Intrinsics_vec256
1064
14.7k
        a06 =
1065
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a05,
1066
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
1067
14.7k
    Lib_IntVector_Intrinsics_vec256
1068
14.7k
        a16 =
1069
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a15,
1070
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
1071
14.7k
    Lib_IntVector_Intrinsics_vec256
1072
14.7k
        a26 =
1073
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a25,
1074
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
1075
14.7k
    Lib_IntVector_Intrinsics_vec256
1076
14.7k
        a36 =
1077
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a35,
1078
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
1079
14.7k
    Lib_IntVector_Intrinsics_vec256
1080
14.7k
        a46 =
1081
14.7k
            Lib_IntVector_Intrinsics_vec256_add64(a45,
1082
14.7k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
1083
14.7k
    Lib_IntVector_Intrinsics_vec256 t0 = a06;
1084
14.7k
    Lib_IntVector_Intrinsics_vec256 t1 = a16;
1085
14.7k
    Lib_IntVector_Intrinsics_vec256 t2 = a26;
1086
14.7k
    Lib_IntVector_Intrinsics_vec256 t3 = a36;
1087
14.7k
    Lib_IntVector_Intrinsics_vec256 t4 = a46;
1088
14.7k
    Lib_IntVector_Intrinsics_vec256
1089
14.7k
        mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
1090
14.7k
    Lib_IntVector_Intrinsics_vec256
1091
14.7k
        z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
1092
14.7k
    Lib_IntVector_Intrinsics_vec256
1093
14.7k
        z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
1094
14.7k
    Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
1095
14.7k
    Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
1096
14.7k
    Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
1097
14.7k
    Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
1098
14.7k
    Lib_IntVector_Intrinsics_vec256
1099
14.7k
        z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
1100
14.7k
    Lib_IntVector_Intrinsics_vec256
1101
14.7k
        z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
1102
14.7k
    Lib_IntVector_Intrinsics_vec256
1103
14.7k
        t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
1104
14.7k
    Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
1105
14.7k
    Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
1106
14.7k
    Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
1107
14.7k
    Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
1108
14.7k
    Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
1109
14.7k
    Lib_IntVector_Intrinsics_vec256
1110
14.7k
        z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
1111
14.7k
    Lib_IntVector_Intrinsics_vec256
1112
14.7k
        z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
1113
14.7k
    Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
1114
14.7k
    Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
1115
14.7k
    Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
1116
14.7k
    Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
1117
14.7k
    Lib_IntVector_Intrinsics_vec256
1118
14.7k
        z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
1119
14.7k
    Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
1120
14.7k
    Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
1121
14.7k
    Lib_IntVector_Intrinsics_vec256 o0 = x02;
1122
14.7k
    Lib_IntVector_Intrinsics_vec256 o1 = x12;
1123
14.7k
    Lib_IntVector_Intrinsics_vec256 o2 = x21;
1124
14.7k
    Lib_IntVector_Intrinsics_vec256 o3 = x32;
1125
14.7k
    Lib_IntVector_Intrinsics_vec256 o4 = x42;
1126
14.7k
    acc[0U] = o0;
1127
14.7k
    acc[1U] = o1;
1128
14.7k
    acc[2U] = o2;
1129
14.7k
    acc[3U] = o3;
1130
14.7k
    acc[4U] = o4;
1131
14.7k
    Hacl_Poly1305_256_poly1305_finish(out, k, ctx);
1132
14.7k
}
1133
1134
/**
1135
Encrypt a message `m` with key `k`.
1136
1137
The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
1138
Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
1139
1140
@param k Pointer to 32 bytes of memory where the AEAD key is read from.
1141
@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
1142
@param aadlen Length of the associated data.
1143
@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
1144
1145
@param mlen Length of the message.
1146
@param m Pointer to `mlen` bytes of memory where the message is read from.
1147
@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
1148
@param mac Pointer to 16 bytes of memory where the mac is written to.
1149
*/
1150
void
1151
Hacl_Chacha20Poly1305_256_aead_encrypt(
1152
    uint8_t *k,
1153
    uint8_t *n,
1154
    uint32_t aadlen,
1155
    uint8_t *aad,
1156
    uint32_t mlen,
1157
    uint8_t *m,
1158
    uint8_t *cipher,
1159
    uint8_t *mac)
1160
10.2k
{
1161
10.2k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U);
1162
10.2k
    uint8_t tmp[64U] = { 0U };
1163
10.2k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
1164
10.2k
    uint8_t *key = tmp;
1165
10.2k
    poly1305_do_256(key, aadlen, aad, mlen, cipher, mac);
1166
10.2k
}
1167
1168
/**
1169
Decrypt a ciphertext `cipher` with key `k`.
1170
1171
The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
1172
Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
1173
1174
If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
1175
If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
1176
1177
@param k Pointer to 32 bytes of memory where the AEAD key is read from.
1178
@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
1179
@param aadlen Length of the associated data.
1180
@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
1181
1182
@param mlen Length of the ciphertext.
1183
@param m Pointer to `mlen` bytes of memory where the message is written to.
1184
@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
1185
@param mac Pointer to 16 bytes of memory where the mac is read from.
1186
1187
@returns 0 on succeess; 1 on failure.
1188
*/
1189
uint32_t
1190
Hacl_Chacha20Poly1305_256_aead_decrypt(
1191
    uint8_t *k,
1192
    uint8_t *n,
1193
    uint32_t aadlen,
1194
    uint8_t *aad,
1195
    uint32_t mlen,
1196
    uint8_t *m,
1197
    uint8_t *cipher,
1198
    uint8_t *mac)
1199
4.56k
{
1200
4.56k
    uint8_t computed_mac[16U] = { 0U };
1201
4.56k
    uint8_t tmp[64U] = { 0U };
1202
4.56k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
1203
4.56k
    uint8_t *key = tmp;
1204
4.56k
    poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac);
1205
4.56k
    uint8_t res = (uint8_t)255U;
1206
4.56k
    KRML_MAYBE_FOR16(i,
1207
4.56k
                     (uint32_t)0U,
1208
4.56k
                     (uint32_t)16U,
1209
4.56k
                     (uint32_t)1U,
1210
4.56k
                     uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
1211
4.56k
                     res = uu____0 & res;);
1212
4.56k
    uint8_t z = res;
1213
4.56k
    if (z == (uint8_t)255U) {
1214
110
        Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U);
1215
110
        return (uint32_t)0U;
1216
110
    }
1217
4.45k
    return (uint32_t)1U;
1218
4.56k
}