Coverage Report

Created: 2025-06-24 06:49

/src/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
Line
Count
Source (jump to first uncovered line)
1
/* MIT License
2
 *
3
 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
4
 * Copyright (c) 2022-2023 HACL* Contributors
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in all
14
 * copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 */
24
25
#include "Hacl_Chacha20Poly1305_256.h"
26
27
#include "internal/Hacl_Poly1305_256.h"
28
#include "internal/Hacl_Krmllib.h"
29
#include "libintvector.h"
30
31
static inline void
32
poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text)
33
29.3k
{
34
29.3k
    uint32_t n = len / (uint32_t)16U;
35
29.3k
    uint32_t r = len % (uint32_t)16U;
36
29.3k
    uint8_t *blocks = text;
37
29.3k
    uint8_t *rem = text + n * (uint32_t)16U;
38
29.3k
    Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U;
39
29.3k
    Lib_IntVector_Intrinsics_vec256 *acc0 = ctx;
40
29.3k
    uint32_t sz_block = (uint32_t)64U;
41
29.3k
    uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
42
29.3k
    uint8_t *t00 = blocks;
43
29.3k
    if (len0 > (uint32_t)0U) {
44
7.82k
        uint32_t bs = (uint32_t)64U;
45
7.82k
        uint8_t *text0 = t00;
46
7.82k
        Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0);
47
7.82k
        uint32_t len1 = len0 - bs;
48
7.82k
        uint8_t *text1 = t00 + bs;
49
7.82k
        uint32_t nb = len1 / bs;
50
42.1k
        for (uint32_t i = (uint32_t)0U; i < nb; i++) {
51
34.3k
            uint8_t *block = text1 + i * bs;
52
34.3k
            KRML_PRE_ALIGN(32)
53
34.3k
            Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
54
34.3k
            Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block);
55
34.3k
            Lib_IntVector_Intrinsics_vec256
56
34.3k
                hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U);
57
34.3k
            Lib_IntVector_Intrinsics_vec256
58
34.3k
                mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
59
34.3k
            Lib_IntVector_Intrinsics_vec256
60
34.3k
                m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
61
34.3k
            Lib_IntVector_Intrinsics_vec256
62
34.3k
                m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
63
34.3k
            Lib_IntVector_Intrinsics_vec256
64
34.3k
                m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
65
34.3k
            Lib_IntVector_Intrinsics_vec256
66
34.3k
                m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
67
34.3k
            Lib_IntVector_Intrinsics_vec256
68
34.3k
                m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
69
34.3k
            Lib_IntVector_Intrinsics_vec256
70
34.3k
                t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
71
34.3k
            Lib_IntVector_Intrinsics_vec256
72
34.3k
                t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
73
34.3k
            Lib_IntVector_Intrinsics_vec256
74
34.3k
                t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
75
34.3k
            Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
76
34.3k
            Lib_IntVector_Intrinsics_vec256
77
34.3k
                t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
78
34.3k
            Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
79
34.3k
            Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
80
34.3k
            Lib_IntVector_Intrinsics_vec256
81
34.3k
                t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
82
34.3k
            Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
83
34.3k
            Lib_IntVector_Intrinsics_vec256
84
34.3k
                o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
85
34.3k
            Lib_IntVector_Intrinsics_vec256 o00 = o5;
86
34.3k
            Lib_IntVector_Intrinsics_vec256 o11 = o10;
87
34.3k
            Lib_IntVector_Intrinsics_vec256 o21 = o20;
88
34.3k
            Lib_IntVector_Intrinsics_vec256 o31 = o30;
89
34.3k
            Lib_IntVector_Intrinsics_vec256 o41 = o40;
90
34.3k
            e[0U] = o00;
91
34.3k
            e[1U] = o11;
92
34.3k
            e[2U] = o21;
93
34.3k
            e[3U] = o31;
94
34.3k
            e[4U] = o41;
95
34.3k
            uint64_t b = (uint64_t)0x1000000U;
96
34.3k
            Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
97
34.3k
            Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
98
34.3k
            e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
99
34.3k
            Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U;
100
34.3k
            Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U;
101
34.3k
            Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
102
34.3k
            Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
103
34.3k
            Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
104
34.3k
            Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
105
34.3k
            Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
106
34.3k
            Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
107
34.3k
            Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
108
34.3k
            Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
109
34.3k
            Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
110
34.3k
            Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U];
111
34.3k
            Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U];
112
34.3k
            Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U];
113
34.3k
            Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U];
114
34.3k
            Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U];
115
34.3k
            Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
116
34.3k
            Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
117
34.3k
            Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
118
34.3k
            Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
119
34.3k
            Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
120
34.3k
            Lib_IntVector_Intrinsics_vec256
121
34.3k
                a01 =
122
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a0,
123
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
124
34.3k
            Lib_IntVector_Intrinsics_vec256
125
34.3k
                a11 =
126
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a1,
127
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
128
34.3k
            Lib_IntVector_Intrinsics_vec256
129
34.3k
                a21 =
130
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a2,
131
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
132
34.3k
            Lib_IntVector_Intrinsics_vec256
133
34.3k
                a31 =
134
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a3,
135
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
136
34.3k
            Lib_IntVector_Intrinsics_vec256
137
34.3k
                a41 =
138
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a4,
139
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
140
34.3k
            Lib_IntVector_Intrinsics_vec256
141
34.3k
                a02 =
142
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a01,
143
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
144
34.3k
            Lib_IntVector_Intrinsics_vec256
145
34.3k
                a12 =
146
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a11,
147
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
148
34.3k
            Lib_IntVector_Intrinsics_vec256
149
34.3k
                a22 =
150
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a21,
151
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
152
34.3k
            Lib_IntVector_Intrinsics_vec256
153
34.3k
                a32 =
154
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a31,
155
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
156
34.3k
            Lib_IntVector_Intrinsics_vec256
157
34.3k
                a42 =
158
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a41,
159
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
160
34.3k
            Lib_IntVector_Intrinsics_vec256
161
34.3k
                a03 =
162
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a02,
163
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
164
34.3k
            Lib_IntVector_Intrinsics_vec256
165
34.3k
                a13 =
166
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a12,
167
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
168
34.3k
            Lib_IntVector_Intrinsics_vec256
169
34.3k
                a23 =
170
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a22,
171
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
172
34.3k
            Lib_IntVector_Intrinsics_vec256
173
34.3k
                a33 =
174
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a32,
175
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
176
34.3k
            Lib_IntVector_Intrinsics_vec256
177
34.3k
                a43 =
178
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a42,
179
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
180
34.3k
            Lib_IntVector_Intrinsics_vec256
181
34.3k
                a04 =
182
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a03,
183
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
184
34.3k
            Lib_IntVector_Intrinsics_vec256
185
34.3k
                a14 =
186
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a13,
187
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
188
34.3k
            Lib_IntVector_Intrinsics_vec256
189
34.3k
                a24 =
190
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a23,
191
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
192
34.3k
            Lib_IntVector_Intrinsics_vec256
193
34.3k
                a34 =
194
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a33,
195
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
196
34.3k
            Lib_IntVector_Intrinsics_vec256
197
34.3k
                a44 =
198
34.3k
                    Lib_IntVector_Intrinsics_vec256_add64(a43,
199
34.3k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
200
34.3k
            Lib_IntVector_Intrinsics_vec256 t01 = a04;
201
34.3k
            Lib_IntVector_Intrinsics_vec256 t1 = a14;
202
34.3k
            Lib_IntVector_Intrinsics_vec256 t2 = a24;
203
34.3k
            Lib_IntVector_Intrinsics_vec256 t3 = a34;
204
34.3k
            Lib_IntVector_Intrinsics_vec256 t4 = a44;
205
34.3k
            Lib_IntVector_Intrinsics_vec256
206
34.3k
                mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
207
34.3k
            Lib_IntVector_Intrinsics_vec256
208
34.3k
                z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
209
34.3k
            Lib_IntVector_Intrinsics_vec256
210
34.3k
                z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
211
34.3k
            Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
212
34.3k
            Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
213
34.3k
            Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
214
34.3k
            Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
215
34.3k
            Lib_IntVector_Intrinsics_vec256
216
34.3k
                z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
217
34.3k
            Lib_IntVector_Intrinsics_vec256
218
34.3k
                z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
219
34.3k
            Lib_IntVector_Intrinsics_vec256
220
34.3k
                t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
221
34.3k
            Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
222
34.3k
            Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
223
34.3k
            Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
224
34.3k
            Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
225
34.3k
            Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
226
34.3k
            Lib_IntVector_Intrinsics_vec256
227
34.3k
                z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
228
34.3k
            Lib_IntVector_Intrinsics_vec256
229
34.3k
                z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
230
34.3k
            Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
231
34.3k
            Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
232
34.3k
            Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
233
34.3k
            Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
234
34.3k
            Lib_IntVector_Intrinsics_vec256
235
34.3k
                z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
236
34.3k
            Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
237
34.3k
            Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
238
34.3k
            Lib_IntVector_Intrinsics_vec256 o01 = x02;
239
34.3k
            Lib_IntVector_Intrinsics_vec256 o12 = x12;
240
34.3k
            Lib_IntVector_Intrinsics_vec256 o22 = x21;
241
34.3k
            Lib_IntVector_Intrinsics_vec256 o32 = x32;
242
34.3k
            Lib_IntVector_Intrinsics_vec256 o42 = x42;
243
34.3k
            acc0[0U] = o01;
244
34.3k
            acc0[1U] = o12;
245
34.3k
            acc0[2U] = o22;
246
34.3k
            acc0[3U] = o32;
247
34.3k
            acc0[4U] = o42;
248
34.3k
            Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U];
249
34.3k
            Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U];
250
34.3k
            Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U];
251
34.3k
            Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U];
252
34.3k
            Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U];
253
34.3k
            Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
254
34.3k
            Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
255
34.3k
            Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
256
34.3k
            Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
257
34.3k
            Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
258
34.3k
            Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
259
34.3k
            Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
260
34.3k
            Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
261
34.3k
            Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
262
34.3k
            Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
263
34.3k
            acc0[0U] = o0;
264
34.3k
            acc0[1U] = o1;
265
34.3k
            acc0[2U] = o2;
266
34.3k
            acc0[3U] = o3;
267
34.3k
            acc0[4U] = o4;
268
34.3k
        }
269
7.82k
        Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0);
270
7.82k
    }
271
29.3k
    uint32_t len1 = n * (uint32_t)16U - len0;
272
29.3k
    uint8_t *t10 = blocks + len0;
273
29.3k
    uint32_t nb = len1 / (uint32_t)16U;
274
29.3k
    uint32_t rem1 = len1 % (uint32_t)16U;
275
37.7k
    for (uint32_t i = (uint32_t)0U; i < nb; i++) {
276
8.33k
        uint8_t *block = t10 + i * (uint32_t)16U;
277
8.33k
        KRML_PRE_ALIGN(32)
278
8.33k
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
279
8.33k
        uint64_t u0 = load64_le(block);
280
8.33k
        uint64_t lo = u0;
281
8.33k
        uint64_t u = load64_le(block + (uint32_t)8U);
282
8.33k
        uint64_t hi = u;
283
8.33k
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
284
8.33k
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
285
8.33k
        Lib_IntVector_Intrinsics_vec256
286
8.33k
            f010 =
287
8.33k
                Lib_IntVector_Intrinsics_vec256_and(f0,
288
8.33k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
289
8.33k
        Lib_IntVector_Intrinsics_vec256
290
8.33k
            f110 =
291
8.33k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
292
8.33k
                                                                                                  (uint32_t)26U),
293
8.33k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
294
8.33k
        Lib_IntVector_Intrinsics_vec256
295
8.33k
            f20 =
296
8.33k
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
297
8.33k
                                                                                                 (uint32_t)52U),
298
8.33k
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
299
8.33k
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
300
8.33k
                                                                                                (uint32_t)12U));
301
8.33k
        Lib_IntVector_Intrinsics_vec256
302
8.33k
            f30 =
303
8.33k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
304
8.33k
                                                                                                  (uint32_t)14U),
305
8.33k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
306
8.33k
        Lib_IntVector_Intrinsics_vec256
307
8.33k
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
308
8.33k
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
309
8.33k
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
310
8.33k
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
311
8.33k
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
312
8.33k
        Lib_IntVector_Intrinsics_vec256 f41 = f40;
313
8.33k
        e[0U] = f01;
314
8.33k
        e[1U] = f111;
315
8.33k
        e[2U] = f2;
316
8.33k
        e[3U] = f3;
317
8.33k
        e[4U] = f41;
318
8.33k
        uint64_t b = (uint64_t)0x1000000U;
319
8.33k
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
320
8.33k
        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
321
8.33k
        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
322
8.33k
        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
323
8.33k
        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
324
8.33k
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
325
8.33k
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
326
8.33k
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
327
8.33k
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
328
8.33k
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
329
8.33k
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
330
8.33k
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
331
8.33k
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
332
8.33k
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
333
8.33k
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
334
8.33k
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
335
8.33k
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
336
8.33k
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
337
8.33k
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
338
8.33k
        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
339
8.33k
        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
340
8.33k
        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
341
8.33k
        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
342
8.33k
        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
343
8.33k
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
344
8.33k
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
345
8.33k
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
346
8.33k
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
347
8.33k
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
348
8.33k
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
349
8.33k
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
350
8.33k
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
351
8.33k
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
352
8.33k
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
353
8.33k
        Lib_IntVector_Intrinsics_vec256
354
8.33k
            a03 =
355
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a02,
356
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
357
8.33k
        Lib_IntVector_Intrinsics_vec256
358
8.33k
            a13 =
359
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a12,
360
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
361
8.33k
        Lib_IntVector_Intrinsics_vec256
362
8.33k
            a23 =
363
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a22,
364
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
365
8.33k
        Lib_IntVector_Intrinsics_vec256
366
8.33k
            a33 =
367
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a32,
368
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
369
8.33k
        Lib_IntVector_Intrinsics_vec256
370
8.33k
            a43 =
371
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a42,
372
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
373
8.33k
        Lib_IntVector_Intrinsics_vec256
374
8.33k
            a04 =
375
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a03,
376
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
377
8.33k
        Lib_IntVector_Intrinsics_vec256
378
8.33k
            a14 =
379
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a13,
380
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
381
8.33k
        Lib_IntVector_Intrinsics_vec256
382
8.33k
            a24 =
383
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a23,
384
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
385
8.33k
        Lib_IntVector_Intrinsics_vec256
386
8.33k
            a34 =
387
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a33,
388
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
389
8.33k
        Lib_IntVector_Intrinsics_vec256
390
8.33k
            a44 =
391
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a43,
392
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
393
8.33k
        Lib_IntVector_Intrinsics_vec256
394
8.33k
            a05 =
395
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a04,
396
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
397
8.33k
        Lib_IntVector_Intrinsics_vec256
398
8.33k
            a15 =
399
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a14,
400
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
401
8.33k
        Lib_IntVector_Intrinsics_vec256
402
8.33k
            a25 =
403
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a24,
404
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
405
8.33k
        Lib_IntVector_Intrinsics_vec256
406
8.33k
            a35 =
407
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a34,
408
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
409
8.33k
        Lib_IntVector_Intrinsics_vec256
410
8.33k
            a45 =
411
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a44,
412
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
413
8.33k
        Lib_IntVector_Intrinsics_vec256
414
8.33k
            a06 =
415
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a05,
416
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
417
8.33k
        Lib_IntVector_Intrinsics_vec256
418
8.33k
            a16 =
419
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a15,
420
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
421
8.33k
        Lib_IntVector_Intrinsics_vec256
422
8.33k
            a26 =
423
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a25,
424
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
425
8.33k
        Lib_IntVector_Intrinsics_vec256
426
8.33k
            a36 =
427
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a35,
428
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
429
8.33k
        Lib_IntVector_Intrinsics_vec256
430
8.33k
            a46 =
431
8.33k
                Lib_IntVector_Intrinsics_vec256_add64(a45,
432
8.33k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
433
8.33k
        Lib_IntVector_Intrinsics_vec256 t01 = a06;
434
8.33k
        Lib_IntVector_Intrinsics_vec256 t11 = a16;
435
8.33k
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
436
8.33k
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
437
8.33k
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
438
8.33k
        Lib_IntVector_Intrinsics_vec256
439
8.33k
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
440
8.33k
        Lib_IntVector_Intrinsics_vec256
441
8.33k
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
442
8.33k
        Lib_IntVector_Intrinsics_vec256
443
8.33k
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
444
8.33k
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
445
8.33k
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
446
8.33k
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
447
8.33k
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
448
8.33k
        Lib_IntVector_Intrinsics_vec256
449
8.33k
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
450
8.33k
        Lib_IntVector_Intrinsics_vec256
451
8.33k
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
452
8.33k
        Lib_IntVector_Intrinsics_vec256
453
8.33k
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
454
8.33k
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
455
8.33k
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
456
8.33k
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
457
8.33k
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
458
8.33k
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
459
8.33k
        Lib_IntVector_Intrinsics_vec256
460
8.33k
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
461
8.33k
        Lib_IntVector_Intrinsics_vec256
462
8.33k
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
463
8.33k
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
464
8.33k
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
465
8.33k
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
466
8.33k
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
467
8.33k
        Lib_IntVector_Intrinsics_vec256
468
8.33k
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
469
8.33k
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
470
8.33k
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
471
8.33k
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
472
8.33k
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
473
8.33k
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
474
8.33k
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
475
8.33k
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
476
8.33k
        acc0[0U] = o0;
477
8.33k
        acc0[1U] = o1;
478
8.33k
        acc0[2U] = o2;
479
8.33k
        acc0[3U] = o3;
480
8.33k
        acc0[4U] = o4;
481
8.33k
    }
482
29.3k
    if (rem1 > (uint32_t)0U) {
483
0
        uint8_t *last = t10 + nb * (uint32_t)16U;
484
0
        KRML_PRE_ALIGN(32)
485
0
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
486
0
        uint8_t tmp[16U] = { 0U };
487
0
        memcpy(tmp, last, rem1 * sizeof(uint8_t));
488
0
        uint64_t u0 = load64_le(tmp);
489
0
        uint64_t lo = u0;
490
0
        uint64_t u = load64_le(tmp + (uint32_t)8U);
491
0
        uint64_t hi = u;
492
0
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
493
0
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
494
0
        Lib_IntVector_Intrinsics_vec256
495
0
            f010 =
496
0
                Lib_IntVector_Intrinsics_vec256_and(f0,
497
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
498
0
        Lib_IntVector_Intrinsics_vec256
499
0
            f110 =
500
0
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
501
0
                                                                                                  (uint32_t)26U),
502
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
503
0
        Lib_IntVector_Intrinsics_vec256
504
0
            f20 =
505
0
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
506
0
                                                                                                 (uint32_t)52U),
507
0
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
508
0
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
509
0
                                                                                                (uint32_t)12U));
510
0
        Lib_IntVector_Intrinsics_vec256
511
0
            f30 =
512
0
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
513
0
                                                                                                  (uint32_t)14U),
514
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
515
0
        Lib_IntVector_Intrinsics_vec256
516
0
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
517
0
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
518
0
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
519
0
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
520
0
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
521
0
        Lib_IntVector_Intrinsics_vec256 f4 = f40;
522
0
        e[0U] = f01;
523
0
        e[1U] = f111;
524
0
        e[2U] = f2;
525
0
        e[3U] = f3;
526
0
        e[4U] = f4;
527
0
        uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
528
0
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
529
0
        Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
530
0
        e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
531
0
        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
532
0
        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
533
0
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
534
0
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
535
0
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
536
0
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
537
0
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
538
0
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
539
0
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
540
0
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
541
0
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
542
0
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
543
0
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
544
0
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
545
0
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
546
0
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
547
0
        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
548
0
        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
549
0
        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
550
0
        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
551
0
        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
552
0
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
553
0
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
554
0
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
555
0
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
556
0
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
557
0
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
558
0
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
559
0
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
560
0
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
561
0
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
562
0
        Lib_IntVector_Intrinsics_vec256
563
0
            a03 =
564
0
                Lib_IntVector_Intrinsics_vec256_add64(a02,
565
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
566
0
        Lib_IntVector_Intrinsics_vec256
567
0
            a13 =
568
0
                Lib_IntVector_Intrinsics_vec256_add64(a12,
569
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
570
0
        Lib_IntVector_Intrinsics_vec256
571
0
            a23 =
572
0
                Lib_IntVector_Intrinsics_vec256_add64(a22,
573
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
574
0
        Lib_IntVector_Intrinsics_vec256
575
0
            a33 =
576
0
                Lib_IntVector_Intrinsics_vec256_add64(a32,
577
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
578
0
        Lib_IntVector_Intrinsics_vec256
579
0
            a43 =
580
0
                Lib_IntVector_Intrinsics_vec256_add64(a42,
581
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
582
0
        Lib_IntVector_Intrinsics_vec256
583
0
            a04 =
584
0
                Lib_IntVector_Intrinsics_vec256_add64(a03,
585
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
586
0
        Lib_IntVector_Intrinsics_vec256
587
0
            a14 =
588
0
                Lib_IntVector_Intrinsics_vec256_add64(a13,
589
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
590
0
        Lib_IntVector_Intrinsics_vec256
591
0
            a24 =
592
0
                Lib_IntVector_Intrinsics_vec256_add64(a23,
593
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
594
0
        Lib_IntVector_Intrinsics_vec256
595
0
            a34 =
596
0
                Lib_IntVector_Intrinsics_vec256_add64(a33,
597
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
598
0
        Lib_IntVector_Intrinsics_vec256
599
0
            a44 =
600
0
                Lib_IntVector_Intrinsics_vec256_add64(a43,
601
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
602
0
        Lib_IntVector_Intrinsics_vec256
603
0
            a05 =
604
0
                Lib_IntVector_Intrinsics_vec256_add64(a04,
605
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
606
0
        Lib_IntVector_Intrinsics_vec256
607
0
            a15 =
608
0
                Lib_IntVector_Intrinsics_vec256_add64(a14,
609
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
610
0
        Lib_IntVector_Intrinsics_vec256
611
0
            a25 =
612
0
                Lib_IntVector_Intrinsics_vec256_add64(a24,
613
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
614
0
        Lib_IntVector_Intrinsics_vec256
615
0
            a35 =
616
0
                Lib_IntVector_Intrinsics_vec256_add64(a34,
617
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
618
0
        Lib_IntVector_Intrinsics_vec256
619
0
            a45 =
620
0
                Lib_IntVector_Intrinsics_vec256_add64(a44,
621
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
622
0
        Lib_IntVector_Intrinsics_vec256
623
0
            a06 =
624
0
                Lib_IntVector_Intrinsics_vec256_add64(a05,
625
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
626
0
        Lib_IntVector_Intrinsics_vec256
627
0
            a16 =
628
0
                Lib_IntVector_Intrinsics_vec256_add64(a15,
629
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
630
0
        Lib_IntVector_Intrinsics_vec256
631
0
            a26 =
632
0
                Lib_IntVector_Intrinsics_vec256_add64(a25,
633
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
634
0
        Lib_IntVector_Intrinsics_vec256
635
0
            a36 =
636
0
                Lib_IntVector_Intrinsics_vec256_add64(a35,
637
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
638
0
        Lib_IntVector_Intrinsics_vec256
639
0
            a46 =
640
0
                Lib_IntVector_Intrinsics_vec256_add64(a45,
641
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
642
0
        Lib_IntVector_Intrinsics_vec256 t01 = a06;
643
0
        Lib_IntVector_Intrinsics_vec256 t11 = a16;
644
0
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
645
0
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
646
0
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
647
0
        Lib_IntVector_Intrinsics_vec256
648
0
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
649
0
        Lib_IntVector_Intrinsics_vec256
650
0
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
651
0
        Lib_IntVector_Intrinsics_vec256
652
0
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
653
0
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
654
0
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
655
0
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
656
0
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
657
0
        Lib_IntVector_Intrinsics_vec256
658
0
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
659
0
        Lib_IntVector_Intrinsics_vec256
660
0
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
661
0
        Lib_IntVector_Intrinsics_vec256
662
0
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
663
0
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
664
0
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
665
0
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
666
0
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
667
0
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
668
0
        Lib_IntVector_Intrinsics_vec256
669
0
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
670
0
        Lib_IntVector_Intrinsics_vec256
671
0
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
672
0
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
673
0
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
674
0
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
675
0
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
676
0
        Lib_IntVector_Intrinsics_vec256
677
0
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
678
0
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
679
0
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
680
0
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
681
0
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
682
0
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
683
0
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
684
0
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
685
0
        acc0[0U] = o0;
686
0
        acc0[1U] = o1;
687
0
        acc0[2U] = o2;
688
0
        acc0[3U] = o3;
689
0
        acc0[4U] = o4;
690
0
    }
691
29.3k
    uint8_t tmp[16U] = { 0U };
692
29.3k
    memcpy(tmp, rem, r * sizeof(uint8_t));
693
29.3k
    if (r > (uint32_t)0U) {
694
25.0k
        Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
695
25.0k
        Lib_IntVector_Intrinsics_vec256 *acc = ctx;
696
25.0k
        KRML_PRE_ALIGN(32)
697
25.0k
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
698
25.0k
        uint64_t u0 = load64_le(tmp);
699
25.0k
        uint64_t lo = u0;
700
25.0k
        uint64_t u = load64_le(tmp + (uint32_t)8U);
701
25.0k
        uint64_t hi = u;
702
25.0k
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
703
25.0k
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
704
25.0k
        Lib_IntVector_Intrinsics_vec256
705
25.0k
            f010 =
706
25.0k
                Lib_IntVector_Intrinsics_vec256_and(f0,
707
25.0k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
708
25.0k
        Lib_IntVector_Intrinsics_vec256
709
25.0k
            f110 =
710
25.0k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
711
25.0k
                                                                                                  (uint32_t)26U),
712
25.0k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
713
25.0k
        Lib_IntVector_Intrinsics_vec256
714
25.0k
            f20 =
715
25.0k
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
716
25.0k
                                                                                                 (uint32_t)52U),
717
25.0k
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
718
25.0k
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
719
25.0k
                                                                                                (uint32_t)12U));
720
25.0k
        Lib_IntVector_Intrinsics_vec256
721
25.0k
            f30 =
722
25.0k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
723
25.0k
                                                                                                  (uint32_t)14U),
724
25.0k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
725
25.0k
        Lib_IntVector_Intrinsics_vec256
726
25.0k
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
727
25.0k
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
728
25.0k
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
729
25.0k
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
730
25.0k
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
731
25.0k
        Lib_IntVector_Intrinsics_vec256 f41 = f40;
732
25.0k
        e[0U] = f01;
733
25.0k
        e[1U] = f111;
734
25.0k
        e[2U] = f2;
735
25.0k
        e[3U] = f3;
736
25.0k
        e[4U] = f41;
737
25.0k
        uint64_t b = (uint64_t)0x1000000U;
738
25.0k
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
739
25.0k
        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
740
25.0k
        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
741
25.0k
        Lib_IntVector_Intrinsics_vec256 *r1 = pre;
742
25.0k
        Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
743
25.0k
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
744
25.0k
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
745
25.0k
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
746
25.0k
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
747
25.0k
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
748
25.0k
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
749
25.0k
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
750
25.0k
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
751
25.0k
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
752
25.0k
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
753
25.0k
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
754
25.0k
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
755
25.0k
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
756
25.0k
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
757
25.0k
        Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
758
25.0k
        Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
759
25.0k
        Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
760
25.0k
        Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
761
25.0k
        Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
762
25.0k
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
763
25.0k
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
764
25.0k
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
765
25.0k
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
766
25.0k
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
767
25.0k
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
768
25.0k
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
769
25.0k
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
770
25.0k
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
771
25.0k
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
772
25.0k
        Lib_IntVector_Intrinsics_vec256
773
25.0k
            a03 =
774
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a02,
775
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
776
25.0k
        Lib_IntVector_Intrinsics_vec256
777
25.0k
            a13 =
778
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a12,
779
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
780
25.0k
        Lib_IntVector_Intrinsics_vec256
781
25.0k
            a23 =
782
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a22,
783
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
784
25.0k
        Lib_IntVector_Intrinsics_vec256
785
25.0k
            a33 =
786
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a32,
787
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
788
25.0k
        Lib_IntVector_Intrinsics_vec256
789
25.0k
            a43 =
790
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a42,
791
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
792
25.0k
        Lib_IntVector_Intrinsics_vec256
793
25.0k
            a04 =
794
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a03,
795
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
796
25.0k
        Lib_IntVector_Intrinsics_vec256
797
25.0k
            a14 =
798
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a13,
799
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
800
25.0k
        Lib_IntVector_Intrinsics_vec256
801
25.0k
            a24 =
802
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a23,
803
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
804
25.0k
        Lib_IntVector_Intrinsics_vec256
805
25.0k
            a34 =
806
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a33,
807
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
808
25.0k
        Lib_IntVector_Intrinsics_vec256
809
25.0k
            a44 =
810
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a43,
811
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
812
25.0k
        Lib_IntVector_Intrinsics_vec256
813
25.0k
            a05 =
814
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a04,
815
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
816
25.0k
        Lib_IntVector_Intrinsics_vec256
817
25.0k
            a15 =
818
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a14,
819
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
820
25.0k
        Lib_IntVector_Intrinsics_vec256
821
25.0k
            a25 =
822
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a24,
823
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
824
25.0k
        Lib_IntVector_Intrinsics_vec256
825
25.0k
            a35 =
826
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a34,
827
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
828
25.0k
        Lib_IntVector_Intrinsics_vec256
829
25.0k
            a45 =
830
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a44,
831
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
832
25.0k
        Lib_IntVector_Intrinsics_vec256
833
25.0k
            a06 =
834
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a05,
835
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
836
25.0k
        Lib_IntVector_Intrinsics_vec256
837
25.0k
            a16 =
838
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a15,
839
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
840
25.0k
        Lib_IntVector_Intrinsics_vec256
841
25.0k
            a26 =
842
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a25,
843
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
844
25.0k
        Lib_IntVector_Intrinsics_vec256
845
25.0k
            a36 =
846
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a35,
847
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
848
25.0k
        Lib_IntVector_Intrinsics_vec256
849
25.0k
            a46 =
850
25.0k
                Lib_IntVector_Intrinsics_vec256_add64(a45,
851
25.0k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
852
25.0k
        Lib_IntVector_Intrinsics_vec256 t0 = a06;
853
25.0k
        Lib_IntVector_Intrinsics_vec256 t1 = a16;
854
25.0k
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
855
25.0k
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
856
25.0k
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
857
25.0k
        Lib_IntVector_Intrinsics_vec256
858
25.0k
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
859
25.0k
        Lib_IntVector_Intrinsics_vec256
860
25.0k
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
861
25.0k
        Lib_IntVector_Intrinsics_vec256
862
25.0k
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
863
25.0k
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
864
25.0k
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
865
25.0k
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
866
25.0k
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
867
25.0k
        Lib_IntVector_Intrinsics_vec256
868
25.0k
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
869
25.0k
        Lib_IntVector_Intrinsics_vec256
870
25.0k
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
871
25.0k
        Lib_IntVector_Intrinsics_vec256
872
25.0k
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
873
25.0k
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
874
25.0k
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
875
25.0k
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
876
25.0k
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
877
25.0k
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
878
25.0k
        Lib_IntVector_Intrinsics_vec256
879
25.0k
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
880
25.0k
        Lib_IntVector_Intrinsics_vec256
881
25.0k
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
882
25.0k
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
883
25.0k
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
884
25.0k
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
885
25.0k
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
886
25.0k
        Lib_IntVector_Intrinsics_vec256
887
25.0k
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
888
25.0k
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
889
25.0k
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
890
25.0k
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
891
25.0k
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
892
25.0k
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
893
25.0k
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
894
25.0k
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
895
25.0k
        acc[0U] = o0;
896
25.0k
        acc[1U] = o1;
897
25.0k
        acc[2U] = o2;
898
25.0k
        acc[3U] = o3;
899
25.0k
        acc[4U] = o4;
900
25.0k
        return;
901
25.0k
    }
902
29.3k
}
903
904
static inline void
905
poly1305_do_256(
906
    uint8_t *k,
907
    uint32_t aadlen,
908
    uint8_t *aad,
909
    uint32_t mlen,
910
    uint8_t *m,
911
    uint8_t *out)
912
17.5k
{
913
17.5k
    KRML_PRE_ALIGN(32)
914
17.5k
    Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U };
915
17.5k
    uint8_t block[16U] = { 0U };
916
17.5k
    Hacl_Poly1305_256_poly1305_init(ctx, k);
917
17.5k
    if (aadlen != (uint32_t)0U) {
918
17.5k
        poly1305_padded_256(ctx, aadlen, aad);
919
17.5k
    }
920
17.5k
    if (mlen != (uint32_t)0U) {
921
11.7k
        poly1305_padded_256(ctx, mlen, m);
922
11.7k
    }
923
17.5k
    store64_le(block, (uint64_t)aadlen);
924
17.5k
    store64_le(block + (uint32_t)8U, (uint64_t)mlen);
925
17.5k
    Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
926
17.5k
    Lib_IntVector_Intrinsics_vec256 *acc = ctx;
927
17.5k
    KRML_PRE_ALIGN(32)
928
17.5k
    Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
929
17.5k
    uint64_t u0 = load64_le(block);
930
17.5k
    uint64_t lo = u0;
931
17.5k
    uint64_t u = load64_le(block + (uint32_t)8U);
932
17.5k
    uint64_t hi = u;
933
17.5k
    Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
934
17.5k
    Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
935
17.5k
    Lib_IntVector_Intrinsics_vec256
936
17.5k
        f010 =
937
17.5k
            Lib_IntVector_Intrinsics_vec256_and(f0,
938
17.5k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
939
17.5k
    Lib_IntVector_Intrinsics_vec256
940
17.5k
        f110 =
941
17.5k
            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
942
17.5k
                                                                                              (uint32_t)26U),
943
17.5k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
944
17.5k
    Lib_IntVector_Intrinsics_vec256
945
17.5k
        f20 =
946
17.5k
            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
947
17.5k
                                                                                             (uint32_t)52U),
948
17.5k
                                               Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
949
17.5k
                                                                                                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
950
17.5k
                                                                                            (uint32_t)12U));
951
17.5k
    Lib_IntVector_Intrinsics_vec256
952
17.5k
        f30 =
953
17.5k
            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
954
17.5k
                                                                                              (uint32_t)14U),
955
17.5k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
956
17.5k
    Lib_IntVector_Intrinsics_vec256
957
17.5k
        f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
958
17.5k
    Lib_IntVector_Intrinsics_vec256 f01 = f010;
959
17.5k
    Lib_IntVector_Intrinsics_vec256 f111 = f110;
960
17.5k
    Lib_IntVector_Intrinsics_vec256 f2 = f20;
961
17.5k
    Lib_IntVector_Intrinsics_vec256 f3 = f30;
962
17.5k
    Lib_IntVector_Intrinsics_vec256 f41 = f40;
963
17.5k
    e[0U] = f01;
964
17.5k
    e[1U] = f111;
965
17.5k
    e[2U] = f2;
966
17.5k
    e[3U] = f3;
967
17.5k
    e[4U] = f41;
968
17.5k
    uint64_t b = (uint64_t)0x1000000U;
969
17.5k
    Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
970
17.5k
    Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
971
17.5k
    e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
972
17.5k
    Lib_IntVector_Intrinsics_vec256 *r = pre;
973
17.5k
    Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
974
17.5k
    Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
975
17.5k
    Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
976
17.5k
    Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
977
17.5k
    Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
978
17.5k
    Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
979
17.5k
    Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
980
17.5k
    Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
981
17.5k
    Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
982
17.5k
    Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
983
17.5k
    Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
984
17.5k
    Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
985
17.5k
    Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
986
17.5k
    Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
987
17.5k
    Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
988
17.5k
    Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
989
17.5k
    Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
990
17.5k
    Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
991
17.5k
    Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
992
17.5k
    Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
993
17.5k
    Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
994
17.5k
    Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
995
17.5k
    Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
996
17.5k
    Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
997
17.5k
    Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
998
17.5k
    Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
999
17.5k
    Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
1000
17.5k
    Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
1001
17.5k
    Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
1002
17.5k
    Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
1003
17.5k
    Lib_IntVector_Intrinsics_vec256
1004
17.5k
        a03 =
1005
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a02,
1006
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
1007
17.5k
    Lib_IntVector_Intrinsics_vec256
1008
17.5k
        a13 =
1009
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a12,
1010
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
1011
17.5k
    Lib_IntVector_Intrinsics_vec256
1012
17.5k
        a23 =
1013
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a22,
1014
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
1015
17.5k
    Lib_IntVector_Intrinsics_vec256
1016
17.5k
        a33 =
1017
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a32,
1018
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
1019
17.5k
    Lib_IntVector_Intrinsics_vec256
1020
17.5k
        a43 =
1021
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a42,
1022
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
1023
17.5k
    Lib_IntVector_Intrinsics_vec256
1024
17.5k
        a04 =
1025
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a03,
1026
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
1027
17.5k
    Lib_IntVector_Intrinsics_vec256
1028
17.5k
        a14 =
1029
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a13,
1030
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
1031
17.5k
    Lib_IntVector_Intrinsics_vec256
1032
17.5k
        a24 =
1033
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a23,
1034
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
1035
17.5k
    Lib_IntVector_Intrinsics_vec256
1036
17.5k
        a34 =
1037
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a33,
1038
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
1039
17.5k
    Lib_IntVector_Intrinsics_vec256
1040
17.5k
        a44 =
1041
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a43,
1042
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
1043
17.5k
    Lib_IntVector_Intrinsics_vec256
1044
17.5k
        a05 =
1045
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a04,
1046
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
1047
17.5k
    Lib_IntVector_Intrinsics_vec256
1048
17.5k
        a15 =
1049
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a14,
1050
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
1051
17.5k
    Lib_IntVector_Intrinsics_vec256
1052
17.5k
        a25 =
1053
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a24,
1054
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
1055
17.5k
    Lib_IntVector_Intrinsics_vec256
1056
17.5k
        a35 =
1057
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a34,
1058
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
1059
17.5k
    Lib_IntVector_Intrinsics_vec256
1060
17.5k
        a45 =
1061
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a44,
1062
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
1063
17.5k
    Lib_IntVector_Intrinsics_vec256
1064
17.5k
        a06 =
1065
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a05,
1066
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
1067
17.5k
    Lib_IntVector_Intrinsics_vec256
1068
17.5k
        a16 =
1069
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a15,
1070
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
1071
17.5k
    Lib_IntVector_Intrinsics_vec256
1072
17.5k
        a26 =
1073
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a25,
1074
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
1075
17.5k
    Lib_IntVector_Intrinsics_vec256
1076
17.5k
        a36 =
1077
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a35,
1078
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
1079
17.5k
    Lib_IntVector_Intrinsics_vec256
1080
17.5k
        a46 =
1081
17.5k
            Lib_IntVector_Intrinsics_vec256_add64(a45,
1082
17.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
1083
17.5k
    Lib_IntVector_Intrinsics_vec256 t0 = a06;
1084
17.5k
    Lib_IntVector_Intrinsics_vec256 t1 = a16;
1085
17.5k
    Lib_IntVector_Intrinsics_vec256 t2 = a26;
1086
17.5k
    Lib_IntVector_Intrinsics_vec256 t3 = a36;
1087
17.5k
    Lib_IntVector_Intrinsics_vec256 t4 = a46;
1088
17.5k
    Lib_IntVector_Intrinsics_vec256
1089
17.5k
        mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
1090
17.5k
    Lib_IntVector_Intrinsics_vec256
1091
17.5k
        z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
1092
17.5k
    Lib_IntVector_Intrinsics_vec256
1093
17.5k
        z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
1094
17.5k
    Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
1095
17.5k
    Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
1096
17.5k
    Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
1097
17.5k
    Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
1098
17.5k
    Lib_IntVector_Intrinsics_vec256
1099
17.5k
        z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
1100
17.5k
    Lib_IntVector_Intrinsics_vec256
1101
17.5k
        z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
1102
17.5k
    Lib_IntVector_Intrinsics_vec256
1103
17.5k
        t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
1104
17.5k
    Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
1105
17.5k
    Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
1106
17.5k
    Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
1107
17.5k
    Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
1108
17.5k
    Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
1109
17.5k
    Lib_IntVector_Intrinsics_vec256
1110
17.5k
        z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
1111
17.5k
    Lib_IntVector_Intrinsics_vec256
1112
17.5k
        z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
1113
17.5k
    Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
1114
17.5k
    Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
1115
17.5k
    Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
1116
17.5k
    Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
1117
17.5k
    Lib_IntVector_Intrinsics_vec256
1118
17.5k
        z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
1119
17.5k
    Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
1120
17.5k
    Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
1121
17.5k
    Lib_IntVector_Intrinsics_vec256 o0 = x02;
1122
17.5k
    Lib_IntVector_Intrinsics_vec256 o1 = x12;
1123
17.5k
    Lib_IntVector_Intrinsics_vec256 o2 = x21;
1124
17.5k
    Lib_IntVector_Intrinsics_vec256 o3 = x32;
1125
17.5k
    Lib_IntVector_Intrinsics_vec256 o4 = x42;
1126
17.5k
    acc[0U] = o0;
1127
17.5k
    acc[1U] = o1;
1128
17.5k
    acc[2U] = o2;
1129
17.5k
    acc[3U] = o3;
1130
17.5k
    acc[4U] = o4;
1131
17.5k
    Hacl_Poly1305_256_poly1305_finish(out, k, ctx);
1132
17.5k
}
1133
1134
/**
1135
Encrypt a message `m` with key `k`.
1136
1137
The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
1138
Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
1139
1140
@param k Pointer to 32 bytes of memory where the AEAD key is read from.
1141
@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
1142
@param aadlen Length of the associated data.
1143
@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
1144
1145
@param mlen Length of the message.
1146
@param m Pointer to `mlen` bytes of memory where the message is read from.
1147
@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
1148
@param mac Pointer to 16 bytes of memory where the mac is written to.
1149
*/
1150
void
1151
Hacl_Chacha20Poly1305_256_aead_encrypt(
1152
    uint8_t *k,
1153
    uint8_t *n,
1154
    uint32_t aadlen,
1155
    uint8_t *aad,
1156
    uint32_t mlen,
1157
    uint8_t *m,
1158
    uint8_t *cipher,
1159
    uint8_t *mac)
1160
8.91k
{
1161
8.91k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U);
1162
8.91k
    uint8_t tmp[64U] = { 0U };
1163
8.91k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
1164
8.91k
    uint8_t *key = tmp;
1165
8.91k
    poly1305_do_256(key, aadlen, aad, mlen, cipher, mac);
1166
8.91k
}
1167
1168
/**
1169
Decrypt a ciphertext `cipher` with key `k`.
1170
1171
The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
1172
Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
1173
1174
If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
1175
If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
1176
1177
@param k Pointer to 32 bytes of memory where the AEAD key is read from.
1178
@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
1179
@param aadlen Length of the associated data.
1180
@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
1181
1182
@param mlen Length of the ciphertext.
1183
@param m Pointer to `mlen` bytes of memory where the message is written to.
1184
@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
1185
@param mac Pointer to 16 bytes of memory where the mac is read from.
1186
1187
@returns 0 on succeess; 1 on failure.
1188
*/
1189
uint32_t
1190
Hacl_Chacha20Poly1305_256_aead_decrypt(
1191
    uint8_t *k,
1192
    uint8_t *n,
1193
    uint32_t aadlen,
1194
    uint8_t *aad,
1195
    uint32_t mlen,
1196
    uint8_t *m,
1197
    uint8_t *cipher,
1198
    uint8_t *mac)
1199
8.66k
{
1200
8.66k
    uint8_t computed_mac[16U] = { 0U };
1201
8.66k
    uint8_t tmp[64U] = { 0U };
1202
8.66k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
1203
8.66k
    uint8_t *key = tmp;
1204
8.66k
    poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac);
1205
8.66k
    uint8_t res = (uint8_t)255U;
1206
8.66k
    KRML_MAYBE_FOR16(i,
1207
8.66k
                     (uint32_t)0U,
1208
8.66k
                     (uint32_t)16U,
1209
8.66k
                     (uint32_t)1U,
1210
8.66k
                     uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
1211
8.66k
                     res = uu____0 & res;);
1212
8.66k
    uint8_t z = res;
1213
8.66k
    if (z == (uint8_t)255U) {
1214
0
        Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U);
1215
0
        return (uint32_t)0U;
1216
0
    }
1217
8.66k
    return (uint32_t)1U;
1218
8.66k
}