Coverage Report

Created: 2025-11-05 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
Line
Count
Source
1
/* MIT License
2
 *
3
 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
4
 * Copyright (c) 2022-2023 HACL* Contributors
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in all
14
 * copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 */
24
25
#include "Hacl_Chacha20Poly1305_256.h"
26
27
#include "internal/Hacl_Poly1305_256.h"
28
#include "internal/Hacl_Krmllib.h"
29
#include "libintvector.h"
30
31
static inline void
32
poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text)
33
26.7k
{
34
26.7k
    uint32_t n = len / (uint32_t)16U;
35
26.7k
    uint32_t r = len % (uint32_t)16U;
36
26.7k
    uint8_t *blocks = text;
37
26.7k
    uint8_t *rem = text + n * (uint32_t)16U;
38
26.7k
    Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U;
39
26.7k
    Lib_IntVector_Intrinsics_vec256 *acc0 = ctx;
40
26.7k
    uint32_t sz_block = (uint32_t)64U;
41
26.7k
    uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
42
26.7k
    uint8_t *t00 = blocks;
43
26.7k
    if (len0 > (uint32_t)0U) {
44
7.90k
        uint32_t bs = (uint32_t)64U;
45
7.90k
        uint8_t *text0 = t00;
46
7.90k
        Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0);
47
7.90k
        uint32_t len1 = len0 - bs;
48
7.90k
        uint8_t *text1 = t00 + bs;
49
7.90k
        uint32_t nb = len1 / bs;
50
21.5k
        for (uint32_t i = (uint32_t)0U; i < nb; i++) {
51
13.6k
            uint8_t *block = text1 + i * bs;
52
13.6k
            KRML_PRE_ALIGN(32)
53
13.6k
            Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
54
13.6k
            Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block);
55
13.6k
            Lib_IntVector_Intrinsics_vec256
56
13.6k
                hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U);
57
13.6k
            Lib_IntVector_Intrinsics_vec256
58
13.6k
                mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
59
13.6k
            Lib_IntVector_Intrinsics_vec256
60
13.6k
                m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
61
13.6k
            Lib_IntVector_Intrinsics_vec256
62
13.6k
                m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
63
13.6k
            Lib_IntVector_Intrinsics_vec256
64
13.6k
                m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
65
13.6k
            Lib_IntVector_Intrinsics_vec256
66
13.6k
                m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
67
13.6k
            Lib_IntVector_Intrinsics_vec256
68
13.6k
                m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
69
13.6k
            Lib_IntVector_Intrinsics_vec256
70
13.6k
                t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
71
13.6k
            Lib_IntVector_Intrinsics_vec256
72
13.6k
                t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
73
13.6k
            Lib_IntVector_Intrinsics_vec256
74
13.6k
                t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
75
13.6k
            Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
76
13.6k
            Lib_IntVector_Intrinsics_vec256
77
13.6k
                t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
78
13.6k
            Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
79
13.6k
            Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
80
13.6k
            Lib_IntVector_Intrinsics_vec256
81
13.6k
                t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
82
13.6k
            Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
83
13.6k
            Lib_IntVector_Intrinsics_vec256
84
13.6k
                o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
85
13.6k
            Lib_IntVector_Intrinsics_vec256 o00 = o5;
86
13.6k
            Lib_IntVector_Intrinsics_vec256 o11 = o10;
87
13.6k
            Lib_IntVector_Intrinsics_vec256 o21 = o20;
88
13.6k
            Lib_IntVector_Intrinsics_vec256 o31 = o30;
89
13.6k
            Lib_IntVector_Intrinsics_vec256 o41 = o40;
90
13.6k
            e[0U] = o00;
91
13.6k
            e[1U] = o11;
92
13.6k
            e[2U] = o21;
93
13.6k
            e[3U] = o31;
94
13.6k
            e[4U] = o41;
95
13.6k
            uint64_t b = (uint64_t)0x1000000U;
96
13.6k
            Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
97
13.6k
            Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
98
13.6k
            e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
99
13.6k
            Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U;
100
13.6k
            Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U;
101
13.6k
            Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
102
13.6k
            Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
103
13.6k
            Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
104
13.6k
            Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
105
13.6k
            Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
106
13.6k
            Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
107
13.6k
            Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
108
13.6k
            Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
109
13.6k
            Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
110
13.6k
            Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U];
111
13.6k
            Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U];
112
13.6k
            Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U];
113
13.6k
            Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U];
114
13.6k
            Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U];
115
13.6k
            Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
116
13.6k
            Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
117
13.6k
            Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
118
13.6k
            Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
119
13.6k
            Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
120
13.6k
            Lib_IntVector_Intrinsics_vec256
121
13.6k
                a01 =
122
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a0,
123
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
124
13.6k
            Lib_IntVector_Intrinsics_vec256
125
13.6k
                a11 =
126
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a1,
127
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
128
13.6k
            Lib_IntVector_Intrinsics_vec256
129
13.6k
                a21 =
130
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a2,
131
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
132
13.6k
            Lib_IntVector_Intrinsics_vec256
133
13.6k
                a31 =
134
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a3,
135
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
136
13.6k
            Lib_IntVector_Intrinsics_vec256
137
13.6k
                a41 =
138
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a4,
139
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
140
13.6k
            Lib_IntVector_Intrinsics_vec256
141
13.6k
                a02 =
142
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a01,
143
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
144
13.6k
            Lib_IntVector_Intrinsics_vec256
145
13.6k
                a12 =
146
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a11,
147
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
148
13.6k
            Lib_IntVector_Intrinsics_vec256
149
13.6k
                a22 =
150
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a21,
151
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
152
13.6k
            Lib_IntVector_Intrinsics_vec256
153
13.6k
                a32 =
154
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a31,
155
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
156
13.6k
            Lib_IntVector_Intrinsics_vec256
157
13.6k
                a42 =
158
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a41,
159
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
160
13.6k
            Lib_IntVector_Intrinsics_vec256
161
13.6k
                a03 =
162
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a02,
163
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
164
13.6k
            Lib_IntVector_Intrinsics_vec256
165
13.6k
                a13 =
166
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a12,
167
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
168
13.6k
            Lib_IntVector_Intrinsics_vec256
169
13.6k
                a23 =
170
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a22,
171
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
172
13.6k
            Lib_IntVector_Intrinsics_vec256
173
13.6k
                a33 =
174
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a32,
175
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
176
13.6k
            Lib_IntVector_Intrinsics_vec256
177
13.6k
                a43 =
178
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a42,
179
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
180
13.6k
            Lib_IntVector_Intrinsics_vec256
181
13.6k
                a04 =
182
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a03,
183
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
184
13.6k
            Lib_IntVector_Intrinsics_vec256
185
13.6k
                a14 =
186
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a13,
187
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
188
13.6k
            Lib_IntVector_Intrinsics_vec256
189
13.6k
                a24 =
190
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a23,
191
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
192
13.6k
            Lib_IntVector_Intrinsics_vec256
193
13.6k
                a34 =
194
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a33,
195
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
196
13.6k
            Lib_IntVector_Intrinsics_vec256
197
13.6k
                a44 =
198
13.6k
                    Lib_IntVector_Intrinsics_vec256_add64(a43,
199
13.6k
                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
200
13.6k
            Lib_IntVector_Intrinsics_vec256 t01 = a04;
201
13.6k
            Lib_IntVector_Intrinsics_vec256 t1 = a14;
202
13.6k
            Lib_IntVector_Intrinsics_vec256 t2 = a24;
203
13.6k
            Lib_IntVector_Intrinsics_vec256 t3 = a34;
204
13.6k
            Lib_IntVector_Intrinsics_vec256 t4 = a44;
205
13.6k
            Lib_IntVector_Intrinsics_vec256
206
13.6k
                mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
207
13.6k
            Lib_IntVector_Intrinsics_vec256
208
13.6k
                z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
209
13.6k
            Lib_IntVector_Intrinsics_vec256
210
13.6k
                z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
211
13.6k
            Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
212
13.6k
            Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
213
13.6k
            Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
214
13.6k
            Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
215
13.6k
            Lib_IntVector_Intrinsics_vec256
216
13.6k
                z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
217
13.6k
            Lib_IntVector_Intrinsics_vec256
218
13.6k
                z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
219
13.6k
            Lib_IntVector_Intrinsics_vec256
220
13.6k
                t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
221
13.6k
            Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
222
13.6k
            Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
223
13.6k
            Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
224
13.6k
            Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
225
13.6k
            Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
226
13.6k
            Lib_IntVector_Intrinsics_vec256
227
13.6k
                z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
228
13.6k
            Lib_IntVector_Intrinsics_vec256
229
13.6k
                z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
230
13.6k
            Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
231
13.6k
            Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
232
13.6k
            Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
233
13.6k
            Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
234
13.6k
            Lib_IntVector_Intrinsics_vec256
235
13.6k
                z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
236
13.6k
            Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
237
13.6k
            Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
238
13.6k
            Lib_IntVector_Intrinsics_vec256 o01 = x02;
239
13.6k
            Lib_IntVector_Intrinsics_vec256 o12 = x12;
240
13.6k
            Lib_IntVector_Intrinsics_vec256 o22 = x21;
241
13.6k
            Lib_IntVector_Intrinsics_vec256 o32 = x32;
242
13.6k
            Lib_IntVector_Intrinsics_vec256 o42 = x42;
243
13.6k
            acc0[0U] = o01;
244
13.6k
            acc0[1U] = o12;
245
13.6k
            acc0[2U] = o22;
246
13.6k
            acc0[3U] = o32;
247
13.6k
            acc0[4U] = o42;
248
13.6k
            Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U];
249
13.6k
            Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U];
250
13.6k
            Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U];
251
13.6k
            Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U];
252
13.6k
            Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U];
253
13.6k
            Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
254
13.6k
            Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
255
13.6k
            Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
256
13.6k
            Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
257
13.6k
            Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
258
13.6k
            Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
259
13.6k
            Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
260
13.6k
            Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
261
13.6k
            Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
262
13.6k
            Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
263
13.6k
            acc0[0U] = o0;
264
13.6k
            acc0[1U] = o1;
265
13.6k
            acc0[2U] = o2;
266
13.6k
            acc0[3U] = o3;
267
13.6k
            acc0[4U] = o4;
268
13.6k
        }
269
7.90k
        Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0);
270
7.90k
    }
271
26.7k
    uint32_t len1 = n * (uint32_t)16U - len0;
272
26.7k
    uint8_t *t10 = blocks + len0;
273
26.7k
    uint32_t nb = len1 / (uint32_t)16U;
274
26.7k
    uint32_t rem1 = len1 % (uint32_t)16U;
275
35.9k
    for (uint32_t i = (uint32_t)0U; i < nb; i++) {
276
9.28k
        uint8_t *block = t10 + i * (uint32_t)16U;
277
9.28k
        KRML_PRE_ALIGN(32)
278
9.28k
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
279
9.28k
        uint64_t u0 = load64_le(block);
280
9.28k
        uint64_t lo = u0;
281
9.28k
        uint64_t u = load64_le(block + (uint32_t)8U);
282
9.28k
        uint64_t hi = u;
283
9.28k
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
284
9.28k
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
285
9.28k
        Lib_IntVector_Intrinsics_vec256
286
9.28k
            f010 =
287
9.28k
                Lib_IntVector_Intrinsics_vec256_and(f0,
288
9.28k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
289
9.28k
        Lib_IntVector_Intrinsics_vec256
290
9.28k
            f110 =
291
9.28k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
292
9.28k
                                                                                                  (uint32_t)26U),
293
9.28k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
294
9.28k
        Lib_IntVector_Intrinsics_vec256
295
9.28k
            f20 =
296
9.28k
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
297
9.28k
                                                                                                 (uint32_t)52U),
298
9.28k
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
299
9.28k
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
300
9.28k
                                                                                                (uint32_t)12U));
301
9.28k
        Lib_IntVector_Intrinsics_vec256
302
9.28k
            f30 =
303
9.28k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
304
9.28k
                                                                                                  (uint32_t)14U),
305
9.28k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
306
9.28k
        Lib_IntVector_Intrinsics_vec256
307
9.28k
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
308
9.28k
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
309
9.28k
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
310
9.28k
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
311
9.28k
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
312
9.28k
        Lib_IntVector_Intrinsics_vec256 f41 = f40;
313
9.28k
        e[0U] = f01;
314
9.28k
        e[1U] = f111;
315
9.28k
        e[2U] = f2;
316
9.28k
        e[3U] = f3;
317
9.28k
        e[4U] = f41;
318
9.28k
        uint64_t b = (uint64_t)0x1000000U;
319
9.28k
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
320
9.28k
        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
321
9.28k
        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
322
9.28k
        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
323
9.28k
        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
324
9.28k
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
325
9.28k
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
326
9.28k
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
327
9.28k
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
328
9.28k
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
329
9.28k
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
330
9.28k
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
331
9.28k
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
332
9.28k
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
333
9.28k
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
334
9.28k
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
335
9.28k
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
336
9.28k
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
337
9.28k
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
338
9.28k
        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
339
9.28k
        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
340
9.28k
        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
341
9.28k
        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
342
9.28k
        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
343
9.28k
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
344
9.28k
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
345
9.28k
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
346
9.28k
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
347
9.28k
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
348
9.28k
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
349
9.28k
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
350
9.28k
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
351
9.28k
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
352
9.28k
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
353
9.28k
        Lib_IntVector_Intrinsics_vec256
354
9.28k
            a03 =
355
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a02,
356
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
357
9.28k
        Lib_IntVector_Intrinsics_vec256
358
9.28k
            a13 =
359
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a12,
360
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
361
9.28k
        Lib_IntVector_Intrinsics_vec256
362
9.28k
            a23 =
363
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a22,
364
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
365
9.28k
        Lib_IntVector_Intrinsics_vec256
366
9.28k
            a33 =
367
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a32,
368
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
369
9.28k
        Lib_IntVector_Intrinsics_vec256
370
9.28k
            a43 =
371
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a42,
372
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
373
9.28k
        Lib_IntVector_Intrinsics_vec256
374
9.28k
            a04 =
375
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a03,
376
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
377
9.28k
        Lib_IntVector_Intrinsics_vec256
378
9.28k
            a14 =
379
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a13,
380
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
381
9.28k
        Lib_IntVector_Intrinsics_vec256
382
9.28k
            a24 =
383
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a23,
384
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
385
9.28k
        Lib_IntVector_Intrinsics_vec256
386
9.28k
            a34 =
387
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a33,
388
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
389
9.28k
        Lib_IntVector_Intrinsics_vec256
390
9.28k
            a44 =
391
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a43,
392
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
393
9.28k
        Lib_IntVector_Intrinsics_vec256
394
9.28k
            a05 =
395
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a04,
396
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
397
9.28k
        Lib_IntVector_Intrinsics_vec256
398
9.28k
            a15 =
399
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a14,
400
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
401
9.28k
        Lib_IntVector_Intrinsics_vec256
402
9.28k
            a25 =
403
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a24,
404
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
405
9.28k
        Lib_IntVector_Intrinsics_vec256
406
9.28k
            a35 =
407
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a34,
408
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
409
9.28k
        Lib_IntVector_Intrinsics_vec256
410
9.28k
            a45 =
411
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a44,
412
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
413
9.28k
        Lib_IntVector_Intrinsics_vec256
414
9.28k
            a06 =
415
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a05,
416
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
417
9.28k
        Lib_IntVector_Intrinsics_vec256
418
9.28k
            a16 =
419
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a15,
420
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
421
9.28k
        Lib_IntVector_Intrinsics_vec256
422
9.28k
            a26 =
423
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a25,
424
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
425
9.28k
        Lib_IntVector_Intrinsics_vec256
426
9.28k
            a36 =
427
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a35,
428
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
429
9.28k
        Lib_IntVector_Intrinsics_vec256
430
9.28k
            a46 =
431
9.28k
                Lib_IntVector_Intrinsics_vec256_add64(a45,
432
9.28k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
433
9.28k
        Lib_IntVector_Intrinsics_vec256 t01 = a06;
434
9.28k
        Lib_IntVector_Intrinsics_vec256 t11 = a16;
435
9.28k
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
436
9.28k
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
437
9.28k
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
438
9.28k
        Lib_IntVector_Intrinsics_vec256
439
9.28k
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
440
9.28k
        Lib_IntVector_Intrinsics_vec256
441
9.28k
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
442
9.28k
        Lib_IntVector_Intrinsics_vec256
443
9.28k
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
444
9.28k
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
445
9.28k
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
446
9.28k
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
447
9.28k
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
448
9.28k
        Lib_IntVector_Intrinsics_vec256
449
9.28k
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
450
9.28k
        Lib_IntVector_Intrinsics_vec256
451
9.28k
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
452
9.28k
        Lib_IntVector_Intrinsics_vec256
453
9.28k
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
454
9.28k
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
455
9.28k
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
456
9.28k
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
457
9.28k
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
458
9.28k
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
459
9.28k
        Lib_IntVector_Intrinsics_vec256
460
9.28k
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
461
9.28k
        Lib_IntVector_Intrinsics_vec256
462
9.28k
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
463
9.28k
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
464
9.28k
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
465
9.28k
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
466
9.28k
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
467
9.28k
        Lib_IntVector_Intrinsics_vec256
468
9.28k
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
469
9.28k
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
470
9.28k
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
471
9.28k
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
472
9.28k
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
473
9.28k
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
474
9.28k
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
475
9.28k
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
476
9.28k
        acc0[0U] = o0;
477
9.28k
        acc0[1U] = o1;
478
9.28k
        acc0[2U] = o2;
479
9.28k
        acc0[3U] = o3;
480
9.28k
        acc0[4U] = o4;
481
9.28k
    }
482
26.7k
    if (rem1 > (uint32_t)0U) {
483
0
        uint8_t *last = t10 + nb * (uint32_t)16U;
484
0
        KRML_PRE_ALIGN(32)
485
0
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
486
0
        uint8_t tmp[16U] = { 0U };
487
0
        memcpy(tmp, last, rem1 * sizeof(uint8_t));
488
0
        uint64_t u0 = load64_le(tmp);
489
0
        uint64_t lo = u0;
490
0
        uint64_t u = load64_le(tmp + (uint32_t)8U);
491
0
        uint64_t hi = u;
492
0
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
493
0
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
494
0
        Lib_IntVector_Intrinsics_vec256
495
0
            f010 =
496
0
                Lib_IntVector_Intrinsics_vec256_and(f0,
497
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
498
0
        Lib_IntVector_Intrinsics_vec256
499
0
            f110 =
500
0
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
501
0
                                                                                                  (uint32_t)26U),
502
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
503
0
        Lib_IntVector_Intrinsics_vec256
504
0
            f20 =
505
0
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
506
0
                                                                                                 (uint32_t)52U),
507
0
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
508
0
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
509
0
                                                                                                (uint32_t)12U));
510
0
        Lib_IntVector_Intrinsics_vec256
511
0
            f30 =
512
0
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
513
0
                                                                                                  (uint32_t)14U),
514
0
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
515
0
        Lib_IntVector_Intrinsics_vec256
516
0
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
517
0
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
518
0
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
519
0
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
520
0
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
521
0
        Lib_IntVector_Intrinsics_vec256 f4 = f40;
522
0
        e[0U] = f01;
523
0
        e[1U] = f111;
524
0
        e[2U] = f2;
525
0
        e[3U] = f3;
526
0
        e[4U] = f4;
527
0
        uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
528
0
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
529
0
        Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
530
0
        e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
531
0
        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
532
0
        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
533
0
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
534
0
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
535
0
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
536
0
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
537
0
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
538
0
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
539
0
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
540
0
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
541
0
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
542
0
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
543
0
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
544
0
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
545
0
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
546
0
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
547
0
        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
548
0
        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
549
0
        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
550
0
        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
551
0
        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
552
0
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
553
0
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
554
0
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
555
0
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
556
0
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
557
0
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
558
0
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
559
0
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
560
0
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
561
0
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
562
0
        Lib_IntVector_Intrinsics_vec256
563
0
            a03 =
564
0
                Lib_IntVector_Intrinsics_vec256_add64(a02,
565
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
566
0
        Lib_IntVector_Intrinsics_vec256
567
0
            a13 =
568
0
                Lib_IntVector_Intrinsics_vec256_add64(a12,
569
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
570
0
        Lib_IntVector_Intrinsics_vec256
571
0
            a23 =
572
0
                Lib_IntVector_Intrinsics_vec256_add64(a22,
573
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
574
0
        Lib_IntVector_Intrinsics_vec256
575
0
            a33 =
576
0
                Lib_IntVector_Intrinsics_vec256_add64(a32,
577
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
578
0
        Lib_IntVector_Intrinsics_vec256
579
0
            a43 =
580
0
                Lib_IntVector_Intrinsics_vec256_add64(a42,
581
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
582
0
        Lib_IntVector_Intrinsics_vec256
583
0
            a04 =
584
0
                Lib_IntVector_Intrinsics_vec256_add64(a03,
585
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
586
0
        Lib_IntVector_Intrinsics_vec256
587
0
            a14 =
588
0
                Lib_IntVector_Intrinsics_vec256_add64(a13,
589
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
590
0
        Lib_IntVector_Intrinsics_vec256
591
0
            a24 =
592
0
                Lib_IntVector_Intrinsics_vec256_add64(a23,
593
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
594
0
        Lib_IntVector_Intrinsics_vec256
595
0
            a34 =
596
0
                Lib_IntVector_Intrinsics_vec256_add64(a33,
597
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
598
0
        Lib_IntVector_Intrinsics_vec256
599
0
            a44 =
600
0
                Lib_IntVector_Intrinsics_vec256_add64(a43,
601
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
602
0
        Lib_IntVector_Intrinsics_vec256
603
0
            a05 =
604
0
                Lib_IntVector_Intrinsics_vec256_add64(a04,
605
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
606
0
        Lib_IntVector_Intrinsics_vec256
607
0
            a15 =
608
0
                Lib_IntVector_Intrinsics_vec256_add64(a14,
609
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
610
0
        Lib_IntVector_Intrinsics_vec256
611
0
            a25 =
612
0
                Lib_IntVector_Intrinsics_vec256_add64(a24,
613
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
614
0
        Lib_IntVector_Intrinsics_vec256
615
0
            a35 =
616
0
                Lib_IntVector_Intrinsics_vec256_add64(a34,
617
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
618
0
        Lib_IntVector_Intrinsics_vec256
619
0
            a45 =
620
0
                Lib_IntVector_Intrinsics_vec256_add64(a44,
621
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
622
0
        Lib_IntVector_Intrinsics_vec256
623
0
            a06 =
624
0
                Lib_IntVector_Intrinsics_vec256_add64(a05,
625
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
626
0
        Lib_IntVector_Intrinsics_vec256
627
0
            a16 =
628
0
                Lib_IntVector_Intrinsics_vec256_add64(a15,
629
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
630
0
        Lib_IntVector_Intrinsics_vec256
631
0
            a26 =
632
0
                Lib_IntVector_Intrinsics_vec256_add64(a25,
633
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
634
0
        Lib_IntVector_Intrinsics_vec256
635
0
            a36 =
636
0
                Lib_IntVector_Intrinsics_vec256_add64(a35,
637
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
638
0
        Lib_IntVector_Intrinsics_vec256
639
0
            a46 =
640
0
                Lib_IntVector_Intrinsics_vec256_add64(a45,
641
0
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
642
0
        Lib_IntVector_Intrinsics_vec256 t01 = a06;
643
0
        Lib_IntVector_Intrinsics_vec256 t11 = a16;
644
0
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
645
0
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
646
0
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
647
0
        Lib_IntVector_Intrinsics_vec256
648
0
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
649
0
        Lib_IntVector_Intrinsics_vec256
650
0
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
651
0
        Lib_IntVector_Intrinsics_vec256
652
0
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
653
0
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
654
0
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
655
0
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
656
0
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
657
0
        Lib_IntVector_Intrinsics_vec256
658
0
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
659
0
        Lib_IntVector_Intrinsics_vec256
660
0
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
661
0
        Lib_IntVector_Intrinsics_vec256
662
0
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
663
0
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
664
0
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
665
0
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
666
0
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
667
0
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
668
0
        Lib_IntVector_Intrinsics_vec256
669
0
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
670
0
        Lib_IntVector_Intrinsics_vec256
671
0
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
672
0
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
673
0
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
674
0
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
675
0
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
676
0
        Lib_IntVector_Intrinsics_vec256
677
0
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
678
0
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
679
0
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
680
0
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
681
0
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
682
0
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
683
0
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
684
0
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
685
0
        acc0[0U] = o0;
686
0
        acc0[1U] = o1;
687
0
        acc0[2U] = o2;
688
0
        acc0[3U] = o3;
689
0
        acc0[4U] = o4;
690
0
    }
691
26.7k
    uint8_t tmp[16U] = { 0U };
692
26.7k
    memcpy(tmp, rem, r * sizeof(uint8_t));
693
26.7k
    if (r > (uint32_t)0U) {
694
22.2k
        Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
695
22.2k
        Lib_IntVector_Intrinsics_vec256 *acc = ctx;
696
22.2k
        KRML_PRE_ALIGN(32)
697
22.2k
        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
698
22.2k
        uint64_t u0 = load64_le(tmp);
699
22.2k
        uint64_t lo = u0;
700
22.2k
        uint64_t u = load64_le(tmp + (uint32_t)8U);
701
22.2k
        uint64_t hi = u;
702
22.2k
        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
703
22.2k
        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
704
22.2k
        Lib_IntVector_Intrinsics_vec256
705
22.2k
            f010 =
706
22.2k
                Lib_IntVector_Intrinsics_vec256_and(f0,
707
22.2k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
708
22.2k
        Lib_IntVector_Intrinsics_vec256
709
22.2k
            f110 =
710
22.2k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
711
22.2k
                                                                                                  (uint32_t)26U),
712
22.2k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
713
22.2k
        Lib_IntVector_Intrinsics_vec256
714
22.2k
            f20 =
715
22.2k
                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
716
22.2k
                                                                                                 (uint32_t)52U),
717
22.2k
                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
718
22.2k
                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
719
22.2k
                                                                                                (uint32_t)12U));
720
22.2k
        Lib_IntVector_Intrinsics_vec256
721
22.2k
            f30 =
722
22.2k
                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
723
22.2k
                                                                                                  (uint32_t)14U),
724
22.2k
                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
725
22.2k
        Lib_IntVector_Intrinsics_vec256
726
22.2k
            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
727
22.2k
        Lib_IntVector_Intrinsics_vec256 f01 = f010;
728
22.2k
        Lib_IntVector_Intrinsics_vec256 f111 = f110;
729
22.2k
        Lib_IntVector_Intrinsics_vec256 f2 = f20;
730
22.2k
        Lib_IntVector_Intrinsics_vec256 f3 = f30;
731
22.2k
        Lib_IntVector_Intrinsics_vec256 f41 = f40;
732
22.2k
        e[0U] = f01;
733
22.2k
        e[1U] = f111;
734
22.2k
        e[2U] = f2;
735
22.2k
        e[3U] = f3;
736
22.2k
        e[4U] = f41;
737
22.2k
        uint64_t b = (uint64_t)0x1000000U;
738
22.2k
        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
739
22.2k
        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
740
22.2k
        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
741
22.2k
        Lib_IntVector_Intrinsics_vec256 *r1 = pre;
742
22.2k
        Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
743
22.2k
        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
744
22.2k
        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
745
22.2k
        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
746
22.2k
        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
747
22.2k
        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
748
22.2k
        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
749
22.2k
        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
750
22.2k
        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
751
22.2k
        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
752
22.2k
        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
753
22.2k
        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
754
22.2k
        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
755
22.2k
        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
756
22.2k
        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
757
22.2k
        Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
758
22.2k
        Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
759
22.2k
        Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
760
22.2k
        Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
761
22.2k
        Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
762
22.2k
        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
763
22.2k
        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
764
22.2k
        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
765
22.2k
        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
766
22.2k
        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
767
22.2k
        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
768
22.2k
        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
769
22.2k
        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
770
22.2k
        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
771
22.2k
        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
772
22.2k
        Lib_IntVector_Intrinsics_vec256
773
22.2k
            a03 =
774
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a02,
775
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
776
22.2k
        Lib_IntVector_Intrinsics_vec256
777
22.2k
            a13 =
778
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a12,
779
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
780
22.2k
        Lib_IntVector_Intrinsics_vec256
781
22.2k
            a23 =
782
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a22,
783
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
784
22.2k
        Lib_IntVector_Intrinsics_vec256
785
22.2k
            a33 =
786
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a32,
787
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
788
22.2k
        Lib_IntVector_Intrinsics_vec256
789
22.2k
            a43 =
790
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a42,
791
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
792
22.2k
        Lib_IntVector_Intrinsics_vec256
793
22.2k
            a04 =
794
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a03,
795
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
796
22.2k
        Lib_IntVector_Intrinsics_vec256
797
22.2k
            a14 =
798
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a13,
799
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
800
22.2k
        Lib_IntVector_Intrinsics_vec256
801
22.2k
            a24 =
802
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a23,
803
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
804
22.2k
        Lib_IntVector_Intrinsics_vec256
805
22.2k
            a34 =
806
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a33,
807
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
808
22.2k
        Lib_IntVector_Intrinsics_vec256
809
22.2k
            a44 =
810
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a43,
811
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
812
22.2k
        Lib_IntVector_Intrinsics_vec256
813
22.2k
            a05 =
814
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a04,
815
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
816
22.2k
        Lib_IntVector_Intrinsics_vec256
817
22.2k
            a15 =
818
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a14,
819
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
820
22.2k
        Lib_IntVector_Intrinsics_vec256
821
22.2k
            a25 =
822
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a24,
823
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
824
22.2k
        Lib_IntVector_Intrinsics_vec256
825
22.2k
            a35 =
826
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a34,
827
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
828
22.2k
        Lib_IntVector_Intrinsics_vec256
829
22.2k
            a45 =
830
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a44,
831
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
832
22.2k
        Lib_IntVector_Intrinsics_vec256
833
22.2k
            a06 =
834
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a05,
835
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
836
22.2k
        Lib_IntVector_Intrinsics_vec256
837
22.2k
            a16 =
838
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a15,
839
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
840
22.2k
        Lib_IntVector_Intrinsics_vec256
841
22.2k
            a26 =
842
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a25,
843
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
844
22.2k
        Lib_IntVector_Intrinsics_vec256
845
22.2k
            a36 =
846
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a35,
847
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
848
22.2k
        Lib_IntVector_Intrinsics_vec256
849
22.2k
            a46 =
850
22.2k
                Lib_IntVector_Intrinsics_vec256_add64(a45,
851
22.2k
                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
852
22.2k
        Lib_IntVector_Intrinsics_vec256 t0 = a06;
853
22.2k
        Lib_IntVector_Intrinsics_vec256 t1 = a16;
854
22.2k
        Lib_IntVector_Intrinsics_vec256 t2 = a26;
855
22.2k
        Lib_IntVector_Intrinsics_vec256 t3 = a36;
856
22.2k
        Lib_IntVector_Intrinsics_vec256 t4 = a46;
857
22.2k
        Lib_IntVector_Intrinsics_vec256
858
22.2k
            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
859
22.2k
        Lib_IntVector_Intrinsics_vec256
860
22.2k
            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
861
22.2k
        Lib_IntVector_Intrinsics_vec256
862
22.2k
            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
863
22.2k
        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
864
22.2k
        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
865
22.2k
        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
866
22.2k
        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
867
22.2k
        Lib_IntVector_Intrinsics_vec256
868
22.2k
            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
869
22.2k
        Lib_IntVector_Intrinsics_vec256
870
22.2k
            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
871
22.2k
        Lib_IntVector_Intrinsics_vec256
872
22.2k
            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
873
22.2k
        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
874
22.2k
        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
875
22.2k
        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
876
22.2k
        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
877
22.2k
        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
878
22.2k
        Lib_IntVector_Intrinsics_vec256
879
22.2k
            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
880
22.2k
        Lib_IntVector_Intrinsics_vec256
881
22.2k
            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
882
22.2k
        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
883
22.2k
        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
884
22.2k
        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
885
22.2k
        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
886
22.2k
        Lib_IntVector_Intrinsics_vec256
887
22.2k
            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
888
22.2k
        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
889
22.2k
        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
890
22.2k
        Lib_IntVector_Intrinsics_vec256 o0 = x02;
891
22.2k
        Lib_IntVector_Intrinsics_vec256 o1 = x12;
892
22.2k
        Lib_IntVector_Intrinsics_vec256 o2 = x21;
893
22.2k
        Lib_IntVector_Intrinsics_vec256 o3 = x32;
894
22.2k
        Lib_IntVector_Intrinsics_vec256 o4 = x42;
895
22.2k
        acc[0U] = o0;
896
22.2k
        acc[1U] = o1;
897
22.2k
        acc[2U] = o2;
898
22.2k
        acc[3U] = o3;
899
22.2k
        acc[4U] = o4;
900
22.2k
        return;
901
22.2k
    }
902
26.7k
}
903
904
static inline void
905
poly1305_do_256(
906
    uint8_t *k,
907
    uint32_t aadlen,
908
    uint8_t *aad,
909
    uint32_t mlen,
910
    uint8_t *m,
911
    uint8_t *out)
912
15.5k
{
913
15.5k
    KRML_PRE_ALIGN(32)
914
15.5k
    Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U };
915
15.5k
    uint8_t block[16U] = { 0U };
916
15.5k
    Hacl_Poly1305_256_poly1305_init(ctx, k);
917
15.5k
    if (aadlen != (uint32_t)0U) {
918
15.4k
        poly1305_padded_256(ctx, aadlen, aad);
919
15.4k
    }
920
15.5k
    if (mlen != (uint32_t)0U) {
921
11.2k
        poly1305_padded_256(ctx, mlen, m);
922
11.2k
    }
923
15.5k
    store64_le(block, (uint64_t)aadlen);
924
15.5k
    store64_le(block + (uint32_t)8U, (uint64_t)mlen);
925
15.5k
    Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
926
15.5k
    Lib_IntVector_Intrinsics_vec256 *acc = ctx;
927
15.5k
    KRML_PRE_ALIGN(32)
928
15.5k
    Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
929
15.5k
    uint64_t u0 = load64_le(block);
930
15.5k
    uint64_t lo = u0;
931
15.5k
    uint64_t u = load64_le(block + (uint32_t)8U);
932
15.5k
    uint64_t hi = u;
933
15.5k
    Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
934
15.5k
    Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
935
15.5k
    Lib_IntVector_Intrinsics_vec256
936
15.5k
        f010 =
937
15.5k
            Lib_IntVector_Intrinsics_vec256_and(f0,
938
15.5k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
939
15.5k
    Lib_IntVector_Intrinsics_vec256
940
15.5k
        f110 =
941
15.5k
            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
942
15.5k
                                                                                              (uint32_t)26U),
943
15.5k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
944
15.5k
    Lib_IntVector_Intrinsics_vec256
945
15.5k
        f20 =
946
15.5k
            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
947
15.5k
                                                                                             (uint32_t)52U),
948
15.5k
                                               Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
949
15.5k
                                                                                                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
950
15.5k
                                                                                            (uint32_t)12U));
951
15.5k
    Lib_IntVector_Intrinsics_vec256
952
15.5k
        f30 =
953
15.5k
            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
954
15.5k
                                                                                              (uint32_t)14U),
955
15.5k
                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
956
15.5k
    Lib_IntVector_Intrinsics_vec256
957
15.5k
        f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
958
15.5k
    Lib_IntVector_Intrinsics_vec256 f01 = f010;
959
15.5k
    Lib_IntVector_Intrinsics_vec256 f111 = f110;
960
15.5k
    Lib_IntVector_Intrinsics_vec256 f2 = f20;
961
15.5k
    Lib_IntVector_Intrinsics_vec256 f3 = f30;
962
15.5k
    Lib_IntVector_Intrinsics_vec256 f41 = f40;
963
15.5k
    e[0U] = f01;
964
15.5k
    e[1U] = f111;
965
15.5k
    e[2U] = f2;
966
15.5k
    e[3U] = f3;
967
15.5k
    e[4U] = f41;
968
15.5k
    uint64_t b = (uint64_t)0x1000000U;
969
15.5k
    Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
970
15.5k
    Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
971
15.5k
    e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
972
15.5k
    Lib_IntVector_Intrinsics_vec256 *r = pre;
973
15.5k
    Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
974
15.5k
    Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
975
15.5k
    Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
976
15.5k
    Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
977
15.5k
    Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
978
15.5k
    Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
979
15.5k
    Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
980
15.5k
    Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
981
15.5k
    Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
982
15.5k
    Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
983
15.5k
    Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
984
15.5k
    Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
985
15.5k
    Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
986
15.5k
    Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
987
15.5k
    Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
988
15.5k
    Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
989
15.5k
    Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
990
15.5k
    Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
991
15.5k
    Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
992
15.5k
    Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
993
15.5k
    Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
994
15.5k
    Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
995
15.5k
    Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
996
15.5k
    Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
997
15.5k
    Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
998
15.5k
    Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
999
15.5k
    Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
1000
15.5k
    Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
1001
15.5k
    Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
1002
15.5k
    Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
1003
15.5k
    Lib_IntVector_Intrinsics_vec256
1004
15.5k
        a03 =
1005
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a02,
1006
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
1007
15.5k
    Lib_IntVector_Intrinsics_vec256
1008
15.5k
        a13 =
1009
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a12,
1010
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
1011
15.5k
    Lib_IntVector_Intrinsics_vec256
1012
15.5k
        a23 =
1013
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a22,
1014
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
1015
15.5k
    Lib_IntVector_Intrinsics_vec256
1016
15.5k
        a33 =
1017
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a32,
1018
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
1019
15.5k
    Lib_IntVector_Intrinsics_vec256
1020
15.5k
        a43 =
1021
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a42,
1022
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
1023
15.5k
    Lib_IntVector_Intrinsics_vec256
1024
15.5k
        a04 =
1025
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a03,
1026
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
1027
15.5k
    Lib_IntVector_Intrinsics_vec256
1028
15.5k
        a14 =
1029
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a13,
1030
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
1031
15.5k
    Lib_IntVector_Intrinsics_vec256
1032
15.5k
        a24 =
1033
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a23,
1034
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
1035
15.5k
    Lib_IntVector_Intrinsics_vec256
1036
15.5k
        a34 =
1037
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a33,
1038
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
1039
15.5k
    Lib_IntVector_Intrinsics_vec256
1040
15.5k
        a44 =
1041
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a43,
1042
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
1043
15.5k
    Lib_IntVector_Intrinsics_vec256
1044
15.5k
        a05 =
1045
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a04,
1046
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
1047
15.5k
    Lib_IntVector_Intrinsics_vec256
1048
15.5k
        a15 =
1049
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a14,
1050
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
1051
15.5k
    Lib_IntVector_Intrinsics_vec256
1052
15.5k
        a25 =
1053
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a24,
1054
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
1055
15.5k
    Lib_IntVector_Intrinsics_vec256
1056
15.5k
        a35 =
1057
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a34,
1058
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
1059
15.5k
    Lib_IntVector_Intrinsics_vec256
1060
15.5k
        a45 =
1061
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a44,
1062
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
1063
15.5k
    Lib_IntVector_Intrinsics_vec256
1064
15.5k
        a06 =
1065
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a05,
1066
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
1067
15.5k
    Lib_IntVector_Intrinsics_vec256
1068
15.5k
        a16 =
1069
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a15,
1070
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
1071
15.5k
    Lib_IntVector_Intrinsics_vec256
1072
15.5k
        a26 =
1073
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a25,
1074
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
1075
15.5k
    Lib_IntVector_Intrinsics_vec256
1076
15.5k
        a36 =
1077
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a35,
1078
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
1079
15.5k
    Lib_IntVector_Intrinsics_vec256
1080
15.5k
        a46 =
1081
15.5k
            Lib_IntVector_Intrinsics_vec256_add64(a45,
1082
15.5k
                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
1083
15.5k
    Lib_IntVector_Intrinsics_vec256 t0 = a06;
1084
15.5k
    Lib_IntVector_Intrinsics_vec256 t1 = a16;
1085
15.5k
    Lib_IntVector_Intrinsics_vec256 t2 = a26;
1086
15.5k
    Lib_IntVector_Intrinsics_vec256 t3 = a36;
1087
15.5k
    Lib_IntVector_Intrinsics_vec256 t4 = a46;
1088
15.5k
    Lib_IntVector_Intrinsics_vec256
1089
15.5k
        mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
1090
15.5k
    Lib_IntVector_Intrinsics_vec256
1091
15.5k
        z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
1092
15.5k
    Lib_IntVector_Intrinsics_vec256
1093
15.5k
        z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
1094
15.5k
    Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
1095
15.5k
    Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
1096
15.5k
    Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
1097
15.5k
    Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
1098
15.5k
    Lib_IntVector_Intrinsics_vec256
1099
15.5k
        z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
1100
15.5k
    Lib_IntVector_Intrinsics_vec256
1101
15.5k
        z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
1102
15.5k
    Lib_IntVector_Intrinsics_vec256
1103
15.5k
        t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
1104
15.5k
    Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
1105
15.5k
    Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
1106
15.5k
    Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
1107
15.5k
    Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
1108
15.5k
    Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
1109
15.5k
    Lib_IntVector_Intrinsics_vec256
1110
15.5k
        z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
1111
15.5k
    Lib_IntVector_Intrinsics_vec256
1112
15.5k
        z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
1113
15.5k
    Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
1114
15.5k
    Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
1115
15.5k
    Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
1116
15.5k
    Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
1117
15.5k
    Lib_IntVector_Intrinsics_vec256
1118
15.5k
        z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
1119
15.5k
    Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
1120
15.5k
    Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
1121
15.5k
    Lib_IntVector_Intrinsics_vec256 o0 = x02;
1122
15.5k
    Lib_IntVector_Intrinsics_vec256 o1 = x12;
1123
15.5k
    Lib_IntVector_Intrinsics_vec256 o2 = x21;
1124
15.5k
    Lib_IntVector_Intrinsics_vec256 o3 = x32;
1125
15.5k
    Lib_IntVector_Intrinsics_vec256 o4 = x42;
1126
15.5k
    acc[0U] = o0;
1127
15.5k
    acc[1U] = o1;
1128
15.5k
    acc[2U] = o2;
1129
15.5k
    acc[3U] = o3;
1130
15.5k
    acc[4U] = o4;
1131
15.5k
    Hacl_Poly1305_256_poly1305_finish(out, k, ctx);
1132
15.5k
}
1133
1134
/**
1135
Encrypt a message `m` with key `k`.
1136
1137
The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
1138
Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
1139
1140
@param k Pointer to 32 bytes of memory where the AEAD key is read from.
1141
@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
1142
@param aadlen Length of the associated data.
1143
@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
1144
1145
@param mlen Length of the message.
1146
@param m Pointer to `mlen` bytes of memory where the message is read from.
1147
@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
1148
@param mac Pointer to 16 bytes of memory where the mac is written to.
1149
*/
1150
void
1151
Hacl_Chacha20Poly1305_256_aead_encrypt(
1152
    uint8_t *k,
1153
    uint8_t *n,
1154
    uint32_t aadlen,
1155
    uint8_t *aad,
1156
    uint32_t mlen,
1157
    uint8_t *m,
1158
    uint8_t *cipher,
1159
    uint8_t *mac)
1160
8.59k
{
1161
8.59k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U);
1162
8.59k
    uint8_t tmp[64U] = { 0U };
1163
8.59k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
1164
8.59k
    uint8_t *key = tmp;
1165
8.59k
    poly1305_do_256(key, aadlen, aad, mlen, cipher, mac);
1166
8.59k
}
1167
1168
/**
1169
Decrypt a ciphertext `cipher` with key `k`.
1170
1171
The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
1172
Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
1173
1174
If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
1175
If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
1176
1177
@param k Pointer to 32 bytes of memory where the AEAD key is read from.
1178
@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
1179
@param aadlen Length of the associated data.
1180
@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
1181
1182
@param mlen Length of the ciphertext.
1183
@param m Pointer to `mlen` bytes of memory where the message is written to.
1184
@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
1185
@param mac Pointer to 16 bytes of memory where the mac is read from.
1186
1187
@returns 0 on succeess; 1 on failure.
1188
*/
1189
uint32_t
1190
Hacl_Chacha20Poly1305_256_aead_decrypt(
1191
    uint8_t *k,
1192
    uint8_t *n,
1193
    uint32_t aadlen,
1194
    uint8_t *aad,
1195
    uint32_t mlen,
1196
    uint8_t *m,
1197
    uint8_t *cipher,
1198
    uint8_t *mac)
1199
6.92k
{
1200
6.92k
    uint8_t computed_mac[16U] = { 0U };
1201
6.92k
    uint8_t tmp[64U] = { 0U };
1202
6.92k
    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
1203
6.92k
    uint8_t *key = tmp;
1204
6.92k
    poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac);
1205
6.92k
    uint8_t res = (uint8_t)255U;
1206
6.92k
    KRML_MAYBE_FOR16(i,
1207
6.92k
                     (uint32_t)0U,
1208
6.92k
                     (uint32_t)16U,
1209
6.92k
                     (uint32_t)1U,
1210
6.92k
                     uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
1211
6.92k
                     res = uu____0 & res;);
1212
6.92k
    uint8_t z = res;
1213
6.92k
    if (z == (uint8_t)255U) {
1214
10
        Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U);
1215
10
        return (uint32_t)0U;
1216
10
    }
1217
6.91k
    return (uint32_t)1U;
1218
6.92k
}