/src/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* MIT License |
2 | | * |
3 | | * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation |
4 | | * Copyright (c) 2022-2023 HACL* Contributors |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in all |
14 | | * copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | |
25 | | #include "Hacl_Chacha20Poly1305_256.h" |
26 | | |
27 | | #include "internal/Hacl_Poly1305_256.h" |
28 | | #include "internal/Hacl_Krmllib.h" |
29 | | #include "libintvector.h" |
30 | | |
31 | | static inline void |
32 | | poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text) |
33 | 29.3k | { |
34 | 29.3k | uint32_t n = len / (uint32_t)16U; |
35 | 29.3k | uint32_t r = len % (uint32_t)16U; |
36 | 29.3k | uint8_t *blocks = text; |
37 | 29.3k | uint8_t *rem = text + n * (uint32_t)16U; |
38 | 29.3k | Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U; |
39 | 29.3k | Lib_IntVector_Intrinsics_vec256 *acc0 = ctx; |
40 | 29.3k | uint32_t sz_block = (uint32_t)64U; |
41 | 29.3k | uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block; |
42 | 29.3k | uint8_t *t00 = blocks; |
43 | 29.3k | if (len0 > (uint32_t)0U) { |
44 | 7.82k | uint32_t bs = (uint32_t)64U; |
45 | 7.82k | uint8_t *text0 = t00; |
46 | 7.82k | Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0); |
47 | 7.82k | uint32_t len1 = len0 - bs; |
48 | 7.82k | uint8_t *text1 = t00 + bs; |
49 | 7.82k | uint32_t nb = len1 / bs; |
50 | 42.1k | for (uint32_t i = (uint32_t)0U; i < nb; i++) { |
51 | 34.3k | uint8_t *block = text1 + i * bs; |
52 | 34.3k | KRML_PRE_ALIGN(32) |
53 | 34.3k | Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; |
54 | 34.3k | Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); |
55 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
56 | 34.3k | hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); |
57 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
58 | 34.3k | mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); |
59 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
60 | 34.3k | m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); |
61 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
62 | 34.3k | m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); |
63 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
64 | 34.3k | m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); |
65 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
66 | 34.3k | m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); |
67 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
68 | 34.3k | m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); |
69 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
70 | 34.3k | t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); |
71 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
72 | 34.3k | t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); |
73 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
74 | 34.3k | t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); |
75 | 34.3k | Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260); |
76 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
77 | 34.3k | t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); |
78 | 34.3k | Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260); |
79 | 34.3k | Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260); |
80 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
81 | 34.3k | t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); |
82 | 34.3k | Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260); |
83 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
84 | 34.3k | o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); |
85 | 34.3k | Lib_IntVector_Intrinsics_vec256 o00 = o5; |
86 | 34.3k | Lib_IntVector_Intrinsics_vec256 o11 = o10; |
87 | 34.3k | Lib_IntVector_Intrinsics_vec256 o21 = o20; |
88 | 34.3k | Lib_IntVector_Intrinsics_vec256 o31 = o30; |
89 | 34.3k | Lib_IntVector_Intrinsics_vec256 o41 = o40; |
90 | 34.3k | e[0U] = o00; |
91 | 34.3k | e[1U] = o11; |
92 | 34.3k | e[2U] = o21; |
93 | 34.3k | e[3U] = o31; |
94 | 34.3k | e[4U] = o41; |
95 | 34.3k | uint64_t b = (uint64_t)0x1000000U; |
96 | 34.3k | Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); |
97 | 34.3k | Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; |
98 | 34.3k | e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); |
99 | 34.3k | Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U; |
100 | 34.3k | Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U; |
101 | 34.3k | Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; |
102 | 34.3k | Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; |
103 | 34.3k | Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; |
104 | 34.3k | Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; |
105 | 34.3k | Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; |
106 | 34.3k | Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; |
107 | 34.3k | Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; |
108 | 34.3k | Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; |
109 | 34.3k | Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; |
110 | 34.3k | Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U]; |
111 | 34.3k | Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U]; |
112 | 34.3k | Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U]; |
113 | 34.3k | Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U]; |
114 | 34.3k | Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U]; |
115 | 34.3k | Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); |
116 | 34.3k | Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); |
117 | 34.3k | Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); |
118 | 34.3k | Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); |
119 | 34.3k | Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); |
120 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
121 | 34.3k | a01 = |
122 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a0, |
123 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); |
124 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
125 | 34.3k | a11 = |
126 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a1, |
127 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); |
128 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
129 | 34.3k | a21 = |
130 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a2, |
131 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); |
132 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
133 | 34.3k | a31 = |
134 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a3, |
135 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); |
136 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
137 | 34.3k | a41 = |
138 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a4, |
139 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); |
140 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
141 | 34.3k | a02 = |
142 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a01, |
143 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); |
144 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
145 | 34.3k | a12 = |
146 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a11, |
147 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); |
148 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
149 | 34.3k | a22 = |
150 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a21, |
151 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); |
152 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
153 | 34.3k | a32 = |
154 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a31, |
155 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); |
156 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
157 | 34.3k | a42 = |
158 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a41, |
159 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); |
160 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
161 | 34.3k | a03 = |
162 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a02, |
163 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); |
164 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
165 | 34.3k | a13 = |
166 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a12, |
167 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); |
168 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
169 | 34.3k | a23 = |
170 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a22, |
171 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); |
172 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
173 | 34.3k | a33 = |
174 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a32, |
175 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); |
176 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
177 | 34.3k | a43 = |
178 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a42, |
179 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); |
180 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
181 | 34.3k | a04 = |
182 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a03, |
183 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); |
184 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
185 | 34.3k | a14 = |
186 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a13, |
187 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); |
188 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
189 | 34.3k | a24 = |
190 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a23, |
191 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); |
192 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
193 | 34.3k | a34 = |
194 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a33, |
195 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); |
196 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
197 | 34.3k | a44 = |
198 | 34.3k | Lib_IntVector_Intrinsics_vec256_add64(a43, |
199 | 34.3k | Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); |
200 | 34.3k | Lib_IntVector_Intrinsics_vec256 t01 = a04; |
201 | 34.3k | Lib_IntVector_Intrinsics_vec256 t1 = a14; |
202 | 34.3k | Lib_IntVector_Intrinsics_vec256 t2 = a24; |
203 | 34.3k | Lib_IntVector_Intrinsics_vec256 t3 = a34; |
204 | 34.3k | Lib_IntVector_Intrinsics_vec256 t4 = a44; |
205 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
206 | 34.3k | mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); |
207 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
208 | 34.3k | z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); |
209 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
210 | 34.3k | z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); |
211 | 34.3k | Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); |
212 | 34.3k | Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); |
213 | 34.3k | Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); |
214 | 34.3k | Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); |
215 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
216 | 34.3k | z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); |
217 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
218 | 34.3k | z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); |
219 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
220 | 34.3k | t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); |
221 | 34.3k | Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); |
222 | 34.3k | Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); |
223 | 34.3k | Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); |
224 | 34.3k | Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); |
225 | 34.3k | Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); |
226 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
227 | 34.3k | z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); |
228 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
229 | 34.3k | z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); |
230 | 34.3k | Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); |
231 | 34.3k | Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); |
232 | 34.3k | Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); |
233 | 34.3k | Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); |
234 | 34.3k | Lib_IntVector_Intrinsics_vec256 |
235 | 34.3k | z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); |
236 | 34.3k | Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); |
237 | 34.3k | Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); |
238 | 34.3k | Lib_IntVector_Intrinsics_vec256 o01 = x02; |
239 | 34.3k | Lib_IntVector_Intrinsics_vec256 o12 = x12; |
240 | 34.3k | Lib_IntVector_Intrinsics_vec256 o22 = x21; |
241 | 34.3k | Lib_IntVector_Intrinsics_vec256 o32 = x32; |
242 | 34.3k | Lib_IntVector_Intrinsics_vec256 o42 = x42; |
243 | 34.3k | acc0[0U] = o01; |
244 | 34.3k | acc0[1U] = o12; |
245 | 34.3k | acc0[2U] = o22; |
246 | 34.3k | acc0[3U] = o32; |
247 | 34.3k | acc0[4U] = o42; |
248 | 34.3k | Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U]; |
249 | 34.3k | Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U]; |
250 | 34.3k | Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U]; |
251 | 34.3k | Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U]; |
252 | 34.3k | Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U]; |
253 | 34.3k | Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; |
254 | 34.3k | Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; |
255 | 34.3k | Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; |
256 | 34.3k | Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; |
257 | 34.3k | Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; |
258 | 34.3k | Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); |
259 | 34.3k | Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); |
260 | 34.3k | Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); |
261 | 34.3k | Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); |
262 | 34.3k | Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); |
263 | 34.3k | acc0[0U] = o0; |
264 | 34.3k | acc0[1U] = o1; |
265 | 34.3k | acc0[2U] = o2; |
266 | 34.3k | acc0[3U] = o3; |
267 | 34.3k | acc0[4U] = o4; |
268 | 34.3k | } |
269 | 7.82k | Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0); |
270 | 7.82k | } |
271 | 29.3k | uint32_t len1 = n * (uint32_t)16U - len0; |
272 | 29.3k | uint8_t *t10 = blocks + len0; |
273 | 29.3k | uint32_t nb = len1 / (uint32_t)16U; |
274 | 29.3k | uint32_t rem1 = len1 % (uint32_t)16U; |
275 | 37.7k | for (uint32_t i = (uint32_t)0U; i < nb; i++) { |
276 | 8.33k | uint8_t *block = t10 + i * (uint32_t)16U; |
277 | 8.33k | KRML_PRE_ALIGN(32) |
278 | 8.33k | Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; |
279 | 8.33k | uint64_t u0 = load64_le(block); |
280 | 8.33k | uint64_t lo = u0; |
281 | 8.33k | uint64_t u = load64_le(block + (uint32_t)8U); |
282 | 8.33k | uint64_t hi = u; |
283 | 8.33k | Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); |
284 | 8.33k | Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); |
285 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
286 | 8.33k | f010 = |
287 | 8.33k | Lib_IntVector_Intrinsics_vec256_and(f0, |
288 | 8.33k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
289 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
290 | 8.33k | f110 = |
291 | 8.33k | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
292 | 8.33k | (uint32_t)26U), |
293 | 8.33k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
294 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
295 | 8.33k | f20 = |
296 | 8.33k | Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
297 | 8.33k | (uint32_t)52U), |
298 | 8.33k | Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, |
299 | 8.33k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), |
300 | 8.33k | (uint32_t)12U)); |
301 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
302 | 8.33k | f30 = |
303 | 8.33k | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, |
304 | 8.33k | (uint32_t)14U), |
305 | 8.33k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
306 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
307 | 8.33k | f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); |
308 | 8.33k | Lib_IntVector_Intrinsics_vec256 f01 = f010; |
309 | 8.33k | Lib_IntVector_Intrinsics_vec256 f111 = f110; |
310 | 8.33k | Lib_IntVector_Intrinsics_vec256 f2 = f20; |
311 | 8.33k | Lib_IntVector_Intrinsics_vec256 f3 = f30; |
312 | 8.33k | Lib_IntVector_Intrinsics_vec256 f41 = f40; |
313 | 8.33k | e[0U] = f01; |
314 | 8.33k | e[1U] = f111; |
315 | 8.33k | e[2U] = f2; |
316 | 8.33k | e[3U] = f3; |
317 | 8.33k | e[4U] = f41; |
318 | 8.33k | uint64_t b = (uint64_t)0x1000000U; |
319 | 8.33k | Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); |
320 | 8.33k | Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; |
321 | 8.33k | e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); |
322 | 8.33k | Lib_IntVector_Intrinsics_vec256 *r1 = pre0; |
323 | 8.33k | Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; |
324 | 8.33k | Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; |
325 | 8.33k | Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; |
326 | 8.33k | Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; |
327 | 8.33k | Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; |
328 | 8.33k | Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; |
329 | 8.33k | Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; |
330 | 8.33k | Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; |
331 | 8.33k | Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; |
332 | 8.33k | Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; |
333 | 8.33k | Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; |
334 | 8.33k | Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; |
335 | 8.33k | Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; |
336 | 8.33k | Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; |
337 | 8.33k | Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; |
338 | 8.33k | Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; |
339 | 8.33k | Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; |
340 | 8.33k | Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; |
341 | 8.33k | Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; |
342 | 8.33k | Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; |
343 | 8.33k | Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); |
344 | 8.33k | Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); |
345 | 8.33k | Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); |
346 | 8.33k | Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); |
347 | 8.33k | Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); |
348 | 8.33k | Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); |
349 | 8.33k | Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); |
350 | 8.33k | Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); |
351 | 8.33k | Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); |
352 | 8.33k | Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); |
353 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
354 | 8.33k | a03 = |
355 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a02, |
356 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); |
357 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
358 | 8.33k | a13 = |
359 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a12, |
360 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); |
361 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
362 | 8.33k | a23 = |
363 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a22, |
364 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); |
365 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
366 | 8.33k | a33 = |
367 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a32, |
368 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); |
369 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
370 | 8.33k | a43 = |
371 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a42, |
372 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); |
373 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
374 | 8.33k | a04 = |
375 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a03, |
376 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); |
377 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
378 | 8.33k | a14 = |
379 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a13, |
380 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); |
381 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
382 | 8.33k | a24 = |
383 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a23, |
384 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); |
385 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
386 | 8.33k | a34 = |
387 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a33, |
388 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); |
389 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
390 | 8.33k | a44 = |
391 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a43, |
392 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); |
393 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
394 | 8.33k | a05 = |
395 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a04, |
396 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); |
397 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
398 | 8.33k | a15 = |
399 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a14, |
400 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); |
401 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
402 | 8.33k | a25 = |
403 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a24, |
404 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); |
405 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
406 | 8.33k | a35 = |
407 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a34, |
408 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); |
409 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
410 | 8.33k | a45 = |
411 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a44, |
412 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); |
413 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
414 | 8.33k | a06 = |
415 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a05, |
416 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); |
417 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
418 | 8.33k | a16 = |
419 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a15, |
420 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); |
421 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
422 | 8.33k | a26 = |
423 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a25, |
424 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); |
425 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
426 | 8.33k | a36 = |
427 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a35, |
428 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); |
429 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
430 | 8.33k | a46 = |
431 | 8.33k | Lib_IntVector_Intrinsics_vec256_add64(a45, |
432 | 8.33k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); |
433 | 8.33k | Lib_IntVector_Intrinsics_vec256 t01 = a06; |
434 | 8.33k | Lib_IntVector_Intrinsics_vec256 t11 = a16; |
435 | 8.33k | Lib_IntVector_Intrinsics_vec256 t2 = a26; |
436 | 8.33k | Lib_IntVector_Intrinsics_vec256 t3 = a36; |
437 | 8.33k | Lib_IntVector_Intrinsics_vec256 t4 = a46; |
438 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
439 | 8.33k | mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); |
440 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
441 | 8.33k | z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); |
442 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
443 | 8.33k | z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); |
444 | 8.33k | Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); |
445 | 8.33k | Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); |
446 | 8.33k | Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); |
447 | 8.33k | Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); |
448 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
449 | 8.33k | z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); |
450 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
451 | 8.33k | z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); |
452 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
453 | 8.33k | t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); |
454 | 8.33k | Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); |
455 | 8.33k | Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); |
456 | 8.33k | Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); |
457 | 8.33k | Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); |
458 | 8.33k | Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); |
459 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
460 | 8.33k | z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); |
461 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
462 | 8.33k | z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); |
463 | 8.33k | Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); |
464 | 8.33k | Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); |
465 | 8.33k | Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); |
466 | 8.33k | Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); |
467 | 8.33k | Lib_IntVector_Intrinsics_vec256 |
468 | 8.33k | z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); |
469 | 8.33k | Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); |
470 | 8.33k | Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); |
471 | 8.33k | Lib_IntVector_Intrinsics_vec256 o0 = x02; |
472 | 8.33k | Lib_IntVector_Intrinsics_vec256 o1 = x12; |
473 | 8.33k | Lib_IntVector_Intrinsics_vec256 o2 = x21; |
474 | 8.33k | Lib_IntVector_Intrinsics_vec256 o3 = x32; |
475 | 8.33k | Lib_IntVector_Intrinsics_vec256 o4 = x42; |
476 | 8.33k | acc0[0U] = o0; |
477 | 8.33k | acc0[1U] = o1; |
478 | 8.33k | acc0[2U] = o2; |
479 | 8.33k | acc0[3U] = o3; |
480 | 8.33k | acc0[4U] = o4; |
481 | 8.33k | } |
482 | 29.3k | if (rem1 > (uint32_t)0U) { |
483 | 0 | uint8_t *last = t10 + nb * (uint32_t)16U; |
484 | 0 | KRML_PRE_ALIGN(32) |
485 | 0 | Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; |
486 | 0 | uint8_t tmp[16U] = { 0U }; |
487 | 0 | memcpy(tmp, last, rem1 * sizeof(uint8_t)); |
488 | 0 | uint64_t u0 = load64_le(tmp); |
489 | 0 | uint64_t lo = u0; |
490 | 0 | uint64_t u = load64_le(tmp + (uint32_t)8U); |
491 | 0 | uint64_t hi = u; |
492 | 0 | Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); |
493 | 0 | Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); |
494 | 0 | Lib_IntVector_Intrinsics_vec256 |
495 | 0 | f010 = |
496 | 0 | Lib_IntVector_Intrinsics_vec256_and(f0, |
497 | 0 | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
498 | 0 | Lib_IntVector_Intrinsics_vec256 |
499 | 0 | f110 = |
500 | 0 | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
501 | 0 | (uint32_t)26U), |
502 | 0 | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
503 | 0 | Lib_IntVector_Intrinsics_vec256 |
504 | 0 | f20 = |
505 | 0 | Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
506 | 0 | (uint32_t)52U), |
507 | 0 | Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, |
508 | 0 | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), |
509 | 0 | (uint32_t)12U)); |
510 | 0 | Lib_IntVector_Intrinsics_vec256 |
511 | 0 | f30 = |
512 | 0 | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, |
513 | 0 | (uint32_t)14U), |
514 | 0 | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
515 | 0 | Lib_IntVector_Intrinsics_vec256 |
516 | 0 | f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); |
517 | 0 | Lib_IntVector_Intrinsics_vec256 f01 = f010; |
518 | 0 | Lib_IntVector_Intrinsics_vec256 f111 = f110; |
519 | 0 | Lib_IntVector_Intrinsics_vec256 f2 = f20; |
520 | 0 | Lib_IntVector_Intrinsics_vec256 f3 = f30; |
521 | 0 | Lib_IntVector_Intrinsics_vec256 f4 = f40; |
522 | 0 | e[0U] = f01; |
523 | 0 | e[1U] = f111; |
524 | 0 | e[2U] = f2; |
525 | 0 | e[3U] = f3; |
526 | 0 | e[4U] = f4; |
527 | 0 | uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; |
528 | 0 | Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); |
529 | 0 | Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; |
530 | 0 | e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); |
531 | 0 | Lib_IntVector_Intrinsics_vec256 *r1 = pre0; |
532 | 0 | Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; |
533 | 0 | Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; |
534 | 0 | Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; |
535 | 0 | Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; |
536 | 0 | Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; |
537 | 0 | Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; |
538 | 0 | Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; |
539 | 0 | Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; |
540 | 0 | Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; |
541 | 0 | Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; |
542 | 0 | Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; |
543 | 0 | Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; |
544 | 0 | Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; |
545 | 0 | Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; |
546 | 0 | Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; |
547 | 0 | Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; |
548 | 0 | Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; |
549 | 0 | Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; |
550 | 0 | Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; |
551 | 0 | Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; |
552 | 0 | Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); |
553 | 0 | Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); |
554 | 0 | Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); |
555 | 0 | Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); |
556 | 0 | Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); |
557 | 0 | Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); |
558 | 0 | Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); |
559 | 0 | Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); |
560 | 0 | Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); |
561 | 0 | Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); |
562 | 0 | Lib_IntVector_Intrinsics_vec256 |
563 | 0 | a03 = |
564 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a02, |
565 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); |
566 | 0 | Lib_IntVector_Intrinsics_vec256 |
567 | 0 | a13 = |
568 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a12, |
569 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); |
570 | 0 | Lib_IntVector_Intrinsics_vec256 |
571 | 0 | a23 = |
572 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a22, |
573 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); |
574 | 0 | Lib_IntVector_Intrinsics_vec256 |
575 | 0 | a33 = |
576 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a32, |
577 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); |
578 | 0 | Lib_IntVector_Intrinsics_vec256 |
579 | 0 | a43 = |
580 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a42, |
581 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); |
582 | 0 | Lib_IntVector_Intrinsics_vec256 |
583 | 0 | a04 = |
584 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a03, |
585 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); |
586 | 0 | Lib_IntVector_Intrinsics_vec256 |
587 | 0 | a14 = |
588 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a13, |
589 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); |
590 | 0 | Lib_IntVector_Intrinsics_vec256 |
591 | 0 | a24 = |
592 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a23, |
593 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); |
594 | 0 | Lib_IntVector_Intrinsics_vec256 |
595 | 0 | a34 = |
596 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a33, |
597 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); |
598 | 0 | Lib_IntVector_Intrinsics_vec256 |
599 | 0 | a44 = |
600 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a43, |
601 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); |
602 | 0 | Lib_IntVector_Intrinsics_vec256 |
603 | 0 | a05 = |
604 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a04, |
605 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); |
606 | 0 | Lib_IntVector_Intrinsics_vec256 |
607 | 0 | a15 = |
608 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a14, |
609 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); |
610 | 0 | Lib_IntVector_Intrinsics_vec256 |
611 | 0 | a25 = |
612 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a24, |
613 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); |
614 | 0 | Lib_IntVector_Intrinsics_vec256 |
615 | 0 | a35 = |
616 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a34, |
617 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); |
618 | 0 | Lib_IntVector_Intrinsics_vec256 |
619 | 0 | a45 = |
620 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a44, |
621 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); |
622 | 0 | Lib_IntVector_Intrinsics_vec256 |
623 | 0 | a06 = |
624 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a05, |
625 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); |
626 | 0 | Lib_IntVector_Intrinsics_vec256 |
627 | 0 | a16 = |
628 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a15, |
629 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); |
630 | 0 | Lib_IntVector_Intrinsics_vec256 |
631 | 0 | a26 = |
632 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a25, |
633 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); |
634 | 0 | Lib_IntVector_Intrinsics_vec256 |
635 | 0 | a36 = |
636 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a35, |
637 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); |
638 | 0 | Lib_IntVector_Intrinsics_vec256 |
639 | 0 | a46 = |
640 | 0 | Lib_IntVector_Intrinsics_vec256_add64(a45, |
641 | 0 | Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); |
642 | 0 | Lib_IntVector_Intrinsics_vec256 t01 = a06; |
643 | 0 | Lib_IntVector_Intrinsics_vec256 t11 = a16; |
644 | 0 | Lib_IntVector_Intrinsics_vec256 t2 = a26; |
645 | 0 | Lib_IntVector_Intrinsics_vec256 t3 = a36; |
646 | 0 | Lib_IntVector_Intrinsics_vec256 t4 = a46; |
647 | 0 | Lib_IntVector_Intrinsics_vec256 |
648 | 0 | mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); |
649 | 0 | Lib_IntVector_Intrinsics_vec256 |
650 | 0 | z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); |
651 | 0 | Lib_IntVector_Intrinsics_vec256 |
652 | 0 | z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); |
653 | 0 | Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); |
654 | 0 | Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); |
655 | 0 | Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); |
656 | 0 | Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); |
657 | 0 | Lib_IntVector_Intrinsics_vec256 |
658 | 0 | z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); |
659 | 0 | Lib_IntVector_Intrinsics_vec256 |
660 | 0 | z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); |
661 | 0 | Lib_IntVector_Intrinsics_vec256 |
662 | 0 | t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); |
663 | 0 | Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); |
664 | 0 | Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); |
665 | 0 | Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); |
666 | 0 | Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); |
667 | 0 | Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); |
668 | 0 | Lib_IntVector_Intrinsics_vec256 |
669 | 0 | z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); |
670 | 0 | Lib_IntVector_Intrinsics_vec256 |
671 | 0 | z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); |
672 | 0 | Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); |
673 | 0 | Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); |
674 | 0 | Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); |
675 | 0 | Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); |
676 | 0 | Lib_IntVector_Intrinsics_vec256 |
677 | 0 | z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); |
678 | 0 | Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); |
679 | 0 | Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); |
680 | 0 | Lib_IntVector_Intrinsics_vec256 o0 = x02; |
681 | 0 | Lib_IntVector_Intrinsics_vec256 o1 = x12; |
682 | 0 | Lib_IntVector_Intrinsics_vec256 o2 = x21; |
683 | 0 | Lib_IntVector_Intrinsics_vec256 o3 = x32; |
684 | 0 | Lib_IntVector_Intrinsics_vec256 o4 = x42; |
685 | 0 | acc0[0U] = o0; |
686 | 0 | acc0[1U] = o1; |
687 | 0 | acc0[2U] = o2; |
688 | 0 | acc0[3U] = o3; |
689 | 0 | acc0[4U] = o4; |
690 | 0 | } |
691 | 29.3k | uint8_t tmp[16U] = { 0U }; |
692 | 29.3k | memcpy(tmp, rem, r * sizeof(uint8_t)); |
693 | 29.3k | if (r > (uint32_t)0U) { |
694 | 25.0k | Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; |
695 | 25.0k | Lib_IntVector_Intrinsics_vec256 *acc = ctx; |
696 | 25.0k | KRML_PRE_ALIGN(32) |
697 | 25.0k | Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; |
698 | 25.0k | uint64_t u0 = load64_le(tmp); |
699 | 25.0k | uint64_t lo = u0; |
700 | 25.0k | uint64_t u = load64_le(tmp + (uint32_t)8U); |
701 | 25.0k | uint64_t hi = u; |
702 | 25.0k | Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); |
703 | 25.0k | Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); |
704 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
705 | 25.0k | f010 = |
706 | 25.0k | Lib_IntVector_Intrinsics_vec256_and(f0, |
707 | 25.0k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
708 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
709 | 25.0k | f110 = |
710 | 25.0k | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
711 | 25.0k | (uint32_t)26U), |
712 | 25.0k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
713 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
714 | 25.0k | f20 = |
715 | 25.0k | Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
716 | 25.0k | (uint32_t)52U), |
717 | 25.0k | Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, |
718 | 25.0k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), |
719 | 25.0k | (uint32_t)12U)); |
720 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
721 | 25.0k | f30 = |
722 | 25.0k | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, |
723 | 25.0k | (uint32_t)14U), |
724 | 25.0k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
725 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
726 | 25.0k | f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); |
727 | 25.0k | Lib_IntVector_Intrinsics_vec256 f01 = f010; |
728 | 25.0k | Lib_IntVector_Intrinsics_vec256 f111 = f110; |
729 | 25.0k | Lib_IntVector_Intrinsics_vec256 f2 = f20; |
730 | 25.0k | Lib_IntVector_Intrinsics_vec256 f3 = f30; |
731 | 25.0k | Lib_IntVector_Intrinsics_vec256 f41 = f40; |
732 | 25.0k | e[0U] = f01; |
733 | 25.0k | e[1U] = f111; |
734 | 25.0k | e[2U] = f2; |
735 | 25.0k | e[3U] = f3; |
736 | 25.0k | e[4U] = f41; |
737 | 25.0k | uint64_t b = (uint64_t)0x1000000U; |
738 | 25.0k | Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); |
739 | 25.0k | Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; |
740 | 25.0k | e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); |
741 | 25.0k | Lib_IntVector_Intrinsics_vec256 *r1 = pre; |
742 | 25.0k | Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; |
743 | 25.0k | Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; |
744 | 25.0k | Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; |
745 | 25.0k | Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; |
746 | 25.0k | Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; |
747 | 25.0k | Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; |
748 | 25.0k | Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; |
749 | 25.0k | Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; |
750 | 25.0k | Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; |
751 | 25.0k | Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; |
752 | 25.0k | Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; |
753 | 25.0k | Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; |
754 | 25.0k | Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; |
755 | 25.0k | Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; |
756 | 25.0k | Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; |
757 | 25.0k | Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; |
758 | 25.0k | Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; |
759 | 25.0k | Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; |
760 | 25.0k | Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; |
761 | 25.0k | Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; |
762 | 25.0k | Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); |
763 | 25.0k | Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); |
764 | 25.0k | Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); |
765 | 25.0k | Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); |
766 | 25.0k | Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); |
767 | 25.0k | Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); |
768 | 25.0k | Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); |
769 | 25.0k | Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); |
770 | 25.0k | Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); |
771 | 25.0k | Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); |
772 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
773 | 25.0k | a03 = |
774 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a02, |
775 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); |
776 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
777 | 25.0k | a13 = |
778 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a12, |
779 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); |
780 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
781 | 25.0k | a23 = |
782 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a22, |
783 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); |
784 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
785 | 25.0k | a33 = |
786 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a32, |
787 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); |
788 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
789 | 25.0k | a43 = |
790 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a42, |
791 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); |
792 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
793 | 25.0k | a04 = |
794 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a03, |
795 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); |
796 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
797 | 25.0k | a14 = |
798 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a13, |
799 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); |
800 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
801 | 25.0k | a24 = |
802 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a23, |
803 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); |
804 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
805 | 25.0k | a34 = |
806 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a33, |
807 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); |
808 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
809 | 25.0k | a44 = |
810 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a43, |
811 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); |
812 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
813 | 25.0k | a05 = |
814 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a04, |
815 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); |
816 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
817 | 25.0k | a15 = |
818 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a14, |
819 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); |
820 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
821 | 25.0k | a25 = |
822 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a24, |
823 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); |
824 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
825 | 25.0k | a35 = |
826 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a34, |
827 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); |
828 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
829 | 25.0k | a45 = |
830 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a44, |
831 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); |
832 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
833 | 25.0k | a06 = |
834 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a05, |
835 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); |
836 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
837 | 25.0k | a16 = |
838 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a15, |
839 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); |
840 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
841 | 25.0k | a26 = |
842 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a25, |
843 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); |
844 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
845 | 25.0k | a36 = |
846 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a35, |
847 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); |
848 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
849 | 25.0k | a46 = |
850 | 25.0k | Lib_IntVector_Intrinsics_vec256_add64(a45, |
851 | 25.0k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); |
852 | 25.0k | Lib_IntVector_Intrinsics_vec256 t0 = a06; |
853 | 25.0k | Lib_IntVector_Intrinsics_vec256 t1 = a16; |
854 | 25.0k | Lib_IntVector_Intrinsics_vec256 t2 = a26; |
855 | 25.0k | Lib_IntVector_Intrinsics_vec256 t3 = a36; |
856 | 25.0k | Lib_IntVector_Intrinsics_vec256 t4 = a46; |
857 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
858 | 25.0k | mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); |
859 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
860 | 25.0k | z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); |
861 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
862 | 25.0k | z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); |
863 | 25.0k | Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); |
864 | 25.0k | Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); |
865 | 25.0k | Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); |
866 | 25.0k | Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); |
867 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
868 | 25.0k | z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); |
869 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
870 | 25.0k | z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); |
871 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
872 | 25.0k | t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); |
873 | 25.0k | Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); |
874 | 25.0k | Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); |
875 | 25.0k | Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); |
876 | 25.0k | Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); |
877 | 25.0k | Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); |
878 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
879 | 25.0k | z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); |
880 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
881 | 25.0k | z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); |
882 | 25.0k | Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); |
883 | 25.0k | Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); |
884 | 25.0k | Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); |
885 | 25.0k | Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); |
886 | 25.0k | Lib_IntVector_Intrinsics_vec256 |
887 | 25.0k | z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); |
888 | 25.0k | Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); |
889 | 25.0k | Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); |
890 | 25.0k | Lib_IntVector_Intrinsics_vec256 o0 = x02; |
891 | 25.0k | Lib_IntVector_Intrinsics_vec256 o1 = x12; |
892 | 25.0k | Lib_IntVector_Intrinsics_vec256 o2 = x21; |
893 | 25.0k | Lib_IntVector_Intrinsics_vec256 o3 = x32; |
894 | 25.0k | Lib_IntVector_Intrinsics_vec256 o4 = x42; |
895 | 25.0k | acc[0U] = o0; |
896 | 25.0k | acc[1U] = o1; |
897 | 25.0k | acc[2U] = o2; |
898 | 25.0k | acc[3U] = o3; |
899 | 25.0k | acc[4U] = o4; |
900 | 25.0k | return; |
901 | 25.0k | } |
902 | 29.3k | } |
903 | | |
904 | | static inline void |
905 | | poly1305_do_256( |
906 | | uint8_t *k, |
907 | | uint32_t aadlen, |
908 | | uint8_t *aad, |
909 | | uint32_t mlen, |
910 | | uint8_t *m, |
911 | | uint8_t *out) |
912 | 17.5k | { |
913 | 17.5k | KRML_PRE_ALIGN(32) |
914 | 17.5k | Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; |
915 | 17.5k | uint8_t block[16U] = { 0U }; |
916 | 17.5k | Hacl_Poly1305_256_poly1305_init(ctx, k); |
917 | 17.5k | if (aadlen != (uint32_t)0U) { |
918 | 17.5k | poly1305_padded_256(ctx, aadlen, aad); |
919 | 17.5k | } |
920 | 17.5k | if (mlen != (uint32_t)0U) { |
921 | 11.7k | poly1305_padded_256(ctx, mlen, m); |
922 | 11.7k | } |
923 | 17.5k | store64_le(block, (uint64_t)aadlen); |
924 | 17.5k | store64_le(block + (uint32_t)8U, (uint64_t)mlen); |
925 | 17.5k | Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; |
926 | 17.5k | Lib_IntVector_Intrinsics_vec256 *acc = ctx; |
927 | 17.5k | KRML_PRE_ALIGN(32) |
928 | 17.5k | Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; |
929 | 17.5k | uint64_t u0 = load64_le(block); |
930 | 17.5k | uint64_t lo = u0; |
931 | 17.5k | uint64_t u = load64_le(block + (uint32_t)8U); |
932 | 17.5k | uint64_t hi = u; |
933 | 17.5k | Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); |
934 | 17.5k | Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); |
935 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
936 | 17.5k | f010 = |
937 | 17.5k | Lib_IntVector_Intrinsics_vec256_and(f0, |
938 | 17.5k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
939 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
940 | 17.5k | f110 = |
941 | 17.5k | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
942 | 17.5k | (uint32_t)26U), |
943 | 17.5k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
944 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
945 | 17.5k | f20 = |
946 | 17.5k | Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, |
947 | 17.5k | (uint32_t)52U), |
948 | 17.5k | Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, |
949 | 17.5k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), |
950 | 17.5k | (uint32_t)12U)); |
951 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
952 | 17.5k | f30 = |
953 | 17.5k | Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, |
954 | 17.5k | (uint32_t)14U), |
955 | 17.5k | Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); |
956 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
957 | 17.5k | f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); |
958 | 17.5k | Lib_IntVector_Intrinsics_vec256 f01 = f010; |
959 | 17.5k | Lib_IntVector_Intrinsics_vec256 f111 = f110; |
960 | 17.5k | Lib_IntVector_Intrinsics_vec256 f2 = f20; |
961 | 17.5k | Lib_IntVector_Intrinsics_vec256 f3 = f30; |
962 | 17.5k | Lib_IntVector_Intrinsics_vec256 f41 = f40; |
963 | 17.5k | e[0U] = f01; |
964 | 17.5k | e[1U] = f111; |
965 | 17.5k | e[2U] = f2; |
966 | 17.5k | e[3U] = f3; |
967 | 17.5k | e[4U] = f41; |
968 | 17.5k | uint64_t b = (uint64_t)0x1000000U; |
969 | 17.5k | Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); |
970 | 17.5k | Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; |
971 | 17.5k | e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); |
972 | 17.5k | Lib_IntVector_Intrinsics_vec256 *r = pre; |
973 | 17.5k | Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; |
974 | 17.5k | Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; |
975 | 17.5k | Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; |
976 | 17.5k | Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; |
977 | 17.5k | Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; |
978 | 17.5k | Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; |
979 | 17.5k | Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; |
980 | 17.5k | Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; |
981 | 17.5k | Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; |
982 | 17.5k | Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; |
983 | 17.5k | Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; |
984 | 17.5k | Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; |
985 | 17.5k | Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; |
986 | 17.5k | Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; |
987 | 17.5k | Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; |
988 | 17.5k | Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; |
989 | 17.5k | Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; |
990 | 17.5k | Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; |
991 | 17.5k | Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; |
992 | 17.5k | Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; |
993 | 17.5k | Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); |
994 | 17.5k | Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); |
995 | 17.5k | Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); |
996 | 17.5k | Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); |
997 | 17.5k | Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); |
998 | 17.5k | Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); |
999 | 17.5k | Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); |
1000 | 17.5k | Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); |
1001 | 17.5k | Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); |
1002 | 17.5k | Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); |
1003 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1004 | 17.5k | a03 = |
1005 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a02, |
1006 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); |
1007 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1008 | 17.5k | a13 = |
1009 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a12, |
1010 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); |
1011 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1012 | 17.5k | a23 = |
1013 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a22, |
1014 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); |
1015 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1016 | 17.5k | a33 = |
1017 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a32, |
1018 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); |
1019 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1020 | 17.5k | a43 = |
1021 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a42, |
1022 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); |
1023 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1024 | 17.5k | a04 = |
1025 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a03, |
1026 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); |
1027 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1028 | 17.5k | a14 = |
1029 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a13, |
1030 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); |
1031 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1032 | 17.5k | a24 = |
1033 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a23, |
1034 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); |
1035 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1036 | 17.5k | a34 = |
1037 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a33, |
1038 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); |
1039 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1040 | 17.5k | a44 = |
1041 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a43, |
1042 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); |
1043 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1044 | 17.5k | a05 = |
1045 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a04, |
1046 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); |
1047 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1048 | 17.5k | a15 = |
1049 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a14, |
1050 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); |
1051 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1052 | 17.5k | a25 = |
1053 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a24, |
1054 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); |
1055 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1056 | 17.5k | a35 = |
1057 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a34, |
1058 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); |
1059 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1060 | 17.5k | a45 = |
1061 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a44, |
1062 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); |
1063 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1064 | 17.5k | a06 = |
1065 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a05, |
1066 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); |
1067 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1068 | 17.5k | a16 = |
1069 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a15, |
1070 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); |
1071 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1072 | 17.5k | a26 = |
1073 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a25, |
1074 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); |
1075 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1076 | 17.5k | a36 = |
1077 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a35, |
1078 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); |
1079 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1080 | 17.5k | a46 = |
1081 | 17.5k | Lib_IntVector_Intrinsics_vec256_add64(a45, |
1082 | 17.5k | Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); |
1083 | 17.5k | Lib_IntVector_Intrinsics_vec256 t0 = a06; |
1084 | 17.5k | Lib_IntVector_Intrinsics_vec256 t1 = a16; |
1085 | 17.5k | Lib_IntVector_Intrinsics_vec256 t2 = a26; |
1086 | 17.5k | Lib_IntVector_Intrinsics_vec256 t3 = a36; |
1087 | 17.5k | Lib_IntVector_Intrinsics_vec256 t4 = a46; |
1088 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1089 | 17.5k | mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); |
1090 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1091 | 17.5k | z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); |
1092 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1093 | 17.5k | z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); |
1094 | 17.5k | Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); |
1095 | 17.5k | Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); |
1096 | 17.5k | Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); |
1097 | 17.5k | Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); |
1098 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1099 | 17.5k | z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); |
1100 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1101 | 17.5k | z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); |
1102 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1103 | 17.5k | t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); |
1104 | 17.5k | Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); |
1105 | 17.5k | Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); |
1106 | 17.5k | Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); |
1107 | 17.5k | Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); |
1108 | 17.5k | Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); |
1109 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1110 | 17.5k | z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); |
1111 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1112 | 17.5k | z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); |
1113 | 17.5k | Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); |
1114 | 17.5k | Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); |
1115 | 17.5k | Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); |
1116 | 17.5k | Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); |
1117 | 17.5k | Lib_IntVector_Intrinsics_vec256 |
1118 | 17.5k | z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); |
1119 | 17.5k | Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); |
1120 | 17.5k | Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); |
1121 | 17.5k | Lib_IntVector_Intrinsics_vec256 o0 = x02; |
1122 | 17.5k | Lib_IntVector_Intrinsics_vec256 o1 = x12; |
1123 | 17.5k | Lib_IntVector_Intrinsics_vec256 o2 = x21; |
1124 | 17.5k | Lib_IntVector_Intrinsics_vec256 o3 = x32; |
1125 | 17.5k | Lib_IntVector_Intrinsics_vec256 o4 = x42; |
1126 | 17.5k | acc[0U] = o0; |
1127 | 17.5k | acc[1U] = o1; |
1128 | 17.5k | acc[2U] = o2; |
1129 | 17.5k | acc[3U] = o3; |
1130 | 17.5k | acc[4U] = o4; |
1131 | 17.5k | Hacl_Poly1305_256_poly1305_finish(out, k, ctx); |
1132 | 17.5k | } |
1133 | | |
1134 | | /** |
1135 | | Encrypt a message `m` with key `k`. |
1136 | | |
1137 | | The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption. |
1138 | | Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory. |
1139 | | |
1140 | | @param k Pointer to 32 bytes of memory where the AEAD key is read from. |
1141 | | @param n Pointer to 12 bytes of memory where the AEAD nonce is read from. |
1142 | | @param aadlen Length of the associated data. |
1143 | | @param aad Pointer to `aadlen` bytes of memory where the associated data is read from. |
1144 | | |
1145 | | @param mlen Length of the message. |
1146 | | @param m Pointer to `mlen` bytes of memory where the message is read from. |
1147 | | @param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to. |
1148 | | @param mac Pointer to 16 bytes of memory where the mac is written to. |
1149 | | */ |
1150 | | void |
1151 | | Hacl_Chacha20Poly1305_256_aead_encrypt( |
1152 | | uint8_t *k, |
1153 | | uint8_t *n, |
1154 | | uint32_t aadlen, |
1155 | | uint8_t *aad, |
1156 | | uint32_t mlen, |
1157 | | uint8_t *m, |
1158 | | uint8_t *cipher, |
1159 | | uint8_t *mac) |
1160 | 8.91k | { |
1161 | 8.91k | Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U); |
1162 | 8.91k | uint8_t tmp[64U] = { 0U }; |
1163 | 8.91k | Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); |
1164 | 8.91k | uint8_t *key = tmp; |
1165 | 8.91k | poly1305_do_256(key, aadlen, aad, mlen, cipher, mac); |
1166 | 8.91k | } |
1167 | | |
1168 | | /** |
1169 | | Decrypt a ciphertext `cipher` with key `k`. |
1170 | | |
1171 | | The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption. |
1172 | | Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory. |
1173 | | |
1174 | | If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0. |
1175 | | If decryption fails, the array `m` remains unchanged and the function returns the error code 1. |
1176 | | |
1177 | | @param k Pointer to 32 bytes of memory where the AEAD key is read from. |
1178 | | @param n Pointer to 12 bytes of memory where the AEAD nonce is read from. |
1179 | | @param aadlen Length of the associated data. |
1180 | | @param aad Pointer to `aadlen` bytes of memory where the associated data is read from. |
1181 | | |
1182 | | @param mlen Length of the ciphertext. |
1183 | | @param m Pointer to `mlen` bytes of memory where the message is written to. |
1184 | | @param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from. |
1185 | | @param mac Pointer to 16 bytes of memory where the mac is read from. |
1186 | | |
1187 | | @returns 0 on succeess; 1 on failure. |
1188 | | */ |
1189 | | uint32_t |
1190 | | Hacl_Chacha20Poly1305_256_aead_decrypt( |
1191 | | uint8_t *k, |
1192 | | uint8_t *n, |
1193 | | uint32_t aadlen, |
1194 | | uint8_t *aad, |
1195 | | uint32_t mlen, |
1196 | | uint8_t *m, |
1197 | | uint8_t *cipher, |
1198 | | uint8_t *mac) |
1199 | 8.66k | { |
1200 | 8.66k | uint8_t computed_mac[16U] = { 0U }; |
1201 | 8.66k | uint8_t tmp[64U] = { 0U }; |
1202 | 8.66k | Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); |
1203 | 8.66k | uint8_t *key = tmp; |
1204 | 8.66k | poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac); |
1205 | 8.66k | uint8_t res = (uint8_t)255U; |
1206 | 8.66k | KRML_MAYBE_FOR16(i, |
1207 | 8.66k | (uint32_t)0U, |
1208 | 8.66k | (uint32_t)16U, |
1209 | 8.66k | (uint32_t)1U, |
1210 | 8.66k | uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); |
1211 | 8.66k | res = uu____0 & res;); |
1212 | 8.66k | uint8_t z = res; |
1213 | 8.66k | if (z == (uint8_t)255U) { |
1214 | 0 | Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U); |
1215 | 0 | return (uint32_t)0U; |
1216 | 0 | } |
1217 | 8.66k | return (uint32_t)1U; |
1218 | 8.66k | } |