/src/dropbear/src/sntrup761.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Public Domain, Authors: |
3 | | * - Daniel J. Bernstein |
4 | | * - Chitchanok Chuengsatiansup |
5 | | * - Tanja Lange |
6 | | * - Christine van Vredendaal |
7 | | */ |
8 | | |
9 | | #include <string.h> |
10 | | #include "sntrup761_compat.h" |
11 | | |
12 | | #if DROPBEAR_SNTRUP761 |
13 | | |
14 | 3.48k | #define crypto_declassify(x, y) do {} while (0) |
15 | | |
16 | | #define int8 crypto_int8 |
17 | | #define uint8 crypto_uint8 |
18 | | #define int16 crypto_int16 |
19 | | #define uint16 crypto_uint16 |
20 | 10.6M | #define int32 crypto_int32 |
21 | | #define uint32 crypto_uint32 |
22 | | #define int64 crypto_int64 |
23 | | #define uint64 crypto_uint64 |
24 | | extern volatile crypto_int16 crypto_int16_optblocker; |
25 | | extern volatile crypto_int32 crypto_int32_optblocker; |
26 | | extern volatile crypto_int64 crypto_int64_optblocker; |
27 | | |
28 | | /* from supercop-20241022/cryptoint/crypto_int16.h */ |
29 | | /* auto-generated: cd cryptoint; ./autogen */ |
30 | | /* cryptoint 20241003 */ |
31 | | |
32 | | #ifndef crypto_int16_h |
33 | | #define crypto_int16_h |
34 | | |
35 | 10.6M | #define crypto_int16 int16_t |
36 | | #define crypto_int16_unsigned uint16_t |
37 | | |
38 | | |
39 | | |
40 | | __attribute__((unused)) |
41 | | static inline |
42 | 0 | crypto_int16 crypto_int16_load(const unsigned char *crypto_int16_s) { |
43 | 0 | crypto_int16 crypto_int16_z = 0; |
44 | 0 | crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 0; |
45 | 0 | crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 8; |
46 | 0 | return crypto_int16_z; |
47 | 0 | } |
48 | | |
49 | | __attribute__((unused)) |
50 | | static inline |
51 | 0 | crypto_int16 crypto_int16_load_bigendian(const unsigned char *crypto_int16_s) { |
52 | 0 | crypto_int16 crypto_int16_z = 0; |
53 | 0 | crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 8; |
54 | 0 | crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 0; |
55 | 0 | return crypto_int16_z; |
56 | 0 | } |
57 | | |
58 | | __attribute__((unused)) |
59 | | static inline |
60 | 0 | void crypto_int16_store(unsigned char *crypto_int16_s,crypto_int16 crypto_int16_x) { |
61 | 0 | *crypto_int16_s++ = crypto_int16_x >> 0; |
62 | 0 | *crypto_int16_s++ = crypto_int16_x >> 8; |
63 | 0 | } |
64 | | |
65 | | __attribute__((unused)) |
66 | | static inline |
67 | 0 | void crypto_int16_store_bigendian(unsigned char *crypto_int16_s,crypto_int16 crypto_int16_x) { |
68 | 0 | *crypto_int16_s++ = crypto_int16_x >> 8; |
69 | 0 | *crypto_int16_s++ = crypto_int16_x >> 0; |
70 | 0 | } |
71 | | |
72 | | __attribute__((unused)) |
73 | | static inline |
74 | 10.6M | crypto_int16 crypto_int16_negative_mask(crypto_int16 crypto_int16_x) { |
75 | 10.6M | #if defined(__GNUC__) && defined(__x86_64__) |
76 | 10.6M | __asm__ ("sarw $15,%0" : "+r"(crypto_int16_x) : : "cc"); |
77 | 10.6M | return crypto_int16_x; |
78 | | #elif defined(__GNUC__) && defined(__aarch64__) |
79 | | crypto_int16 crypto_int16_y; |
80 | | __asm__ ("sbfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); |
81 | | return crypto_int16_y; |
82 | | #else |
83 | | crypto_int16_x >>= 16-6; |
84 | | crypto_int16_x += crypto_int16_optblocker; |
85 | | crypto_int16_x >>= 5; |
86 | | return crypto_int16_x; |
87 | | #endif |
88 | 10.6M | } |
89 | | |
90 | | __attribute__((unused)) |
91 | | static inline |
92 | 0 | crypto_int16_unsigned crypto_int16_unsigned_topbit_01(crypto_int16_unsigned crypto_int16_x) { |
93 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
94 | 0 | __asm__ ("shrw $15,%0" : "+r"(crypto_int16_x) : : "cc"); |
95 | 0 | return crypto_int16_x; |
96 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
97 | 0 | crypto_int16 crypto_int16_y; |
98 | 0 | __asm__ ("ubfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); |
99 | 0 | return crypto_int16_y; |
100 | 0 | #else |
101 | 0 | crypto_int16_x >>= 16-6; |
102 | 0 | crypto_int16_x += crypto_int16_optblocker; |
103 | 0 | crypto_int16_x >>= 5; |
104 | 0 | return crypto_int16_x; |
105 | 0 | #endif |
106 | 0 | } |
107 | | |
108 | | __attribute__((unused)) |
109 | | static inline |
110 | 0 | crypto_int16 crypto_int16_negative_01(crypto_int16 crypto_int16_x) { |
111 | 0 | return crypto_int16_unsigned_topbit_01(crypto_int16_x); |
112 | 0 | } |
113 | | |
114 | | __attribute__((unused)) |
115 | | static inline |
116 | 0 | crypto_int16 crypto_int16_topbit_mask(crypto_int16 crypto_int16_x) { |
117 | 0 | return crypto_int16_negative_mask(crypto_int16_x); |
118 | 0 | } |
119 | | |
120 | | __attribute__((unused)) |
121 | | static inline |
122 | 0 | crypto_int16 crypto_int16_topbit_01(crypto_int16 crypto_int16_x) { |
123 | 0 | return crypto_int16_unsigned_topbit_01(crypto_int16_x); |
124 | 0 | } |
125 | | |
126 | | __attribute__((unused)) |
127 | | static inline |
128 | 0 | crypto_int16 crypto_int16_bottombit_mask(crypto_int16 crypto_int16_x) { |
129 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
130 | 0 | __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); |
131 | 0 | return -crypto_int16_x; |
132 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
133 | 0 | crypto_int16 crypto_int16_y; |
134 | 0 | __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); |
135 | 0 | return crypto_int16_y; |
136 | 0 | #else |
137 | 0 | crypto_int16_x &= 1 + crypto_int16_optblocker; |
138 | 0 | return -crypto_int16_x; |
139 | 0 | #endif |
140 | 0 | } |
141 | | |
142 | | __attribute__((unused)) |
143 | | static inline |
144 | 0 | crypto_int16 crypto_int16_bottombit_01(crypto_int16 crypto_int16_x) { |
145 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
146 | 0 | __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); |
147 | 0 | return crypto_int16_x; |
148 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
149 | 0 | crypto_int16 crypto_int16_y; |
150 | 0 | __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); |
151 | 0 | return crypto_int16_y; |
152 | 0 | #else |
153 | 0 | crypto_int16_x &= 1 + crypto_int16_optblocker; |
154 | 0 | return crypto_int16_x; |
155 | 0 | #endif |
156 | 0 | } |
157 | | |
158 | | __attribute__((unused)) |
159 | | static inline |
160 | 0 | crypto_int16 crypto_int16_bitinrangepublicpos_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { |
161 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
162 | 0 | __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); |
163 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
164 | 0 | __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); |
165 | 0 | #else |
166 | 0 | crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; |
167 | 0 | #endif |
168 | 0 | return crypto_int16_bottombit_mask(crypto_int16_x); |
169 | 0 | } |
170 | | |
171 | | __attribute__((unused)) |
172 | | static inline |
173 | 0 | crypto_int16 crypto_int16_bitinrangepublicpos_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { |
174 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
175 | 0 | __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); |
176 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
177 | 0 | __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); |
178 | 0 | #else |
179 | 0 | crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; |
180 | 0 | #endif |
181 | 0 | return crypto_int16_bottombit_01(crypto_int16_x); |
182 | 0 | } |
183 | | |
184 | | __attribute__((unused)) |
185 | | static inline |
186 | 0 | crypto_int16 crypto_int16_shlmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { |
187 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
188 | 0 | crypto_int16_s &= 15; |
189 | 0 | __asm__ ("shlw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); |
190 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
191 | 0 | __asm__ ("and %w0,%w0,15\n and %w1,%w1,65535\n lsl %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); |
192 | 0 | #else |
193 | 0 | int crypto_int16_k, crypto_int16_l; |
194 | 0 | for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) |
195 | 0 | crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x << crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); |
196 | 0 | #endif |
197 | 0 | return crypto_int16_x; |
198 | 0 | } |
199 | | |
200 | | __attribute__((unused)) |
201 | | static inline |
202 | 0 | crypto_int16 crypto_int16_shrmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { |
203 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
204 | 0 | crypto_int16_s &= 15; |
205 | 0 | __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); |
206 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
207 | 0 | __asm__ ("and %w0,%w0,15\n sxth %w1,%w1\n asr %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); |
208 | 0 | #else |
209 | 0 | int crypto_int16_k, crypto_int16_l; |
210 | 0 | for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) |
211 | 0 | crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x >> crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); |
212 | 0 | #endif |
213 | 0 | return crypto_int16_x; |
214 | 0 | } |
215 | | |
216 | | __attribute__((unused)) |
217 | | static inline |
218 | 0 | crypto_int16 crypto_int16_bitmod_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { |
219 | 0 | crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); |
220 | 0 | return crypto_int16_bottombit_mask(crypto_int16_x); |
221 | 0 | } |
222 | | |
223 | | __attribute__((unused)) |
224 | | static inline |
225 | 0 | crypto_int16 crypto_int16_bitmod_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { |
226 | 0 | crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); |
227 | 0 | return crypto_int16_bottombit_01(crypto_int16_x); |
228 | 0 | } |
229 | | |
230 | | __attribute__((unused)) |
231 | | static inline |
232 | 10.6M | crypto_int16 crypto_int16_nonzero_mask(crypto_int16 crypto_int16_x) { |
233 | 10.6M | #if defined(__GNUC__) && defined(__x86_64__) |
234 | 10.6M | crypto_int16 crypto_int16_q,crypto_int16_z; |
235 | 10.6M | __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); |
236 | 10.6M | return crypto_int16_z; |
237 | | #elif defined(__GNUC__) && defined(__aarch64__) |
238 | | crypto_int16 crypto_int16_z; |
239 | | __asm__ ("tst %w1,65535\n csetm %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); |
240 | | return crypto_int16_z; |
241 | | #else |
242 | | crypto_int16_x |= -crypto_int16_x; |
243 | | return crypto_int16_negative_mask(crypto_int16_x); |
244 | | #endif |
245 | 10.6M | } |
246 | | |
247 | | __attribute__((unused)) |
248 | | static inline |
249 | 0 | crypto_int16 crypto_int16_nonzero_01(crypto_int16 crypto_int16_x) { |
250 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
251 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
252 | 0 | __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); |
253 | 0 | return crypto_int16_z; |
254 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
255 | 0 | crypto_int16 crypto_int16_z; |
256 | 0 | __asm__ ("tst %w1,65535\n cset %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); |
257 | 0 | return crypto_int16_z; |
258 | 0 | #else |
259 | 0 | crypto_int16_x |= -crypto_int16_x; |
260 | 0 | return crypto_int16_unsigned_topbit_01(crypto_int16_x); |
261 | 0 | #endif |
262 | 0 | } |
263 | | |
264 | | __attribute__((unused)) |
265 | | static inline |
266 | 0 | crypto_int16 crypto_int16_positive_mask(crypto_int16 crypto_int16_x) { |
267 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
268 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
269 | 0 | __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); |
270 | 0 | return crypto_int16_z; |
271 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
272 | 0 | crypto_int16 crypto_int16_z; |
273 | 0 | __asm__ ("sxth %w0,%w1\n cmp %w0,0\n csetm %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); |
274 | 0 | return crypto_int16_z; |
275 | 0 | #else |
276 | 0 | crypto_int16 crypto_int16_z = -crypto_int16_x; |
277 | 0 | crypto_int16_z ^= crypto_int16_x & crypto_int16_z; |
278 | 0 | return crypto_int16_negative_mask(crypto_int16_z); |
279 | 0 | #endif |
280 | 0 | } |
281 | | |
282 | | __attribute__((unused)) |
283 | | static inline |
284 | 0 | crypto_int16 crypto_int16_positive_01(crypto_int16 crypto_int16_x) { |
285 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
286 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
287 | 0 | __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); |
288 | 0 | return crypto_int16_z; |
289 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
290 | 0 | crypto_int16 crypto_int16_z; |
291 | 0 | __asm__ ("sxth %w0,%w1\n cmp %w0,0\n cset %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); |
292 | 0 | return crypto_int16_z; |
293 | 0 | #else |
294 | 0 | crypto_int16 crypto_int16_z = -crypto_int16_x; |
295 | 0 | crypto_int16_z ^= crypto_int16_x & crypto_int16_z; |
296 | 0 | return crypto_int16_unsigned_topbit_01(crypto_int16_z); |
297 | 0 | #endif |
298 | 0 | } |
299 | | |
300 | | __attribute__((unused)) |
301 | | static inline |
302 | 0 | crypto_int16 crypto_int16_zero_mask(crypto_int16 crypto_int16_x) { |
303 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
304 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
305 | 0 | __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); |
306 | 0 | return crypto_int16_z; |
307 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
308 | 0 | crypto_int16 crypto_int16_z; |
309 | 0 | __asm__ ("tst %w1,65535\n csetm %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); |
310 | 0 | return crypto_int16_z; |
311 | 0 | #else |
312 | 0 | return ~crypto_int16_nonzero_mask(crypto_int16_x); |
313 | 0 | #endif |
314 | 0 | } |
315 | | |
316 | | __attribute__((unused)) |
317 | | static inline |
318 | 0 | crypto_int16 crypto_int16_zero_01(crypto_int16 crypto_int16_x) { |
319 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
320 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
321 | 0 | __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); |
322 | 0 | return crypto_int16_z; |
323 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
324 | 0 | crypto_int16 crypto_int16_z; |
325 | 0 | __asm__ ("tst %w1,65535\n cset %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); |
326 | 0 | return crypto_int16_z; |
327 | 0 | #else |
328 | 0 | return 1-crypto_int16_nonzero_01(crypto_int16_x); |
329 | 0 | #endif |
330 | 0 | } |
331 | | |
332 | | __attribute__((unused)) |
333 | | static inline |
334 | 0 | crypto_int16 crypto_int16_unequal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
335 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
336 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
337 | 0 | __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
338 | 0 | return crypto_int16_z; |
339 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
340 | 0 | crypto_int16 crypto_int16_z; |
341 | 0 | __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
342 | 0 | return crypto_int16_z; |
343 | 0 | #else |
344 | 0 | return crypto_int16_nonzero_mask(crypto_int16_x ^ crypto_int16_y); |
345 | 0 | #endif |
346 | 0 | } |
347 | | |
348 | | __attribute__((unused)) |
349 | | static inline |
350 | 0 | crypto_int16 crypto_int16_unequal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
351 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
352 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
353 | 0 | __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
354 | 0 | return crypto_int16_z; |
355 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
356 | 0 | crypto_int16 crypto_int16_z; |
357 | 0 | __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
358 | 0 | return crypto_int16_z; |
359 | 0 | #else |
360 | 0 | return crypto_int16_nonzero_01(crypto_int16_x ^ crypto_int16_y); |
361 | 0 | #endif |
362 | 0 | } |
363 | | |
364 | | __attribute__((unused)) |
365 | | static inline |
366 | 0 | crypto_int16 crypto_int16_equal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
367 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
368 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
369 | 0 | __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
370 | 0 | return crypto_int16_z; |
371 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
372 | 0 | crypto_int16 crypto_int16_z; |
373 | 0 | __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
374 | 0 | return crypto_int16_z; |
375 | 0 | #else |
376 | 0 | return ~crypto_int16_unequal_mask(crypto_int16_x,crypto_int16_y); |
377 | 0 | #endif |
378 | 0 | } |
379 | | |
380 | | __attribute__((unused)) |
381 | | static inline |
382 | 0 | crypto_int16 crypto_int16_equal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
383 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
384 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
385 | 0 | __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
386 | 0 | return crypto_int16_z; |
387 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
388 | 0 | crypto_int16 crypto_int16_z; |
389 | 0 | __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
390 | 0 | return crypto_int16_z; |
391 | 0 | #else |
392 | 0 | return 1-crypto_int16_unequal_01(crypto_int16_x,crypto_int16_y); |
393 | 0 | #endif |
394 | 0 | } |
395 | | |
396 | | __attribute__((unused)) |
397 | | static inline |
398 | 0 | crypto_int16 crypto_int16_min(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
399 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
400 | 0 | __asm__ ("cmpw %1,%0\n cmovgw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); |
401 | 0 | return crypto_int16_x; |
402 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
403 | 0 | __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w0,%w1,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); |
404 | 0 | return crypto_int16_x; |
405 | 0 | #else |
406 | 0 | crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; |
407 | 0 | crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; |
408 | 0 | crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); |
409 | 0 | crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); |
410 | 0 | crypto_int16_z &= crypto_int16_r; |
411 | 0 | return crypto_int16_x ^ crypto_int16_z; |
412 | 0 | #endif |
413 | 0 | } |
414 | | |
415 | | __attribute__((unused)) |
416 | | static inline |
417 | 0 | crypto_int16 crypto_int16_max(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
418 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
419 | 0 | __asm__ ("cmpw %1,%0\n cmovlw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); |
420 | 0 | return crypto_int16_x; |
421 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
422 | 0 | __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w1,%w0,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); |
423 | 0 | return crypto_int16_x; |
424 | 0 | #else |
425 | 0 | crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; |
426 | 0 | crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; |
427 | 0 | crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); |
428 | 0 | crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); |
429 | 0 | crypto_int16_z &= crypto_int16_r; |
430 | 0 | return crypto_int16_y ^ crypto_int16_z; |
431 | 0 | #endif |
432 | 0 | } |
433 | | |
434 | | __attribute__((unused)) |
435 | | static inline |
436 | 0 | void crypto_int16_minmax(crypto_int16 *crypto_int16_p,crypto_int16 *crypto_int16_q) { |
437 | 0 | crypto_int16 crypto_int16_x = *crypto_int16_p; |
438 | 0 | crypto_int16 crypto_int16_y = *crypto_int16_q; |
439 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
440 | 0 | crypto_int16 crypto_int16_z; |
441 | 0 | __asm__ ("cmpw %2,%1\n movw %1,%0\n cmovgw %2,%1\n cmovgw %0,%2" : "=&r"(crypto_int16_z), "+&r"(crypto_int16_x), "+r"(crypto_int16_y) : : "cc"); |
442 | 0 | *crypto_int16_p = crypto_int16_x; |
443 | 0 | *crypto_int16_q = crypto_int16_y; |
444 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
445 | 0 | crypto_int16 crypto_int16_r, crypto_int16_s; |
446 | 0 | __asm__ ("sxth %w0,%w0\n cmp %w0,%w3,sxth\n csel %w1,%w0,%w3,lt\n csel %w2,%w3,%w0,lt" : "+&r"(crypto_int16_x), "=&r"(crypto_int16_r), "=r"(crypto_int16_s) : "r"(crypto_int16_y) : "cc"); |
447 | 0 | *crypto_int16_p = crypto_int16_r; |
448 | 0 | *crypto_int16_q = crypto_int16_s; |
449 | 0 | #else |
450 | 0 | crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; |
451 | 0 | crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; |
452 | 0 | crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); |
453 | 0 | crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); |
454 | 0 | crypto_int16_z &= crypto_int16_r; |
455 | 0 | crypto_int16_x ^= crypto_int16_z; |
456 | 0 | crypto_int16_y ^= crypto_int16_z; |
457 | 0 | *crypto_int16_p = crypto_int16_x; |
458 | 0 | *crypto_int16_q = crypto_int16_y; |
459 | 0 | #endif |
460 | 0 | } |
461 | | |
462 | | __attribute__((unused)) |
463 | | static inline |
464 | 0 | crypto_int16 crypto_int16_smaller_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
465 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
466 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
467 | 0 | __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
468 | 0 | return crypto_int16_z; |
469 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
470 | 0 | crypto_int16 crypto_int16_z; |
471 | 0 | __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
472 | 0 | return crypto_int16_z; |
473 | 0 | #else |
474 | 0 | crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; |
475 | 0 | crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; |
476 | 0 | crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); |
477 | 0 | return crypto_int16_negative_mask(crypto_int16_z); |
478 | 0 | #endif |
479 | 0 | } |
480 | | |
481 | | __attribute__((unused)) |
482 | | static inline |
483 | 0 | crypto_int16 crypto_int16_smaller_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
484 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
485 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
486 | 0 | __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
487 | 0 | return crypto_int16_z; |
488 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
489 | 0 | crypto_int16 crypto_int16_z; |
490 | 0 | __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
491 | 0 | return crypto_int16_z; |
492 | 0 | #else |
493 | 0 | crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; |
494 | 0 | crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; |
495 | 0 | crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); |
496 | 0 | return crypto_int16_unsigned_topbit_01(crypto_int16_z); |
497 | 0 | #endif |
498 | 0 | } |
499 | | |
500 | | __attribute__((unused)) |
501 | | static inline |
502 | 0 | crypto_int16 crypto_int16_leq_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
503 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
504 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
505 | 0 | __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
506 | 0 | return crypto_int16_z; |
507 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
508 | 0 | crypto_int16 crypto_int16_z; |
509 | 0 | __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
510 | 0 | return crypto_int16_z; |
511 | 0 | #else |
512 | 0 | return ~crypto_int16_smaller_mask(crypto_int16_y,crypto_int16_x); |
513 | 0 | #endif |
514 | 0 | } |
515 | | |
516 | | __attribute__((unused)) |
517 | | static inline |
518 | 0 | crypto_int16 crypto_int16_leq_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { |
519 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
520 | 0 | crypto_int16 crypto_int16_q,crypto_int16_z; |
521 | 0 | __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
522 | 0 | return crypto_int16_z; |
523 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
524 | 0 | crypto_int16 crypto_int16_z; |
525 | 0 | __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); |
526 | 0 | return crypto_int16_z; |
527 | 0 | #else |
528 | 0 | return 1-crypto_int16_smaller_01(crypto_int16_y,crypto_int16_x); |
529 | 0 | #endif |
530 | 0 | } |
531 | | |
532 | | __attribute__((unused)) |
533 | | static inline |
534 | 0 | int crypto_int16_ones_num(crypto_int16 crypto_int16_x) { |
535 | 0 | crypto_int16_unsigned crypto_int16_y = crypto_int16_x; |
536 | 0 | const crypto_int16 C0 = 0x5555; |
537 | 0 | const crypto_int16 C1 = 0x3333; |
538 | 0 | const crypto_int16 C2 = 0x0f0f; |
539 | 0 | crypto_int16_y -= ((crypto_int16_y >> 1) & C0); |
540 | 0 | crypto_int16_y = (crypto_int16_y & C1) + ((crypto_int16_y >> 2) & C1); |
541 | 0 | crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 4)) & C2; |
542 | 0 | crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 8)) & 0xff; |
543 | 0 | return crypto_int16_y; |
544 | 0 | } |
545 | | |
546 | | __attribute__((unused)) |
547 | | static inline |
548 | 0 | int crypto_int16_bottomzeros_num(crypto_int16 crypto_int16_x) { |
549 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
550 | 0 | crypto_int16 fallback = 16; |
551 | 0 | __asm__ ("bsfw %0,%0\n cmovew %1,%0" : "+&r"(crypto_int16_x) : "r"(fallback) : "cc"); |
552 | 0 | return crypto_int16_x; |
553 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
554 | 0 | int64_t crypto_int16_y; |
555 | 0 | __asm__ ("orr %w0,%w1,-65536\n rbit %w0,%w0\n clz %w0,%w0" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); |
556 | 0 | return crypto_int16_y; |
557 | 0 | #else |
558 | 0 | crypto_int16 crypto_int16_y = crypto_int16_x ^ (crypto_int16_x-1); |
559 | 0 | crypto_int16_y = ((crypto_int16) crypto_int16_y) >> 1; |
560 | 0 | crypto_int16_y &= ~(crypto_int16_x & (((crypto_int16) 1) << (16-1))); |
561 | 0 | return crypto_int16_ones_num(crypto_int16_y); |
562 | 0 | #endif |
563 | 0 | } |
564 | | |
565 | | #endif |
566 | | |
567 | | /* from supercop-20241022/cryptoint/crypto_int32.h */ |
568 | | /* auto-generated: cd cryptoint; ./autogen */ |
569 | | /* cryptoint 20241003 */ |
570 | | |
571 | | #ifndef crypto_int32_h |
572 | | #define crypto_int32_h |
573 | | |
574 | 186M | #define crypto_int32 int32_t |
575 | | #define crypto_int32_unsigned uint32_t |
576 | | |
577 | | |
578 | | |
579 | | __attribute__((unused)) |
580 | | static inline |
581 | 0 | crypto_int32 crypto_int32_load(const unsigned char *crypto_int32_s) { |
582 | 0 | crypto_int32 crypto_int32_z = 0; |
583 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 0; |
584 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 8; |
585 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 16; |
586 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 24; |
587 | 0 | return crypto_int32_z; |
588 | 0 | } |
589 | | |
590 | | __attribute__((unused)) |
591 | | static inline |
592 | 0 | crypto_int32 crypto_int32_load_bigendian(const unsigned char *crypto_int32_s) { |
593 | 0 | crypto_int32 crypto_int32_z = 0; |
594 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 24; |
595 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 16; |
596 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 8; |
597 | 0 | crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 0; |
598 | 0 | return crypto_int32_z; |
599 | 0 | } |
600 | | |
601 | | __attribute__((unused)) |
602 | | static inline |
603 | 0 | void crypto_int32_store(unsigned char *crypto_int32_s,crypto_int32 crypto_int32_x) { |
604 | 0 | *crypto_int32_s++ = crypto_int32_x >> 0; |
605 | 0 | *crypto_int32_s++ = crypto_int32_x >> 8; |
606 | 0 | *crypto_int32_s++ = crypto_int32_x >> 16; |
607 | 0 | *crypto_int32_s++ = crypto_int32_x >> 24; |
608 | 0 | } |
609 | | |
610 | | __attribute__((unused)) |
611 | | static inline |
612 | 0 | void crypto_int32_store_bigendian(unsigned char *crypto_int32_s,crypto_int32 crypto_int32_x) { |
613 | 0 | *crypto_int32_s++ = crypto_int32_x >> 24; |
614 | 0 | *crypto_int32_s++ = crypto_int32_x >> 16; |
615 | 0 | *crypto_int32_s++ = crypto_int32_x >> 8; |
616 | 0 | *crypto_int32_s++ = crypto_int32_x >> 0; |
617 | 0 | } |
618 | | |
619 | | __attribute__((unused)) |
620 | | static inline |
621 | 121k | crypto_int32 crypto_int32_negative_mask(crypto_int32 crypto_int32_x) { |
622 | 121k | #if defined(__GNUC__) && defined(__x86_64__) |
623 | 121k | __asm__ ("sarl $31,%0" : "+r"(crypto_int32_x) : : "cc"); |
624 | 121k | return crypto_int32_x; |
625 | | #elif defined(__GNUC__) && defined(__aarch64__) |
626 | | crypto_int32 crypto_int32_y; |
627 | | __asm__ ("asr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); |
628 | | return crypto_int32_y; |
629 | | #else |
630 | | crypto_int32_x >>= 32-6; |
631 | | crypto_int32_x += crypto_int32_optblocker; |
632 | | crypto_int32_x >>= 5; |
633 | | return crypto_int32_x; |
634 | | #endif |
635 | 121k | } |
636 | | |
637 | | __attribute__((unused)) |
638 | | static inline |
639 | 0 | crypto_int32_unsigned crypto_int32_unsigned_topbit_01(crypto_int32_unsigned crypto_int32_x) { |
640 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
641 | 0 | __asm__ ("shrl $31,%0" : "+r"(crypto_int32_x) : : "cc"); |
642 | 0 | return crypto_int32_x; |
643 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
644 | 0 | crypto_int32 crypto_int32_y; |
645 | 0 | __asm__ ("lsr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); |
646 | 0 | return crypto_int32_y; |
647 | 0 | #else |
648 | 0 | crypto_int32_x >>= 32-6; |
649 | 0 | crypto_int32_x += crypto_int32_optblocker; |
650 | 0 | crypto_int32_x >>= 5; |
651 | 0 | return crypto_int32_x; |
652 | 0 | #endif |
653 | 0 | } |
654 | | |
655 | | __attribute__((unused)) |
656 | | static inline |
657 | 0 | crypto_int32 crypto_int32_negative_01(crypto_int32 crypto_int32_x) { |
658 | 0 | return crypto_int32_unsigned_topbit_01(crypto_int32_x); |
659 | 0 | } |
660 | | |
661 | | __attribute__((unused)) |
662 | | static inline |
663 | 0 | crypto_int32 crypto_int32_topbit_mask(crypto_int32 crypto_int32_x) { |
664 | 0 | return crypto_int32_negative_mask(crypto_int32_x); |
665 | 0 | } |
666 | | |
667 | | __attribute__((unused)) |
668 | | static inline |
669 | 0 | crypto_int32 crypto_int32_topbit_01(crypto_int32 crypto_int32_x) { |
670 | 0 | return crypto_int32_unsigned_topbit_01(crypto_int32_x); |
671 | 0 | } |
672 | | |
673 | | __attribute__((unused)) |
674 | | static inline |
675 | 0 | crypto_int32 crypto_int32_bottombit_mask(crypto_int32 crypto_int32_x) { |
676 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
677 | 0 | __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); |
678 | 0 | return -crypto_int32_x; |
679 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
680 | 0 | crypto_int32 crypto_int32_y; |
681 | 0 | __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); |
682 | 0 | return crypto_int32_y; |
683 | 0 | #else |
684 | 0 | crypto_int32_x &= 1 + crypto_int32_optblocker; |
685 | 0 | return -crypto_int32_x; |
686 | 0 | #endif |
687 | 0 | } |
688 | | |
689 | | __attribute__((unused)) |
690 | | static inline |
691 | 0 | crypto_int32 crypto_int32_bottombit_01(crypto_int32 crypto_int32_x) { |
692 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
693 | 0 | __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); |
694 | 0 | return crypto_int32_x; |
695 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
696 | 0 | crypto_int32 crypto_int32_y; |
697 | 0 | __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); |
698 | 0 | return crypto_int32_y; |
699 | 0 | #else |
700 | 0 | crypto_int32_x &= 1 + crypto_int32_optblocker; |
701 | 0 | return crypto_int32_x; |
702 | 0 | #endif |
703 | 0 | } |
704 | | |
705 | | __attribute__((unused)) |
706 | | static inline |
707 | 0 | crypto_int32 crypto_int32_bitinrangepublicpos_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { |
708 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
709 | 0 | __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); |
710 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
711 | 0 | __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); |
712 | 0 | #else |
713 | 0 | crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; |
714 | 0 | #endif |
715 | 0 | return crypto_int32_bottombit_mask(crypto_int32_x); |
716 | 0 | } |
717 | | |
718 | | __attribute__((unused)) |
719 | | static inline |
720 | 0 | crypto_int32 crypto_int32_bitinrangepublicpos_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { |
721 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
722 | 0 | __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); |
723 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
724 | 0 | __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); |
725 | 0 | #else |
726 | 0 | crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; |
727 | 0 | #endif |
728 | 0 | return crypto_int32_bottombit_01(crypto_int32_x); |
729 | 0 | } |
730 | | |
731 | | __attribute__((unused)) |
732 | | static inline |
733 | 0 | crypto_int32 crypto_int32_shlmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { |
734 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
735 | 0 | __asm__ ("shll %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); |
736 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
737 | 0 | __asm__ ("lsl %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); |
738 | 0 | #else |
739 | 0 | int crypto_int32_k, crypto_int32_l; |
740 | 0 | for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) |
741 | 0 | crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x << crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); |
742 | 0 | #endif |
743 | 0 | return crypto_int32_x; |
744 | 0 | } |
745 | | |
746 | | __attribute__((unused)) |
747 | | static inline |
748 | 0 | crypto_int32 crypto_int32_shrmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { |
749 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
750 | 0 | __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); |
751 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
752 | 0 | __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); |
753 | 0 | #else |
754 | 0 | int crypto_int32_k, crypto_int32_l; |
755 | 0 | for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) |
756 | 0 | crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x >> crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); |
757 | 0 | #endif |
758 | 0 | return crypto_int32_x; |
759 | 0 | } |
760 | | |
761 | | __attribute__((unused)) |
762 | | static inline |
763 | 0 | crypto_int32 crypto_int32_bitmod_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { |
764 | 0 | crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); |
765 | 0 | return crypto_int32_bottombit_mask(crypto_int32_x); |
766 | 0 | } |
767 | | |
768 | | __attribute__((unused)) |
769 | | static inline |
770 | 0 | crypto_int32 crypto_int32_bitmod_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { |
771 | 0 | crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); |
772 | 0 | return crypto_int32_bottombit_01(crypto_int32_x); |
773 | 0 | } |
774 | | |
775 | | __attribute__((unused)) |
776 | | static inline |
777 | 0 | crypto_int32 crypto_int32_nonzero_mask(crypto_int32 crypto_int32_x) { |
778 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
779 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
780 | 0 | __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); |
781 | 0 | return crypto_int32_z; |
782 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
783 | 0 | crypto_int32 crypto_int32_z; |
784 | 0 | __asm__ ("cmp %w1,0\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); |
785 | 0 | return crypto_int32_z; |
786 | 0 | #else |
787 | 0 | crypto_int32_x |= -crypto_int32_x; |
788 | 0 | return crypto_int32_negative_mask(crypto_int32_x); |
789 | 0 | #endif |
790 | 0 | } |
791 | | |
792 | | __attribute__((unused)) |
793 | | static inline |
794 | 0 | crypto_int32 crypto_int32_nonzero_01(crypto_int32 crypto_int32_x) { |
795 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
796 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
797 | 0 | __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); |
798 | 0 | return crypto_int32_z; |
799 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
800 | 0 | crypto_int32 crypto_int32_z; |
801 | 0 | __asm__ ("cmp %w1,0\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); |
802 | 0 | return crypto_int32_z; |
803 | 0 | #else |
804 | 0 | crypto_int32_x |= -crypto_int32_x; |
805 | 0 | return crypto_int32_unsigned_topbit_01(crypto_int32_x); |
806 | 0 | #endif |
807 | 0 | } |
808 | | |
809 | | __attribute__((unused)) |
810 | | static inline |
811 | 0 | crypto_int32 crypto_int32_positive_mask(crypto_int32 crypto_int32_x) { |
812 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
813 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
814 | 0 | __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); |
815 | 0 | return crypto_int32_z; |
816 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
817 | 0 | crypto_int32 crypto_int32_z; |
818 | 0 | __asm__ ("cmp %w1,0\n csetm %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); |
819 | 0 | return crypto_int32_z; |
820 | 0 | #else |
821 | 0 | crypto_int32 crypto_int32_z = -crypto_int32_x; |
822 | 0 | crypto_int32_z ^= crypto_int32_x & crypto_int32_z; |
823 | 0 | return crypto_int32_negative_mask(crypto_int32_z); |
824 | 0 | #endif |
825 | 0 | } |
826 | | |
827 | | __attribute__((unused)) |
828 | | static inline |
829 | 0 | crypto_int32 crypto_int32_positive_01(crypto_int32 crypto_int32_x) { |
830 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
831 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
832 | 0 | __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); |
833 | 0 | return crypto_int32_z; |
834 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
835 | 0 | crypto_int32 crypto_int32_z; |
836 | 0 | __asm__ ("cmp %w1,0\n cset %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); |
837 | 0 | return crypto_int32_z; |
838 | 0 | #else |
839 | 0 | crypto_int32 crypto_int32_z = -crypto_int32_x; |
840 | 0 | crypto_int32_z ^= crypto_int32_x & crypto_int32_z; |
841 | 0 | return crypto_int32_unsigned_topbit_01(crypto_int32_z); |
842 | 0 | #endif |
843 | 0 | } |
844 | | |
845 | | __attribute__((unused)) |
846 | | static inline |
847 | 0 | crypto_int32 crypto_int32_zero_mask(crypto_int32 crypto_int32_x) { |
848 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
849 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
850 | 0 | __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); |
851 | 0 | return crypto_int32_z; |
852 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
853 | 0 | crypto_int32 crypto_int32_z; |
854 | 0 | __asm__ ("cmp %w1,0\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); |
855 | 0 | return crypto_int32_z; |
856 | 0 | #else |
857 | 0 | return ~crypto_int32_nonzero_mask(crypto_int32_x); |
858 | 0 | #endif |
859 | 0 | } |
860 | | |
861 | | __attribute__((unused)) |
862 | | static inline |
863 | 0 | crypto_int32 crypto_int32_zero_01(crypto_int32 crypto_int32_x) { |
864 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
865 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
866 | 0 | __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); |
867 | 0 | return crypto_int32_z; |
868 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
869 | 0 | crypto_int32 crypto_int32_z; |
870 | 0 | __asm__ ("cmp %w1,0\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); |
871 | 0 | return crypto_int32_z; |
872 | 0 | #else |
873 | 0 | return 1-crypto_int32_nonzero_01(crypto_int32_x); |
874 | 0 | #endif |
875 | 0 | } |
876 | | |
877 | | __attribute__((unused)) |
878 | | static inline |
879 | 0 | crypto_int32 crypto_int32_unequal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
880 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
881 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
882 | 0 | __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
883 | 0 | return crypto_int32_z; |
884 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
885 | 0 | crypto_int32 crypto_int32_z; |
886 | 0 | __asm__ ("cmp %w1,%w2\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
887 | 0 | return crypto_int32_z; |
888 | 0 | #else |
889 | 0 | return crypto_int32_nonzero_mask(crypto_int32_x ^ crypto_int32_y); |
890 | 0 | #endif |
891 | 0 | } |
892 | | |
893 | | __attribute__((unused)) |
894 | | static inline |
895 | 0 | crypto_int32 crypto_int32_unequal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
896 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
897 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
898 | 0 | __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
899 | 0 | return crypto_int32_z; |
900 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
901 | 0 | crypto_int32 crypto_int32_z; |
902 | 0 | __asm__ ("cmp %w1,%w2\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
903 | 0 | return crypto_int32_z; |
904 | 0 | #else |
905 | 0 | return crypto_int32_nonzero_01(crypto_int32_x ^ crypto_int32_y); |
906 | 0 | #endif |
907 | 0 | } |
908 | | |
909 | | __attribute__((unused)) |
910 | | static inline |
911 | 0 | crypto_int32 crypto_int32_equal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
912 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
913 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
914 | 0 | __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
915 | 0 | return crypto_int32_z; |
916 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
917 | 0 | crypto_int32 crypto_int32_z; |
918 | 0 | __asm__ ("cmp %w1,%w2\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
919 | 0 | return crypto_int32_z; |
920 | 0 | #else |
921 | 0 | return ~crypto_int32_unequal_mask(crypto_int32_x,crypto_int32_y); |
922 | 0 | #endif |
923 | 0 | } |
924 | | |
925 | | __attribute__((unused)) |
926 | | static inline |
927 | 0 | crypto_int32 crypto_int32_equal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
928 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
929 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
930 | 0 | __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
931 | 0 | return crypto_int32_z; |
932 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
933 | 0 | crypto_int32 crypto_int32_z; |
934 | 0 | __asm__ ("cmp %w1,%w2\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
935 | 0 | return crypto_int32_z; |
936 | 0 | #else |
937 | 0 | return 1-crypto_int32_unequal_01(crypto_int32_x,crypto_int32_y); |
938 | 0 | #endif |
939 | 0 | } |
940 | | |
941 | | __attribute__((unused)) |
942 | | static inline |
943 | 0 | crypto_int32 crypto_int32_min(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
944 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
945 | 0 | __asm__ ("cmpl %1,%0\n cmovgl %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); |
946 | 0 | return crypto_int32_x; |
947 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
948 | 0 | __asm__ ("cmp %w0,%w1\n csel %w0,%w0,%w1,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); |
949 | 0 | return crypto_int32_x; |
950 | 0 | #else |
951 | 0 | crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; |
952 | 0 | crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; |
953 | 0 | crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); |
954 | 0 | crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); |
955 | 0 | crypto_int32_z &= crypto_int32_r; |
956 | 0 | return crypto_int32_x ^ crypto_int32_z; |
957 | 0 | #endif |
958 | 0 | } |
959 | | |
960 | | __attribute__((unused)) |
961 | | static inline |
962 | 0 | crypto_int32 crypto_int32_max(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
963 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
964 | 0 | __asm__ ("cmpl %1,%0\n cmovll %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); |
965 | 0 | return crypto_int32_x; |
966 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
967 | 0 | __asm__ ("cmp %w0,%w1\n csel %w0,%w1,%w0,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); |
968 | 0 | return crypto_int32_x; |
969 | 0 | #else |
970 | 0 | crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; |
971 | 0 | crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; |
972 | 0 | crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); |
973 | 0 | crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); |
974 | 0 | crypto_int32_z &= crypto_int32_r; |
975 | 0 | return crypto_int32_y ^ crypto_int32_z; |
976 | 0 | #endif |
977 | 0 | } |
978 | | |
979 | | __attribute__((unused)) |
980 | | static inline |
981 | 58.4M | void crypto_int32_minmax(crypto_int32 *crypto_int32_p,crypto_int32 *crypto_int32_q) { |
982 | 58.4M | crypto_int32 crypto_int32_x = *crypto_int32_p; |
983 | 58.4M | crypto_int32 crypto_int32_y = *crypto_int32_q; |
984 | 58.4M | #if defined(__GNUC__) && defined(__x86_64__) |
985 | 58.4M | crypto_int32 crypto_int32_z; |
986 | 58.4M | __asm__ ("cmpl %2,%1\n movl %1,%0\n cmovgl %2,%1\n cmovgl %0,%2" : "=&r"(crypto_int32_z), "+&r"(crypto_int32_x), "+r"(crypto_int32_y) : : "cc"); |
987 | 58.4M | *crypto_int32_p = crypto_int32_x; |
988 | 58.4M | *crypto_int32_q = crypto_int32_y; |
989 | | #elif defined(__GNUC__) && defined(__aarch64__) |
990 | | crypto_int32 crypto_int32_r, crypto_int32_s; |
991 | | __asm__ ("cmp %w2,%w3\n csel %w0,%w2,%w3,lt\n csel %w1,%w3,%w2,lt" : "=&r"(crypto_int32_r), "=r"(crypto_int32_s) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
992 | | *crypto_int32_p = crypto_int32_r; |
993 | | *crypto_int32_q = crypto_int32_s; |
994 | | #else |
995 | | crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; |
996 | | crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; |
997 | | crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); |
998 | | crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); |
999 | | crypto_int32_z &= crypto_int32_r; |
1000 | | crypto_int32_x ^= crypto_int32_z; |
1001 | | crypto_int32_y ^= crypto_int32_z; |
1002 | | *crypto_int32_p = crypto_int32_x; |
1003 | | *crypto_int32_q = crypto_int32_y; |
1004 | | #endif |
1005 | 58.4M | } |
1006 | | |
1007 | | __attribute__((unused)) |
1008 | | static inline |
1009 | 0 | crypto_int32 crypto_int32_smaller_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
1010 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1011 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
1012 | 0 | __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1013 | 0 | return crypto_int32_z; |
1014 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1015 | 0 | crypto_int32 crypto_int32_z; |
1016 | 0 | __asm__ ("cmp %w1,%w2\n csetm %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1017 | 0 | return crypto_int32_z; |
1018 | 0 | #else |
1019 | 0 | crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; |
1020 | 0 | crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; |
1021 | 0 | crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); |
1022 | 0 | return crypto_int32_negative_mask(crypto_int32_z); |
1023 | 0 | #endif |
1024 | 0 | } |
1025 | | |
1026 | | __attribute__((unused)) |
1027 | | static inline |
1028 | 0 | crypto_int32 crypto_int32_smaller_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
1029 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1030 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
1031 | 0 | __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1032 | 0 | return crypto_int32_z; |
1033 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1034 | 0 | crypto_int32 crypto_int32_z; |
1035 | 0 | __asm__ ("cmp %w1,%w2\n cset %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1036 | 0 | return crypto_int32_z; |
1037 | 0 | #else |
1038 | 0 | crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; |
1039 | 0 | crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; |
1040 | 0 | crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); |
1041 | 0 | return crypto_int32_unsigned_topbit_01(crypto_int32_z); |
1042 | 0 | #endif |
1043 | 0 | } |
1044 | | |
1045 | | __attribute__((unused)) |
1046 | | static inline |
1047 | 0 | crypto_int32 crypto_int32_leq_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
1048 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1049 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
1050 | 0 | __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1051 | 0 | return crypto_int32_z; |
1052 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1053 | 0 | crypto_int32 crypto_int32_z; |
1054 | 0 | __asm__ ("cmp %w1,%w2\n csetm %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1055 | 0 | return crypto_int32_z; |
1056 | 0 | #else |
1057 | 0 | return ~crypto_int32_smaller_mask(crypto_int32_y,crypto_int32_x); |
1058 | 0 | #endif |
1059 | 0 | } |
1060 | | |
1061 | | __attribute__((unused)) |
1062 | | static inline |
1063 | 0 | crypto_int32 crypto_int32_leq_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { |
1064 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1065 | 0 | crypto_int32 crypto_int32_q,crypto_int32_z; |
1066 | 0 | __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1067 | 0 | return crypto_int32_z; |
1068 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1069 | 0 | crypto_int32 crypto_int32_z; |
1070 | 0 | __asm__ ("cmp %w1,%w2\n cset %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); |
1071 | 0 | return crypto_int32_z; |
1072 | 0 | #else |
1073 | 0 | return 1-crypto_int32_smaller_01(crypto_int32_y,crypto_int32_x); |
1074 | 0 | #endif |
1075 | 0 | } |
1076 | | |
1077 | | __attribute__((unused)) |
1078 | | static inline |
1079 | 0 | int crypto_int32_ones_num(crypto_int32 crypto_int32_x) { |
1080 | 0 | crypto_int32_unsigned crypto_int32_y = crypto_int32_x; |
1081 | 0 | const crypto_int32 C0 = 0x55555555; |
1082 | 0 | const crypto_int32 C1 = 0x33333333; |
1083 | 0 | const crypto_int32 C2 = 0x0f0f0f0f; |
1084 | 0 | crypto_int32_y -= ((crypto_int32_y >> 1) & C0); |
1085 | 0 | crypto_int32_y = (crypto_int32_y & C1) + ((crypto_int32_y >> 2) & C1); |
1086 | 0 | crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 4)) & C2; |
1087 | 0 | crypto_int32_y += crypto_int32_y >> 8; |
1088 | 0 | crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 16)) & 0xff; |
1089 | 0 | return crypto_int32_y; |
1090 | 0 | } |
1091 | | |
1092 | | __attribute__((unused)) |
1093 | | static inline |
1094 | 0 | int crypto_int32_bottomzeros_num(crypto_int32 crypto_int32_x) { |
1095 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1096 | 0 | crypto_int32 fallback = 32; |
1097 | 0 | __asm__ ("bsfl %0,%0\n cmovel %1,%0" : "+&r"(crypto_int32_x) : "r"(fallback) : "cc"); |
1098 | 0 | return crypto_int32_x; |
1099 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1100 | 0 | int64_t crypto_int32_y; |
1101 | 0 | __asm__ ("rbit %w0,%w1\n clz %w0,%w0" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); |
1102 | 0 | return crypto_int32_y; |
1103 | 0 | #else |
1104 | 0 | crypto_int32 crypto_int32_y = crypto_int32_x ^ (crypto_int32_x-1); |
1105 | 0 | crypto_int32_y = ((crypto_int32) crypto_int32_y) >> 1; |
1106 | 0 | crypto_int32_y &= ~(crypto_int32_x & (((crypto_int32) 1) << (32-1))); |
1107 | 0 | return crypto_int32_ones_num(crypto_int32_y); |
1108 | 0 | #endif |
1109 | 0 | } |
1110 | | |
1111 | | #endif |
1112 | | |
1113 | | /* from supercop-20241022/cryptoint/crypto_int64.h */ |
1114 | | /* auto-generated: cd cryptoint; ./autogen */ |
1115 | | /* cryptoint 20241003 */ |
1116 | | |
1117 | | #ifndef crypto_int64_h |
1118 | | #define crypto_int64_h |
1119 | | |
1120 | | #define crypto_int64 int64_t |
1121 | | #define crypto_int64_unsigned uint64_t |
1122 | | |
1123 | | |
1124 | | |
1125 | | __attribute__((unused)) |
1126 | | static inline |
1127 | 0 | crypto_int64 crypto_int64_load(const unsigned char *crypto_int64_s) { |
1128 | 0 | crypto_int64 crypto_int64_z = 0; |
1129 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 0; |
1130 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 8; |
1131 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 16; |
1132 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 24; |
1133 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 32; |
1134 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 40; |
1135 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 48; |
1136 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 56; |
1137 | 0 | return crypto_int64_z; |
1138 | 0 | } |
1139 | | |
1140 | | __attribute__((unused)) |
1141 | | static inline |
1142 | 0 | crypto_int64 crypto_int64_load_bigendian(const unsigned char *crypto_int64_s) { |
1143 | 0 | crypto_int64 crypto_int64_z = 0; |
1144 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 56; |
1145 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 48; |
1146 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 40; |
1147 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 32; |
1148 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 24; |
1149 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 16; |
1150 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 8; |
1151 | 0 | crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 0; |
1152 | 0 | return crypto_int64_z; |
1153 | 0 | } |
1154 | | |
1155 | | __attribute__((unused)) |
1156 | | static inline |
1157 | 0 | void crypto_int64_store(unsigned char *crypto_int64_s,crypto_int64 crypto_int64_x) { |
1158 | 0 | *crypto_int64_s++ = crypto_int64_x >> 0; |
1159 | 0 | *crypto_int64_s++ = crypto_int64_x >> 8; |
1160 | 0 | *crypto_int64_s++ = crypto_int64_x >> 16; |
1161 | 0 | *crypto_int64_s++ = crypto_int64_x >> 24; |
1162 | 0 | *crypto_int64_s++ = crypto_int64_x >> 32; |
1163 | 0 | *crypto_int64_s++ = crypto_int64_x >> 40; |
1164 | 0 | *crypto_int64_s++ = crypto_int64_x >> 48; |
1165 | 0 | *crypto_int64_s++ = crypto_int64_x >> 56; |
1166 | 0 | } |
1167 | | |
1168 | | __attribute__((unused)) |
1169 | | static inline |
1170 | 0 | void crypto_int64_store_bigendian(unsigned char *crypto_int64_s,crypto_int64 crypto_int64_x) { |
1171 | 0 | *crypto_int64_s++ = crypto_int64_x >> 56; |
1172 | 0 | *crypto_int64_s++ = crypto_int64_x >> 48; |
1173 | 0 | *crypto_int64_s++ = crypto_int64_x >> 40; |
1174 | 0 | *crypto_int64_s++ = crypto_int64_x >> 32; |
1175 | 0 | *crypto_int64_s++ = crypto_int64_x >> 24; |
1176 | 0 | *crypto_int64_s++ = crypto_int64_x >> 16; |
1177 | 0 | *crypto_int64_s++ = crypto_int64_x >> 8; |
1178 | 0 | *crypto_int64_s++ = crypto_int64_x >> 0; |
1179 | 0 | } |
1180 | | |
1181 | | __attribute__((unused)) |
1182 | | static inline |
1183 | 0 | crypto_int64 crypto_int64_negative_mask(crypto_int64 crypto_int64_x) { |
1184 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1185 | 0 | __asm__ ("sarq $63,%0" : "+r"(crypto_int64_x) : : "cc"); |
1186 | 0 | return crypto_int64_x; |
1187 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1188 | 0 | crypto_int64 crypto_int64_y; |
1189 | 0 | __asm__ ("asr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); |
1190 | 0 | return crypto_int64_y; |
1191 | 0 | #else |
1192 | 0 | crypto_int64_x >>= 64-6; |
1193 | 0 | crypto_int64_x += crypto_int64_optblocker; |
1194 | 0 | crypto_int64_x >>= 5; |
1195 | 0 | return crypto_int64_x; |
1196 | 0 | #endif |
1197 | 0 | } |
1198 | | |
1199 | | __attribute__((unused)) |
1200 | | static inline |
1201 | 0 | crypto_int64_unsigned crypto_int64_unsigned_topbit_01(crypto_int64_unsigned crypto_int64_x) { |
1202 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1203 | 0 | __asm__ ("shrq $63,%0" : "+r"(crypto_int64_x) : : "cc"); |
1204 | 0 | return crypto_int64_x; |
1205 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1206 | 0 | crypto_int64 crypto_int64_y; |
1207 | 0 | __asm__ ("lsr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); |
1208 | 0 | return crypto_int64_y; |
1209 | 0 | #else |
1210 | 0 | crypto_int64_x >>= 64-6; |
1211 | 0 | crypto_int64_x += crypto_int64_optblocker; |
1212 | 0 | crypto_int64_x >>= 5; |
1213 | 0 | return crypto_int64_x; |
1214 | 0 | #endif |
1215 | 0 | } |
1216 | | |
1217 | | __attribute__((unused)) |
1218 | | static inline |
1219 | 0 | crypto_int64 crypto_int64_negative_01(crypto_int64 crypto_int64_x) { |
1220 | 0 | return crypto_int64_unsigned_topbit_01(crypto_int64_x); |
1221 | 0 | } |
1222 | | |
1223 | | __attribute__((unused)) |
1224 | | static inline |
1225 | 0 | crypto_int64 crypto_int64_topbit_mask(crypto_int64 crypto_int64_x) { |
1226 | 0 | return crypto_int64_negative_mask(crypto_int64_x); |
1227 | 0 | } |
1228 | | |
1229 | | __attribute__((unused)) |
1230 | | static inline |
1231 | 0 | crypto_int64 crypto_int64_topbit_01(crypto_int64 crypto_int64_x) { |
1232 | 0 | return crypto_int64_unsigned_topbit_01(crypto_int64_x); |
1233 | 0 | } |
1234 | | |
1235 | | __attribute__((unused)) |
1236 | | static inline |
1237 | 0 | crypto_int64 crypto_int64_bottombit_mask(crypto_int64 crypto_int64_x) { |
1238 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1239 | 0 | __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); |
1240 | 0 | return -crypto_int64_x; |
1241 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1242 | 0 | crypto_int64 crypto_int64_y; |
1243 | 0 | __asm__ ("sbfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); |
1244 | 0 | return crypto_int64_y; |
1245 | 0 | #else |
1246 | 0 | crypto_int64_x &= 1 + crypto_int64_optblocker; |
1247 | 0 | return -crypto_int64_x; |
1248 | 0 | #endif |
1249 | 0 | } |
1250 | | |
1251 | | __attribute__((unused)) |
1252 | | static inline |
1253 | 30.4k | crypto_int64 crypto_int64_bottombit_01(crypto_int64 crypto_int64_x) { |
1254 | 30.4k | #if defined(__GNUC__) && defined(__x86_64__) |
1255 | 30.4k | __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); |
1256 | 30.4k | return crypto_int64_x; |
1257 | | #elif defined(__GNUC__) && defined(__aarch64__) |
1258 | | crypto_int64 crypto_int64_y; |
1259 | | __asm__ ("ubfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); |
1260 | | return crypto_int64_y; |
1261 | | #else |
1262 | | crypto_int64_x &= 1 + crypto_int64_optblocker; |
1263 | | return crypto_int64_x; |
1264 | | #endif |
1265 | 30.4k | } |
1266 | | |
1267 | | __attribute__((unused)) |
1268 | | static inline |
1269 | 0 | crypto_int64 crypto_int64_bitinrangepublicpos_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { |
1270 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1271 | 0 | __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); |
1272 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1273 | 0 | __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); |
1274 | 0 | #else |
1275 | 0 | crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; |
1276 | 0 | #endif |
1277 | 0 | return crypto_int64_bottombit_mask(crypto_int64_x); |
1278 | 0 | } |
1279 | | |
1280 | | __attribute__((unused)) |
1281 | | static inline |
1282 | 0 | crypto_int64 crypto_int64_bitinrangepublicpos_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { |
1283 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1284 | 0 | __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); |
1285 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1286 | 0 | __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); |
1287 | 0 | #else |
1288 | 0 | crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; |
1289 | 0 | #endif |
1290 | 0 | return crypto_int64_bottombit_01(crypto_int64_x); |
1291 | 0 | } |
1292 | | |
1293 | | __attribute__((unused)) |
1294 | | static inline |
1295 | 0 | crypto_int64 crypto_int64_shlmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { |
1296 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1297 | 0 | __asm__ ("shlq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); |
1298 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1299 | 0 | __asm__ ("lsl %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); |
1300 | 0 | #else |
1301 | 0 | int crypto_int64_k, crypto_int64_l; |
1302 | 0 | for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) |
1303 | 0 | crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x << crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); |
1304 | 0 | #endif |
1305 | 0 | return crypto_int64_x; |
1306 | 0 | } |
1307 | | |
1308 | | __attribute__((unused)) |
1309 | | static inline |
1310 | 40 | crypto_int64 crypto_int64_shrmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { |
1311 | 40 | #if defined(__GNUC__) && defined(__x86_64__) |
1312 | 40 | __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); |
1313 | | #elif defined(__GNUC__) && defined(__aarch64__) |
1314 | | __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); |
1315 | | #else |
1316 | | int crypto_int64_k, crypto_int64_l; |
1317 | | for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) |
1318 | | crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x >> crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); |
1319 | | #endif |
1320 | 40 | return crypto_int64_x; |
1321 | 40 | } |
1322 | | |
1323 | | __attribute__((unused)) |
1324 | | static inline |
1325 | 0 | crypto_int64 crypto_int64_bitmod_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { |
1326 | 0 | crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); |
1327 | 0 | return crypto_int64_bottombit_mask(crypto_int64_x); |
1328 | 0 | } |
1329 | | |
1330 | | __attribute__((unused)) |
1331 | | static inline |
1332 | 40 | crypto_int64 crypto_int64_bitmod_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { |
1333 | 40 | crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); |
1334 | 40 | return crypto_int64_bottombit_01(crypto_int64_x); |
1335 | 40 | } |
1336 | | |
1337 | | __attribute__((unused)) |
1338 | | static inline |
1339 | 0 | crypto_int64 crypto_int64_nonzero_mask(crypto_int64 crypto_int64_x) { |
1340 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1341 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1342 | 0 | __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); |
1343 | 0 | return crypto_int64_z; |
1344 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1345 | 0 | crypto_int64 crypto_int64_z; |
1346 | 0 | __asm__ ("cmp %1,0\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); |
1347 | 0 | return crypto_int64_z; |
1348 | 0 | #else |
1349 | 0 | crypto_int64_x |= -crypto_int64_x; |
1350 | 0 | return crypto_int64_negative_mask(crypto_int64_x); |
1351 | 0 | #endif |
1352 | 0 | } |
1353 | | |
1354 | | __attribute__((unused)) |
1355 | | static inline |
1356 | 0 | crypto_int64 crypto_int64_nonzero_01(crypto_int64 crypto_int64_x) { |
1357 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1358 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1359 | 0 | __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); |
1360 | 0 | return crypto_int64_z; |
1361 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1362 | 0 | crypto_int64 crypto_int64_z; |
1363 | 0 | __asm__ ("cmp %1,0\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); |
1364 | 0 | return crypto_int64_z; |
1365 | 0 | #else |
1366 | 0 | crypto_int64_x |= -crypto_int64_x; |
1367 | 0 | return crypto_int64_unsigned_topbit_01(crypto_int64_x); |
1368 | 0 | #endif |
1369 | 0 | } |
1370 | | |
1371 | | __attribute__((unused)) |
1372 | | static inline |
1373 | 0 | crypto_int64 crypto_int64_positive_mask(crypto_int64 crypto_int64_x) { |
1374 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1375 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1376 | 0 | __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); |
1377 | 0 | return crypto_int64_z; |
1378 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1379 | 0 | crypto_int64 crypto_int64_z; |
1380 | 0 | __asm__ ("cmp %1,0\n csetm %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); |
1381 | 0 | return crypto_int64_z; |
1382 | 0 | #else |
1383 | 0 | crypto_int64 crypto_int64_z = -crypto_int64_x; |
1384 | 0 | crypto_int64_z ^= crypto_int64_x & crypto_int64_z; |
1385 | 0 | return crypto_int64_negative_mask(crypto_int64_z); |
1386 | 0 | #endif |
1387 | 0 | } |
1388 | | |
1389 | | __attribute__((unused)) |
1390 | | static inline |
1391 | 0 | crypto_int64 crypto_int64_positive_01(crypto_int64 crypto_int64_x) { |
1392 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1393 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1394 | 0 | __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); |
1395 | 0 | return crypto_int64_z; |
1396 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1397 | 0 | crypto_int64 crypto_int64_z; |
1398 | 0 | __asm__ ("cmp %1,0\n cset %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); |
1399 | 0 | return crypto_int64_z; |
1400 | 0 | #else |
1401 | 0 | crypto_int64 crypto_int64_z = -crypto_int64_x; |
1402 | 0 | crypto_int64_z ^= crypto_int64_x & crypto_int64_z; |
1403 | 0 | return crypto_int64_unsigned_topbit_01(crypto_int64_z); |
1404 | 0 | #endif |
1405 | 0 | } |
1406 | | |
1407 | | __attribute__((unused)) |
1408 | | static inline |
1409 | 0 | crypto_int64 crypto_int64_zero_mask(crypto_int64 crypto_int64_x) { |
1410 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1411 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1412 | 0 | __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); |
1413 | 0 | return crypto_int64_z; |
1414 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1415 | 0 | crypto_int64 crypto_int64_z; |
1416 | 0 | __asm__ ("cmp %1,0\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); |
1417 | 0 | return crypto_int64_z; |
1418 | 0 | #else |
1419 | 0 | return ~crypto_int64_nonzero_mask(crypto_int64_x); |
1420 | 0 | #endif |
1421 | 0 | } |
1422 | | |
1423 | | __attribute__((unused)) |
1424 | | static inline |
1425 | 0 | crypto_int64 crypto_int64_zero_01(crypto_int64 crypto_int64_x) { |
1426 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1427 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1428 | 0 | __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); |
1429 | 0 | return crypto_int64_z; |
1430 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1431 | 0 | crypto_int64 crypto_int64_z; |
1432 | 0 | __asm__ ("cmp %1,0\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); |
1433 | 0 | return crypto_int64_z; |
1434 | 0 | #else |
1435 | 0 | return 1-crypto_int64_nonzero_01(crypto_int64_x); |
1436 | 0 | #endif |
1437 | 0 | } |
1438 | | |
1439 | | __attribute__((unused)) |
1440 | | static inline |
1441 | 0 | crypto_int64 crypto_int64_unequal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1442 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1443 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1444 | 0 | __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1445 | 0 | return crypto_int64_z; |
1446 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1447 | 0 | crypto_int64 crypto_int64_z; |
1448 | 0 | __asm__ ("cmp %1,%2\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1449 | 0 | return crypto_int64_z; |
1450 | 0 | #else |
1451 | 0 | return crypto_int64_nonzero_mask(crypto_int64_x ^ crypto_int64_y); |
1452 | 0 | #endif |
1453 | 0 | } |
1454 | | |
1455 | | __attribute__((unused)) |
1456 | | static inline |
1457 | 0 | crypto_int64 crypto_int64_unequal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1458 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1459 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1460 | 0 | __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1461 | 0 | return crypto_int64_z; |
1462 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1463 | 0 | crypto_int64 crypto_int64_z; |
1464 | 0 | __asm__ ("cmp %1,%2\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1465 | 0 | return crypto_int64_z; |
1466 | 0 | #else |
1467 | 0 | return crypto_int64_nonzero_01(crypto_int64_x ^ crypto_int64_y); |
1468 | 0 | #endif |
1469 | 0 | } |
1470 | | |
1471 | | __attribute__((unused)) |
1472 | | static inline |
1473 | 0 | crypto_int64 crypto_int64_equal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1474 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1475 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1476 | 0 | __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1477 | 0 | return crypto_int64_z; |
1478 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1479 | 0 | crypto_int64 crypto_int64_z; |
1480 | 0 | __asm__ ("cmp %1,%2\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1481 | 0 | return crypto_int64_z; |
1482 | 0 | #else |
1483 | 0 | return ~crypto_int64_unequal_mask(crypto_int64_x,crypto_int64_y); |
1484 | 0 | #endif |
1485 | 0 | } |
1486 | | |
1487 | | __attribute__((unused)) |
1488 | | static inline |
1489 | 0 | crypto_int64 crypto_int64_equal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1490 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1491 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1492 | 0 | __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1493 | 0 | return crypto_int64_z; |
1494 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1495 | 0 | crypto_int64 crypto_int64_z; |
1496 | 0 | __asm__ ("cmp %1,%2\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1497 | 0 | return crypto_int64_z; |
1498 | 0 | #else |
1499 | 0 | return 1-crypto_int64_unequal_01(crypto_int64_x,crypto_int64_y); |
1500 | 0 | #endif |
1501 | 0 | } |
1502 | | |
1503 | | __attribute__((unused)) |
1504 | | static inline |
1505 | 0 | crypto_int64 crypto_int64_min(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1506 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1507 | 0 | __asm__ ("cmpq %1,%0\n cmovgq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); |
1508 | 0 | return crypto_int64_x; |
1509 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1510 | 0 | __asm__ ("cmp %0,%1\n csel %0,%0,%1,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); |
1511 | 0 | return crypto_int64_x; |
1512 | 0 | #else |
1513 | 0 | crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; |
1514 | 0 | crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; |
1515 | 0 | crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); |
1516 | 0 | crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); |
1517 | 0 | crypto_int64_z &= crypto_int64_r; |
1518 | 0 | return crypto_int64_x ^ crypto_int64_z; |
1519 | 0 | #endif |
1520 | 0 | } |
1521 | | |
1522 | | __attribute__((unused)) |
1523 | | static inline |
1524 | 0 | crypto_int64 crypto_int64_max(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1525 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1526 | 0 | __asm__ ("cmpq %1,%0\n cmovlq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); |
1527 | 0 | return crypto_int64_x; |
1528 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1529 | 0 | __asm__ ("cmp %0,%1\n csel %0,%1,%0,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); |
1530 | 0 | return crypto_int64_x; |
1531 | 0 | #else |
1532 | 0 | crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; |
1533 | 0 | crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; |
1534 | 0 | crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); |
1535 | 0 | crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); |
1536 | 0 | crypto_int64_z &= crypto_int64_r; |
1537 | 0 | return crypto_int64_y ^ crypto_int64_z; |
1538 | 0 | #endif |
1539 | 0 | } |
1540 | | |
1541 | | __attribute__((unused)) |
1542 | | static inline |
1543 | 0 | void crypto_int64_minmax(crypto_int64 *crypto_int64_p,crypto_int64 *crypto_int64_q) { |
1544 | 0 | crypto_int64 crypto_int64_x = *crypto_int64_p; |
1545 | 0 | crypto_int64 crypto_int64_y = *crypto_int64_q; |
1546 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1547 | 0 | crypto_int64 crypto_int64_z; |
1548 | 0 | __asm__ ("cmpq %2,%1\n movq %1,%0\n cmovgq %2,%1\n cmovgq %0,%2" : "=&r"(crypto_int64_z), "+&r"(crypto_int64_x), "+r"(crypto_int64_y) : : "cc"); |
1549 | 0 | *crypto_int64_p = crypto_int64_x; |
1550 | 0 | *crypto_int64_q = crypto_int64_y; |
1551 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1552 | 0 | crypto_int64 crypto_int64_r, crypto_int64_s; |
1553 | 0 | __asm__ ("cmp %2,%3\n csel %0,%2,%3,lt\n csel %1,%3,%2,lt" : "=&r"(crypto_int64_r), "=r"(crypto_int64_s) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1554 | 0 | *crypto_int64_p = crypto_int64_r; |
1555 | 0 | *crypto_int64_q = crypto_int64_s; |
1556 | 0 | #else |
1557 | 0 | crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; |
1558 | 0 | crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; |
1559 | 0 | crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); |
1560 | 0 | crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); |
1561 | 0 | crypto_int64_z &= crypto_int64_r; |
1562 | 0 | crypto_int64_x ^= crypto_int64_z; |
1563 | 0 | crypto_int64_y ^= crypto_int64_z; |
1564 | 0 | *crypto_int64_p = crypto_int64_x; |
1565 | 0 | *crypto_int64_q = crypto_int64_y; |
1566 | 0 | #endif |
1567 | 0 | } |
1568 | | |
1569 | | __attribute__((unused)) |
1570 | | static inline |
1571 | 0 | crypto_int64 crypto_int64_smaller_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1572 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1573 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1574 | 0 | __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1575 | 0 | return crypto_int64_z; |
1576 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1577 | 0 | crypto_int64 crypto_int64_z; |
1578 | 0 | __asm__ ("cmp %1,%2\n csetm %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1579 | 0 | return crypto_int64_z; |
1580 | 0 | #else |
1581 | 0 | crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; |
1582 | 0 | crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; |
1583 | 0 | crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); |
1584 | 0 | return crypto_int64_negative_mask(crypto_int64_z); |
1585 | 0 | #endif |
1586 | 0 | } |
1587 | | |
1588 | | __attribute__((unused)) |
1589 | | static inline |
1590 | 0 | crypto_int64 crypto_int64_smaller_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1591 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1592 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1593 | 0 | __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1594 | 0 | return crypto_int64_z; |
1595 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1596 | 0 | crypto_int64 crypto_int64_z; |
1597 | 0 | __asm__ ("cmp %1,%2\n cset %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1598 | 0 | return crypto_int64_z; |
1599 | 0 | #else |
1600 | 0 | crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; |
1601 | 0 | crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; |
1602 | 0 | crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); |
1603 | 0 | return crypto_int64_unsigned_topbit_01(crypto_int64_z); |
1604 | 0 | #endif |
1605 | 0 | } |
1606 | | |
1607 | | __attribute__((unused)) |
1608 | | static inline |
1609 | 0 | crypto_int64 crypto_int64_leq_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1610 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1611 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1612 | 0 | __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1613 | 0 | return crypto_int64_z; |
1614 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1615 | 0 | crypto_int64 crypto_int64_z; |
1616 | 0 | __asm__ ("cmp %1,%2\n csetm %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1617 | 0 | return crypto_int64_z; |
1618 | 0 | #else |
1619 | 0 | return ~crypto_int64_smaller_mask(crypto_int64_y,crypto_int64_x); |
1620 | 0 | #endif |
1621 | 0 | } |
1622 | | |
1623 | | __attribute__((unused)) |
1624 | | static inline |
1625 | 0 | crypto_int64 crypto_int64_leq_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { |
1626 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1627 | 0 | crypto_int64 crypto_int64_q,crypto_int64_z; |
1628 | 0 | __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1629 | 0 | return crypto_int64_z; |
1630 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1631 | 0 | crypto_int64 crypto_int64_z; |
1632 | 0 | __asm__ ("cmp %1,%2\n cset %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); |
1633 | 0 | return crypto_int64_z; |
1634 | 0 | #else |
1635 | 0 | return 1-crypto_int64_smaller_01(crypto_int64_y,crypto_int64_x); |
1636 | 0 | #endif |
1637 | 0 | } |
1638 | | |
1639 | | __attribute__((unused)) |
1640 | | static inline |
1641 | 0 | int crypto_int64_ones_num(crypto_int64 crypto_int64_x) { |
1642 | 0 | crypto_int64_unsigned crypto_int64_y = crypto_int64_x; |
1643 | 0 | const crypto_int64 C0 = 0x5555555555555555; |
1644 | 0 | const crypto_int64 C1 = 0x3333333333333333; |
1645 | 0 | const crypto_int64 C2 = 0x0f0f0f0f0f0f0f0f; |
1646 | 0 | crypto_int64_y -= ((crypto_int64_y >> 1) & C0); |
1647 | 0 | crypto_int64_y = (crypto_int64_y & C1) + ((crypto_int64_y >> 2) & C1); |
1648 | 0 | crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 4)) & C2; |
1649 | 0 | crypto_int64_y += crypto_int64_y >> 8; |
1650 | 0 | crypto_int64_y += crypto_int64_y >> 16; |
1651 | 0 | crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 32)) & 0xff; |
1652 | 0 | return crypto_int64_y; |
1653 | 0 | } |
1654 | | |
1655 | | __attribute__((unused)) |
1656 | | static inline |
1657 | 0 | int crypto_int64_bottomzeros_num(crypto_int64 crypto_int64_x) { |
1658 | 0 | #if defined(__GNUC__) && defined(__x86_64__) |
1659 | 0 | crypto_int64 fallback = 64; |
1660 | 0 | __asm__ ("bsfq %0,%0\n cmoveq %1,%0" : "+&r"(crypto_int64_x) : "r"(fallback) : "cc"); |
1661 | 0 | return crypto_int64_x; |
1662 | 0 | #elif defined(__GNUC__) && defined(__aarch64__) |
1663 | 0 | int64_t crypto_int64_y; |
1664 | 0 | __asm__ ("rbit %0,%1\n clz %0,%0" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); |
1665 | 0 | return crypto_int64_y; |
1666 | 0 | #else |
1667 | 0 | crypto_int64 crypto_int64_y = crypto_int64_x ^ (crypto_int64_x-1); |
1668 | 0 | crypto_int64_y = ((crypto_int64) crypto_int64_y) >> 1; |
1669 | 0 | crypto_int64_y &= ~(crypto_int64_x & (((crypto_int64) 1) << (64-1))); |
1670 | 0 | return crypto_int64_ones_num(crypto_int64_y); |
1671 | 0 | #endif |
1672 | 0 | } |
1673 | | |
1674 | | #endif |
1675 | | |
1676 | | /* from supercop-20241022/crypto_sort/int32/portable4/sort.c */ |
1677 | 58.5M | #define int32_MINMAX(a,b) crypto_int32_minmax(&a,&b) |
1678 | | |
1679 | | static void crypto_sort_int32(void *array,long long n) |
1680 | 3.48k | { |
1681 | 3.48k | long long top,p,q,r,i,j; |
1682 | 3.48k | int32 *x = array; |
1683 | | |
1684 | 3.48k | if (n < 2) return; |
1685 | 3.48k | top = 1; |
1686 | 34.8k | while (top < n - top) top += top; |
1687 | | |
1688 | 38.3k | for (p = top;p >= 1;p >>= 1) { |
1689 | 34.8k | i = 0; |
1690 | 2.66M | while (i + 2 * p <= n) { |
1691 | 13.3M | for (j = i;j < i + p;++j) |
1692 | 10.6M | int32_MINMAX(x[j],x[j+p]); |
1693 | 2.62M | i += 2 * p; |
1694 | 2.62M | } |
1695 | 1.64M | for (j = i;j < n - p;++j) |
1696 | 1.61M | int32_MINMAX(x[j],x[j+p]); |
1697 | | |
1698 | 34.8k | i = 0; |
1699 | 34.8k | j = 0; |
1700 | 191k | for (q = top;q > p;q >>= 1) { |
1701 | 156k | if (j != i) for (;;) { |
1702 | 87.1k | if (j == n - q) goto done; |
1703 | 87.1k | int32 a = x[j + p]; |
1704 | 404k | for (r = q;r > p;r >>= 1) |
1705 | 317k | int32_MINMAX(a,x[j + r]); |
1706 | 87.1k | x[j + p] = a; |
1707 | 87.1k | ++j; |
1708 | 87.1k | if (j == i + p) { |
1709 | 45.3k | i += 2 * p; |
1710 | 45.3k | break; |
1711 | 45.3k | } |
1712 | 87.1k | } |
1713 | 2.73M | while (i + p <= n - q) { |
1714 | 12.2M | for (j = i;j < i + p;++j) { |
1715 | 9.65M | int32 a = x[j + p]; |
1716 | 54.3M | for (r = q;r > p;r >>= 1) |
1717 | 44.7M | int32_MINMAX(a,x[j+r]); |
1718 | 9.65M | x[j + p] = a; |
1719 | 9.65M | } |
1720 | 2.57M | i += 2 * p; |
1721 | 2.57M | } |
1722 | | /* now i + p > n - q */ |
1723 | 156k | j = i; |
1724 | 1.07M | while (j < n - q) { |
1725 | 920k | int32 a = x[j + p]; |
1726 | 2.01M | for (r = q;r > p;r >>= 1) |
1727 | 1.09M | int32_MINMAX(a,x[j+r]); |
1728 | 920k | x[j + p] = a; |
1729 | 920k | ++j; |
1730 | 920k | } |
1731 | | |
1732 | 156k | done: ; |
1733 | 156k | } |
1734 | 34.8k | } |
1735 | 3.48k | } |
1736 | | |
1737 | | /* from supercop-20241022/crypto_sort/uint32/useint32/sort.c */ |
1738 | | |
1739 | | /* can save time by vectorizing xor loops */ |
1740 | | /* can save time by integrating xor loops with int32_sort */ |
1741 | | |
1742 | | static void crypto_sort_uint32(void *array,long long n) |
1743 | 3.48k | { |
1744 | 3.48k | crypto_uint32 *x = array; |
1745 | 3.48k | long long j; |
1746 | 2.65M | for (j = 0;j < n;++j) x[j] ^= 0x80000000; |
1747 | 3.48k | crypto_sort_int32(array,n); |
1748 | 2.65M | for (j = 0;j < n;++j) x[j] ^= 0x80000000; |
1749 | 3.48k | } |
1750 | | |
1751 | | /* from supercop-20241022/crypto_kem/sntrup761/compact/kem.c */ |
1752 | | // 20240806 djb: some automated conversion to cryptoint |
1753 | | |
1754 | 34.5G | #define p 761 |
1755 | 73.1G | #define q 4591 |
1756 | 1.01M | #define w 286 |
1757 | 2.74M | #define q12 ((q - 1) / 2) |
1758 | | typedef int8_t small; |
1759 | | typedef int16_t Fq; |
1760 | 44.1k | #define Hash_bytes 32 |
1761 | 21.8k | #define Small_bytes ((p + 3) / 4) |
1762 | | typedef small Inputs[p]; |
1763 | 3.52k | #define SecretKeys_bytes (2 * Small_bytes) |
1764 | 40 | #define Confirm_bytes 32 |
1765 | | |
1766 | 8.08G | static small F3_freeze(int16_t x) { return x - 3 * ((10923 * x + 16384) >> 15); } |
1767 | | |
1768 | 8.12G | static Fq Fq_freeze(int32_t x) { |
1769 | 8.12G | const int32_t q16 = (0x10000 + q / 2) / q; |
1770 | 8.12G | const int32_t q20 = (0x100000 + q / 2) / q; |
1771 | 8.12G | const int32_t q28 = (0x10000000 + q / 2) / q; |
1772 | 8.12G | x -= q * ((q16 * x) >> 16); |
1773 | 8.12G | x -= q * ((q20 * x) >> 20); |
1774 | 8.12G | return x - q * ((q28 * x + 0x8000000) >> 28); |
1775 | 8.12G | } |
1776 | | |
1777 | 40 | static int Weightw_mask(small *r) { |
1778 | 40 | int i, weight = 0; |
1779 | 30.4k | for (i = 0; i < p; ++i) weight += crypto_int64_bottombit_01(r[i]); |
1780 | 40 | return crypto_int16_nonzero_mask(weight - w); |
1781 | 40 | } |
1782 | | |
1783 | 121k | static void uint32_divmod_uint14(uint32_t *Q, uint16_t *r, uint32_t x, uint16_t m) { |
1784 | 121k | uint32_t qpart, mask, v = 0x80000000 / m; |
1785 | 121k | qpart = (x * (uint64_t)v) >> 31; |
1786 | 121k | x -= qpart * m; |
1787 | 121k | *Q = qpart; |
1788 | 121k | qpart = (x * (uint64_t)v) >> 31; |
1789 | 121k | x -= qpart * m; |
1790 | 121k | *Q += qpart; |
1791 | 121k | x -= m; |
1792 | 121k | *Q += 1; |
1793 | 121k | mask = crypto_int32_negative_mask(x); |
1794 | 121k | x += mask & (uint32_t)m; |
1795 | 121k | *Q += mask; |
1796 | 121k | *r = x; |
1797 | 121k | } |
1798 | | |
1799 | 60.8k | static uint16_t uint32_mod_uint14(uint32_t x, uint16_t m) { |
1800 | 60.8k | uint32_t Q; |
1801 | 60.8k | uint16_t r; |
1802 | 60.8k | uint32_divmod_uint14(&Q, &r, x, m); |
1803 | 60.8k | return r; |
1804 | 60.8k | } |
1805 | | |
1806 | 38.7k | static void Encode(unsigned char *out, const uint16_t *R, const uint16_t *M, long long len) { |
1807 | 38.7k | if (len == 1) { |
1808 | 3.52k | uint16_t r = R[0], m = M[0]; |
1809 | 10.5k | while (m > 1) { |
1810 | 7.05k | *out++ = r; |
1811 | 7.05k | r >>= 8; |
1812 | 7.05k | m = (m + 255) >> 8; |
1813 | 7.05k | } |
1814 | 3.52k | } |
1815 | 38.7k | if (len > 1) { |
1816 | 35.2k | uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2]; |
1817 | 35.2k | long long i; |
1818 | 2.71M | for (i = 0; i < len - 1; i += 2) { |
1819 | 2.68M | uint32_t m0 = M[i]; |
1820 | 2.68M | uint32_t r = R[i] + R[i + 1] * m0; |
1821 | 2.68M | uint32_t m = M[i + 1] * m0; |
1822 | 6.75M | while (m >= 16384) { |
1823 | 4.07M | *out++ = r; |
1824 | 4.07M | r >>= 8; |
1825 | 4.07M | m = (m + 255) >> 8; |
1826 | 4.07M | } |
1827 | 2.68M | R2[i / 2] = r; |
1828 | 2.68M | M2[i / 2] = m; |
1829 | 2.68M | } |
1830 | 35.2k | if (i < len) { |
1831 | 14.1k | R2[i / 2] = R[i]; |
1832 | 14.1k | M2[i / 2] = M[i]; |
1833 | 14.1k | } |
1834 | 35.2k | Encode(out, R2, M2, (len + 1) / 2); |
1835 | 35.2k | } |
1836 | 38.7k | } |
1837 | | |
1838 | 880 | static void Decode(uint16_t *out, const unsigned char *S, const uint16_t *M, long long len) { |
1839 | 880 | if (len == 1) { |
1840 | 80 | if (M[0] == 1) |
1841 | 0 | *out = 0; |
1842 | 80 | else if (M[0] <= 256) |
1843 | 0 | *out = uint32_mod_uint14(S[0], M[0]); |
1844 | 80 | else |
1845 | 80 | *out = uint32_mod_uint14(S[0] + (((uint16_t)S[1]) << 8), M[0]); |
1846 | 80 | } |
1847 | 880 | if (len > 1) { |
1848 | 800 | uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2], bottomr[len / 2]; |
1849 | 800 | uint32_t bottomt[len / 2]; |
1850 | 800 | long long i; |
1851 | 61.6k | for (i = 0; i < len - 1; i += 2) { |
1852 | 60.8k | uint32_t m = M[i] * (uint32_t)M[i + 1]; |
1853 | 60.8k | if (m > 256 * 16383) { |
1854 | 25.6k | bottomt[i / 2] = 256 * 256; |
1855 | 25.6k | bottomr[i / 2] = S[0] + 256 * S[1]; |
1856 | 25.6k | S += 2; |
1857 | 25.6k | M2[i / 2] = (((m + 255) >> 8) + 255) >> 8; |
1858 | 35.1k | } else if (m >= 16384) { |
1859 | 35.1k | bottomt[i / 2] = 256; |
1860 | 35.1k | bottomr[i / 2] = S[0]; |
1861 | 35.1k | S += 1; |
1862 | 35.1k | M2[i / 2] = (m + 255) >> 8; |
1863 | 35.1k | } else { |
1864 | 0 | bottomt[i / 2] = 1; |
1865 | 0 | bottomr[i / 2] = 0; |
1866 | 0 | M2[i / 2] = m; |
1867 | 0 | } |
1868 | 60.8k | } |
1869 | 800 | if (i < len) M2[i / 2] = M[i]; |
1870 | 800 | Decode(R2, S, M2, (len + 1) / 2); |
1871 | 61.6k | for (i = 0; i < len - 1; i += 2) { |
1872 | 60.8k | uint32_t r1, r = bottomr[i / 2]; |
1873 | 60.8k | uint16_t r0; |
1874 | 60.8k | r += bottomt[i / 2] * R2[i / 2]; |
1875 | 60.8k | uint32_divmod_uint14(&r1, &r0, r, M[i]); |
1876 | 60.8k | r1 = uint32_mod_uint14(r1, M[i + 1]); |
1877 | 60.8k | *out++ = r0; |
1878 | 60.8k | *out++ = r1; |
1879 | 60.8k | } |
1880 | 800 | if (i < len) *out++ = R2[i / 2]; |
1881 | 800 | } |
1882 | 880 | } |
1883 | | |
1884 | 40 | static void R3_fromRq(small *out, const Fq *r) { |
1885 | 40 | int i; |
1886 | 30.4k | for (i = 0; i < p; ++i) out[i] = F3_freeze(r[i]); |
1887 | 40 | } |
1888 | | |
1889 | 40 | static void R3_mult(small *h, const small *f, const small *g) { |
1890 | 40 | int16_t fg[p + p - 1]; |
1891 | 40 | int i, j; |
1892 | 60.8k | for (i = 0; i < p + p - 1; ++i) fg[i] = 0; |
1893 | 30.4k | for (i = 0; i < p; ++i) |
1894 | 23.1M | for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int16_t)g[j]; |
1895 | 30.4k | for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; |
1896 | 30.4k | for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; |
1897 | 30.4k | for (i = 0; i < p; ++i) h[i] = F3_freeze(fg[i]); |
1898 | 40 | } |
1899 | | |
1900 | 3.48k | static int R3_recip(small *out, const small *in) { |
1901 | 3.48k | small f[p + 1], g[p + 1], v[p + 1], r[p + 1]; |
1902 | 3.48k | int sign, swap, t, i, loop, delta = 1; |
1903 | 2.66M | for (i = 0; i < p + 1; ++i) v[i] = 0; |
1904 | 2.66M | for (i = 0; i < p + 1; ++i) r[i] = 0; |
1905 | 3.48k | r[0] = 1; |
1906 | 2.65M | for (i = 0; i < p; ++i) f[i] = 0; |
1907 | 3.48k | f[0] = 1; |
1908 | 3.48k | f[p - 1] = f[p] = -1; |
1909 | 2.65M | for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; |
1910 | 3.48k | g[p] = 0; |
1911 | 5.30M | for (loop = 0; loop < 2 * p - 1; ++loop) { |
1912 | 4.04G | for (i = p; i > 0; --i) v[i] = v[i - 1]; |
1913 | 5.30M | v[0] = 0; |
1914 | 5.30M | sign = -g[0] * f[0]; |
1915 | 5.30M | swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); |
1916 | 5.30M | delta ^= swap & (delta ^ -delta); |
1917 | 5.30M | delta += 1; |
1918 | 4.04G | for (i = 0; i < p + 1; ++i) { |
1919 | 4.04G | t = swap & (f[i] ^ g[i]); |
1920 | 4.04G | f[i] ^= t; |
1921 | 4.04G | g[i] ^= t; |
1922 | 4.04G | t = swap & (v[i] ^ r[i]); |
1923 | 4.04G | v[i] ^= t; |
1924 | 4.04G | r[i] ^= t; |
1925 | 4.04G | } |
1926 | 4.04G | for (i = 0; i < p + 1; ++i) g[i] = F3_freeze(g[i] + sign * f[i]); |
1927 | 4.04G | for (i = 0; i < p + 1; ++i) r[i] = F3_freeze(r[i] + sign * v[i]); |
1928 | 4.04G | for (i = 0; i < p; ++i) g[i] = g[i + 1]; |
1929 | 5.30M | g[p] = 0; |
1930 | 5.30M | } |
1931 | 3.48k | sign = f[0]; |
1932 | 2.65M | for (i = 0; i < p; ++i) out[i] = sign * v[p - 1 - i]; |
1933 | 3.48k | return crypto_int16_nonzero_mask(delta); |
1934 | 3.48k | } |
1935 | | |
1936 | 3.56k | static void Rq_mult_small(Fq *h, const Fq *f, const small *g) { |
1937 | 3.56k | int32_t fg[p + p - 1]; |
1938 | 3.56k | int i, j; |
1939 | 5.42M | for (i = 0; i < p + p - 1; ++i) fg[i] = 0; |
1940 | 2.71M | for (i = 0; i < p; ++i) |
1941 | 2.06G | for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int32_t)g[j]; |
1942 | 2.71M | for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; |
1943 | 2.71M | for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; |
1944 | 2.71M | for (i = 0; i < p; ++i) h[i] = Fq_freeze(fg[i]); |
1945 | 3.56k | } |
1946 | | |
1947 | 40 | static void Rq_mult3(Fq *h, const Fq *f) { |
1948 | 40 | int i; |
1949 | 30.4k | for (i = 0; i < p; ++i) h[i] = Fq_freeze(3 * f[i]); |
1950 | 40 | } |
1951 | | |
1952 | 6.97k | static Fq Fq_recip(Fq a1) { |
1953 | 6.97k | int i = 1; |
1954 | 6.97k | Fq ai = a1; |
1955 | 32.0M | while (i < q - 2) { |
1956 | 31.9M | ai = Fq_freeze(a1 * (int32_t)ai); |
1957 | 31.9M | i += 1; |
1958 | 31.9M | } |
1959 | 6.97k | return ai; |
1960 | 6.97k | } |
1961 | | |
1962 | 3.48k | static int Rq_recip3(Fq *out, const small *in) { |
1963 | 3.48k | Fq f[p + 1], g[p + 1], v[p + 1], r[p + 1], scale; |
1964 | 3.48k | int swap, t, i, loop, delta = 1; |
1965 | 3.48k | int32_t f0, g0; |
1966 | 2.66M | for (i = 0; i < p + 1; ++i) v[i] = 0; |
1967 | 2.66M | for (i = 0; i < p + 1; ++i) r[i] = 0; |
1968 | 3.48k | r[0] = Fq_recip(3); |
1969 | 2.65M | for (i = 0; i < p; ++i) f[i] = 0; |
1970 | 3.48k | f[0] = 1; |
1971 | 3.48k | f[p - 1] = f[p] = -1; |
1972 | 2.65M | for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; |
1973 | 3.48k | g[p] = 0; |
1974 | 5.30M | for (loop = 0; loop < 2 * p - 1; ++loop) { |
1975 | 4.04G | for (i = p; i > 0; --i) v[i] = v[i - 1]; |
1976 | 5.30M | v[0] = 0; |
1977 | 5.30M | swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); |
1978 | 5.30M | delta ^= swap & (delta ^ -delta); |
1979 | 5.30M | delta += 1; |
1980 | 4.04G | for (i = 0; i < p + 1; ++i) { |
1981 | 4.04G | t = swap & (f[i] ^ g[i]); |
1982 | 4.04G | f[i] ^= t; |
1983 | 4.04G | g[i] ^= t; |
1984 | 4.04G | t = swap & (v[i] ^ r[i]); |
1985 | 4.04G | v[i] ^= t; |
1986 | 4.04G | r[i] ^= t; |
1987 | 4.04G | } |
1988 | 5.30M | f0 = f[0]; |
1989 | 5.30M | g0 = g[0]; |
1990 | 4.04G | for (i = 0; i < p + 1; ++i) g[i] = Fq_freeze(f0 * g[i] - g0 * f[i]); |
1991 | 4.04G | for (i = 0; i < p + 1; ++i) r[i] = Fq_freeze(f0 * r[i] - g0 * v[i]); |
1992 | 4.04G | for (i = 0; i < p; ++i) g[i] = g[i + 1]; |
1993 | 5.30M | g[p] = 0; |
1994 | 5.30M | } |
1995 | 3.48k | scale = Fq_recip(f[0]); |
1996 | 2.65M | for (i = 0; i < p; ++i) out[i] = Fq_freeze(scale * (int32_t)v[p - 1 - i]); |
1997 | 3.48k | return crypto_int16_nonzero_mask(delta); |
1998 | 3.48k | } |
1999 | | |
2000 | 40 | static void Round(Fq *out, const Fq *a) { |
2001 | 40 | int i; |
2002 | 30.4k | for (i = 0; i < p; ++i) out[i] = a[i] - F3_freeze(a[i]); |
2003 | 40 | } |
2004 | | |
2005 | 3.48k | static void Short_fromlist(small *out, const uint32_t *in) { |
2006 | 3.48k | uint32_t L[p]; |
2007 | 3.48k | int i; |
2008 | 1.00M | for (i = 0; i < w; ++i) L[i] = in[i] & (uint32_t)-2; |
2009 | 1.65M | for (i = w; i < p; ++i) L[i] = (in[i] & (uint32_t)-3) | 1; |
2010 | 3.48k | crypto_sort_uint32(L, p); |
2011 | 2.65M | for (i = 0; i < p; ++i) out[i] = (L[i] & 3) - 1; |
2012 | 3.48k | } |
2013 | | |
2014 | 3.64k | static void Hash_prefix(unsigned char *out, int b, const unsigned char *in, int inlen) { |
2015 | 3.64k | unsigned char x[inlen + 1], h[64]; |
2016 | 3.64k | int i; |
2017 | 3.64k | x[0] = b; |
2018 | 4.10M | for (i = 0; i < inlen; ++i) x[i + 1] = in[i]; |
2019 | 3.64k | crypto_hash_sha512(h, x, inlen + 1); |
2020 | 120k | for (i = 0; i < 32; ++i) out[i] = h[i]; |
2021 | 3.64k | } |
2022 | | |
2023 | 5.30M | static uint32_t urandom32(void) { |
2024 | 5.30M | unsigned char c[4]; |
2025 | 5.30M | uint32_t result = 0; |
2026 | 5.30M | int i; |
2027 | 5.30M | randombytes(c, 4); |
2028 | 26.5M | for (i = 0; i < 4; ++i) result += ((uint32_t)c[i]) << (8 * i); |
2029 | 5.30M | return result; |
2030 | 5.30M | } |
2031 | | |
2032 | 3.48k | static void Short_random(small *out) { |
2033 | 3.48k | uint32_t L[p]; |
2034 | 3.48k | int i; |
2035 | 2.65M | for (i = 0; i < p; ++i) L[i] = urandom32(); |
2036 | 3.48k | Short_fromlist(out, L); |
2037 | 3.48k | } |
2038 | | |
2039 | 3.48k | static void Small_random(small *out) { |
2040 | 3.48k | int i; |
2041 | 2.65M | for (i = 0; i < p; ++i) out[i] = (((urandom32() & 0x3fffffff) * 3) >> 30) - 1; |
2042 | 3.48k | } |
2043 | | |
2044 | 3.48k | static void KeyGen(Fq *h, small *f, small *ginv) { |
2045 | 3.48k | small g[p]; |
2046 | 3.48k | Fq finv[p]; |
2047 | 3.48k | for (;;) { |
2048 | 3.48k | int result; |
2049 | 3.48k | Small_random(g); |
2050 | 3.48k | result = R3_recip(ginv, g); |
2051 | 3.48k | crypto_declassify(&result, sizeof result); |
2052 | 3.48k | if (result == 0) break; |
2053 | 3.48k | } |
2054 | 3.48k | Short_random(f); |
2055 | 3.48k | Rq_recip3(finv, f); |
2056 | 3.48k | Rq_mult_small(h, finv, g); |
2057 | 3.48k | } |
2058 | | |
2059 | 40 | static void Encrypt(Fq *c, const small *r, const Fq *h) { |
2060 | 40 | Fq hr[p]; |
2061 | 40 | Rq_mult_small(hr, h, r); |
2062 | 40 | Round(c, hr); |
2063 | 40 | } |
2064 | | |
2065 | 40 | static void Decrypt(small *r, const Fq *c, const small *f, const small *ginv) { |
2066 | 40 | Fq cf[p], cf3[p]; |
2067 | 40 | small e[p], ev[p]; |
2068 | 40 | int mask, i; |
2069 | 40 | Rq_mult_small(cf, c, f); |
2070 | 40 | Rq_mult3(cf3, cf); |
2071 | 40 | R3_fromRq(e, cf3); |
2072 | 40 | R3_mult(ev, e, ginv); |
2073 | 40 | mask = Weightw_mask(ev); |
2074 | 11.4k | for (i = 0; i < w; ++i) r[i] = ((ev[i] ^ 1) & ~mask) ^ 1; |
2075 | 19.0k | for (i = w; i < p; ++i) r[i] = ev[i] & ~mask; |
2076 | 40 | } |
2077 | | |
2078 | 7.01k | static void Small_encode(unsigned char *s, const small *f) { |
2079 | 7.01k | int i, j; |
2080 | 1.33M | for (i = 0; i < p / 4; ++i) { |
2081 | 1.33M | small x = 0; |
2082 | 6.66M | for (j = 0;j < 4;++j) x += (*f++ + 1) << (2 * j); |
2083 | 1.33M | *s++ = x; |
2084 | 1.33M | } |
2085 | 7.01k | *s = *f++ + 1; |
2086 | 7.01k | } |
2087 | | |
2088 | 80 | static void Small_decode(small *f, const unsigned char *s) { |
2089 | 80 | int i, j; |
2090 | 15.2k | for (i = 0; i < p / 4; ++i) { |
2091 | 15.2k | unsigned char x = *s++; |
2092 | 76.0k | for (j = 0;j < 4;++j) *f++ = ((small)((x >> (2 * j)) & 3)) - 1; |
2093 | 15.2k | } |
2094 | 80 | *f++ = ((small)(*s & 3)) - 1; |
2095 | 80 | } |
2096 | | |
2097 | 3.48k | static void Rq_encode(unsigned char *s, const Fq *r) { |
2098 | 3.48k | uint16_t R[p], M[p]; |
2099 | 3.48k | int i; |
2100 | 2.65M | for (i = 0; i < p; ++i) R[i] = r[i] + q12; |
2101 | 2.65M | for (i = 0; i < p; ++i) M[i] = q; |
2102 | 3.48k | Encode(s, R, M, p); |
2103 | 3.48k | } |
2104 | | |
2105 | 40 | static void Rq_decode(Fq *r, const unsigned char *s) { |
2106 | 40 | uint16_t R[p], M[p]; |
2107 | 40 | int i; |
2108 | 30.4k | for (i = 0; i < p; ++i) M[i] = q; |
2109 | 40 | Decode(R, s, M, p); |
2110 | 30.4k | for (i = 0; i < p; ++i) r[i] = ((Fq)R[i]) - q12; |
2111 | 40 | } |
2112 | | |
2113 | 40 | static void Rounded_encode(unsigned char *s, const Fq *r) { |
2114 | 40 | uint16_t R[p], M[p]; |
2115 | 40 | int i; |
2116 | 30.4k | for (i = 0; i < p; ++i) R[i] = ((r[i] + q12) * 10923) >> 15; |
2117 | 30.4k | for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; |
2118 | 40 | Encode(s, R, M, p); |
2119 | 40 | } |
2120 | | |
2121 | 40 | static void Rounded_decode(Fq *r, const unsigned char *s) { |
2122 | 40 | uint16_t R[p], M[p]; |
2123 | 40 | int i; |
2124 | 30.4k | for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; |
2125 | 40 | Decode(R, s, M, p); |
2126 | 30.4k | for (i = 0; i < p; ++i) r[i] = R[i] * 3 - q12; |
2127 | 40 | } |
2128 | | |
2129 | 3.48k | static void ZKeyGen(unsigned char *pk, unsigned char *sk) { |
2130 | 3.48k | Fq h[p]; |
2131 | 3.48k | small f[p], v[p]; |
2132 | 3.48k | KeyGen(h, f, v); |
2133 | 3.48k | Rq_encode(pk, h); |
2134 | 3.48k | Small_encode(sk, f); |
2135 | 3.48k | Small_encode(sk + Small_bytes, v); |
2136 | 3.48k | } |
2137 | | |
2138 | 40 | static void ZEncrypt(unsigned char *C, const Inputs r, const unsigned char *pk) { |
2139 | 40 | Fq h[p], c[p]; |
2140 | 40 | Rq_decode(h, pk); |
2141 | 40 | Encrypt(c, r, h); |
2142 | 40 | Rounded_encode(C, c); |
2143 | 40 | } |
2144 | | |
2145 | 40 | static void ZDecrypt(Inputs r, const unsigned char *C, const unsigned char *sk) { |
2146 | 40 | small f[p], v[p]; |
2147 | 40 | Fq c[p]; |
2148 | 40 | Small_decode(f, sk); |
2149 | 40 | Small_decode(v, sk + Small_bytes); |
2150 | 40 | Rounded_decode(c, C); |
2151 | 40 | Decrypt(r, c, f, v); |
2152 | 40 | } |
2153 | | |
2154 | 40 | static void HashConfirm(unsigned char *h, const unsigned char *r, const unsigned char *cache) { |
2155 | 40 | unsigned char x[Hash_bytes * 2]; |
2156 | 40 | int i; |
2157 | 40 | Hash_prefix(x, 3, r, Small_bytes); |
2158 | 1.32k | for (i = 0; i < Hash_bytes; ++i) x[Hash_bytes + i] = cache[i]; |
2159 | 40 | Hash_prefix(h, 2, x, sizeof x); |
2160 | 40 | } |
2161 | | |
2162 | 40 | static void HashSession(unsigned char *k, int b, const unsigned char *y, const unsigned char *z) { |
2163 | 40 | unsigned char x[Hash_bytes + crypto_kem_sntrup761_CIPHERTEXTBYTES]; |
2164 | 40 | int i; |
2165 | 40 | Hash_prefix(x, 3, y, Small_bytes); |
2166 | 41.6k | for (i = 0; i < crypto_kem_sntrup761_CIPHERTEXTBYTES; ++i) x[Hash_bytes + i] = z[i]; |
2167 | 40 | Hash_prefix(k, b, x, sizeof x); |
2168 | 40 | } |
2169 | | |
2170 | 3.48k | int crypto_kem_sntrup761_keypair(unsigned char *pk, unsigned char *sk) { |
2171 | 3.48k | int i; |
2172 | 3.48k | ZKeyGen(pk, sk); |
2173 | 3.48k | sk += SecretKeys_bytes; |
2174 | 4.04M | for (i = 0; i < crypto_kem_sntrup761_PUBLICKEYBYTES; ++i) *sk++ = pk[i]; |
2175 | 3.48k | randombytes(sk, Small_bytes); |
2176 | 3.48k | Hash_prefix(sk + Small_bytes, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); |
2177 | 3.48k | return 0; |
2178 | 3.48k | } |
2179 | | |
2180 | 40 | static void Hide(unsigned char *c, unsigned char *r_enc, const Inputs r, const unsigned char *pk, const unsigned char *cache) { |
2181 | 40 | Small_encode(r_enc, r); |
2182 | 40 | ZEncrypt(c, r, pk); |
2183 | 40 | HashConfirm(c + crypto_kem_sntrup761_CIPHERTEXTBYTES - Confirm_bytes, r_enc, cache); |
2184 | 40 | } |
2185 | | |
2186 | 0 | int crypto_kem_sntrup761_enc(unsigned char *c, unsigned char *k, const unsigned char *pk) { |
2187 | 0 | Inputs r; |
2188 | 0 | unsigned char r_enc[Small_bytes], cache[Hash_bytes]; |
2189 | 0 | Hash_prefix(cache, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); |
2190 | 0 | Short_random(r); |
2191 | 0 | Hide(c, r_enc, r, pk, cache); |
2192 | 0 | HashSession(k, 1, r_enc, c); |
2193 | 0 | return 0; |
2194 | 0 | } |
2195 | | |
2196 | 40 | static int Ciphertexts_diff_mask(const unsigned char *c, const unsigned char *c2) { |
2197 | 40 | uint16_t differentbits = 0; |
2198 | 40 | int len = crypto_kem_sntrup761_CIPHERTEXTBYTES; |
2199 | 41.6k | while (len-- > 0) differentbits |= (*c++) ^ (*c2++); |
2200 | 40 | return (crypto_int64_bitmod_01((differentbits - 1),8)) - 1; |
2201 | 40 | } |
2202 | | |
2203 | 40 | int crypto_kem_sntrup761_dec(unsigned char *k, const unsigned char *c, const unsigned char *sk) { |
2204 | 40 | const unsigned char *pk = sk + SecretKeys_bytes; |
2205 | 40 | const unsigned char *rho = pk + crypto_kem_sntrup761_PUBLICKEYBYTES; |
2206 | 40 | const unsigned char *cache = rho + Small_bytes; |
2207 | 40 | Inputs r; |
2208 | 40 | unsigned char r_enc[Small_bytes], cnew[crypto_kem_sntrup761_CIPHERTEXTBYTES]; |
2209 | 40 | int mask, i; |
2210 | 40 | ZDecrypt(r, c, sk); |
2211 | 40 | Hide(cnew, r_enc, r, pk, cache); |
2212 | 40 | mask = Ciphertexts_diff_mask(c, cnew); |
2213 | 7.68k | for (i = 0; i < Small_bytes; ++i) r_enc[i] ^= mask & (r_enc[i] ^ rho[i]); |
2214 | 40 | HashSession(k, 1 + mask, r_enc, c); |
2215 | 40 | return 0; |
2216 | 40 | } |
2217 | | |
2218 | | #endif /* DROPBEAR_SNTRUP761 */ |