/src/libgcrypt/cipher/sm4.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* sm4.c - SM4 Cipher Algorithm |
2 | | * Copyright (C) 2020 Alibaba Group. |
3 | | * Copyright (C) 2020-2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> |
4 | | * Copyright (C) 2020-2022 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | | * |
6 | | * This file is part of Libgcrypt. |
7 | | * |
8 | | * Libgcrypt is free software; you can redistribute it and/or modify |
9 | | * it under the terms of the GNU Lesser General Public License as |
10 | | * published by the Free Software Foundation; either version 2.1 of |
11 | | * the License, or (at your option) any later version. |
12 | | * |
13 | | * Libgcrypt is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with this program; if not, see <http://www.gnu.org/licenses/>. |
20 | | */ |
21 | | |
22 | | #include <config.h> |
23 | | #include <stdio.h> |
24 | | #include <stdlib.h> |
25 | | |
26 | | #include "types.h" /* for byte and u32 typedefs */ |
27 | | #include "bithelp.h" |
28 | | #include "g10lib.h" |
29 | | #include "cipher.h" |
30 | | #include "bufhelp.h" |
31 | | #include "cipher-internal.h" |
32 | | #include "bulkhelp.h" |
33 | | |
34 | | /* Helper macro to force alignment to 64 bytes. */ |
35 | | #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED |
36 | | # define ATTR_ALIGNED_64 __attribute__ ((aligned (64))) |
37 | | #else |
38 | | # define ATTR_ALIGNED_64 |
39 | | #endif |
40 | | |
41 | | /* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */ |
42 | | #undef USE_AESNI_AVX |
43 | | #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) |
44 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
45 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
46 | | # define USE_AESNI_AVX 1 |
47 | | # endif |
48 | | #endif |
49 | | |
50 | | /* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */ |
51 | | #undef USE_AESNI_AVX2 |
52 | | #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) |
53 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
54 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
55 | | # define USE_AESNI_AVX2 1 |
56 | | # endif |
57 | | #endif |
58 | | |
59 | | /* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */ |
60 | | #undef USE_GFNI_AVX2 |
61 | | #if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) |
62 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
63 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
64 | | # define USE_GFNI_AVX2 1 |
65 | | # endif |
66 | | #endif |
67 | | |
68 | | /* USE_GFNI_AVX512 inidicates whether to compile with Intel GFNI/AVX512 code. */ |
69 | | #undef USE_GFNI_AVX512 |
70 | | #if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX512_SUPPORT) |
71 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
72 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
73 | | # define USE_GFNI_AVX512 1 |
74 | | # endif |
75 | | #endif |
76 | | |
77 | | /* Assembly implementations use SystemV ABI, ABI conversion and additional |
78 | | * stack to store XMM6-XMM15 needed on Win64. */ |
79 | | #undef ASM_FUNC_ABI |
80 | | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) || \ |
81 | | defined(USE_GFNI_AVX2) || defined(USE_GFNI_AVX512) |
82 | | # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS |
83 | | # define ASM_FUNC_ABI __attribute__((sysv_abi)) |
84 | | # else |
85 | | # define ASM_FUNC_ABI |
86 | | # endif |
87 | | #endif |
88 | | |
89 | | #undef USE_AARCH64_SIMD |
90 | | #ifdef ENABLE_NEON_SUPPORT |
91 | | # if defined(__AARCH64EL__) && \ |
92 | | defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ |
93 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) |
94 | | # define USE_AARCH64_SIMD 1 |
95 | | # endif |
96 | | #endif |
97 | | |
98 | | #undef USE_ARM_CE |
99 | | #ifdef ENABLE_ARM_CRYPTO_SUPPORT |
100 | | # if defined(__AARCH64EL__) && \ |
101 | | defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ |
102 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) |
103 | | # define USE_ARM_CE 1 |
104 | | # endif |
105 | | #endif |
106 | | |
107 | | #undef USE_ARM_SVE_CE |
108 | | #ifdef ENABLE_SVE_SUPPORT |
109 | | # if defined(__AARCH64EL__) && \ |
110 | | defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ |
111 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && \ |
112 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE) && \ |
113 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE2) |
114 | | # define USE_ARM_SVE_CE 1 |
115 | | # endif |
116 | | #endif |
117 | | |
118 | | static const char *sm4_selftest (void); |
119 | | |
120 | | static void _gcry_sm4_ctr_enc (void *context, unsigned char *ctr, |
121 | | void *outbuf_arg, const void *inbuf_arg, |
122 | | size_t nblocks); |
123 | | static void _gcry_sm4_cbc_dec (void *context, unsigned char *iv, |
124 | | void *outbuf_arg, const void *inbuf_arg, |
125 | | size_t nblocks); |
126 | | static void _gcry_sm4_cfb_dec (void *context, unsigned char *iv, |
127 | | void *outbuf_arg, const void *inbuf_arg, |
128 | | size_t nblocks); |
129 | | static void _gcry_sm4_xts_crypt (void *context, unsigned char *tweak, |
130 | | void *outbuf_arg, const void *inbuf_arg, |
131 | | size_t nblocks, int encrypt); |
132 | | static void _gcry_sm4_ecb_crypt (void *context, void *outbuf_arg, |
133 | | const void *inbuf_arg, size_t nblocks, |
134 | | int encrypt); |
135 | | static void _gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr, |
136 | | void *outbuf_arg, const void *inbuf_arg, |
137 | | size_t nblocks); |
138 | | static size_t _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
139 | | const void *inbuf_arg, size_t nblocks, |
140 | | int encrypt); |
141 | | static size_t _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
142 | | size_t nblocks); |
143 | | |
144 | | typedef unsigned int (*crypt_blk1_16_fn_t) (const void *ctx, byte *out, |
145 | | const byte *in, |
146 | | unsigned int num_blks); |
147 | | |
148 | | typedef struct |
149 | | { |
150 | | u32 rkey_enc[32]; |
151 | | u32 rkey_dec[32]; |
152 | | crypt_blk1_16_fn_t crypt_blk1_16; |
153 | | #ifdef USE_AESNI_AVX |
154 | | unsigned int use_aesni_avx:1; |
155 | | #endif |
156 | | #ifdef USE_AESNI_AVX2 |
157 | | unsigned int use_aesni_avx2:1; |
158 | | #endif |
159 | | #ifdef USE_GFNI_AVX2 |
160 | | unsigned int use_gfni_avx2:1; |
161 | | #endif |
162 | | #ifdef USE_GFNI_AVX512 |
163 | | unsigned int use_gfni_avx512:1; |
164 | | #endif |
165 | | #ifdef USE_AARCH64_SIMD |
166 | | unsigned int use_aarch64_simd:1; |
167 | | #endif |
168 | | #ifdef USE_ARM_CE |
169 | | unsigned int use_arm_ce:1; |
170 | | #endif |
171 | | #ifdef USE_ARM_SVE_CE |
172 | | unsigned int use_arm_sve_ce:1; |
173 | | #endif |
174 | | } SM4_context; |
175 | | |
176 | | static const u32 fk[4] = |
177 | | { |
178 | | 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc |
179 | | }; |
180 | | |
181 | | static struct |
182 | | { |
183 | | volatile u32 counter_head; |
184 | | u32 cacheline_align[64 / 4 - 1]; |
185 | | byte S[256]; |
186 | | volatile u32 counter_tail; |
187 | | } sbox_table ATTR_ALIGNED_64 = |
188 | | { |
189 | | 0, |
190 | | { 0, }, |
191 | | { |
192 | | 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, |
193 | | 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, |
194 | | 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, |
195 | | 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, |
196 | | 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, |
197 | | 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, |
198 | | 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, |
199 | | 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, |
200 | | 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, |
201 | | 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, |
202 | | 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, |
203 | | 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, |
204 | | 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, |
205 | | 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, |
206 | | 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, |
207 | | 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, |
208 | | 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, |
209 | | 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, |
210 | | 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, |
211 | | 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, |
212 | | 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, |
213 | | 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, |
214 | | 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, |
215 | | 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, |
216 | | 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, |
217 | | 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, |
218 | | 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, |
219 | | 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, |
220 | | 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, |
221 | | 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, |
222 | | 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, |
223 | | 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 |
224 | | }, |
225 | | 0 |
226 | | }; |
227 | | |
228 | | static const u32 ck[] = |
229 | | { |
230 | | 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, |
231 | | 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, |
232 | | 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, |
233 | | 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, |
234 | | 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, |
235 | | 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, |
236 | | 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, |
237 | | 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 |
238 | | }; |
239 | | |
240 | | static inline crypt_blk1_16_fn_t sm4_get_crypt_blk1_16_fn(SM4_context *ctx); |
241 | | |
242 | | #ifdef USE_AESNI_AVX |
243 | | extern void _gcry_sm4_aesni_avx_expand_key(const byte *key, u32 *rk_enc, |
244 | | u32 *rk_dec, const u32 *fk, |
245 | | const u32 *ck) ASM_FUNC_ABI; |
246 | | |
247 | | extern void _gcry_sm4_aesni_avx_ctr_enc(const u32 *rk_enc, byte *out, |
248 | | const byte *in, byte *ctr) ASM_FUNC_ABI; |
249 | | |
250 | | extern void _gcry_sm4_aesni_avx_cbc_dec(const u32 *rk_dec, byte *out, |
251 | | const byte *in, byte *iv) ASM_FUNC_ABI; |
252 | | |
253 | | extern void _gcry_sm4_aesni_avx_cfb_dec(const u32 *rk_enc, byte *out, |
254 | | const byte *in, byte *iv) ASM_FUNC_ABI; |
255 | | |
256 | | extern void _gcry_sm4_aesni_avx_ocb_enc(const u32 *rk_enc, |
257 | | unsigned char *out, |
258 | | const unsigned char *in, |
259 | | unsigned char *offset, |
260 | | unsigned char *checksum, |
261 | | const u64 Ls[8]) ASM_FUNC_ABI; |
262 | | |
263 | | extern void _gcry_sm4_aesni_avx_ocb_dec(const u32 *rk_dec, |
264 | | unsigned char *out, |
265 | | const unsigned char *in, |
266 | | unsigned char *offset, |
267 | | unsigned char *checksum, |
268 | | const u64 Ls[8]) ASM_FUNC_ABI; |
269 | | |
270 | | extern void _gcry_sm4_aesni_avx_ocb_auth(const u32 *rk_enc, |
271 | | const unsigned char *abuf, |
272 | | unsigned char *offset, |
273 | | unsigned char *checksum, |
274 | | const u64 Ls[8]) ASM_FUNC_ABI; |
275 | | |
276 | | extern unsigned int |
277 | | _gcry_sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in, |
278 | | unsigned int num_blks) ASM_FUNC_ABI; |
279 | | |
280 | | static inline unsigned int |
281 | | sm4_aesni_avx_crypt_blk1_16(const void *rk, byte *out, const byte *in, |
282 | | unsigned int num_blks) |
283 | 0 | { |
284 | 0 | if (num_blks > 8) |
285 | 0 | { |
286 | 0 | _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, 8); |
287 | 0 | in += 8 * 16; |
288 | 0 | out += 8 * 16; |
289 | 0 | num_blks -= 8; |
290 | 0 | } |
291 | |
|
292 | 0 | return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks); |
293 | 0 | } |
294 | | |
295 | | #endif /* USE_AESNI_AVX */ |
296 | | |
297 | | #ifdef USE_AESNI_AVX2 |
298 | | extern void _gcry_sm4_aesni_avx2_ctr_enc(const u32 *rk_enc, byte *out, |
299 | | const byte *in, |
300 | | byte *ctr) ASM_FUNC_ABI; |
301 | | |
302 | | extern void _gcry_sm4_aesni_avx2_cbc_dec(const u32 *rk_dec, byte *out, |
303 | | const byte *in, |
304 | | byte *iv) ASM_FUNC_ABI; |
305 | | |
306 | | extern void _gcry_sm4_aesni_avx2_cfb_dec(const u32 *rk_enc, byte *out, |
307 | | const byte *in, |
308 | | byte *iv) ASM_FUNC_ABI; |
309 | | |
310 | | extern void _gcry_sm4_aesni_avx2_ocb_enc(const u32 *rk_enc, |
311 | | unsigned char *out, |
312 | | const unsigned char *in, |
313 | | unsigned char *offset, |
314 | | unsigned char *checksum, |
315 | | const u64 Ls[16]) ASM_FUNC_ABI; |
316 | | |
317 | | extern void _gcry_sm4_aesni_avx2_ocb_dec(const u32 *rk_dec, |
318 | | unsigned char *out, |
319 | | const unsigned char *in, |
320 | | unsigned char *offset, |
321 | | unsigned char *checksum, |
322 | | const u64 Ls[16]) ASM_FUNC_ABI; |
323 | | |
324 | | extern void _gcry_sm4_aesni_avx2_ocb_auth(const u32 *rk_enc, |
325 | | const unsigned char *abuf, |
326 | | unsigned char *offset, |
327 | | unsigned char *checksum, |
328 | | const u64 Ls[16]) ASM_FUNC_ABI; |
329 | | |
330 | | extern unsigned int |
331 | | _gcry_sm4_aesni_avx2_crypt_blk1_16(const u32 *rk, byte *out, const byte *in, |
332 | | unsigned int num_blks) ASM_FUNC_ABI; |
333 | | |
334 | | static inline unsigned int |
335 | | sm4_aesni_avx2_crypt_blk1_16(const void *rk, byte *out, const byte *in, |
336 | | unsigned int num_blks) |
337 | 0 | { |
338 | 0 | #ifdef USE_AESNI_AVX |
339 | | /* Use 128-bit register implementation for short input. */ |
340 | 0 | if (num_blks <= 8) |
341 | 0 | return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks); |
342 | 0 | #endif |
343 | | |
344 | 0 | return _gcry_sm4_aesni_avx2_crypt_blk1_16(rk, out, in, num_blks); |
345 | 0 | } |
346 | | |
347 | | #endif /* USE_AESNI_AVX2 */ |
348 | | |
349 | | #ifdef USE_GFNI_AVX2 |
350 | | extern void _gcry_sm4_gfni_avx2_expand_key(const byte *key, u32 *rk_enc, |
351 | | u32 *rk_dec, const u32 *fk, |
352 | | const u32 *ck) ASM_FUNC_ABI; |
353 | | |
354 | | extern void _gcry_sm4_gfni_avx2_ctr_enc(const u32 *rk_enc, byte *out, |
355 | | const byte *in, |
356 | | byte *ctr) ASM_FUNC_ABI; |
357 | | |
358 | | extern void _gcry_sm4_gfni_avx2_cbc_dec(const u32 *rk_dec, byte *out, |
359 | | const byte *in, |
360 | | byte *iv) ASM_FUNC_ABI; |
361 | | |
362 | | extern void _gcry_sm4_gfni_avx2_cfb_dec(const u32 *rk_enc, byte *out, |
363 | | const byte *in, |
364 | | byte *iv) ASM_FUNC_ABI; |
365 | | |
366 | | extern void _gcry_sm4_gfni_avx2_ocb_enc(const u32 *rk_enc, |
367 | | unsigned char *out, |
368 | | const unsigned char *in, |
369 | | unsigned char *offset, |
370 | | unsigned char *checksum, |
371 | | const u64 Ls[16]) ASM_FUNC_ABI; |
372 | | |
373 | | extern void _gcry_sm4_gfni_avx2_ocb_dec(const u32 *rk_dec, |
374 | | unsigned char *out, |
375 | | const unsigned char *in, |
376 | | unsigned char *offset, |
377 | | unsigned char *checksum, |
378 | | const u64 Ls[16]) ASM_FUNC_ABI; |
379 | | |
380 | | extern void _gcry_sm4_gfni_avx2_ocb_auth(const u32 *rk_enc, |
381 | | const unsigned char *abuf, |
382 | | unsigned char *offset, |
383 | | unsigned char *checksum, |
384 | | const u64 Ls[16]) ASM_FUNC_ABI; |
385 | | |
386 | | extern unsigned int |
387 | | _gcry_sm4_gfni_avx2_crypt_blk1_16(const u32 *rk, byte *out, const byte *in, |
388 | | unsigned int num_blks) ASM_FUNC_ABI; |
389 | | |
390 | | static inline unsigned int |
391 | | sm4_gfni_avx2_crypt_blk1_16(const void *rk, byte *out, const byte *in, |
392 | | unsigned int num_blks) |
393 | 0 | { |
394 | 0 | return _gcry_sm4_gfni_avx2_crypt_blk1_16(rk, out, in, num_blks); |
395 | 0 | } |
396 | | |
397 | | #endif /* USE_GFNI_AVX2 */ |
398 | | |
399 | | #ifdef USE_GFNI_AVX512 |
400 | | extern void _gcry_sm4_gfni_avx512_expand_key(const byte *key, u32 *rk_enc, |
401 | | u32 *rk_dec, const u32 *fk, |
402 | | const u32 *ck) ASM_FUNC_ABI; |
403 | | |
404 | | extern void _gcry_sm4_gfni_avx512_ctr_enc(const u32 *rk_enc, byte *out, |
405 | | const byte *in, |
406 | | byte *ctr) ASM_FUNC_ABI; |
407 | | |
408 | | extern void _gcry_sm4_gfni_avx512_cbc_dec(const u32 *rk_dec, byte *out, |
409 | | const byte *in, |
410 | | byte *iv) ASM_FUNC_ABI; |
411 | | |
412 | | extern void _gcry_sm4_gfni_avx512_cfb_dec(const u32 *rk_enc, byte *out, |
413 | | const byte *in, |
414 | | byte *iv) ASM_FUNC_ABI; |
415 | | |
416 | | extern void _gcry_sm4_gfni_avx512_ocb_enc(const u32 *rk_enc, |
417 | | unsigned char *out, |
418 | | const unsigned char *in, |
419 | | unsigned char *offset, |
420 | | unsigned char *checksum, |
421 | | const u64 Ls[16]) ASM_FUNC_ABI; |
422 | | |
423 | | extern void _gcry_sm4_gfni_avx512_ocb_dec(const u32 *rk_dec, |
424 | | unsigned char *out, |
425 | | const unsigned char *in, |
426 | | unsigned char *offset, |
427 | | unsigned char *checksum, |
428 | | const u64 Ls[16]) ASM_FUNC_ABI; |
429 | | |
430 | | extern void _gcry_sm4_gfni_avx512_ocb_auth(const u32 *rk_enc, |
431 | | const unsigned char *abuf, |
432 | | unsigned char *offset, |
433 | | unsigned char *checksum, |
434 | | const u64 Ls[16]) ASM_FUNC_ABI; |
435 | | |
436 | | extern void _gcry_sm4_gfni_avx512_ctr_enc_blk32(const u32 *rk_enc, byte *out, |
437 | | const byte *in, |
438 | | byte *ctr) ASM_FUNC_ABI; |
439 | | |
440 | | extern void _gcry_sm4_gfni_avx512_cbc_dec_blk32(const u32 *rk_enc, byte *out, |
441 | | const byte *in, |
442 | | byte *iv) ASM_FUNC_ABI; |
443 | | |
444 | | extern void _gcry_sm4_gfni_avx512_cfb_dec_blk32(const u32 *rk_enc, byte *out, |
445 | | const byte *in, |
446 | | byte *iv) ASM_FUNC_ABI; |
447 | | |
448 | | extern void _gcry_sm4_gfni_avx512_ocb_enc_blk32(const u32 *rk_enc, |
449 | | unsigned char *out, |
450 | | const unsigned char *in, |
451 | | unsigned char *offset, |
452 | | unsigned char *checksum, |
453 | | const u64 Ls[32]) ASM_FUNC_ABI; |
454 | | |
455 | | extern void _gcry_sm4_gfni_avx512_ocb_dec_blk32(const u32 *rk_dec, |
456 | | unsigned char *out, |
457 | | const unsigned char *in, |
458 | | unsigned char *offset, |
459 | | unsigned char *checksum, |
460 | | const u64 Ls[32]) ASM_FUNC_ABI; |
461 | | |
462 | | extern unsigned int |
463 | | _gcry_sm4_gfni_avx512_crypt_blk1_16(const u32 *rk, byte *out, const byte *in, |
464 | | unsigned int num_blks) ASM_FUNC_ABI; |
465 | | |
466 | | extern unsigned int |
467 | | _gcry_sm4_gfni_avx512_crypt_blk32(const u32 *rk, byte *out, |
468 | | const byte *in) ASM_FUNC_ABI; |
469 | | |
470 | | static inline unsigned int |
471 | | sm4_gfni_avx512_crypt_blk1_16(const void *rk, byte *out, const byte *in, |
472 | | unsigned int num_blks) |
473 | 0 | { |
474 | 0 | return _gcry_sm4_gfni_avx512_crypt_blk1_16(rk, out, in, num_blks); |
475 | 0 | } |
476 | | |
477 | | #endif /* USE_GFNI_AVX2 */ |
478 | | |
479 | | #ifdef USE_AARCH64_SIMD |
480 | | extern void _gcry_sm4_aarch64_crypt(const u32 *rk, byte *out, |
481 | | const byte *in, |
482 | | size_t num_blocks); |
483 | | |
484 | | extern void _gcry_sm4_aarch64_ctr_enc(const u32 *rk_enc, byte *out, |
485 | | const byte *in, |
486 | | byte *ctr, |
487 | | size_t nblocks); |
488 | | |
489 | | extern void _gcry_sm4_aarch64_cbc_dec(const u32 *rk_dec, byte *out, |
490 | | const byte *in, |
491 | | byte *iv, |
492 | | size_t nblocks); |
493 | | |
494 | | extern void _gcry_sm4_aarch64_cfb_dec(const u32 *rk_enc, byte *out, |
495 | | const byte *in, |
496 | | byte *iv, |
497 | | size_t nblocks); |
498 | | |
499 | | extern void _gcry_sm4_aarch64_crypt_blk1_8(const u32 *rk, byte *out, |
500 | | const byte *in, |
501 | | size_t num_blocks); |
502 | | |
503 | | static inline unsigned int |
504 | | sm4_aarch64_crypt_blk1_16(const void *rk, byte *out, const byte *in, |
505 | | unsigned int num_blks) |
506 | | { |
507 | | if (num_blks > 8) |
508 | | { |
509 | | _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, 8); |
510 | | in += 8 * 16; |
511 | | out += 8 * 16; |
512 | | num_blks -= 8; |
513 | | } |
514 | | |
515 | | _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks); |
516 | | return 0; |
517 | | } |
518 | | |
519 | | #endif /* USE_AARCH64_SIMD */ |
520 | | |
521 | | #ifdef USE_ARM_CE |
522 | | extern void _gcry_sm4_armv8_ce_expand_key(const byte *key, |
523 | | u32 *rkey_enc, u32 *rkey_dec, |
524 | | const u32 *fk, const u32 *ck); |
525 | | |
526 | | extern void _gcry_sm4_armv8_ce_crypt(const u32 *rk, byte *out, |
527 | | const byte *in, |
528 | | size_t num_blocks); |
529 | | |
530 | | extern void _gcry_sm4_armv8_ce_ctr_enc(const u32 *rk_enc, byte *out, |
531 | | const byte *in, |
532 | | byte *ctr, |
533 | | size_t nblocks); |
534 | | |
535 | | extern void _gcry_sm4_armv8_ce_cbc_dec(const u32 *rk_dec, byte *out, |
536 | | const byte *in, |
537 | | byte *iv, |
538 | | size_t nblocks); |
539 | | |
540 | | extern void _gcry_sm4_armv8_ce_cfb_dec(const u32 *rk_enc, byte *out, |
541 | | const byte *in, |
542 | | byte *iv, |
543 | | size_t nblocks); |
544 | | |
545 | | extern void _gcry_sm4_armv8_ce_xts_crypt(const u32 *rk, byte *out, |
546 | | const byte *in, |
547 | | byte *tweak, |
548 | | size_t nblocks); |
549 | | |
550 | | extern void _gcry_sm4_armv8_ce_crypt_blk1_8(const u32 *rk, byte *out, |
551 | | const byte *in, |
552 | | size_t num_blocks); |
553 | | |
554 | | static inline unsigned int |
555 | | sm4_armv8_ce_crypt_blk1_16(const void *rk, byte *out, const byte *in, |
556 | | unsigned int num_blks) |
557 | | { |
558 | | if (num_blks > 8) |
559 | | { |
560 | | _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, 8); |
561 | | in += 8 * 16; |
562 | | out += 8 * 16; |
563 | | num_blks -= 8; |
564 | | } |
565 | | |
566 | | _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks); |
567 | | return 0; |
568 | | } |
569 | | |
570 | | #endif /* USE_ARM_CE */ |
571 | | |
572 | | #ifdef USE_ARM_SVE_CE |
573 | | extern void _gcry_sm4_armv9_sve_ce_crypt(const u32 *rk, byte *out, |
574 | | const byte *in, |
575 | | size_t nblocks); |
576 | | |
577 | | extern void _gcry_sm4_armv9_sve_ce_ctr_enc(const u32 *rk_enc, byte *out, |
578 | | const byte *in, |
579 | | byte *ctr, |
580 | | size_t nblocks); |
581 | | |
582 | | extern void _gcry_sm4_armv9_sve_ce_cbc_dec(const u32 *rk_dec, byte *out, |
583 | | const byte *in, |
584 | | byte *iv, |
585 | | size_t nblocks); |
586 | | |
587 | | extern void _gcry_sm4_armv9_sve_ce_cfb_dec(const u32 *rk_enc, byte *out, |
588 | | const byte *in, |
589 | | byte *iv, |
590 | | size_t nblocks); |
591 | | |
592 | | static inline unsigned int |
593 | | sm4_armv9_sve_ce_crypt_blk1_16(const void *rk, byte *out, const byte *in, |
594 | | unsigned int num_blks) |
595 | | { |
596 | | _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks); |
597 | | return 0; |
598 | | } |
599 | | |
600 | | extern unsigned int _gcry_sm4_armv9_sve_get_vl(void); |
601 | | #endif /* USE_ARM_SVE_CE */ |
602 | | |
603 | | static inline void prefetch_sbox_table(void) |
604 | 0 | { |
605 | 0 | const volatile byte *vtab = (void *)&sbox_table; |
606 | | |
607 | | /* Modify counters to trigger copy-on-write and unsharing if physical pages |
608 | | * of look-up table are shared between processes. Modifying counters also |
609 | | * causes checksums for pages to change and hint same-page merging algorithm |
610 | | * that these pages are frequently changing. */ |
611 | 0 | sbox_table.counter_head++; |
612 | 0 | sbox_table.counter_tail++; |
613 | | |
614 | | /* Prefetch look-up table to cache. */ |
615 | 0 | (void)vtab[0 * 32]; |
616 | 0 | (void)vtab[1 * 32]; |
617 | 0 | (void)vtab[2 * 32]; |
618 | 0 | (void)vtab[3 * 32]; |
619 | 0 | (void)vtab[4 * 32]; |
620 | 0 | (void)vtab[5 * 32]; |
621 | 0 | (void)vtab[6 * 32]; |
622 | 0 | (void)vtab[7 * 32]; |
623 | 0 | (void)vtab[8 * 32 - 1]; |
624 | 0 | } |
625 | | |
626 | | static inline u32 sm4_t_non_lin_sub(u32 x) |
627 | 0 | { |
628 | 0 | u32 out; |
629 | |
|
630 | 0 | out = (u32)sbox_table.S[(x >> 0) & 0xff] << 0; |
631 | 0 | out |= (u32)sbox_table.S[(x >> 8) & 0xff] << 8; |
632 | 0 | out |= (u32)sbox_table.S[(x >> 16) & 0xff] << 16; |
633 | 0 | out |= (u32)sbox_table.S[(x >> 24) & 0xff] << 24; |
634 | |
|
635 | 0 | return out; |
636 | 0 | } |
637 | | |
638 | | static inline u32 sm4_key_lin_sub(u32 x) |
639 | 0 | { |
640 | 0 | return x ^ rol(x, 13) ^ rol(x, 23); |
641 | 0 | } |
642 | | |
643 | | static inline u32 sm4_enc_lin_sub(u32 x) |
644 | 0 | { |
645 | 0 | u32 xrol2 = rol(x, 2); |
646 | 0 | return x ^ xrol2 ^ rol(xrol2, 8) ^ rol(xrol2, 16) ^ rol(x, 24); |
647 | 0 | } |
648 | | |
649 | | static inline u32 sm4_key_sub(u32 x) |
650 | 0 | { |
651 | 0 | return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); |
652 | 0 | } |
653 | | |
654 | | static inline u32 sm4_enc_sub(u32 x) |
655 | 0 | { |
656 | 0 | return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); |
657 | 0 | } |
658 | | |
659 | | static inline u32 |
660 | | sm4_round(const u32 x0, const u32 x1, const u32 x2, const u32 x3, const u32 rk) |
661 | 0 | { |
662 | 0 | return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); |
663 | 0 | } |
664 | | |
665 | | static void |
666 | | sm4_expand_key (SM4_context *ctx, const byte *key) |
667 | 0 | { |
668 | 0 | u32 rk[4]; |
669 | 0 | int i; |
670 | |
|
671 | 0 | #ifdef USE_GFNI_AVX512 |
672 | 0 | if (ctx->use_gfni_avx512) |
673 | 0 | { |
674 | 0 | _gcry_sm4_gfni_avx512_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
675 | 0 | fk, ck); |
676 | 0 | return; |
677 | 0 | } |
678 | 0 | #endif |
679 | | |
680 | 0 | #ifdef USE_GFNI_AVX2 |
681 | 0 | if (ctx->use_gfni_avx2) |
682 | 0 | { |
683 | 0 | _gcry_sm4_gfni_avx2_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
684 | 0 | fk, ck); |
685 | 0 | return; |
686 | 0 | } |
687 | 0 | #endif |
688 | | |
689 | 0 | #ifdef USE_AESNI_AVX |
690 | 0 | if (ctx->use_aesni_avx) |
691 | 0 | { |
692 | 0 | _gcry_sm4_aesni_avx_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
693 | 0 | fk, ck); |
694 | 0 | return; |
695 | 0 | } |
696 | 0 | #endif |
697 | | |
698 | | #ifdef USE_ARM_CE |
699 | | if (ctx->use_arm_ce) |
700 | | { |
701 | | _gcry_sm4_armv8_ce_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
702 | | fk, ck); |
703 | | return; |
704 | | } |
705 | | #endif |
706 | | |
707 | 0 | prefetch_sbox_table (); |
708 | |
|
709 | 0 | rk[0] = buf_get_be32(key + 4 * 0) ^ fk[0]; |
710 | 0 | rk[1] = buf_get_be32(key + 4 * 1) ^ fk[1]; |
711 | 0 | rk[2] = buf_get_be32(key + 4 * 2) ^ fk[2]; |
712 | 0 | rk[3] = buf_get_be32(key + 4 * 3) ^ fk[3]; |
713 | |
|
714 | 0 | for (i = 0; i < 32; i += 4) |
715 | 0 | { |
716 | 0 | rk[0] = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); |
717 | 0 | rk[1] = rk[1] ^ sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); |
718 | 0 | rk[2] = rk[2] ^ sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); |
719 | 0 | rk[3] = rk[3] ^ sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); |
720 | 0 | ctx->rkey_enc[i + 0] = rk[0]; |
721 | 0 | ctx->rkey_enc[i + 1] = rk[1]; |
722 | 0 | ctx->rkey_enc[i + 2] = rk[2]; |
723 | 0 | ctx->rkey_enc[i + 3] = rk[3]; |
724 | 0 | ctx->rkey_dec[31 - i - 0] = rk[0]; |
725 | 0 | ctx->rkey_dec[31 - i - 1] = rk[1]; |
726 | 0 | ctx->rkey_dec[31 - i - 2] = rk[2]; |
727 | 0 | ctx->rkey_dec[31 - i - 3] = rk[3]; |
728 | 0 | } |
729 | |
|
730 | 0 | wipememory (rk, sizeof(rk)); |
731 | 0 | } |
732 | | |
733 | | static gcry_err_code_t |
734 | | sm4_setkey (void *context, const byte *key, const unsigned keylen, |
735 | | cipher_bulk_ops_t *bulk_ops) |
736 | 0 | { |
737 | 0 | SM4_context *ctx = context; |
738 | 0 | static int init = 0; |
739 | 0 | static const char *selftest_failed = NULL; |
740 | 0 | unsigned int hwf = _gcry_get_hw_features (); |
741 | |
|
742 | 0 | (void)hwf; |
743 | |
|
744 | 0 | if (!init) |
745 | 0 | { |
746 | 0 | init = 1; |
747 | 0 | selftest_failed = sm4_selftest(); |
748 | 0 | if (selftest_failed) |
749 | 0 | log_error("%s\n", selftest_failed); |
750 | 0 | } |
751 | 0 | if (selftest_failed) |
752 | 0 | return GPG_ERR_SELFTEST_FAILED; |
753 | | |
754 | 0 | if (keylen != 16) |
755 | 0 | return GPG_ERR_INV_KEYLEN; |
756 | | |
757 | 0 | #ifdef USE_AESNI_AVX |
758 | 0 | ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX); |
759 | 0 | #endif |
760 | 0 | #ifdef USE_AESNI_AVX2 |
761 | 0 | ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2); |
762 | 0 | #endif |
763 | 0 | #ifdef USE_GFNI_AVX2 |
764 | 0 | ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2); |
765 | 0 | #endif |
766 | 0 | #ifdef USE_GFNI_AVX512 |
767 | 0 | ctx->use_gfni_avx512 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX512); |
768 | 0 | #endif |
769 | | #ifdef USE_AARCH64_SIMD |
770 | | ctx->use_aarch64_simd = !!(hwf & HWF_ARM_NEON); |
771 | | #endif |
772 | | #ifdef USE_ARM_CE |
773 | | ctx->use_arm_ce = !!(hwf & HWF_ARM_SM4); |
774 | | #endif |
775 | | #ifdef USE_ARM_SVE_CE |
776 | | /* Only enabled when the SVE vector length is greater than 128 bits */ |
777 | | ctx->use_arm_sve_ce = (hwf & HWF_ARM_SVE2) && (hwf & HWF_ARM_SVESM4) |
778 | | && _gcry_sm4_armv9_sve_get_vl() > 16; |
779 | | #endif |
780 | |
|
781 | 0 | #ifdef USE_GFNI_AVX2 |
782 | 0 | if (ctx->use_gfni_avx2) |
783 | 0 | { |
784 | | /* Disable AESNI implementations when GFNI implementation is enabled. */ |
785 | 0 | #ifdef USE_AESNI_AVX |
786 | 0 | ctx->use_aesni_avx = 0; |
787 | 0 | #endif |
788 | 0 | #ifdef USE_AESNI_AVX2 |
789 | 0 | ctx->use_aesni_avx2 = 0; |
790 | 0 | #endif |
791 | 0 | } |
792 | 0 | #endif |
793 | |
|
794 | 0 | ctx->crypt_blk1_16 = sm4_get_crypt_blk1_16_fn(ctx); |
795 | | |
796 | | /* Setup bulk encryption routines. */ |
797 | 0 | memset (bulk_ops, 0, sizeof(*bulk_ops)); |
798 | 0 | bulk_ops->cbc_dec = _gcry_sm4_cbc_dec; |
799 | 0 | bulk_ops->cfb_dec = _gcry_sm4_cfb_dec; |
800 | 0 | bulk_ops->ctr_enc = _gcry_sm4_ctr_enc; |
801 | 0 | bulk_ops->xts_crypt = _gcry_sm4_xts_crypt; |
802 | 0 | bulk_ops->ecb_crypt = _gcry_sm4_ecb_crypt; |
803 | 0 | bulk_ops->ctr32le_enc = _gcry_sm4_ctr32le_enc; |
804 | 0 | bulk_ops->ocb_crypt = _gcry_sm4_ocb_crypt; |
805 | 0 | bulk_ops->ocb_auth = _gcry_sm4_ocb_auth; |
806 | |
|
807 | 0 | sm4_expand_key (ctx, key); |
808 | 0 | return 0; |
809 | 0 | } |
810 | | |
811 | | static unsigned int |
812 | | sm4_do_crypt (const u32 *rk, byte *out, const byte *in) |
813 | 0 | { |
814 | 0 | u32 x[4]; |
815 | 0 | int i; |
816 | |
|
817 | 0 | x[0] = buf_get_be32(in + 0 * 4); |
818 | 0 | x[1] = buf_get_be32(in + 1 * 4); |
819 | 0 | x[2] = buf_get_be32(in + 2 * 4); |
820 | 0 | x[3] = buf_get_be32(in + 3 * 4); |
821 | |
|
822 | 0 | for (i = 0; i < 32; i += 4) |
823 | 0 | { |
824 | 0 | x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); |
825 | 0 | x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); |
826 | 0 | x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); |
827 | 0 | x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); |
828 | 0 | } |
829 | |
|
830 | 0 | buf_put_be32(out + 0 * 4, x[3 - 0]); |
831 | 0 | buf_put_be32(out + 1 * 4, x[3 - 1]); |
832 | 0 | buf_put_be32(out + 2 * 4, x[3 - 2]); |
833 | 0 | buf_put_be32(out + 3 * 4, x[3 - 3]); |
834 | |
|
835 | 0 | return /*burn_stack*/ 4*6+sizeof(void*)*4; |
836 | 0 | } |
837 | | |
838 | | static unsigned int |
839 | | sm4_encrypt (void *context, byte *outbuf, const byte *inbuf) |
840 | 0 | { |
841 | 0 | SM4_context *ctx = context; |
842 | |
|
843 | 0 | #ifdef USE_GFNI_AVX512 |
844 | 0 | if (ctx->use_gfni_avx512) |
845 | 0 | return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1); |
846 | 0 | #endif |
847 | | |
848 | 0 | #ifdef USE_GFNI_AVX2 |
849 | 0 | if (ctx->use_gfni_avx2) |
850 | 0 | return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1); |
851 | 0 | #endif |
852 | | |
853 | | #ifdef USE_ARM_CE |
854 | | if (ctx->use_arm_ce) |
855 | | return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1); |
856 | | #endif |
857 | | |
858 | 0 | prefetch_sbox_table (); |
859 | |
|
860 | 0 | return sm4_do_crypt (ctx->rkey_enc, outbuf, inbuf); |
861 | 0 | } |
862 | | |
863 | | static unsigned int |
864 | | sm4_decrypt (void *context, byte *outbuf, const byte *inbuf) |
865 | 0 | { |
866 | 0 | SM4_context *ctx = context; |
867 | |
|
868 | 0 | #ifdef USE_GFNI_AVX512 |
869 | 0 | if (ctx->use_gfni_avx512) |
870 | 0 | return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1); |
871 | 0 | #endif |
872 | | |
873 | 0 | #ifdef USE_GFNI_AVX2 |
874 | 0 | if (ctx->use_gfni_avx2) |
875 | 0 | return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1); |
876 | 0 | #endif |
877 | | |
878 | | #ifdef USE_ARM_CE |
879 | | if (ctx->use_arm_ce) |
880 | | return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1); |
881 | | #endif |
882 | | |
883 | 0 | prefetch_sbox_table (); |
884 | |
|
885 | 0 | return sm4_do_crypt (ctx->rkey_dec, outbuf, inbuf); |
886 | 0 | } |
887 | | |
888 | | static unsigned int |
889 | | sm4_do_crypt_blks2 (const u32 *rk, byte *out, const byte *in) |
890 | 0 | { |
891 | 0 | u32 x[4]; |
892 | 0 | u32 y[4]; |
893 | 0 | u32 k; |
894 | 0 | int i; |
895 | | |
896 | | /* Encrypts/Decrypts two blocks for higher instruction level |
897 | | * parallelism. */ |
898 | |
|
899 | 0 | x[0] = buf_get_be32(in + 0 * 4); |
900 | 0 | x[1] = buf_get_be32(in + 1 * 4); |
901 | 0 | x[2] = buf_get_be32(in + 2 * 4); |
902 | 0 | x[3] = buf_get_be32(in + 3 * 4); |
903 | 0 | y[0] = buf_get_be32(in + 4 * 4); |
904 | 0 | y[1] = buf_get_be32(in + 5 * 4); |
905 | 0 | y[2] = buf_get_be32(in + 6 * 4); |
906 | 0 | y[3] = buf_get_be32(in + 7 * 4); |
907 | |
|
908 | 0 | for (i = 0; i < 32; i += 4) |
909 | 0 | { |
910 | 0 | k = rk[i + 0]; |
911 | 0 | x[0] = sm4_round(x[0], x[1], x[2], x[3], k); |
912 | 0 | y[0] = sm4_round(y[0], y[1], y[2], y[3], k); |
913 | 0 | k = rk[i + 1]; |
914 | 0 | x[1] = sm4_round(x[1], x[2], x[3], x[0], k); |
915 | 0 | y[1] = sm4_round(y[1], y[2], y[3], y[0], k); |
916 | 0 | k = rk[i + 2]; |
917 | 0 | x[2] = sm4_round(x[2], x[3], x[0], x[1], k); |
918 | 0 | y[2] = sm4_round(y[2], y[3], y[0], y[1], k); |
919 | 0 | k = rk[i + 3]; |
920 | 0 | x[3] = sm4_round(x[3], x[0], x[1], x[2], k); |
921 | 0 | y[3] = sm4_round(y[3], y[0], y[1], y[2], k); |
922 | 0 | } |
923 | |
|
924 | 0 | buf_put_be32(out + 0 * 4, x[3 - 0]); |
925 | 0 | buf_put_be32(out + 1 * 4, x[3 - 1]); |
926 | 0 | buf_put_be32(out + 2 * 4, x[3 - 2]); |
927 | 0 | buf_put_be32(out + 3 * 4, x[3 - 3]); |
928 | 0 | buf_put_be32(out + 4 * 4, y[3 - 0]); |
929 | 0 | buf_put_be32(out + 5 * 4, y[3 - 1]); |
930 | 0 | buf_put_be32(out + 6 * 4, y[3 - 2]); |
931 | 0 | buf_put_be32(out + 7 * 4, y[3 - 3]); |
932 | |
|
933 | 0 | return /*burn_stack*/ 4*10+sizeof(void*)*4; |
934 | 0 | } |
935 | | |
936 | | static unsigned int |
937 | | sm4_crypt_blocks (const void *ctx, byte *out, const byte *in, |
938 | | unsigned int num_blks) |
939 | 0 | { |
940 | 0 | const u32 *rk = ctx; |
941 | 0 | unsigned int burn_depth = 0; |
942 | 0 | unsigned int nburn; |
943 | |
|
944 | 0 | while (num_blks >= 2) |
945 | 0 | { |
946 | 0 | nburn = sm4_do_crypt_blks2 (rk, out, in); |
947 | 0 | burn_depth = nburn > burn_depth ? nburn : burn_depth; |
948 | 0 | out += 2 * 16; |
949 | 0 | in += 2 * 16; |
950 | 0 | num_blks -= 2; |
951 | 0 | } |
952 | |
|
953 | 0 | while (num_blks) |
954 | 0 | { |
955 | 0 | nburn = sm4_do_crypt (rk, out, in); |
956 | 0 | burn_depth = nburn > burn_depth ? nburn : burn_depth; |
957 | 0 | out += 16; |
958 | 0 | in += 16; |
959 | 0 | num_blks--; |
960 | 0 | } |
961 | |
|
962 | 0 | if (burn_depth) |
963 | 0 | burn_depth += sizeof(void *) * 5; |
964 | 0 | return burn_depth; |
965 | 0 | } |
966 | | |
967 | | static inline crypt_blk1_16_fn_t |
968 | | sm4_get_crypt_blk1_16_fn(SM4_context *ctx) |
969 | 0 | { |
970 | 0 | if (0) |
971 | 0 | ; |
972 | 0 | #ifdef USE_GFNI_AVX512 |
973 | 0 | else if (ctx->use_gfni_avx512) |
974 | 0 | { |
975 | 0 | return &sm4_gfni_avx512_crypt_blk1_16; |
976 | 0 | } |
977 | 0 | #endif |
978 | 0 | #ifdef USE_GFNI_AVX2 |
979 | 0 | else if (ctx->use_gfni_avx2) |
980 | 0 | { |
981 | 0 | return &sm4_gfni_avx2_crypt_blk1_16; |
982 | 0 | } |
983 | 0 | #endif |
984 | 0 | #ifdef USE_AESNI_AVX2 |
985 | 0 | else if (ctx->use_aesni_avx2) |
986 | 0 | { |
987 | 0 | return &sm4_aesni_avx2_crypt_blk1_16; |
988 | 0 | } |
989 | 0 | #endif |
990 | 0 | #ifdef USE_AESNI_AVX |
991 | 0 | else if (ctx->use_aesni_avx) |
992 | 0 | { |
993 | 0 | return &sm4_aesni_avx_crypt_blk1_16; |
994 | 0 | } |
995 | 0 | #endif |
996 | | #ifdef USE_ARM_SVE_CE |
997 | | else if (ctx->use_arm_sve_ce) |
998 | | { |
999 | | return &sm4_armv9_sve_ce_crypt_blk1_16; |
1000 | | } |
1001 | | #endif |
1002 | | #ifdef USE_ARM_CE |
1003 | | else if (ctx->use_arm_ce) |
1004 | | { |
1005 | | return &sm4_armv8_ce_crypt_blk1_16; |
1006 | | } |
1007 | | #endif |
1008 | | #ifdef USE_AARCH64_SIMD |
1009 | | else if (ctx->use_aarch64_simd) |
1010 | | { |
1011 | | return &sm4_aarch64_crypt_blk1_16; |
1012 | | } |
1013 | | #endif |
1014 | 0 | else |
1015 | 0 | { |
1016 | 0 | (void)ctx; |
1017 | 0 | return &sm4_crypt_blocks; |
1018 | 0 | } |
1019 | 0 | } |
1020 | | |
1021 | | /* Bulk encryption of complete blocks in CTR mode. This function is only |
1022 | | intended for the bulk encryption feature of cipher.c. CTR is expected to be |
1023 | | of size 16. */ |
1024 | | static void |
1025 | | _gcry_sm4_ctr_enc(void *context, unsigned char *ctr, |
1026 | | void *outbuf_arg, const void *inbuf_arg, |
1027 | | size_t nblocks) |
1028 | 0 | { |
1029 | 0 | SM4_context *ctx = context; |
1030 | 0 | byte *outbuf = outbuf_arg; |
1031 | 0 | const byte *inbuf = inbuf_arg; |
1032 | 0 | int burn_stack_depth = 0; |
1033 | |
|
1034 | 0 | #ifdef USE_GFNI_AVX512 |
1035 | 0 | if (ctx->use_gfni_avx512) |
1036 | 0 | { |
1037 | | /* Process data in 32 block chunks. */ |
1038 | 0 | while (nblocks >= 32) |
1039 | 0 | { |
1040 | 0 | _gcry_sm4_gfni_avx512_ctr_enc_blk32(ctx->rkey_enc, |
1041 | 0 | outbuf, inbuf, ctr); |
1042 | |
|
1043 | 0 | nblocks -= 32; |
1044 | 0 | outbuf += 32 * 16; |
1045 | 0 | inbuf += 32 * 16; |
1046 | 0 | } |
1047 | | |
1048 | | /* Process data in 16 block chunks. */ |
1049 | 0 | if (nblocks >= 16) |
1050 | 0 | { |
1051 | 0 | _gcry_sm4_gfni_avx512_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1052 | |
|
1053 | 0 | nblocks -= 16; |
1054 | 0 | outbuf += 16 * 16; |
1055 | 0 | inbuf += 16 * 16; |
1056 | 0 | } |
1057 | 0 | } |
1058 | 0 | #endif |
1059 | |
|
1060 | 0 | #ifdef USE_GFNI_AVX2 |
1061 | 0 | if (ctx->use_gfni_avx2) |
1062 | 0 | { |
1063 | | /* Process data in 16 block chunks. */ |
1064 | 0 | while (nblocks >= 16) |
1065 | 0 | { |
1066 | 0 | _gcry_sm4_gfni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1067 | |
|
1068 | 0 | nblocks -= 16; |
1069 | 0 | outbuf += 16 * 16; |
1070 | 0 | inbuf += 16 * 16; |
1071 | 0 | } |
1072 | 0 | } |
1073 | 0 | #endif |
1074 | |
|
1075 | 0 | #ifdef USE_AESNI_AVX2 |
1076 | 0 | if (ctx->use_aesni_avx2) |
1077 | 0 | { |
1078 | | /* Process data in 16 block chunks. */ |
1079 | 0 | while (nblocks >= 16) |
1080 | 0 | { |
1081 | 0 | _gcry_sm4_aesni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1082 | |
|
1083 | 0 | nblocks -= 16; |
1084 | 0 | outbuf += 16 * 16; |
1085 | 0 | inbuf += 16 * 16; |
1086 | 0 | } |
1087 | 0 | } |
1088 | 0 | #endif |
1089 | |
|
1090 | 0 | #ifdef USE_AESNI_AVX |
1091 | 0 | if (ctx->use_aesni_avx) |
1092 | 0 | { |
1093 | | /* Process data in 8 block chunks. */ |
1094 | 0 | while (nblocks >= 8) |
1095 | 0 | { |
1096 | 0 | _gcry_sm4_aesni_avx_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1097 | |
|
1098 | 0 | nblocks -= 8; |
1099 | 0 | outbuf += 8 * 16; |
1100 | 0 | inbuf += 8 * 16; |
1101 | 0 | } |
1102 | 0 | } |
1103 | 0 | #endif |
1104 | |
|
1105 | | #ifdef USE_ARM_SVE_CE |
1106 | | if (ctx->use_arm_sve_ce) |
1107 | | { |
1108 | | /* Process all blocks at a time. */ |
1109 | | _gcry_sm4_armv9_sve_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf, |
1110 | | ctr, nblocks); |
1111 | | nblocks = 0; |
1112 | | } |
1113 | | #endif |
1114 | |
|
1115 | | #ifdef USE_ARM_CE |
1116 | | if (ctx->use_arm_ce) |
1117 | | { |
1118 | | /* Process multiples of 8 blocks at a time. */ |
1119 | | if (nblocks >= 8) |
1120 | | { |
1121 | | size_t nblks = nblocks & ~(8 - 1); |
1122 | | |
1123 | | _gcry_sm4_armv8_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks); |
1124 | | |
1125 | | nblocks -= nblks; |
1126 | | outbuf += nblks * 16; |
1127 | | inbuf += nblks * 16; |
1128 | | } |
1129 | | } |
1130 | | #endif |
1131 | |
|
1132 | | #ifdef USE_AARCH64_SIMD |
1133 | | if (ctx->use_aarch64_simd) |
1134 | | { |
1135 | | /* Process multiples of 8 blocks at a time. */ |
1136 | | if (nblocks >= 8) |
1137 | | { |
1138 | | size_t nblks = nblocks & ~(8 - 1); |
1139 | | |
1140 | | _gcry_sm4_aarch64_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks); |
1141 | | |
1142 | | nblocks -= nblks; |
1143 | | outbuf += nblks * 16; |
1144 | | inbuf += nblks * 16; |
1145 | | } |
1146 | | } |
1147 | | #endif |
1148 | | |
1149 | | /* Process remaining blocks. */ |
1150 | 0 | if (nblocks) |
1151 | 0 | { |
1152 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1153 | 0 | byte tmpbuf[16 * 16]; |
1154 | 0 | unsigned int tmp_used = 16; |
1155 | 0 | size_t nburn; |
1156 | |
|
1157 | 0 | if (crypt_blk1_16 == &sm4_crypt_blocks) |
1158 | 0 | prefetch_sbox_table (); |
1159 | |
|
1160 | 0 | nburn = bulk_ctr_enc_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf, |
1161 | 0 | nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16, |
1162 | 0 | &tmp_used); |
1163 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1164 | |
|
1165 | 0 | wipememory(tmpbuf, tmp_used); |
1166 | 0 | } |
1167 | |
|
1168 | 0 | if (burn_stack_depth) |
1169 | 0 | _gcry_burn_stack(burn_stack_depth); |
1170 | 0 | } |
1171 | | |
1172 | | /* Bulk decryption of complete blocks in CBC mode. This function is only |
1173 | | intended for the bulk encryption feature of cipher.c. */ |
1174 | | static void |
1175 | | _gcry_sm4_cbc_dec(void *context, unsigned char *iv, |
1176 | | void *outbuf_arg, const void *inbuf_arg, |
1177 | | size_t nblocks) |
1178 | 0 | { |
1179 | 0 | SM4_context *ctx = context; |
1180 | 0 | unsigned char *outbuf = outbuf_arg; |
1181 | 0 | const unsigned char *inbuf = inbuf_arg; |
1182 | 0 | int burn_stack_depth = 0; |
1183 | |
|
1184 | 0 | #ifdef USE_GFNI_AVX512 |
1185 | 0 | if (ctx->use_gfni_avx512) |
1186 | 0 | { |
1187 | | /* Process data in 32 block chunks. */ |
1188 | 0 | while (nblocks >= 32) |
1189 | 0 | { |
1190 | 0 | _gcry_sm4_gfni_avx512_cbc_dec_blk32(ctx->rkey_dec, outbuf, inbuf, iv); |
1191 | |
|
1192 | 0 | nblocks -= 32; |
1193 | 0 | outbuf += 32 * 16; |
1194 | 0 | inbuf += 32 * 16; |
1195 | 0 | } |
1196 | | |
1197 | | /* Process data in 16 block chunks. */ |
1198 | 0 | if (nblocks >= 16) |
1199 | 0 | { |
1200 | 0 | _gcry_sm4_gfni_avx512_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1201 | |
|
1202 | 0 | nblocks -= 16; |
1203 | 0 | outbuf += 16 * 16; |
1204 | 0 | inbuf += 16 * 16; |
1205 | 0 | } |
1206 | 0 | } |
1207 | 0 | #endif |
1208 | |
|
1209 | 0 | #ifdef USE_GFNI_AVX2 |
1210 | 0 | if (ctx->use_gfni_avx2) |
1211 | 0 | { |
1212 | | /* Process data in 16 block chunks. */ |
1213 | 0 | while (nblocks >= 16) |
1214 | 0 | { |
1215 | 0 | _gcry_sm4_gfni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1216 | |
|
1217 | 0 | nblocks -= 16; |
1218 | 0 | outbuf += 16 * 16; |
1219 | 0 | inbuf += 16 * 16; |
1220 | 0 | } |
1221 | 0 | } |
1222 | 0 | #endif |
1223 | |
|
1224 | 0 | #ifdef USE_AESNI_AVX2 |
1225 | 0 | if (ctx->use_aesni_avx2) |
1226 | 0 | { |
1227 | | /* Process data in 16 block chunks. */ |
1228 | 0 | while (nblocks >= 16) |
1229 | 0 | { |
1230 | 0 | _gcry_sm4_aesni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1231 | |
|
1232 | 0 | nblocks -= 16; |
1233 | 0 | outbuf += 16 * 16; |
1234 | 0 | inbuf += 16 * 16; |
1235 | 0 | } |
1236 | 0 | } |
1237 | 0 | #endif |
1238 | |
|
1239 | 0 | #ifdef USE_AESNI_AVX |
1240 | 0 | if (ctx->use_aesni_avx) |
1241 | 0 | { |
1242 | | /* Process data in 8 block chunks. */ |
1243 | 0 | while (nblocks >= 8) |
1244 | 0 | { |
1245 | 0 | _gcry_sm4_aesni_avx_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1246 | |
|
1247 | 0 | nblocks -= 8; |
1248 | 0 | outbuf += 8 * 16; |
1249 | 0 | inbuf += 8 * 16; |
1250 | 0 | } |
1251 | 0 | } |
1252 | 0 | #endif |
1253 | |
|
1254 | | #ifdef USE_ARM_SVE_CE |
1255 | | if (ctx->use_arm_sve_ce) |
1256 | | { |
1257 | | /* Process all blocks at a time. */ |
1258 | | _gcry_sm4_armv9_sve_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf, |
1259 | | iv, nblocks); |
1260 | | nblocks = 0; |
1261 | | } |
1262 | | #endif |
1263 | |
|
1264 | | #ifdef USE_ARM_CE |
1265 | | if (ctx->use_arm_ce) |
1266 | | { |
1267 | | /* Process multiples of 8 blocks at a time. */ |
1268 | | if (nblocks >= 8) |
1269 | | { |
1270 | | size_t nblks = nblocks & ~(8 - 1); |
1271 | | |
1272 | | _gcry_sm4_armv8_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks); |
1273 | | |
1274 | | nblocks -= nblks; |
1275 | | outbuf += nblks * 16; |
1276 | | inbuf += nblks * 16; |
1277 | | } |
1278 | | } |
1279 | | #endif |
1280 | |
|
1281 | | #ifdef USE_AARCH64_SIMD |
1282 | | if (ctx->use_aarch64_simd) |
1283 | | { |
1284 | | /* Process multiples of 8 blocks at a time. */ |
1285 | | if (nblocks >= 8) |
1286 | | { |
1287 | | size_t nblks = nblocks & ~(8 - 1); |
1288 | | |
1289 | | _gcry_sm4_aarch64_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks); |
1290 | | |
1291 | | nblocks -= nblks; |
1292 | | outbuf += nblks * 16; |
1293 | | inbuf += nblks * 16; |
1294 | | } |
1295 | | } |
1296 | | #endif |
1297 | | |
1298 | | /* Process remaining blocks. */ |
1299 | 0 | if (nblocks) |
1300 | 0 | { |
1301 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1302 | 0 | unsigned char tmpbuf[16 * 16]; |
1303 | 0 | unsigned int tmp_used = 16; |
1304 | 0 | size_t nburn; |
1305 | |
|
1306 | 0 | if (crypt_blk1_16 == &sm4_crypt_blocks) |
1307 | 0 | prefetch_sbox_table (); |
1308 | |
|
1309 | 0 | nburn = bulk_cbc_dec_128(ctx->rkey_dec, crypt_blk1_16, outbuf, inbuf, |
1310 | 0 | nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16, |
1311 | 0 | &tmp_used); |
1312 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1313 | |
|
1314 | 0 | wipememory(tmpbuf, tmp_used); |
1315 | 0 | } |
1316 | |
|
1317 | 0 | if (burn_stack_depth) |
1318 | 0 | _gcry_burn_stack(burn_stack_depth); |
1319 | 0 | } |
1320 | | |
1321 | | /* Bulk decryption of complete blocks in CFB mode. This function is only |
1322 | | intended for the bulk encryption feature of cipher.c. */ |
1323 | | static void |
1324 | | _gcry_sm4_cfb_dec(void *context, unsigned char *iv, |
1325 | | void *outbuf_arg, const void *inbuf_arg, |
1326 | | size_t nblocks) |
1327 | 0 | { |
1328 | 0 | SM4_context *ctx = context; |
1329 | 0 | unsigned char *outbuf = outbuf_arg; |
1330 | 0 | const unsigned char *inbuf = inbuf_arg; |
1331 | 0 | int burn_stack_depth = 0; |
1332 | |
|
1333 | 0 | #ifdef USE_GFNI_AVX512 |
1334 | 0 | if (ctx->use_gfni_avx512) |
1335 | 0 | { |
1336 | | /* Process data in 32 block chunks. */ |
1337 | 0 | while (nblocks >= 32) |
1338 | 0 | { |
1339 | 0 | _gcry_sm4_gfni_avx512_cfb_dec_blk32(ctx->rkey_enc, outbuf, inbuf, iv); |
1340 | |
|
1341 | 0 | nblocks -= 32; |
1342 | 0 | outbuf += 32 * 16; |
1343 | 0 | inbuf += 32 * 16; |
1344 | 0 | } |
1345 | | |
1346 | | /* Process data in 16 block chunks. */ |
1347 | 0 | if (nblocks >= 16) |
1348 | 0 | { |
1349 | 0 | _gcry_sm4_gfni_avx512_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1350 | |
|
1351 | 0 | nblocks -= 16; |
1352 | 0 | outbuf += 16 * 16; |
1353 | 0 | inbuf += 16 * 16; |
1354 | 0 | } |
1355 | 0 | } |
1356 | 0 | #endif |
1357 | |
|
1358 | 0 | #ifdef USE_GFNI_AVX2 |
1359 | 0 | if (ctx->use_gfni_avx2) |
1360 | 0 | { |
1361 | | /* Process data in 16 block chunks. */ |
1362 | 0 | while (nblocks >= 16) |
1363 | 0 | { |
1364 | 0 | _gcry_sm4_gfni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1365 | |
|
1366 | 0 | nblocks -= 16; |
1367 | 0 | outbuf += 16 * 16; |
1368 | 0 | inbuf += 16 * 16; |
1369 | 0 | } |
1370 | 0 | } |
1371 | 0 | #endif |
1372 | |
|
1373 | 0 | #ifdef USE_AESNI_AVX2 |
1374 | 0 | if (ctx->use_aesni_avx2) |
1375 | 0 | { |
1376 | | /* Process data in 16 block chunks. */ |
1377 | 0 | while (nblocks >= 16) |
1378 | 0 | { |
1379 | 0 | _gcry_sm4_aesni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1380 | |
|
1381 | 0 | nblocks -= 16; |
1382 | 0 | outbuf += 16 * 16; |
1383 | 0 | inbuf += 16 * 16; |
1384 | 0 | } |
1385 | 0 | } |
1386 | 0 | #endif |
1387 | |
|
1388 | 0 | #ifdef USE_AESNI_AVX |
1389 | 0 | if (ctx->use_aesni_avx) |
1390 | 0 | { |
1391 | | /* Process data in 8 block chunks. */ |
1392 | 0 | while (nblocks >= 8) |
1393 | 0 | { |
1394 | 0 | _gcry_sm4_aesni_avx_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1395 | |
|
1396 | 0 | nblocks -= 8; |
1397 | 0 | outbuf += 8 * 16; |
1398 | 0 | inbuf += 8 * 16; |
1399 | 0 | } |
1400 | 0 | } |
1401 | 0 | #endif |
1402 | |
|
1403 | | #ifdef USE_ARM_SVE_CE |
1404 | | if (ctx->use_arm_sve_ce) |
1405 | | { |
1406 | | /* Process all blocks at a time. */ |
1407 | | _gcry_sm4_armv9_sve_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf, |
1408 | | iv, nblocks); |
1409 | | nblocks = 0; |
1410 | | } |
1411 | | #endif |
1412 | |
|
1413 | | #ifdef USE_ARM_CE |
1414 | | if (ctx->use_arm_ce) |
1415 | | { |
1416 | | /* Process multiples of 8 blocks at a time. */ |
1417 | | if (nblocks >= 8) |
1418 | | { |
1419 | | size_t nblks = nblocks & ~(8 - 1); |
1420 | | |
1421 | | _gcry_sm4_armv8_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks); |
1422 | | |
1423 | | nblocks -= nblks; |
1424 | | outbuf += nblks * 16; |
1425 | | inbuf += nblks * 16; |
1426 | | } |
1427 | | } |
1428 | | #endif |
1429 | |
|
1430 | | #ifdef USE_AARCH64_SIMD |
1431 | | if (ctx->use_aarch64_simd) |
1432 | | { |
1433 | | /* Process multiples of 8 blocks at a time. */ |
1434 | | if (nblocks >= 8) |
1435 | | { |
1436 | | size_t nblks = nblocks & ~(8 - 1); |
1437 | | |
1438 | | _gcry_sm4_aarch64_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks); |
1439 | | |
1440 | | nblocks -= nblks; |
1441 | | outbuf += nblks * 16; |
1442 | | inbuf += nblks * 16; |
1443 | | } |
1444 | | } |
1445 | | #endif |
1446 | | |
1447 | | /* Process remaining blocks. */ |
1448 | 0 | if (nblocks) |
1449 | 0 | { |
1450 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1451 | 0 | unsigned char tmpbuf[16 * 16]; |
1452 | 0 | unsigned int tmp_used = 16; |
1453 | 0 | size_t nburn; |
1454 | |
|
1455 | 0 | if (crypt_blk1_16 == &sm4_crypt_blocks) |
1456 | 0 | prefetch_sbox_table (); |
1457 | |
|
1458 | 0 | nburn = bulk_cfb_dec_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf, |
1459 | 0 | nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16, |
1460 | 0 | &tmp_used); |
1461 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1462 | |
|
1463 | 0 | wipememory(tmpbuf, tmp_used); |
1464 | 0 | } |
1465 | |
|
1466 | 0 | if (burn_stack_depth) |
1467 | 0 | _gcry_burn_stack(burn_stack_depth); |
1468 | 0 | } |
1469 | | |
1470 | | static unsigned int |
1471 | | sm4_crypt_blk1_32 (const SM4_context *ctx, byte *outbuf, const byte *inbuf, |
1472 | | unsigned int num_blks, const u32 *rk) |
1473 | 0 | { |
1474 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1475 | 0 | unsigned int stack_burn_size = 0; |
1476 | 0 | unsigned int nburn; |
1477 | |
|
1478 | 0 | gcry_assert (num_blks <= 32); |
1479 | | |
1480 | 0 | #ifdef USE_GFNI_AVX512 |
1481 | 0 | if (num_blks == 32 && ctx->use_gfni_avx512) |
1482 | 0 | { |
1483 | 0 | return _gcry_sm4_gfni_avx512_crypt_blk32 (rk, outbuf, inbuf); |
1484 | 0 | } |
1485 | 0 | #endif |
1486 | | #ifdef USE_ARM_SVE_CE |
1487 | | if (ctx->use_arm_sve_ce) |
1488 | | { |
1489 | | _gcry_sm4_armv9_sve_ce_crypt (rk, outbuf, inbuf, num_blks); |
1490 | | return 0; |
1491 | | } |
1492 | | #endif |
1493 | | |
1494 | 0 | do |
1495 | 0 | { |
1496 | 0 | unsigned int curr_blks = num_blks > 16 ? 16 : num_blks; |
1497 | 0 | nburn = crypt_blk1_16 (rk, outbuf, inbuf, curr_blks); |
1498 | 0 | stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size; |
1499 | 0 | outbuf += curr_blks * 16; |
1500 | 0 | inbuf += curr_blks * 16; |
1501 | 0 | num_blks -= curr_blks; |
1502 | 0 | } |
1503 | 0 | while (num_blks > 0); |
1504 | |
|
1505 | 0 | return stack_burn_size; |
1506 | 0 | } |
1507 | | |
1508 | | static unsigned int |
1509 | | sm4_encrypt_blk1_32 (const void *context, byte *out, const byte *in, |
1510 | | unsigned int num_blks) |
1511 | 0 | { |
1512 | 0 | const SM4_context *ctx = context; |
1513 | 0 | return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_enc); |
1514 | 0 | } |
1515 | | |
1516 | | static unsigned int |
1517 | | sm4_decrypt_blk1_32 (const void *context, byte *out, const byte *in, |
1518 | | unsigned int num_blks) |
1519 | 0 | { |
1520 | 0 | const SM4_context *ctx = context; |
1521 | 0 | return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_dec); |
1522 | 0 | } |
1523 | | |
1524 | | /* Bulk encryption/decryption in ECB mode. */ |
1525 | | static void |
1526 | | _gcry_sm4_ecb_crypt (void *context, void *outbuf_arg, |
1527 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1528 | 0 | { |
1529 | 0 | SM4_context *ctx = context; |
1530 | 0 | unsigned char *outbuf = outbuf_arg; |
1531 | 0 | const unsigned char *inbuf = inbuf_arg; |
1532 | 0 | int burn_stack_depth = 0; |
1533 | | |
1534 | | /* Process remaining blocks. */ |
1535 | 0 | if (nblocks) |
1536 | 0 | { |
1537 | 0 | size_t nburn; |
1538 | |
|
1539 | 0 | if (ctx->crypt_blk1_16 == &sm4_crypt_blocks) |
1540 | 0 | prefetch_sbox_table (); |
1541 | |
|
1542 | 0 | nburn = bulk_ecb_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32 |
1543 | 0 | : sm4_decrypt_blk1_32, |
1544 | 0 | outbuf, inbuf, nblocks, 32); |
1545 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1546 | 0 | } |
1547 | |
|
1548 | 0 | if (burn_stack_depth) |
1549 | 0 | _gcry_burn_stack(burn_stack_depth); |
1550 | 0 | } |
1551 | | |
1552 | | /* Bulk encryption/decryption of complete blocks in XTS mode. */ |
1553 | | static void |
1554 | | _gcry_sm4_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, |
1555 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1556 | 0 | { |
1557 | 0 | SM4_context *ctx = context; |
1558 | 0 | unsigned char *outbuf = outbuf_arg; |
1559 | 0 | const unsigned char *inbuf = inbuf_arg; |
1560 | 0 | int burn_stack_depth = 0; |
1561 | |
|
1562 | | #ifdef USE_ARM_CE |
1563 | | if (ctx->use_arm_ce) |
1564 | | { |
1565 | | /* Process all blocks at a time. */ |
1566 | | _gcry_sm4_armv8_ce_xts_crypt(encrypt ? ctx->rkey_enc : ctx->rkey_dec, |
1567 | | outbuf, inbuf, tweak, nblocks); |
1568 | | |
1569 | | nblocks = 0; |
1570 | | } |
1571 | | #endif |
1572 | | |
1573 | | /* Process remaining blocks. */ |
1574 | 0 | if (nblocks) |
1575 | 0 | { |
1576 | 0 | unsigned char tmpbuf[32 * 16]; |
1577 | 0 | unsigned int tmp_used = 16; |
1578 | 0 | size_t nburn; |
1579 | |
|
1580 | 0 | if (ctx->crypt_blk1_16 == &sm4_crypt_blocks) |
1581 | 0 | prefetch_sbox_table (); |
1582 | |
|
1583 | 0 | nburn = bulk_xts_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32 |
1584 | 0 | : sm4_decrypt_blk1_32, |
1585 | 0 | outbuf, inbuf, nblocks, |
1586 | 0 | tweak, tmpbuf, sizeof(tmpbuf) / 16, |
1587 | 0 | &tmp_used); |
1588 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1589 | |
|
1590 | 0 | wipememory(tmpbuf, tmp_used); |
1591 | 0 | } |
1592 | |
|
1593 | 0 | if (burn_stack_depth) |
1594 | 0 | _gcry_burn_stack(burn_stack_depth); |
1595 | 0 | } |
1596 | | |
1597 | | /* Bulk encryption of complete blocks in CTR32LE mode (for GCM-SIV). */ |
1598 | | static void |
1599 | | _gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr, |
1600 | | void *outbuf_arg, const void *inbuf_arg, |
1601 | | size_t nblocks) |
1602 | 0 | { |
1603 | 0 | SM4_context *ctx = context; |
1604 | 0 | byte *outbuf = outbuf_arg; |
1605 | 0 | const byte *inbuf = inbuf_arg; |
1606 | 0 | int burn_stack_depth = 0; |
1607 | | |
1608 | | /* Process remaining blocks. */ |
1609 | 0 | if (nblocks) |
1610 | 0 | { |
1611 | 0 | byte tmpbuf[32 * 16]; |
1612 | 0 | unsigned int tmp_used = 16; |
1613 | 0 | size_t nburn; |
1614 | |
|
1615 | 0 | nburn = bulk_ctr32le_enc_128 (ctx, sm4_encrypt_blk1_32, outbuf, inbuf, |
1616 | 0 | nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16, |
1617 | 0 | &tmp_used); |
1618 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1619 | |
|
1620 | 0 | wipememory (tmpbuf, tmp_used); |
1621 | 0 | } |
1622 | |
|
1623 | 0 | if (burn_stack_depth) |
1624 | 0 | _gcry_burn_stack (burn_stack_depth); |
1625 | 0 | } |
1626 | | |
1627 | | /* Bulk encryption/decryption of complete blocks in OCB mode. */ |
1628 | | static size_t |
1629 | | _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
1630 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1631 | 0 | { |
1632 | 0 | SM4_context *ctx = (void *)&c->context.c; |
1633 | 0 | unsigned char *outbuf = outbuf_arg; |
1634 | 0 | const unsigned char *inbuf = inbuf_arg; |
1635 | 0 | u64 blkn = c->u_mode.ocb.data_nblocks; |
1636 | 0 | int burn_stack_depth = 0; |
1637 | |
|
1638 | 0 | #ifdef USE_GFNI_AVX512 |
1639 | 0 | if (ctx->use_gfni_avx512) |
1640 | 0 | { |
1641 | 0 | u64 Ls[32]; |
1642 | 0 | u64 *l; |
1643 | |
|
1644 | 0 | if (nblocks >= 32) |
1645 | 0 | { |
1646 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); |
1647 | | |
1648 | | /* Process data in 32 block chunks. */ |
1649 | 0 | while (nblocks >= 32) |
1650 | 0 | { |
1651 | 0 | blkn += 32; |
1652 | 0 | *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 32); |
1653 | |
|
1654 | 0 | if (encrypt) |
1655 | 0 | _gcry_sm4_gfni_avx512_ocb_enc_blk32 (ctx->rkey_enc, outbuf, |
1656 | 0 | inbuf, c->u_iv.iv, |
1657 | 0 | c->u_ctr.ctr, Ls); |
1658 | 0 | else |
1659 | 0 | _gcry_sm4_gfni_avx512_ocb_dec_blk32 (ctx->rkey_dec, outbuf, |
1660 | 0 | inbuf, c->u_iv.iv, |
1661 | 0 | c->u_ctr.ctr, Ls); |
1662 | |
|
1663 | 0 | nblocks -= 32; |
1664 | 0 | outbuf += 32 * 16; |
1665 | 0 | inbuf += 32 * 16; |
1666 | 0 | } |
1667 | 0 | } |
1668 | 0 | } |
1669 | 0 | #endif |
1670 | |
|
1671 | 0 | #ifdef USE_GFNI_AVX2 |
1672 | 0 | if (ctx->use_gfni_avx2) |
1673 | 0 | { |
1674 | 0 | u64 Ls[16]; |
1675 | 0 | u64 *l; |
1676 | |
|
1677 | 0 | if (nblocks >= 16) |
1678 | 0 | { |
1679 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1680 | | |
1681 | | /* Process data in 16 block chunks. */ |
1682 | 0 | while (nblocks >= 16) |
1683 | 0 | { |
1684 | 0 | blkn += 16; |
1685 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1686 | |
|
1687 | 0 | if (encrypt) |
1688 | 0 | _gcry_sm4_gfni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf, |
1689 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1690 | 0 | else |
1691 | 0 | _gcry_sm4_gfni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf, |
1692 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1693 | |
|
1694 | 0 | nblocks -= 16; |
1695 | 0 | outbuf += 16 * 16; |
1696 | 0 | inbuf += 16 * 16; |
1697 | 0 | } |
1698 | 0 | } |
1699 | 0 | } |
1700 | 0 | #endif |
1701 | |
|
1702 | 0 | #ifdef USE_AESNI_AVX2 |
1703 | 0 | if (ctx->use_aesni_avx2) |
1704 | 0 | { |
1705 | 0 | u64 Ls[16]; |
1706 | 0 | u64 *l; |
1707 | |
|
1708 | 0 | if (nblocks >= 16) |
1709 | 0 | { |
1710 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1711 | | |
1712 | | /* Process data in 16 block chunks. */ |
1713 | 0 | while (nblocks >= 16) |
1714 | 0 | { |
1715 | 0 | blkn += 16; |
1716 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1717 | |
|
1718 | 0 | if (encrypt) |
1719 | 0 | _gcry_sm4_aesni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf, |
1720 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1721 | 0 | else |
1722 | 0 | _gcry_sm4_aesni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf, |
1723 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1724 | |
|
1725 | 0 | nblocks -= 16; |
1726 | 0 | outbuf += 16 * 16; |
1727 | 0 | inbuf += 16 * 16; |
1728 | 0 | } |
1729 | 0 | } |
1730 | 0 | } |
1731 | 0 | #endif |
1732 | |
|
1733 | 0 | #ifdef USE_AESNI_AVX |
1734 | 0 | if (ctx->use_aesni_avx) |
1735 | 0 | { |
1736 | 0 | u64 Ls[8]; |
1737 | 0 | u64 *l; |
1738 | |
|
1739 | 0 | if (nblocks >= 8) |
1740 | 0 | { |
1741 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1742 | | |
1743 | | /* Process data in 8 block chunks. */ |
1744 | 0 | while (nblocks >= 8) |
1745 | 0 | { |
1746 | 0 | blkn += 8; |
1747 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1748 | |
|
1749 | 0 | if (encrypt) |
1750 | 0 | _gcry_sm4_aesni_avx_ocb_enc(ctx->rkey_enc, outbuf, inbuf, |
1751 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1752 | 0 | else |
1753 | 0 | _gcry_sm4_aesni_avx_ocb_dec(ctx->rkey_dec, outbuf, inbuf, |
1754 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1755 | |
|
1756 | 0 | nblocks -= 8; |
1757 | 0 | outbuf += 8 * 16; |
1758 | 0 | inbuf += 8 * 16; |
1759 | 0 | } |
1760 | 0 | } |
1761 | 0 | } |
1762 | 0 | #endif |
1763 | | |
1764 | | /* Process remaining blocks. */ |
1765 | 0 | if (nblocks) |
1766 | 0 | { |
1767 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1768 | 0 | u32 *rk = encrypt ? ctx->rkey_enc : ctx->rkey_dec; |
1769 | 0 | unsigned char tmpbuf[16 * 16]; |
1770 | 0 | unsigned int tmp_used = 16; |
1771 | 0 | size_t nburn; |
1772 | |
|
1773 | 0 | nburn = bulk_ocb_crypt_128 (c, rk, crypt_blk1_16, outbuf, inbuf, nblocks, |
1774 | 0 | &blkn, encrypt, tmpbuf, sizeof(tmpbuf) / 16, |
1775 | 0 | &tmp_used); |
1776 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1777 | |
|
1778 | 0 | wipememory(tmpbuf, tmp_used); |
1779 | 0 | } |
1780 | |
|
1781 | 0 | c->u_mode.ocb.data_nblocks = blkn; |
1782 | |
|
1783 | 0 | if (burn_stack_depth) |
1784 | 0 | _gcry_burn_stack(burn_stack_depth); |
1785 | |
|
1786 | 0 | return 0; |
1787 | 0 | } |
1788 | | |
1789 | | /* Bulk authentication of complete blocks in OCB mode. */ |
1790 | | static size_t |
1791 | | _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) |
1792 | 0 | { |
1793 | 0 | SM4_context *ctx = (void *)&c->context.c; |
1794 | 0 | const unsigned char *abuf = abuf_arg; |
1795 | 0 | u64 blkn = c->u_mode.ocb.aad_nblocks; |
1796 | 0 | int burn_stack_depth = 0; |
1797 | |
|
1798 | 0 | #ifdef USE_GFNI_AVX512 |
1799 | 0 | if (ctx->use_gfni_avx512) |
1800 | 0 | { |
1801 | 0 | u64 Ls[16]; |
1802 | 0 | u64 *l; |
1803 | |
|
1804 | 0 | if (nblocks >= 16) |
1805 | 0 | { |
1806 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1807 | | |
1808 | | /* Process data in 16 block chunks. */ |
1809 | 0 | while (nblocks >= 16) |
1810 | 0 | { |
1811 | 0 | blkn += 16; |
1812 | 0 | *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 16); |
1813 | |
|
1814 | 0 | _gcry_sm4_gfni_avx512_ocb_auth (ctx->rkey_enc, abuf, |
1815 | 0 | c->u_mode.ocb.aad_offset, |
1816 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1817 | |
|
1818 | 0 | nblocks -= 16; |
1819 | 0 | abuf += 16 * 16; |
1820 | 0 | } |
1821 | 0 | } |
1822 | 0 | } |
1823 | 0 | #endif |
1824 | |
|
1825 | 0 | #ifdef USE_GFNI_AVX2 |
1826 | 0 | if (ctx->use_gfni_avx2) |
1827 | 0 | { |
1828 | 0 | u64 Ls[16]; |
1829 | 0 | u64 *l; |
1830 | |
|
1831 | 0 | if (nblocks >= 16) |
1832 | 0 | { |
1833 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1834 | | |
1835 | | /* Process data in 16 block chunks. */ |
1836 | 0 | while (nblocks >= 16) |
1837 | 0 | { |
1838 | 0 | blkn += 16; |
1839 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1840 | |
|
1841 | 0 | _gcry_sm4_gfni_avx2_ocb_auth(ctx->rkey_enc, abuf, |
1842 | 0 | c->u_mode.ocb.aad_offset, |
1843 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1844 | |
|
1845 | 0 | nblocks -= 16; |
1846 | 0 | abuf += 16 * 16; |
1847 | 0 | } |
1848 | 0 | } |
1849 | 0 | } |
1850 | 0 | #endif |
1851 | |
|
1852 | 0 | #ifdef USE_AESNI_AVX2 |
1853 | 0 | if (ctx->use_aesni_avx2) |
1854 | 0 | { |
1855 | 0 | u64 Ls[16]; |
1856 | 0 | u64 *l; |
1857 | |
|
1858 | 0 | if (nblocks >= 16) |
1859 | 0 | { |
1860 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1861 | | |
1862 | | /* Process data in 16 block chunks. */ |
1863 | 0 | while (nblocks >= 16) |
1864 | 0 | { |
1865 | 0 | blkn += 16; |
1866 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1867 | |
|
1868 | 0 | _gcry_sm4_aesni_avx2_ocb_auth(ctx->rkey_enc, abuf, |
1869 | 0 | c->u_mode.ocb.aad_offset, |
1870 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1871 | |
|
1872 | 0 | nblocks -= 16; |
1873 | 0 | abuf += 16 * 16; |
1874 | 0 | } |
1875 | 0 | } |
1876 | 0 | } |
1877 | 0 | #endif |
1878 | |
|
1879 | 0 | #ifdef USE_AESNI_AVX |
1880 | 0 | if (ctx->use_aesni_avx) |
1881 | 0 | { |
1882 | 0 | u64 Ls[8]; |
1883 | 0 | u64 *l; |
1884 | |
|
1885 | 0 | if (nblocks >= 8) |
1886 | 0 | { |
1887 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1888 | | |
1889 | | /* Process data in 8 block chunks. */ |
1890 | 0 | while (nblocks >= 8) |
1891 | 0 | { |
1892 | 0 | blkn += 8; |
1893 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1894 | |
|
1895 | 0 | _gcry_sm4_aesni_avx_ocb_auth(ctx->rkey_enc, abuf, |
1896 | 0 | c->u_mode.ocb.aad_offset, |
1897 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1898 | |
|
1899 | 0 | nblocks -= 8; |
1900 | 0 | abuf += 8 * 16; |
1901 | 0 | } |
1902 | 0 | } |
1903 | 0 | } |
1904 | 0 | #endif |
1905 | | |
1906 | | /* Process remaining blocks. */ |
1907 | 0 | if (nblocks) |
1908 | 0 | { |
1909 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1910 | 0 | unsigned char tmpbuf[16 * 16]; |
1911 | 0 | unsigned int tmp_used = 16; |
1912 | 0 | size_t nburn; |
1913 | |
|
1914 | 0 | nburn = bulk_ocb_auth_128 (c, ctx->rkey_enc, crypt_blk1_16, abuf, nblocks, |
1915 | 0 | &blkn, tmpbuf, sizeof(tmpbuf) / 16, &tmp_used); |
1916 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1917 | |
|
1918 | 0 | wipememory(tmpbuf, tmp_used); |
1919 | 0 | } |
1920 | |
|
1921 | 0 | c->u_mode.ocb.aad_nblocks = blkn; |
1922 | |
|
1923 | 0 | if (burn_stack_depth) |
1924 | 0 | _gcry_burn_stack(burn_stack_depth); |
1925 | |
|
1926 | 0 | return 0; |
1927 | 0 | } |
1928 | | |
1929 | | static const char * |
1930 | | sm4_selftest (void) |
1931 | 0 | { |
1932 | 0 | SM4_context ctx; |
1933 | 0 | byte scratch[16]; |
1934 | |
|
1935 | 0 | static const byte plaintext[16] = { |
1936 | 0 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, |
1937 | 0 | 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, |
1938 | 0 | }; |
1939 | 0 | static const byte key[16] = { |
1940 | 0 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, |
1941 | 0 | 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, |
1942 | 0 | }; |
1943 | 0 | static const byte ciphertext[16] = { |
1944 | 0 | 0x68, 0x1E, 0xDF, 0x34, 0xD2, 0x06, 0x96, 0x5E, |
1945 | 0 | 0x86, 0xB3, 0xE9, 0x4F, 0x53, 0x6E, 0x42, 0x46 |
1946 | 0 | }; |
1947 | |
|
1948 | 0 | memset (&ctx, 0, sizeof(ctx)); |
1949 | |
|
1950 | 0 | sm4_expand_key (&ctx, key); |
1951 | 0 | sm4_encrypt (&ctx, scratch, plaintext); |
1952 | 0 | if (memcmp (scratch, ciphertext, sizeof (ciphertext))) |
1953 | 0 | return "SM4 test encryption failed."; |
1954 | 0 | sm4_decrypt (&ctx, scratch, scratch); |
1955 | 0 | if (memcmp (scratch, plaintext, sizeof (plaintext))) |
1956 | 0 | return "SM4 test decryption failed."; |
1957 | | |
1958 | 0 | return NULL; |
1959 | 0 | } |
1960 | | |
1961 | | static gpg_err_code_t |
1962 | | run_selftests (int algo, int extended, selftest_report_func_t report) |
1963 | 0 | { |
1964 | 0 | const char *what; |
1965 | 0 | const char *errtxt; |
1966 | |
|
1967 | 0 | (void)extended; |
1968 | |
|
1969 | 0 | if (algo != GCRY_CIPHER_SM4) |
1970 | 0 | return GPG_ERR_CIPHER_ALGO; |
1971 | | |
1972 | 0 | what = "selftest"; |
1973 | 0 | errtxt = sm4_selftest (); |
1974 | 0 | if (errtxt) |
1975 | 0 | goto failed; |
1976 | | |
1977 | 0 | return 0; |
1978 | | |
1979 | 0 | failed: |
1980 | 0 | if (report) |
1981 | 0 | report ("cipher", GCRY_CIPHER_SM4, what, errtxt); |
1982 | 0 | return GPG_ERR_SELFTEST_FAILED; |
1983 | 0 | } |
1984 | | |
1985 | | |
1986 | | static const gcry_cipher_oid_spec_t sm4_oids[] = |
1987 | | { |
1988 | | { "1.2.156.10197.1.104.1", GCRY_CIPHER_MODE_ECB }, |
1989 | | { "1.2.156.10197.1.104.2", GCRY_CIPHER_MODE_CBC }, |
1990 | | { "1.2.156.10197.1.104.3", GCRY_CIPHER_MODE_OFB }, |
1991 | | { "1.2.156.10197.1.104.4", GCRY_CIPHER_MODE_CFB }, |
1992 | | { "1.2.156.10197.1.104.7", GCRY_CIPHER_MODE_CTR }, |
1993 | | { NULL } |
1994 | | }; |
1995 | | |
1996 | | gcry_cipher_spec_t _gcry_cipher_spec_sm4 = |
1997 | | { |
1998 | | GCRY_CIPHER_SM4, {0, 0}, |
1999 | | "SM4", NULL, sm4_oids, 16, 128, |
2000 | | sizeof (SM4_context), |
2001 | | sm4_setkey, sm4_encrypt, sm4_decrypt, |
2002 | | NULL, NULL, |
2003 | | run_selftests |
2004 | | }; |