/src/libgcrypt/cipher/sm4.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* sm4.c - SM4 Cipher Algorithm |
2 | | * Copyright (C) 2020 Alibaba Group. |
3 | | * Copyright (C) 2020-2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> |
4 | | * Copyright (C) 2020-2022 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | | * |
6 | | * This file is part of Libgcrypt. |
7 | | * |
8 | | * Libgcrypt is free software; you can redistribute it and/or modify |
9 | | * it under the terms of the GNU Lesser General Public License as |
10 | | * published by the Free Software Foundation; either version 2.1 of |
11 | | * the License, or (at your option) any later version. |
12 | | * |
13 | | * Libgcrypt is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with this program; if not, see <http://www.gnu.org/licenses/>. |
20 | | */ |
21 | | |
22 | | #include <config.h> |
23 | | #include <stdio.h> |
24 | | #include <stdlib.h> |
25 | | |
26 | | #include "types.h" /* for byte and u32 typedefs */ |
27 | | #include "bithelp.h" |
28 | | #include "g10lib.h" |
29 | | #include "cipher.h" |
30 | | #include "bufhelp.h" |
31 | | #include "cipher-internal.h" |
32 | | #include "bulkhelp.h" |
33 | | |
34 | | /* Helper macro to force alignment to 64 bytes. */ |
35 | | #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED |
36 | | # define ATTR_ALIGNED_64 __attribute__ ((aligned (64))) |
37 | | #else |
38 | | # define ATTR_ALIGNED_64 |
39 | | #endif |
40 | | |
41 | | /* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */ |
42 | | #undef USE_AESNI_AVX |
43 | | #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) |
44 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
45 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
46 | | # define USE_AESNI_AVX 1 |
47 | | # endif |
48 | | #endif |
49 | | |
50 | | /* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */ |
51 | | #undef USE_AESNI_AVX2 |
52 | | #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) |
53 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
54 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
55 | | # define USE_AESNI_AVX2 1 |
56 | | # endif |
57 | | #endif |
58 | | |
59 | | /* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */ |
60 | | #undef USE_GFNI_AVX2 |
61 | | #if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) |
62 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
63 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
64 | | # define USE_GFNI_AVX2 1 |
65 | | # endif |
66 | | #endif |
67 | | |
68 | | /* USE_GFNI_AVX512 inidicates whether to compile with Intel GFNI/AVX512 code. */ |
69 | | #undef USE_GFNI_AVX512 |
70 | | #if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX512_SUPPORT) |
71 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
72 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
73 | | # define USE_GFNI_AVX512 1 |
74 | | # endif |
75 | | #endif |
76 | | |
77 | | /* Assembly implementations use SystemV ABI, ABI conversion and additional |
78 | | * stack to store XMM6-XMM15 needed on Win64. */ |
79 | | #undef ASM_FUNC_ABI |
80 | | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) || \ |
81 | | defined(USE_GFNI_AVX2) || defined(USE_GFNI_AVX512) |
82 | | # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS |
83 | | # define ASM_FUNC_ABI __attribute__((sysv_abi)) |
84 | | # else |
85 | | # define ASM_FUNC_ABI |
86 | | # endif |
87 | | #endif |
88 | | |
89 | | #undef USE_AARCH64_SIMD |
90 | | #ifdef ENABLE_NEON_SUPPORT |
91 | | # if defined(__AARCH64EL__) && \ |
92 | | defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ |
93 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) |
94 | | # define USE_AARCH64_SIMD 1 |
95 | | # endif |
96 | | #endif |
97 | | |
98 | | #undef USE_ARM_CE |
99 | | #ifdef ENABLE_ARM_CRYPTO_SUPPORT |
100 | | # if defined(__AARCH64EL__) && \ |
101 | | defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ |
102 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) |
103 | | # define USE_ARM_CE 1 |
104 | | # endif |
105 | | #endif |
106 | | |
107 | | #undef USE_ARM_SVE_CE |
108 | | #ifdef ENABLE_SVE_SUPPORT |
109 | | # if defined(__AARCH64EL__) && \ |
110 | | defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ |
111 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && \ |
112 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE) && \ |
113 | | defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE2) |
114 | | # define USE_ARM_SVE_CE 1 |
115 | | # endif |
116 | | #endif |
117 | | |
118 | | #undef USE_PPC_CRYPTO |
119 | | #if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \ |
120 | | defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ |
121 | | defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \ |
122 | | !defined(WORDS_BIGENDIAN) && (__GNUC__ >= 4) |
123 | | # define USE_PPC_CRYPTO 1 |
124 | | #endif |
125 | | |
126 | | static const char *sm4_selftest (void); |
127 | | |
128 | | static void _gcry_sm4_ctr_enc (void *context, unsigned char *ctr, |
129 | | void *outbuf_arg, const void *inbuf_arg, |
130 | | size_t nblocks); |
131 | | static void _gcry_sm4_cbc_dec (void *context, unsigned char *iv, |
132 | | void *outbuf_arg, const void *inbuf_arg, |
133 | | size_t nblocks); |
134 | | static void _gcry_sm4_cfb_dec (void *context, unsigned char *iv, |
135 | | void *outbuf_arg, const void *inbuf_arg, |
136 | | size_t nblocks); |
137 | | static void _gcry_sm4_xts_crypt (void *context, unsigned char *tweak, |
138 | | void *outbuf_arg, const void *inbuf_arg, |
139 | | size_t nblocks, int encrypt); |
140 | | static void _gcry_sm4_ecb_crypt (void *context, void *outbuf_arg, |
141 | | const void *inbuf_arg, size_t nblocks, |
142 | | int encrypt); |
143 | | static void _gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr, |
144 | | void *outbuf_arg, const void *inbuf_arg, |
145 | | size_t nblocks); |
146 | | static size_t _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
147 | | const void *inbuf_arg, size_t nblocks, |
148 | | int encrypt); |
149 | | static size_t _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
150 | | size_t nblocks); |
151 | | |
152 | | typedef bulk_crypt_fn_t crypt_blk1_16_fn_t; |
153 | | |
154 | | typedef struct |
155 | | { |
156 | | u32 rkey_enc[32]; |
157 | | u32 rkey_dec[32]; |
158 | | crypt_blk1_16_fn_t crypt_blk1_16; |
159 | | #ifdef USE_AESNI_AVX |
160 | | unsigned int use_aesni_avx:1; |
161 | | #endif |
162 | | #ifdef USE_AESNI_AVX2 |
163 | | unsigned int use_aesni_avx2:1; |
164 | | #endif |
165 | | #ifdef USE_GFNI_AVX2 |
166 | | unsigned int use_gfni_avx2:1; |
167 | | #endif |
168 | | #ifdef USE_GFNI_AVX512 |
169 | | unsigned int use_gfni_avx512:1; |
170 | | #endif |
171 | | #ifdef USE_AARCH64_SIMD |
172 | | unsigned int use_aarch64_simd:1; |
173 | | #endif |
174 | | #ifdef USE_ARM_CE |
175 | | unsigned int use_arm_ce:1; |
176 | | #endif |
177 | | #ifdef USE_ARM_SVE_CE |
178 | | unsigned int use_arm_sve_ce:1; |
179 | | #endif |
180 | | #ifdef USE_PPC_CRYPTO |
181 | | unsigned int use_ppc8le:1; |
182 | | unsigned int use_ppc9le:1; |
183 | | #endif |
184 | | } SM4_context; |
185 | | |
186 | | static const u32 fk[4] = |
187 | | { |
188 | | 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc |
189 | | }; |
190 | | |
191 | | static struct |
192 | | { |
193 | | volatile u32 counter_head; |
194 | | u32 cacheline_align[64 / 4 - 1]; |
195 | | byte S[256]; |
196 | | volatile u32 counter_tail; |
197 | | } sbox_table ATTR_ALIGNED_64 = |
198 | | { |
199 | | 0, |
200 | | { 0, }, |
201 | | { |
202 | | 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, |
203 | | 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, |
204 | | 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, |
205 | | 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, |
206 | | 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, |
207 | | 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, |
208 | | 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, |
209 | | 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, |
210 | | 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, |
211 | | 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, |
212 | | 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, |
213 | | 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, |
214 | | 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, |
215 | | 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, |
216 | | 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, |
217 | | 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, |
218 | | 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, |
219 | | 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, |
220 | | 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, |
221 | | 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, |
222 | | 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, |
223 | | 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, |
224 | | 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, |
225 | | 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, |
226 | | 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, |
227 | | 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, |
228 | | 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, |
229 | | 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, |
230 | | 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, |
231 | | 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, |
232 | | 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, |
233 | | 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 |
234 | | }, |
235 | | 0 |
236 | | }; |
237 | | |
238 | | static const u32 ck[] = |
239 | | { |
240 | | 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, |
241 | | 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, |
242 | | 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, |
243 | | 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, |
244 | | 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, |
245 | | 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, |
246 | | 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, |
247 | | 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 |
248 | | }; |
249 | | |
250 | | static inline crypt_blk1_16_fn_t sm4_get_crypt_blk1_16_fn(SM4_context *ctx); |
251 | | |
252 | | #ifdef USE_AESNI_AVX |
253 | | extern void _gcry_sm4_aesni_avx_expand_key(const byte *key, u32 *rk_enc, |
254 | | u32 *rk_dec, const u32 *fk, |
255 | | const u32 *ck) ASM_FUNC_ABI; |
256 | | |
257 | | extern void _gcry_sm4_aesni_avx_ctr_enc(const u32 *rk_enc, byte *out, |
258 | | const byte *in, byte *ctr) ASM_FUNC_ABI; |
259 | | |
260 | | extern void _gcry_sm4_aesni_avx_cbc_dec(const u32 *rk_dec, byte *out, |
261 | | const byte *in, byte *iv) ASM_FUNC_ABI; |
262 | | |
263 | | extern void _gcry_sm4_aesni_avx_cfb_dec(const u32 *rk_enc, byte *out, |
264 | | const byte *in, byte *iv) ASM_FUNC_ABI; |
265 | | |
266 | | extern void _gcry_sm4_aesni_avx_ocb_enc(const u32 *rk_enc, |
267 | | unsigned char *out, |
268 | | const unsigned char *in, |
269 | | unsigned char *offset, |
270 | | unsigned char *checksum, |
271 | | const u64 Ls[8]) ASM_FUNC_ABI; |
272 | | |
273 | | extern void _gcry_sm4_aesni_avx_ocb_dec(const u32 *rk_dec, |
274 | | unsigned char *out, |
275 | | const unsigned char *in, |
276 | | unsigned char *offset, |
277 | | unsigned char *checksum, |
278 | | const u64 Ls[8]) ASM_FUNC_ABI; |
279 | | |
280 | | extern void _gcry_sm4_aesni_avx_ocb_auth(const u32 *rk_enc, |
281 | | const unsigned char *abuf, |
282 | | unsigned char *offset, |
283 | | unsigned char *checksum, |
284 | | const u64 Ls[8]) ASM_FUNC_ABI; |
285 | | |
286 | | extern unsigned int |
287 | | _gcry_sm4_aesni_avx_crypt_blk1_8(u32 *rk, byte *out, const byte *in, |
288 | | unsigned int num_blks) ASM_FUNC_ABI; |
289 | | |
290 | | static inline unsigned int |
291 | | sm4_aesni_avx_crypt_blk1_16(void *rk, byte *out, const byte *in, |
292 | | size_t num_blks) |
293 | 0 | { |
294 | 0 | if (num_blks > 8) |
295 | 0 | { |
296 | 0 | _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, 8); |
297 | 0 | in += 8 * 16; |
298 | 0 | out += 8 * 16; |
299 | 0 | num_blks -= 8; |
300 | 0 | } |
301 | |
|
302 | 0 | return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks); |
303 | 0 | } |
304 | | |
305 | | #endif /* USE_AESNI_AVX */ |
306 | | |
307 | | #ifdef USE_AESNI_AVX2 |
308 | | extern void _gcry_sm4_aesni_avx2_ctr_enc(const u32 *rk_enc, byte *out, |
309 | | const byte *in, |
310 | | byte *ctr) ASM_FUNC_ABI; |
311 | | |
312 | | extern void _gcry_sm4_aesni_avx2_cbc_dec(const u32 *rk_dec, byte *out, |
313 | | const byte *in, |
314 | | byte *iv) ASM_FUNC_ABI; |
315 | | |
316 | | extern void _gcry_sm4_aesni_avx2_cfb_dec(const u32 *rk_enc, byte *out, |
317 | | const byte *in, |
318 | | byte *iv) ASM_FUNC_ABI; |
319 | | |
320 | | extern void _gcry_sm4_aesni_avx2_ocb_enc(const u32 *rk_enc, |
321 | | unsigned char *out, |
322 | | const unsigned char *in, |
323 | | unsigned char *offset, |
324 | | unsigned char *checksum, |
325 | | const u64 Ls[16]) ASM_FUNC_ABI; |
326 | | |
327 | | extern void _gcry_sm4_aesni_avx2_ocb_dec(const u32 *rk_dec, |
328 | | unsigned char *out, |
329 | | const unsigned char *in, |
330 | | unsigned char *offset, |
331 | | unsigned char *checksum, |
332 | | const u64 Ls[16]) ASM_FUNC_ABI; |
333 | | |
334 | | extern void _gcry_sm4_aesni_avx2_ocb_auth(const u32 *rk_enc, |
335 | | const unsigned char *abuf, |
336 | | unsigned char *offset, |
337 | | unsigned char *checksum, |
338 | | const u64 Ls[16]) ASM_FUNC_ABI; |
339 | | |
340 | | extern unsigned int |
341 | | _gcry_sm4_aesni_avx2_crypt_blk1_16(u32 *rk, byte *out, const byte *in, |
342 | | unsigned int num_blks) ASM_FUNC_ABI; |
343 | | |
344 | | static inline unsigned int |
345 | | sm4_aesni_avx2_crypt_blk1_16(void *rk, byte *out, const byte *in, |
346 | | size_t num_blks) |
347 | 0 | { |
348 | 0 | #ifdef USE_AESNI_AVX |
349 | | /* Use 128-bit register implementation for short input. */ |
350 | 0 | if (num_blks <= 8) |
351 | 0 | return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks); |
352 | 0 | #endif |
353 | | |
354 | 0 | return _gcry_sm4_aesni_avx2_crypt_blk1_16(rk, out, in, num_blks); |
355 | 0 | } |
356 | | |
357 | | #endif /* USE_AESNI_AVX2 */ |
358 | | |
359 | | #ifdef USE_GFNI_AVX2 |
360 | | extern void _gcry_sm4_gfni_avx2_expand_key(const byte *key, u32 *rk_enc, |
361 | | u32 *rk_dec, const u32 *fk, |
362 | | const u32 *ck) ASM_FUNC_ABI; |
363 | | |
364 | | extern void _gcry_sm4_gfni_avx2_ctr_enc(const u32 *rk_enc, byte *out, |
365 | | const byte *in, |
366 | | byte *ctr) ASM_FUNC_ABI; |
367 | | |
368 | | extern void _gcry_sm4_gfni_avx2_cbc_dec(const u32 *rk_dec, byte *out, |
369 | | const byte *in, |
370 | | byte *iv) ASM_FUNC_ABI; |
371 | | |
372 | | extern void _gcry_sm4_gfni_avx2_cfb_dec(const u32 *rk_enc, byte *out, |
373 | | const byte *in, |
374 | | byte *iv) ASM_FUNC_ABI; |
375 | | |
376 | | extern void _gcry_sm4_gfni_avx2_ocb_enc(const u32 *rk_enc, |
377 | | unsigned char *out, |
378 | | const unsigned char *in, |
379 | | unsigned char *offset, |
380 | | unsigned char *checksum, |
381 | | const u64 Ls[16]) ASM_FUNC_ABI; |
382 | | |
383 | | extern void _gcry_sm4_gfni_avx2_ocb_dec(const u32 *rk_dec, |
384 | | unsigned char *out, |
385 | | const unsigned char *in, |
386 | | unsigned char *offset, |
387 | | unsigned char *checksum, |
388 | | const u64 Ls[16]) ASM_FUNC_ABI; |
389 | | |
390 | | extern void _gcry_sm4_gfni_avx2_ocb_auth(const u32 *rk_enc, |
391 | | const unsigned char *abuf, |
392 | | unsigned char *offset, |
393 | | unsigned char *checksum, |
394 | | const u64 Ls[16]) ASM_FUNC_ABI; |
395 | | |
396 | | extern unsigned int |
397 | | _gcry_sm4_gfni_avx2_crypt_blk1_16(u32 *rk, byte *out, const byte *in, |
398 | | unsigned int num_blks) ASM_FUNC_ABI; |
399 | | |
400 | | static inline unsigned int |
401 | | sm4_gfni_avx2_crypt_blk1_16(void *rk, byte *out, const byte *in, |
402 | | size_t num_blks) |
403 | 0 | { |
404 | 0 | return _gcry_sm4_gfni_avx2_crypt_blk1_16(rk, out, in, num_blks); |
405 | 0 | } |
406 | | |
407 | | #endif /* USE_GFNI_AVX2 */ |
408 | | |
409 | | #ifdef USE_GFNI_AVX512 |
410 | | extern void _gcry_sm4_gfni_avx512_expand_key(const byte *key, u32 *rk_enc, |
411 | | u32 *rk_dec, const u32 *fk, |
412 | | const u32 *ck) ASM_FUNC_ABI; |
413 | | |
414 | | extern void _gcry_sm4_gfni_avx512_ctr_enc(const u32 *rk_enc, byte *out, |
415 | | const byte *in, |
416 | | byte *ctr) ASM_FUNC_ABI; |
417 | | |
418 | | extern void _gcry_sm4_gfni_avx512_cbc_dec(const u32 *rk_dec, byte *out, |
419 | | const byte *in, |
420 | | byte *iv) ASM_FUNC_ABI; |
421 | | |
422 | | extern void _gcry_sm4_gfni_avx512_cfb_dec(const u32 *rk_enc, byte *out, |
423 | | const byte *in, |
424 | | byte *iv) ASM_FUNC_ABI; |
425 | | |
426 | | extern void _gcry_sm4_gfni_avx512_ocb_enc(const u32 *rk_enc, |
427 | | unsigned char *out, |
428 | | const unsigned char *in, |
429 | | unsigned char *offset, |
430 | | unsigned char *checksum, |
431 | | const u64 Ls[16]) ASM_FUNC_ABI; |
432 | | |
433 | | extern void _gcry_sm4_gfni_avx512_ocb_dec(const u32 *rk_dec, |
434 | | unsigned char *out, |
435 | | const unsigned char *in, |
436 | | unsigned char *offset, |
437 | | unsigned char *checksum, |
438 | | const u64 Ls[16]) ASM_FUNC_ABI; |
439 | | |
440 | | extern void _gcry_sm4_gfni_avx512_ocb_auth(const u32 *rk_enc, |
441 | | const unsigned char *abuf, |
442 | | unsigned char *offset, |
443 | | unsigned char *checksum, |
444 | | const u64 Ls[16]) ASM_FUNC_ABI; |
445 | | |
446 | | extern void _gcry_sm4_gfni_avx512_ctr_enc_blk32(const u32 *rk_enc, byte *out, |
447 | | const byte *in, |
448 | | byte *ctr) ASM_FUNC_ABI; |
449 | | |
450 | | extern void _gcry_sm4_gfni_avx512_cbc_dec_blk32(const u32 *rk_enc, byte *out, |
451 | | const byte *in, |
452 | | byte *iv) ASM_FUNC_ABI; |
453 | | |
454 | | extern void _gcry_sm4_gfni_avx512_cfb_dec_blk32(const u32 *rk_enc, byte *out, |
455 | | const byte *in, |
456 | | byte *iv) ASM_FUNC_ABI; |
457 | | |
458 | | extern void _gcry_sm4_gfni_avx512_ocb_enc_blk32(const u32 *rk_enc, |
459 | | unsigned char *out, |
460 | | const unsigned char *in, |
461 | | unsigned char *offset, |
462 | | unsigned char *checksum, |
463 | | const u64 Ls[32]) ASM_FUNC_ABI; |
464 | | |
465 | | extern void _gcry_sm4_gfni_avx512_ocb_dec_blk32(const u32 *rk_dec, |
466 | | unsigned char *out, |
467 | | const unsigned char *in, |
468 | | unsigned char *offset, |
469 | | unsigned char *checksum, |
470 | | const u64 Ls[32]) ASM_FUNC_ABI; |
471 | | |
472 | | extern unsigned int |
473 | | _gcry_sm4_gfni_avx512_crypt_blk1_16(u32 *rk, byte *out, const byte *in, |
474 | | unsigned int num_blks) ASM_FUNC_ABI; |
475 | | |
476 | | extern unsigned int |
477 | | _gcry_sm4_gfni_avx512_crypt_blk32(u32 *rk, byte *out, |
478 | | const byte *in) ASM_FUNC_ABI; |
479 | | |
480 | | static inline unsigned int |
481 | | sm4_gfni_avx512_crypt_blk1_16(void *rk, byte *out, const byte *in, |
482 | | size_t num_blks) |
483 | 0 | { |
484 | 0 | return _gcry_sm4_gfni_avx512_crypt_blk1_16(rk, out, in, num_blks); |
485 | 0 | } |
486 | | |
487 | | #endif /* USE_GFNI_AVX2 */ |
488 | | |
489 | | #ifdef USE_AARCH64_SIMD |
490 | | extern void _gcry_sm4_aarch64_crypt(const u32 *rk, byte *out, |
491 | | const byte *in, |
492 | | size_t num_blocks); |
493 | | |
494 | | extern void _gcry_sm4_aarch64_ctr_enc(const u32 *rk_enc, byte *out, |
495 | | const byte *in, |
496 | | byte *ctr, |
497 | | size_t nblocks); |
498 | | |
499 | | extern void _gcry_sm4_aarch64_cbc_dec(const u32 *rk_dec, byte *out, |
500 | | const byte *in, |
501 | | byte *iv, |
502 | | size_t nblocks); |
503 | | |
504 | | extern void _gcry_sm4_aarch64_cfb_dec(const u32 *rk_enc, byte *out, |
505 | | const byte *in, |
506 | | byte *iv, |
507 | | size_t nblocks); |
508 | | |
509 | | extern void _gcry_sm4_aarch64_crypt_blk1_8(u32 *rk, byte *out, |
510 | | const byte *in, |
511 | | size_t num_blocks); |
512 | | |
513 | | static inline unsigned int |
514 | | sm4_aarch64_crypt_blk1_16(void *rk, byte *out, const byte *in, |
515 | | size_t num_blks) |
516 | | { |
517 | | if (num_blks > 8) |
518 | | { |
519 | | _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, 8); |
520 | | in += 8 * 16; |
521 | | out += 8 * 16; |
522 | | num_blks -= 8; |
523 | | } |
524 | | |
525 | | _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks); |
526 | | return 0; |
527 | | } |
528 | | |
529 | | #endif /* USE_AARCH64_SIMD */ |
530 | | |
531 | | #ifdef USE_ARM_CE |
532 | | extern void _gcry_sm4_armv8_ce_expand_key(const byte *key, |
533 | | u32 *rkey_enc, u32 *rkey_dec, |
534 | | const u32 *fk, const u32 *ck); |
535 | | |
536 | | extern void _gcry_sm4_armv8_ce_crypt(const u32 *rk, byte *out, |
537 | | const byte *in, |
538 | | size_t num_blocks); |
539 | | |
540 | | extern void _gcry_sm4_armv8_ce_ctr_enc(const u32 *rk_enc, byte *out, |
541 | | const byte *in, |
542 | | byte *ctr, |
543 | | size_t nblocks); |
544 | | |
545 | | extern void _gcry_sm4_armv8_ce_cbc_dec(const u32 *rk_dec, byte *out, |
546 | | const byte *in, |
547 | | byte *iv, |
548 | | size_t nblocks); |
549 | | |
550 | | extern void _gcry_sm4_armv8_ce_cfb_dec(const u32 *rk_enc, byte *out, |
551 | | const byte *in, |
552 | | byte *iv, |
553 | | size_t nblocks); |
554 | | |
555 | | extern void _gcry_sm4_armv8_ce_xts_crypt(const u32 *rk, byte *out, |
556 | | const byte *in, |
557 | | byte *tweak, |
558 | | size_t nblocks); |
559 | | |
560 | | extern void _gcry_sm4_armv8_ce_crypt_blk1_8(u32 *rk, byte *out, |
561 | | const byte *in, |
562 | | size_t num_blocks); |
563 | | |
564 | | static inline unsigned int |
565 | | sm4_armv8_ce_crypt_blk1_16(void *rk, byte *out, const byte *in, |
566 | | size_t num_blks) |
567 | | { |
568 | | if (num_blks > 8) |
569 | | { |
570 | | _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, 8); |
571 | | in += 8 * 16; |
572 | | out += 8 * 16; |
573 | | num_blks -= 8; |
574 | | } |
575 | | |
576 | | _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks); |
577 | | return 0; |
578 | | } |
579 | | |
580 | | #endif /* USE_ARM_CE */ |
581 | | |
582 | | #ifdef USE_ARM_SVE_CE |
583 | | extern void _gcry_sm4_armv9_sve_ce_crypt(u32 *rk, byte *out, |
584 | | const byte *in, |
585 | | size_t nblocks); |
586 | | |
587 | | extern void _gcry_sm4_armv9_sve_ce_ctr_enc(const u32 *rk_enc, byte *out, |
588 | | const byte *in, |
589 | | byte *ctr, |
590 | | size_t nblocks); |
591 | | |
592 | | extern void _gcry_sm4_armv9_sve_ce_cbc_dec(const u32 *rk_dec, byte *out, |
593 | | const byte *in, |
594 | | byte *iv, |
595 | | size_t nblocks); |
596 | | |
597 | | extern void _gcry_sm4_armv9_sve_ce_cfb_dec(const u32 *rk_enc, byte *out, |
598 | | const byte *in, |
599 | | byte *iv, |
600 | | size_t nblocks); |
601 | | |
602 | | static inline unsigned int |
603 | | sm4_armv9_sve_ce_crypt_blk1_16(void *rk, byte *out, const byte *in, |
604 | | size_t num_blks) |
605 | | { |
606 | | _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks); |
607 | | return 0; |
608 | | } |
609 | | |
610 | | extern unsigned int _gcry_sm4_armv9_sve_get_vl(void); |
611 | | #endif /* USE_ARM_SVE_CE */ |
612 | | |
613 | | #ifdef USE_PPC_CRYPTO |
614 | | extern void _gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out, const byte *in, |
615 | | size_t num_blks); |
616 | | |
617 | | extern void _gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out, const byte *in, |
618 | | size_t num_blks); |
619 | | |
620 | | static inline unsigned int |
621 | | sm4_ppc8le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks) |
622 | | { |
623 | | _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, num_blks); |
624 | | return 0; |
625 | | } |
626 | | |
627 | | static inline unsigned int |
628 | | sm4_ppc9le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks) |
629 | | { |
630 | | _gcry_sm4_ppc9le_crypt_blk1_16(rk, out, in, num_blks); |
631 | | return 0; |
632 | | } |
633 | | #endif /* USE_PPC_CRYPTO */ |
634 | | |
635 | | static inline void prefetch_sbox_table(void) |
636 | 1.52k | { |
637 | 1.52k | const volatile byte *vtab = (void *)&sbox_table; |
638 | | |
639 | | /* Modify counters to trigger copy-on-write and unsharing if physical pages |
640 | | * of look-up table are shared between processes. Modifying counters also |
641 | | * causes checksums for pages to change and hint same-page merging algorithm |
642 | | * that these pages are frequently changing. */ |
643 | 1.52k | sbox_table.counter_head++; |
644 | 1.52k | sbox_table.counter_tail++; |
645 | | |
646 | | /* Prefetch look-up table to cache. */ |
647 | 1.52k | (void)vtab[0 * 32]; |
648 | 1.52k | (void)vtab[1 * 32]; |
649 | 1.52k | (void)vtab[2 * 32]; |
650 | 1.52k | (void)vtab[3 * 32]; |
651 | 1.52k | (void)vtab[4 * 32]; |
652 | 1.52k | (void)vtab[5 * 32]; |
653 | 1.52k | (void)vtab[6 * 32]; |
654 | 1.52k | (void)vtab[7 * 32]; |
655 | 1.52k | (void)vtab[8 * 32 - 1]; |
656 | 1.52k | } |
657 | | |
658 | | static inline u32 sm4_t_non_lin_sub(u32 x) |
659 | 48.8k | { |
660 | 48.8k | u32 out; |
661 | | |
662 | 48.8k | out = (u32)sbox_table.S[(x >> 0) & 0xff] << 0; |
663 | 48.8k | out |= (u32)sbox_table.S[(x >> 8) & 0xff] << 8; |
664 | 48.8k | out |= (u32)sbox_table.S[(x >> 16) & 0xff] << 16; |
665 | 48.8k | out |= (u32)sbox_table.S[(x >> 24) & 0xff] << 24; |
666 | | |
667 | 48.8k | return out; |
668 | 48.8k | } |
669 | | |
670 | | static inline u32 sm4_key_lin_sub(u32 x) |
671 | 128 | { |
672 | 128 | return x ^ rol(x, 13) ^ rol(x, 23); |
673 | 128 | } |
674 | | |
675 | | static inline u32 sm4_enc_lin_sub(u32 x) |
676 | 48.7k | { |
677 | 48.7k | u32 xrol2 = rol(x, 2); |
678 | 48.7k | return x ^ xrol2 ^ rol(xrol2, 8) ^ rol(xrol2, 16) ^ rol(x, 24); |
679 | 48.7k | } |
680 | | |
681 | | static inline u32 sm4_key_sub(u32 x) |
682 | 128 | { |
683 | 128 | return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); |
684 | 128 | } |
685 | | |
686 | | static inline u32 sm4_enc_sub(u32 x) |
687 | 48.7k | { |
688 | 48.7k | return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); |
689 | 48.7k | } |
690 | | |
691 | | static inline u32 |
692 | | sm4_round(const u32 x0, const u32 x1, const u32 x2, const u32 x3, const u32 rk) |
693 | 48.7k | { |
694 | 48.7k | return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); |
695 | 48.7k | } |
696 | | |
697 | | static void |
698 | | sm4_expand_key (SM4_context *ctx, const byte *key) |
699 | 14 | { |
700 | 14 | u32 rk[4]; |
701 | 14 | int i; |
702 | | |
703 | 14 | #ifdef USE_GFNI_AVX512 |
704 | 14 | if (ctx->use_gfni_avx512) |
705 | 0 | { |
706 | 0 | _gcry_sm4_gfni_avx512_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
707 | 0 | fk, ck); |
708 | 0 | return; |
709 | 0 | } |
710 | 14 | #endif |
711 | | |
712 | 14 | #ifdef USE_GFNI_AVX2 |
713 | 14 | if (ctx->use_gfni_avx2) |
714 | 0 | { |
715 | 0 | _gcry_sm4_gfni_avx2_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
716 | 0 | fk, ck); |
717 | 0 | return; |
718 | 0 | } |
719 | 14 | #endif |
720 | | |
721 | 14 | #ifdef USE_AESNI_AVX |
722 | 14 | if (ctx->use_aesni_avx) |
723 | 10 | { |
724 | 10 | _gcry_sm4_aesni_avx_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
725 | 10 | fk, ck); |
726 | 10 | return; |
727 | 10 | } |
728 | 4 | #endif |
729 | | |
730 | | #ifdef USE_ARM_CE |
731 | | if (ctx->use_arm_ce) |
732 | | { |
733 | | _gcry_sm4_armv8_ce_expand_key (key, ctx->rkey_enc, ctx->rkey_dec, |
734 | | fk, ck); |
735 | | return; |
736 | | } |
737 | | #endif |
738 | | |
739 | 4 | prefetch_sbox_table (); |
740 | | |
741 | 4 | rk[0] = buf_get_be32(key + 4 * 0) ^ fk[0]; |
742 | 4 | rk[1] = buf_get_be32(key + 4 * 1) ^ fk[1]; |
743 | 4 | rk[2] = buf_get_be32(key + 4 * 2) ^ fk[2]; |
744 | 4 | rk[3] = buf_get_be32(key + 4 * 3) ^ fk[3]; |
745 | | |
746 | 36 | for (i = 0; i < 32; i += 4) |
747 | 32 | { |
748 | 32 | rk[0] = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); |
749 | 32 | rk[1] = rk[1] ^ sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); |
750 | 32 | rk[2] = rk[2] ^ sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); |
751 | 32 | rk[3] = rk[3] ^ sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); |
752 | 32 | ctx->rkey_enc[i + 0] = rk[0]; |
753 | 32 | ctx->rkey_enc[i + 1] = rk[1]; |
754 | 32 | ctx->rkey_enc[i + 2] = rk[2]; |
755 | 32 | ctx->rkey_enc[i + 3] = rk[3]; |
756 | 32 | ctx->rkey_dec[31 - i - 0] = rk[0]; |
757 | 32 | ctx->rkey_dec[31 - i - 1] = rk[1]; |
758 | 32 | ctx->rkey_dec[31 - i - 2] = rk[2]; |
759 | 32 | ctx->rkey_dec[31 - i - 3] = rk[3]; |
760 | 32 | } |
761 | | |
762 | 4 | wipememory (rk, sizeof(rk)); |
763 | 4 | } |
764 | | |
765 | | static gcry_err_code_t |
766 | | sm4_setkey (void *context, const byte *key, const unsigned keylen, |
767 | | cipher_bulk_ops_t *bulk_ops) |
768 | 14 | { |
769 | 14 | SM4_context *ctx = context; |
770 | 14 | static int init = 0; |
771 | 14 | static const char *selftest_failed = NULL; |
772 | 14 | unsigned int hwf = _gcry_get_hw_features (); |
773 | | |
774 | 14 | (void)hwf; |
775 | | |
776 | 14 | if (!init) |
777 | 4 | { |
778 | 4 | init = 1; |
779 | 4 | selftest_failed = sm4_selftest(); |
780 | 4 | if (selftest_failed) |
781 | 0 | log_error("%s\n", selftest_failed); |
782 | 4 | } |
783 | 14 | if (selftest_failed) |
784 | 0 | return GPG_ERR_SELFTEST_FAILED; |
785 | | |
786 | 14 | if (keylen != 16) |
787 | 4 | return GPG_ERR_INV_KEYLEN; |
788 | | |
789 | 10 | #ifdef USE_AESNI_AVX |
790 | 10 | ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX); |
791 | 10 | #endif |
792 | 10 | #ifdef USE_AESNI_AVX2 |
793 | 10 | ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2); |
794 | 10 | #endif |
795 | 10 | #ifdef USE_GFNI_AVX2 |
796 | 10 | ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2); |
797 | 10 | #endif |
798 | 10 | #ifdef USE_GFNI_AVX512 |
799 | 10 | ctx->use_gfni_avx512 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX512); |
800 | 10 | #endif |
801 | | #ifdef USE_AARCH64_SIMD |
802 | | ctx->use_aarch64_simd = !!(hwf & HWF_ARM_NEON); |
803 | | #endif |
804 | | #ifdef USE_ARM_CE |
805 | | ctx->use_arm_ce = !!(hwf & HWF_ARM_SM4); |
806 | | #endif |
807 | | #ifdef USE_ARM_SVE_CE |
808 | | /* Only enabled when the SVE vector length is greater than 128 bits */ |
809 | | ctx->use_arm_sve_ce = (hwf & HWF_ARM_SVE2) && (hwf & HWF_ARM_SVESM4) |
810 | | && _gcry_sm4_armv9_sve_get_vl() > 16; |
811 | | #endif |
812 | | #ifdef USE_PPC_CRYPTO |
813 | | ctx->use_ppc8le = (hwf & HWF_PPC_VCRYPTO) != 0; |
814 | | ctx->use_ppc9le = (hwf & HWF_PPC_VCRYPTO) && (hwf & HWF_PPC_ARCH_3_00); |
815 | | #endif |
816 | | |
817 | 10 | #ifdef USE_GFNI_AVX2 |
818 | 10 | if (ctx->use_gfni_avx2) |
819 | 0 | { |
820 | | /* Disable AESNI implementations when GFNI implementation is enabled. */ |
821 | 0 | #ifdef USE_AESNI_AVX |
822 | 0 | ctx->use_aesni_avx = 0; |
823 | 0 | #endif |
824 | 0 | #ifdef USE_AESNI_AVX2 |
825 | 0 | ctx->use_aesni_avx2 = 0; |
826 | 0 | #endif |
827 | 0 | } |
828 | 10 | #endif |
829 | | |
830 | 10 | ctx->crypt_blk1_16 = sm4_get_crypt_blk1_16_fn(ctx); |
831 | | |
832 | | /* Setup bulk encryption routines. */ |
833 | 10 | memset (bulk_ops, 0, sizeof(*bulk_ops)); |
834 | 10 | bulk_ops->cbc_dec = _gcry_sm4_cbc_dec; |
835 | 10 | bulk_ops->cfb_dec = _gcry_sm4_cfb_dec; |
836 | 10 | bulk_ops->ctr_enc = _gcry_sm4_ctr_enc; |
837 | 10 | bulk_ops->xts_crypt = _gcry_sm4_xts_crypt; |
838 | 10 | bulk_ops->ecb_crypt = _gcry_sm4_ecb_crypt; |
839 | 10 | bulk_ops->ctr32le_enc = _gcry_sm4_ctr32le_enc; |
840 | 10 | bulk_ops->ocb_crypt = _gcry_sm4_ocb_crypt; |
841 | 10 | bulk_ops->ocb_auth = _gcry_sm4_ocb_auth; |
842 | | |
843 | 10 | sm4_expand_key (ctx, key); |
844 | 10 | return 0; |
845 | 14 | } |
846 | | |
847 | | static unsigned int |
848 | | sm4_do_crypt (const u32 *rk, byte *out, const byte *in) |
849 | 1.52k | { |
850 | 1.52k | u32 x[4]; |
851 | 1.52k | int i; |
852 | | |
853 | 1.52k | x[0] = buf_get_be32(in + 0 * 4); |
854 | 1.52k | x[1] = buf_get_be32(in + 1 * 4); |
855 | 1.52k | x[2] = buf_get_be32(in + 2 * 4); |
856 | 1.52k | x[3] = buf_get_be32(in + 3 * 4); |
857 | | |
858 | 13.6k | for (i = 0; i < 32; i += 4) |
859 | 12.1k | { |
860 | 12.1k | x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); |
861 | 12.1k | x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); |
862 | 12.1k | x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); |
863 | 12.1k | x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); |
864 | 12.1k | } |
865 | | |
866 | 1.52k | buf_put_be32(out + 0 * 4, x[3 - 0]); |
867 | 1.52k | buf_put_be32(out + 1 * 4, x[3 - 1]); |
868 | 1.52k | buf_put_be32(out + 2 * 4, x[3 - 2]); |
869 | 1.52k | buf_put_be32(out + 3 * 4, x[3 - 3]); |
870 | | |
871 | 1.52k | return /*burn_stack*/ 4*6+sizeof(void*)*4; |
872 | 1.52k | } |
873 | | |
874 | | static unsigned int |
875 | | sm4_encrypt (void *context, byte *outbuf, const byte *inbuf) |
876 | 1.51k | { |
877 | 1.51k | SM4_context *ctx = context; |
878 | | |
879 | 1.51k | #ifdef USE_GFNI_AVX512 |
880 | 1.51k | if (ctx->use_gfni_avx512) |
881 | 0 | return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1); |
882 | 1.51k | #endif |
883 | | |
884 | 1.51k | #ifdef USE_GFNI_AVX2 |
885 | 1.51k | if (ctx->use_gfni_avx2) |
886 | 0 | return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1); |
887 | 1.51k | #endif |
888 | | |
889 | | #ifdef USE_ARM_CE |
890 | | if (ctx->use_arm_ce) |
891 | | return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1); |
892 | | #endif |
893 | | |
894 | 1.51k | prefetch_sbox_table (); |
895 | | |
896 | 1.51k | return sm4_do_crypt (ctx->rkey_enc, outbuf, inbuf); |
897 | 1.51k | } |
898 | | |
899 | | static unsigned int |
900 | | sm4_decrypt (void *context, byte *outbuf, const byte *inbuf) |
901 | 4 | { |
902 | 4 | SM4_context *ctx = context; |
903 | | |
904 | 4 | #ifdef USE_GFNI_AVX512 |
905 | 4 | if (ctx->use_gfni_avx512) |
906 | 0 | return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1); |
907 | 4 | #endif |
908 | | |
909 | 4 | #ifdef USE_GFNI_AVX2 |
910 | 4 | if (ctx->use_gfni_avx2) |
911 | 0 | return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1); |
912 | 4 | #endif |
913 | | |
914 | | #ifdef USE_ARM_CE |
915 | | if (ctx->use_arm_ce) |
916 | | return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1); |
917 | | #endif |
918 | | |
919 | 4 | prefetch_sbox_table (); |
920 | | |
921 | 4 | return sm4_do_crypt (ctx->rkey_dec, outbuf, inbuf); |
922 | 4 | } |
923 | | |
924 | | static unsigned int |
925 | | sm4_do_crypt_blks2 (const u32 *rk, byte *out, const byte *in) |
926 | 0 | { |
927 | 0 | u32 x[4]; |
928 | 0 | u32 y[4]; |
929 | 0 | u32 k; |
930 | 0 | int i; |
931 | | |
932 | | /* Encrypts/Decrypts two blocks for higher instruction level |
933 | | * parallelism. */ |
934 | |
|
935 | 0 | x[0] = buf_get_be32(in + 0 * 4); |
936 | 0 | x[1] = buf_get_be32(in + 1 * 4); |
937 | 0 | x[2] = buf_get_be32(in + 2 * 4); |
938 | 0 | x[3] = buf_get_be32(in + 3 * 4); |
939 | 0 | y[0] = buf_get_be32(in + 4 * 4); |
940 | 0 | y[1] = buf_get_be32(in + 5 * 4); |
941 | 0 | y[2] = buf_get_be32(in + 6 * 4); |
942 | 0 | y[3] = buf_get_be32(in + 7 * 4); |
943 | |
|
944 | 0 | for (i = 0; i < 32; i += 4) |
945 | 0 | { |
946 | 0 | k = rk[i + 0]; |
947 | 0 | x[0] = sm4_round(x[0], x[1], x[2], x[3], k); |
948 | 0 | y[0] = sm4_round(y[0], y[1], y[2], y[3], k); |
949 | 0 | k = rk[i + 1]; |
950 | 0 | x[1] = sm4_round(x[1], x[2], x[3], x[0], k); |
951 | 0 | y[1] = sm4_round(y[1], y[2], y[3], y[0], k); |
952 | 0 | k = rk[i + 2]; |
953 | 0 | x[2] = sm4_round(x[2], x[3], x[0], x[1], k); |
954 | 0 | y[2] = sm4_round(y[2], y[3], y[0], y[1], k); |
955 | 0 | k = rk[i + 3]; |
956 | 0 | x[3] = sm4_round(x[3], x[0], x[1], x[2], k); |
957 | 0 | y[3] = sm4_round(y[3], y[0], y[1], y[2], k); |
958 | 0 | } |
959 | |
|
960 | 0 | buf_put_be32(out + 0 * 4, x[3 - 0]); |
961 | 0 | buf_put_be32(out + 1 * 4, x[3 - 1]); |
962 | 0 | buf_put_be32(out + 2 * 4, x[3 - 2]); |
963 | 0 | buf_put_be32(out + 3 * 4, x[3 - 3]); |
964 | 0 | buf_put_be32(out + 4 * 4, y[3 - 0]); |
965 | 0 | buf_put_be32(out + 5 * 4, y[3 - 1]); |
966 | 0 | buf_put_be32(out + 6 * 4, y[3 - 2]); |
967 | 0 | buf_put_be32(out + 7 * 4, y[3 - 3]); |
968 | |
|
969 | 0 | return /*burn_stack*/ 4*10+sizeof(void*)*4; |
970 | 0 | } |
971 | | |
972 | | static unsigned int |
973 | | sm4_crypt_blocks (void *ctx, byte *out, const byte *in, |
974 | | size_t num_blks) |
975 | 0 | { |
976 | 0 | const u32 *rk = ctx; |
977 | 0 | unsigned int burn_depth = 0; |
978 | 0 | unsigned int nburn; |
979 | |
|
980 | 0 | while (num_blks >= 2) |
981 | 0 | { |
982 | 0 | nburn = sm4_do_crypt_blks2 (rk, out, in); |
983 | 0 | burn_depth = nburn > burn_depth ? nburn : burn_depth; |
984 | 0 | out += 2 * 16; |
985 | 0 | in += 2 * 16; |
986 | 0 | num_blks -= 2; |
987 | 0 | } |
988 | |
|
989 | 0 | while (num_blks) |
990 | 0 | { |
991 | 0 | nburn = sm4_do_crypt (rk, out, in); |
992 | 0 | burn_depth = nburn > burn_depth ? nburn : burn_depth; |
993 | 0 | out += 16; |
994 | 0 | in += 16; |
995 | 0 | num_blks--; |
996 | 0 | } |
997 | |
|
998 | 0 | if (burn_depth) |
999 | 0 | burn_depth += sizeof(void *) * 5; |
1000 | 0 | return burn_depth; |
1001 | 0 | } |
1002 | | |
1003 | | static inline crypt_blk1_16_fn_t |
1004 | | sm4_get_crypt_blk1_16_fn(SM4_context *ctx) |
1005 | 10 | { |
1006 | 10 | if (0) |
1007 | 0 | ; |
1008 | 10 | #ifdef USE_GFNI_AVX512 |
1009 | 10 | else if (ctx->use_gfni_avx512) |
1010 | 0 | { |
1011 | 0 | return &sm4_gfni_avx512_crypt_blk1_16; |
1012 | 0 | } |
1013 | 10 | #endif |
1014 | 10 | #ifdef USE_GFNI_AVX2 |
1015 | 10 | else if (ctx->use_gfni_avx2) |
1016 | 0 | { |
1017 | 0 | return &sm4_gfni_avx2_crypt_blk1_16; |
1018 | 0 | } |
1019 | 10 | #endif |
1020 | 10 | #ifdef USE_AESNI_AVX2 |
1021 | 10 | else if (ctx->use_aesni_avx2) |
1022 | 10 | { |
1023 | 10 | return &sm4_aesni_avx2_crypt_blk1_16; |
1024 | 10 | } |
1025 | 0 | #endif |
1026 | 0 | #ifdef USE_AESNI_AVX |
1027 | 0 | else if (ctx->use_aesni_avx) |
1028 | 0 | { |
1029 | 0 | return &sm4_aesni_avx_crypt_blk1_16; |
1030 | 0 | } |
1031 | 0 | #endif |
1032 | | #ifdef USE_ARM_SVE_CE |
1033 | | else if (ctx->use_arm_sve_ce) |
1034 | | { |
1035 | | return &sm4_armv9_sve_ce_crypt_blk1_16; |
1036 | | } |
1037 | | #endif |
1038 | | #ifdef USE_ARM_CE |
1039 | | else if (ctx->use_arm_ce) |
1040 | | { |
1041 | | return &sm4_armv8_ce_crypt_blk1_16; |
1042 | | } |
1043 | | #endif |
1044 | | #ifdef USE_AARCH64_SIMD |
1045 | | else if (ctx->use_aarch64_simd) |
1046 | | { |
1047 | | return &sm4_aarch64_crypt_blk1_16; |
1048 | | } |
1049 | | #endif |
1050 | | #ifdef USE_PPC_CRYPTO |
1051 | | else if (ctx->use_ppc9le) |
1052 | | { |
1053 | | return &sm4_ppc9le_crypt_blk1_16; |
1054 | | } |
1055 | | else if (ctx->use_ppc8le) |
1056 | | { |
1057 | | return &sm4_ppc8le_crypt_blk1_16; |
1058 | | } |
1059 | | #endif |
1060 | 0 | else |
1061 | 0 | { |
1062 | 0 | (void)ctx; |
1063 | 0 | return &sm4_crypt_blocks; |
1064 | 0 | } |
1065 | 10 | } |
1066 | | |
1067 | | /* Bulk encryption of complete blocks in CTR mode. This function is only |
1068 | | intended for the bulk encryption feature of cipher.c. CTR is expected to be |
1069 | | of size 16. */ |
1070 | | static void |
1071 | | _gcry_sm4_ctr_enc(void *context, unsigned char *ctr, |
1072 | | void *outbuf_arg, const void *inbuf_arg, |
1073 | | size_t nblocks) |
1074 | 0 | { |
1075 | 0 | SM4_context *ctx = context; |
1076 | 0 | byte *outbuf = outbuf_arg; |
1077 | 0 | const byte *inbuf = inbuf_arg; |
1078 | 0 | int burn_stack_depth = 0; |
1079 | |
|
1080 | 0 | #ifdef USE_GFNI_AVX512 |
1081 | 0 | if (ctx->use_gfni_avx512) |
1082 | 0 | { |
1083 | | /* Process data in 32 block chunks. */ |
1084 | 0 | while (nblocks >= 32) |
1085 | 0 | { |
1086 | 0 | _gcry_sm4_gfni_avx512_ctr_enc_blk32(ctx->rkey_enc, |
1087 | 0 | outbuf, inbuf, ctr); |
1088 | |
|
1089 | 0 | nblocks -= 32; |
1090 | 0 | outbuf += 32 * 16; |
1091 | 0 | inbuf += 32 * 16; |
1092 | 0 | } |
1093 | | |
1094 | | /* Process data in 16 block chunks. */ |
1095 | 0 | if (nblocks >= 16) |
1096 | 0 | { |
1097 | 0 | _gcry_sm4_gfni_avx512_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1098 | |
|
1099 | 0 | nblocks -= 16; |
1100 | 0 | outbuf += 16 * 16; |
1101 | 0 | inbuf += 16 * 16; |
1102 | 0 | } |
1103 | 0 | } |
1104 | 0 | #endif |
1105 | |
|
1106 | 0 | #ifdef USE_GFNI_AVX2 |
1107 | 0 | if (ctx->use_gfni_avx2) |
1108 | 0 | { |
1109 | | /* Process data in 16 block chunks. */ |
1110 | 0 | while (nblocks >= 16) |
1111 | 0 | { |
1112 | 0 | _gcry_sm4_gfni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1113 | |
|
1114 | 0 | nblocks -= 16; |
1115 | 0 | outbuf += 16 * 16; |
1116 | 0 | inbuf += 16 * 16; |
1117 | 0 | } |
1118 | 0 | } |
1119 | 0 | #endif |
1120 | |
|
1121 | 0 | #ifdef USE_AESNI_AVX2 |
1122 | 0 | if (ctx->use_aesni_avx2) |
1123 | 0 | { |
1124 | | /* Process data in 16 block chunks. */ |
1125 | 0 | while (nblocks >= 16) |
1126 | 0 | { |
1127 | 0 | _gcry_sm4_aesni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1128 | |
|
1129 | 0 | nblocks -= 16; |
1130 | 0 | outbuf += 16 * 16; |
1131 | 0 | inbuf += 16 * 16; |
1132 | 0 | } |
1133 | 0 | } |
1134 | 0 | #endif |
1135 | |
|
1136 | 0 | #ifdef USE_AESNI_AVX |
1137 | 0 | if (ctx->use_aesni_avx) |
1138 | 0 | { |
1139 | | /* Process data in 8 block chunks. */ |
1140 | 0 | while (nblocks >= 8) |
1141 | 0 | { |
1142 | 0 | _gcry_sm4_aesni_avx_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr); |
1143 | |
|
1144 | 0 | nblocks -= 8; |
1145 | 0 | outbuf += 8 * 16; |
1146 | 0 | inbuf += 8 * 16; |
1147 | 0 | } |
1148 | 0 | } |
1149 | 0 | #endif |
1150 | |
|
1151 | | #ifdef USE_ARM_SVE_CE |
1152 | | if (ctx->use_arm_sve_ce) |
1153 | | { |
1154 | | /* Process all blocks at a time. */ |
1155 | | _gcry_sm4_armv9_sve_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf, |
1156 | | ctr, nblocks); |
1157 | | nblocks = 0; |
1158 | | } |
1159 | | #endif |
1160 | |
|
1161 | | #ifdef USE_ARM_CE |
1162 | | if (ctx->use_arm_ce) |
1163 | | { |
1164 | | /* Process multiples of 8 blocks at a time. */ |
1165 | | if (nblocks >= 8) |
1166 | | { |
1167 | | size_t nblks = nblocks & ~(8 - 1); |
1168 | | |
1169 | | _gcry_sm4_armv8_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks); |
1170 | | |
1171 | | nblocks -= nblks; |
1172 | | outbuf += nblks * 16; |
1173 | | inbuf += nblks * 16; |
1174 | | } |
1175 | | } |
1176 | | #endif |
1177 | |
|
1178 | | #ifdef USE_AARCH64_SIMD |
1179 | | if (ctx->use_aarch64_simd) |
1180 | | { |
1181 | | /* Process multiples of 8 blocks at a time. */ |
1182 | | if (nblocks >= 8) |
1183 | | { |
1184 | | size_t nblks = nblocks & ~(8 - 1); |
1185 | | |
1186 | | _gcry_sm4_aarch64_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks); |
1187 | | |
1188 | | nblocks -= nblks; |
1189 | | outbuf += nblks * 16; |
1190 | | inbuf += nblks * 16; |
1191 | | } |
1192 | | } |
1193 | | #endif |
1194 | | |
1195 | | /* Process remaining blocks. */ |
1196 | 0 | if (nblocks) |
1197 | 0 | { |
1198 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1199 | 0 | byte tmpbuf[16 * 16]; |
1200 | 0 | unsigned int tmp_used = 16; |
1201 | 0 | size_t nburn; |
1202 | |
|
1203 | 0 | if (crypt_blk1_16 == &sm4_crypt_blocks) |
1204 | 0 | prefetch_sbox_table (); |
1205 | |
|
1206 | 0 | nburn = bulk_ctr_enc_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf, |
1207 | 0 | nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16, |
1208 | 0 | &tmp_used); |
1209 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1210 | |
|
1211 | 0 | wipememory(tmpbuf, tmp_used); |
1212 | 0 | } |
1213 | |
|
1214 | 0 | if (burn_stack_depth) |
1215 | 0 | _gcry_burn_stack(burn_stack_depth); |
1216 | 0 | } |
1217 | | |
1218 | | /* Bulk decryption of complete blocks in CBC mode. This function is only |
1219 | | intended for the bulk encryption feature of cipher.c. */ |
1220 | | static void |
1221 | | _gcry_sm4_cbc_dec(void *context, unsigned char *iv, |
1222 | | void *outbuf_arg, const void *inbuf_arg, |
1223 | | size_t nblocks) |
1224 | 0 | { |
1225 | 0 | SM4_context *ctx = context; |
1226 | 0 | unsigned char *outbuf = outbuf_arg; |
1227 | 0 | const unsigned char *inbuf = inbuf_arg; |
1228 | 0 | int burn_stack_depth = 0; |
1229 | |
|
1230 | 0 | #ifdef USE_GFNI_AVX512 |
1231 | 0 | if (ctx->use_gfni_avx512) |
1232 | 0 | { |
1233 | | /* Process data in 32 block chunks. */ |
1234 | 0 | while (nblocks >= 32) |
1235 | 0 | { |
1236 | 0 | _gcry_sm4_gfni_avx512_cbc_dec_blk32(ctx->rkey_dec, outbuf, inbuf, iv); |
1237 | |
|
1238 | 0 | nblocks -= 32; |
1239 | 0 | outbuf += 32 * 16; |
1240 | 0 | inbuf += 32 * 16; |
1241 | 0 | } |
1242 | | |
1243 | | /* Process data in 16 block chunks. */ |
1244 | 0 | if (nblocks >= 16) |
1245 | 0 | { |
1246 | 0 | _gcry_sm4_gfni_avx512_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1247 | |
|
1248 | 0 | nblocks -= 16; |
1249 | 0 | outbuf += 16 * 16; |
1250 | 0 | inbuf += 16 * 16; |
1251 | 0 | } |
1252 | 0 | } |
1253 | 0 | #endif |
1254 | |
|
1255 | 0 | #ifdef USE_GFNI_AVX2 |
1256 | 0 | if (ctx->use_gfni_avx2) |
1257 | 0 | { |
1258 | | /* Process data in 16 block chunks. */ |
1259 | 0 | while (nblocks >= 16) |
1260 | 0 | { |
1261 | 0 | _gcry_sm4_gfni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1262 | |
|
1263 | 0 | nblocks -= 16; |
1264 | 0 | outbuf += 16 * 16; |
1265 | 0 | inbuf += 16 * 16; |
1266 | 0 | } |
1267 | 0 | } |
1268 | 0 | #endif |
1269 | |
|
1270 | 0 | #ifdef USE_AESNI_AVX2 |
1271 | 0 | if (ctx->use_aesni_avx2) |
1272 | 0 | { |
1273 | | /* Process data in 16 block chunks. */ |
1274 | 0 | while (nblocks >= 16) |
1275 | 0 | { |
1276 | 0 | _gcry_sm4_aesni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1277 | |
|
1278 | 0 | nblocks -= 16; |
1279 | 0 | outbuf += 16 * 16; |
1280 | 0 | inbuf += 16 * 16; |
1281 | 0 | } |
1282 | 0 | } |
1283 | 0 | #endif |
1284 | |
|
1285 | 0 | #ifdef USE_AESNI_AVX |
1286 | 0 | if (ctx->use_aesni_avx) |
1287 | 0 | { |
1288 | | /* Process data in 8 block chunks. */ |
1289 | 0 | while (nblocks >= 8) |
1290 | 0 | { |
1291 | 0 | _gcry_sm4_aesni_avx_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv); |
1292 | |
|
1293 | 0 | nblocks -= 8; |
1294 | 0 | outbuf += 8 * 16; |
1295 | 0 | inbuf += 8 * 16; |
1296 | 0 | } |
1297 | 0 | } |
1298 | 0 | #endif |
1299 | |
|
1300 | | #ifdef USE_ARM_SVE_CE |
1301 | | if (ctx->use_arm_sve_ce) |
1302 | | { |
1303 | | /* Process all blocks at a time. */ |
1304 | | _gcry_sm4_armv9_sve_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf, |
1305 | | iv, nblocks); |
1306 | | nblocks = 0; |
1307 | | } |
1308 | | #endif |
1309 | |
|
1310 | | #ifdef USE_ARM_CE |
1311 | | if (ctx->use_arm_ce) |
1312 | | { |
1313 | | /* Process multiples of 8 blocks at a time. */ |
1314 | | if (nblocks >= 8) |
1315 | | { |
1316 | | size_t nblks = nblocks & ~(8 - 1); |
1317 | | |
1318 | | _gcry_sm4_armv8_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks); |
1319 | | |
1320 | | nblocks -= nblks; |
1321 | | outbuf += nblks * 16; |
1322 | | inbuf += nblks * 16; |
1323 | | } |
1324 | | } |
1325 | | #endif |
1326 | |
|
1327 | | #ifdef USE_AARCH64_SIMD |
1328 | | if (ctx->use_aarch64_simd) |
1329 | | { |
1330 | | /* Process multiples of 8 blocks at a time. */ |
1331 | | if (nblocks >= 8) |
1332 | | { |
1333 | | size_t nblks = nblocks & ~(8 - 1); |
1334 | | |
1335 | | _gcry_sm4_aarch64_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks); |
1336 | | |
1337 | | nblocks -= nblks; |
1338 | | outbuf += nblks * 16; |
1339 | | inbuf += nblks * 16; |
1340 | | } |
1341 | | } |
1342 | | #endif |
1343 | | |
1344 | | /* Process remaining blocks. */ |
1345 | 0 | if (nblocks) |
1346 | 0 | { |
1347 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1348 | 0 | unsigned char tmpbuf[16 * 16]; |
1349 | 0 | unsigned int tmp_used = 16; |
1350 | 0 | size_t nburn; |
1351 | |
|
1352 | 0 | if (crypt_blk1_16 == &sm4_crypt_blocks) |
1353 | 0 | prefetch_sbox_table (); |
1354 | |
|
1355 | 0 | nburn = bulk_cbc_dec_128(ctx->rkey_dec, crypt_blk1_16, outbuf, inbuf, |
1356 | 0 | nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16, |
1357 | 0 | &tmp_used); |
1358 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1359 | |
|
1360 | 0 | wipememory(tmpbuf, tmp_used); |
1361 | 0 | } |
1362 | |
|
1363 | 0 | if (burn_stack_depth) |
1364 | 0 | _gcry_burn_stack(burn_stack_depth); |
1365 | 0 | } |
1366 | | |
1367 | | /* Bulk decryption of complete blocks in CFB mode. This function is only |
1368 | | intended for the bulk encryption feature of cipher.c. */ |
1369 | | static void |
1370 | | _gcry_sm4_cfb_dec(void *context, unsigned char *iv, |
1371 | | void *outbuf_arg, const void *inbuf_arg, |
1372 | | size_t nblocks) |
1373 | 0 | { |
1374 | 0 | SM4_context *ctx = context; |
1375 | 0 | unsigned char *outbuf = outbuf_arg; |
1376 | 0 | const unsigned char *inbuf = inbuf_arg; |
1377 | 0 | int burn_stack_depth = 0; |
1378 | |
|
1379 | 0 | #ifdef USE_GFNI_AVX512 |
1380 | 0 | if (ctx->use_gfni_avx512) |
1381 | 0 | { |
1382 | | /* Process data in 32 block chunks. */ |
1383 | 0 | while (nblocks >= 32) |
1384 | 0 | { |
1385 | 0 | _gcry_sm4_gfni_avx512_cfb_dec_blk32(ctx->rkey_enc, outbuf, inbuf, iv); |
1386 | |
|
1387 | 0 | nblocks -= 32; |
1388 | 0 | outbuf += 32 * 16; |
1389 | 0 | inbuf += 32 * 16; |
1390 | 0 | } |
1391 | | |
1392 | | /* Process data in 16 block chunks. */ |
1393 | 0 | if (nblocks >= 16) |
1394 | 0 | { |
1395 | 0 | _gcry_sm4_gfni_avx512_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1396 | |
|
1397 | 0 | nblocks -= 16; |
1398 | 0 | outbuf += 16 * 16; |
1399 | 0 | inbuf += 16 * 16; |
1400 | 0 | } |
1401 | 0 | } |
1402 | 0 | #endif |
1403 | |
|
1404 | 0 | #ifdef USE_GFNI_AVX2 |
1405 | 0 | if (ctx->use_gfni_avx2) |
1406 | 0 | { |
1407 | | /* Process data in 16 block chunks. */ |
1408 | 0 | while (nblocks >= 16) |
1409 | 0 | { |
1410 | 0 | _gcry_sm4_gfni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1411 | |
|
1412 | 0 | nblocks -= 16; |
1413 | 0 | outbuf += 16 * 16; |
1414 | 0 | inbuf += 16 * 16; |
1415 | 0 | } |
1416 | 0 | } |
1417 | 0 | #endif |
1418 | |
|
1419 | 0 | #ifdef USE_AESNI_AVX2 |
1420 | 0 | if (ctx->use_aesni_avx2) |
1421 | 0 | { |
1422 | | /* Process data in 16 block chunks. */ |
1423 | 0 | while (nblocks >= 16) |
1424 | 0 | { |
1425 | 0 | _gcry_sm4_aesni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1426 | |
|
1427 | 0 | nblocks -= 16; |
1428 | 0 | outbuf += 16 * 16; |
1429 | 0 | inbuf += 16 * 16; |
1430 | 0 | } |
1431 | 0 | } |
1432 | 0 | #endif |
1433 | |
|
1434 | 0 | #ifdef USE_AESNI_AVX |
1435 | 0 | if (ctx->use_aesni_avx) |
1436 | 0 | { |
1437 | | /* Process data in 8 block chunks. */ |
1438 | 0 | while (nblocks >= 8) |
1439 | 0 | { |
1440 | 0 | _gcry_sm4_aesni_avx_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv); |
1441 | |
|
1442 | 0 | nblocks -= 8; |
1443 | 0 | outbuf += 8 * 16; |
1444 | 0 | inbuf += 8 * 16; |
1445 | 0 | } |
1446 | 0 | } |
1447 | 0 | #endif |
1448 | |
|
1449 | | #ifdef USE_ARM_SVE_CE |
1450 | | if (ctx->use_arm_sve_ce) |
1451 | | { |
1452 | | /* Process all blocks at a time. */ |
1453 | | _gcry_sm4_armv9_sve_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf, |
1454 | | iv, nblocks); |
1455 | | nblocks = 0; |
1456 | | } |
1457 | | #endif |
1458 | |
|
1459 | | #ifdef USE_ARM_CE |
1460 | | if (ctx->use_arm_ce) |
1461 | | { |
1462 | | /* Process multiples of 8 blocks at a time. */ |
1463 | | if (nblocks >= 8) |
1464 | | { |
1465 | | size_t nblks = nblocks & ~(8 - 1); |
1466 | | |
1467 | | _gcry_sm4_armv8_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks); |
1468 | | |
1469 | | nblocks -= nblks; |
1470 | | outbuf += nblks * 16; |
1471 | | inbuf += nblks * 16; |
1472 | | } |
1473 | | } |
1474 | | #endif |
1475 | |
|
1476 | | #ifdef USE_AARCH64_SIMD |
1477 | | if (ctx->use_aarch64_simd) |
1478 | | { |
1479 | | /* Process multiples of 8 blocks at a time. */ |
1480 | | if (nblocks >= 8) |
1481 | | { |
1482 | | size_t nblks = nblocks & ~(8 - 1); |
1483 | | |
1484 | | _gcry_sm4_aarch64_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks); |
1485 | | |
1486 | | nblocks -= nblks; |
1487 | | outbuf += nblks * 16; |
1488 | | inbuf += nblks * 16; |
1489 | | } |
1490 | | } |
1491 | | #endif |
1492 | | |
1493 | | /* Process remaining blocks. */ |
1494 | 0 | if (nblocks) |
1495 | 0 | { |
1496 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1497 | 0 | unsigned char tmpbuf[16 * 16]; |
1498 | 0 | unsigned int tmp_used = 16; |
1499 | 0 | size_t nburn; |
1500 | |
|
1501 | 0 | if (crypt_blk1_16 == &sm4_crypt_blocks) |
1502 | 0 | prefetch_sbox_table (); |
1503 | |
|
1504 | 0 | nburn = bulk_cfb_dec_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf, |
1505 | 0 | nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16, |
1506 | 0 | &tmp_used); |
1507 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1508 | |
|
1509 | 0 | wipememory(tmpbuf, tmp_used); |
1510 | 0 | } |
1511 | |
|
1512 | 0 | if (burn_stack_depth) |
1513 | 0 | _gcry_burn_stack(burn_stack_depth); |
1514 | 0 | } |
1515 | | |
1516 | | static unsigned int |
1517 | | sm4_crypt_blk1_32 (SM4_context *ctx, byte *outbuf, const byte *inbuf, |
1518 | | size_t num_blks, u32 *rk) |
1519 | 0 | { |
1520 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1521 | 0 | unsigned int stack_burn_size = 0; |
1522 | 0 | unsigned int nburn; |
1523 | |
|
1524 | 0 | gcry_assert (num_blks <= 32); |
1525 | | |
1526 | 0 | #ifdef USE_GFNI_AVX512 |
1527 | 0 | if (num_blks == 32 && ctx->use_gfni_avx512) |
1528 | 0 | { |
1529 | 0 | return _gcry_sm4_gfni_avx512_crypt_blk32 (rk, outbuf, inbuf); |
1530 | 0 | } |
1531 | 0 | #endif |
1532 | | #ifdef USE_ARM_SVE_CE |
1533 | | if (ctx->use_arm_sve_ce) |
1534 | | { |
1535 | | _gcry_sm4_armv9_sve_ce_crypt (rk, outbuf, inbuf, num_blks); |
1536 | | return 0; |
1537 | | } |
1538 | | #endif |
1539 | | |
1540 | 0 | do |
1541 | 0 | { |
1542 | 0 | unsigned int curr_blks = num_blks > 16 ? 16 : num_blks; |
1543 | 0 | nburn = crypt_blk1_16 (rk, outbuf, inbuf, curr_blks); |
1544 | 0 | stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size; |
1545 | 0 | outbuf += curr_blks * 16; |
1546 | 0 | inbuf += curr_blks * 16; |
1547 | 0 | num_blks -= curr_blks; |
1548 | 0 | } |
1549 | 0 | while (num_blks > 0); |
1550 | |
|
1551 | 0 | return stack_burn_size; |
1552 | 0 | } |
1553 | | |
1554 | | static unsigned int |
1555 | | sm4_encrypt_blk1_32 (void *context, byte *out, const byte *in, |
1556 | | size_t num_blks) |
1557 | 0 | { |
1558 | 0 | SM4_context *ctx = context; |
1559 | 0 | return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_enc); |
1560 | 0 | } |
1561 | | |
1562 | | static unsigned int |
1563 | | sm4_decrypt_blk1_32 (void *context, byte *out, const byte *in, |
1564 | | size_t num_blks) |
1565 | 0 | { |
1566 | 0 | SM4_context *ctx = context; |
1567 | 0 | return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_dec); |
1568 | 0 | } |
1569 | | |
1570 | | /* Bulk encryption/decryption in ECB mode. */ |
1571 | | static void |
1572 | | _gcry_sm4_ecb_crypt (void *context, void *outbuf_arg, |
1573 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1574 | 0 | { |
1575 | 0 | SM4_context *ctx = context; |
1576 | 0 | unsigned char *outbuf = outbuf_arg; |
1577 | 0 | const unsigned char *inbuf = inbuf_arg; |
1578 | 0 | int burn_stack_depth = 0; |
1579 | | |
1580 | | /* Process remaining blocks. */ |
1581 | 0 | if (nblocks) |
1582 | 0 | { |
1583 | 0 | size_t nburn; |
1584 | |
|
1585 | 0 | if (ctx->crypt_blk1_16 == &sm4_crypt_blocks) |
1586 | 0 | prefetch_sbox_table (); |
1587 | |
|
1588 | 0 | nburn = bulk_ecb_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32 |
1589 | 0 | : sm4_decrypt_blk1_32, |
1590 | 0 | outbuf, inbuf, nblocks, 32); |
1591 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1592 | 0 | } |
1593 | |
|
1594 | 0 | if (burn_stack_depth) |
1595 | 0 | _gcry_burn_stack(burn_stack_depth); |
1596 | 0 | } |
1597 | | |
1598 | | /* Bulk encryption/decryption of complete blocks in XTS mode. */ |
1599 | | static void |
1600 | | _gcry_sm4_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, |
1601 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1602 | 0 | { |
1603 | 0 | SM4_context *ctx = context; |
1604 | 0 | unsigned char *outbuf = outbuf_arg; |
1605 | 0 | const unsigned char *inbuf = inbuf_arg; |
1606 | 0 | int burn_stack_depth = 0; |
1607 | |
|
1608 | | #ifdef USE_ARM_CE |
1609 | | if (ctx->use_arm_ce) |
1610 | | { |
1611 | | /* Process all blocks at a time. */ |
1612 | | _gcry_sm4_armv8_ce_xts_crypt(encrypt ? ctx->rkey_enc : ctx->rkey_dec, |
1613 | | outbuf, inbuf, tweak, nblocks); |
1614 | | |
1615 | | nblocks = 0; |
1616 | | } |
1617 | | #endif |
1618 | | |
1619 | | /* Process remaining blocks. */ |
1620 | 0 | if (nblocks) |
1621 | 0 | { |
1622 | 0 | unsigned char tmpbuf[32 * 16]; |
1623 | 0 | unsigned int tmp_used = 16; |
1624 | 0 | size_t nburn; |
1625 | |
|
1626 | 0 | if (ctx->crypt_blk1_16 == &sm4_crypt_blocks) |
1627 | 0 | prefetch_sbox_table (); |
1628 | |
|
1629 | 0 | nburn = bulk_xts_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32 |
1630 | 0 | : sm4_decrypt_blk1_32, |
1631 | 0 | outbuf, inbuf, nblocks, |
1632 | 0 | tweak, tmpbuf, sizeof(tmpbuf) / 16, |
1633 | 0 | &tmp_used); |
1634 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1635 | |
|
1636 | 0 | wipememory(tmpbuf, tmp_used); |
1637 | 0 | } |
1638 | |
|
1639 | 0 | if (burn_stack_depth) |
1640 | 0 | _gcry_burn_stack(burn_stack_depth); |
1641 | 0 | } |
1642 | | |
1643 | | /* Bulk encryption of complete blocks in CTR32LE mode (for GCM-SIV). */ |
1644 | | static void |
1645 | | _gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr, |
1646 | | void *outbuf_arg, const void *inbuf_arg, |
1647 | | size_t nblocks) |
1648 | 0 | { |
1649 | 0 | SM4_context *ctx = context; |
1650 | 0 | byte *outbuf = outbuf_arg; |
1651 | 0 | const byte *inbuf = inbuf_arg; |
1652 | 0 | int burn_stack_depth = 0; |
1653 | | |
1654 | | /* Process remaining blocks. */ |
1655 | 0 | if (nblocks) |
1656 | 0 | { |
1657 | 0 | byte tmpbuf[32 * 16]; |
1658 | 0 | unsigned int tmp_used = 16; |
1659 | 0 | size_t nburn; |
1660 | |
|
1661 | 0 | nburn = bulk_ctr32le_enc_128 (ctx, sm4_encrypt_blk1_32, outbuf, inbuf, |
1662 | 0 | nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16, |
1663 | 0 | &tmp_used); |
1664 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1665 | |
|
1666 | 0 | wipememory (tmpbuf, tmp_used); |
1667 | 0 | } |
1668 | |
|
1669 | 0 | if (burn_stack_depth) |
1670 | 0 | _gcry_burn_stack (burn_stack_depth); |
1671 | 0 | } |
1672 | | |
1673 | | /* Bulk encryption/decryption of complete blocks in OCB mode. */ |
1674 | | static size_t |
1675 | | _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
1676 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1677 | 0 | { |
1678 | 0 | SM4_context *ctx = (void *)&c->context.c; |
1679 | 0 | unsigned char *outbuf = outbuf_arg; |
1680 | 0 | const unsigned char *inbuf = inbuf_arg; |
1681 | 0 | u64 blkn = c->u_mode.ocb.data_nblocks; |
1682 | 0 | int burn_stack_depth = 0; |
1683 | |
|
1684 | 0 | #ifdef USE_GFNI_AVX512 |
1685 | 0 | if (ctx->use_gfni_avx512) |
1686 | 0 | { |
1687 | 0 | u64 Ls[32]; |
1688 | 0 | u64 *l; |
1689 | |
|
1690 | 0 | if (nblocks >= 32) |
1691 | 0 | { |
1692 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); |
1693 | | |
1694 | | /* Process data in 32 block chunks. */ |
1695 | 0 | while (nblocks >= 32) |
1696 | 0 | { |
1697 | 0 | blkn += 32; |
1698 | 0 | *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 32); |
1699 | |
|
1700 | 0 | if (encrypt) |
1701 | 0 | _gcry_sm4_gfni_avx512_ocb_enc_blk32 (ctx->rkey_enc, outbuf, |
1702 | 0 | inbuf, c->u_iv.iv, |
1703 | 0 | c->u_ctr.ctr, Ls); |
1704 | 0 | else |
1705 | 0 | _gcry_sm4_gfni_avx512_ocb_dec_blk32 (ctx->rkey_dec, outbuf, |
1706 | 0 | inbuf, c->u_iv.iv, |
1707 | 0 | c->u_ctr.ctr, Ls); |
1708 | |
|
1709 | 0 | nblocks -= 32; |
1710 | 0 | outbuf += 32 * 16; |
1711 | 0 | inbuf += 32 * 16; |
1712 | 0 | } |
1713 | 0 | } |
1714 | |
|
1715 | 0 | if (nblocks >= 16) |
1716 | 0 | { |
1717 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1718 | | |
1719 | | /* Process data in 16 block chunks. */ |
1720 | 0 | blkn += 16; |
1721 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1722 | |
|
1723 | 0 | if (encrypt) |
1724 | 0 | _gcry_sm4_gfni_avx512_ocb_enc(ctx->rkey_enc, outbuf, inbuf, |
1725 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1726 | 0 | else |
1727 | 0 | _gcry_sm4_gfni_avx512_ocb_dec(ctx->rkey_dec, outbuf, inbuf, |
1728 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1729 | |
|
1730 | 0 | nblocks -= 16; |
1731 | 0 | outbuf += 16 * 16; |
1732 | 0 | inbuf += 16 * 16; |
1733 | 0 | } |
1734 | 0 | } |
1735 | 0 | #endif |
1736 | |
|
1737 | 0 | #ifdef USE_GFNI_AVX2 |
1738 | 0 | if (ctx->use_gfni_avx2) |
1739 | 0 | { |
1740 | 0 | u64 Ls[16]; |
1741 | 0 | u64 *l; |
1742 | |
|
1743 | 0 | if (nblocks >= 16) |
1744 | 0 | { |
1745 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1746 | | |
1747 | | /* Process data in 16 block chunks. */ |
1748 | 0 | while (nblocks >= 16) |
1749 | 0 | { |
1750 | 0 | blkn += 16; |
1751 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1752 | |
|
1753 | 0 | if (encrypt) |
1754 | 0 | _gcry_sm4_gfni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf, |
1755 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1756 | 0 | else |
1757 | 0 | _gcry_sm4_gfni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf, |
1758 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1759 | |
|
1760 | 0 | nblocks -= 16; |
1761 | 0 | outbuf += 16 * 16; |
1762 | 0 | inbuf += 16 * 16; |
1763 | 0 | } |
1764 | 0 | } |
1765 | 0 | } |
1766 | 0 | #endif |
1767 | |
|
1768 | 0 | #ifdef USE_AESNI_AVX2 |
1769 | 0 | if (ctx->use_aesni_avx2) |
1770 | 0 | { |
1771 | 0 | u64 Ls[16]; |
1772 | 0 | u64 *l; |
1773 | |
|
1774 | 0 | if (nblocks >= 16) |
1775 | 0 | { |
1776 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1777 | | |
1778 | | /* Process data in 16 block chunks. */ |
1779 | 0 | while (nblocks >= 16) |
1780 | 0 | { |
1781 | 0 | blkn += 16; |
1782 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1783 | |
|
1784 | 0 | if (encrypt) |
1785 | 0 | _gcry_sm4_aesni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf, |
1786 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1787 | 0 | else |
1788 | 0 | _gcry_sm4_aesni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf, |
1789 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1790 | |
|
1791 | 0 | nblocks -= 16; |
1792 | 0 | outbuf += 16 * 16; |
1793 | 0 | inbuf += 16 * 16; |
1794 | 0 | } |
1795 | 0 | } |
1796 | 0 | } |
1797 | 0 | #endif |
1798 | |
|
1799 | 0 | #ifdef USE_AESNI_AVX |
1800 | 0 | if (ctx->use_aesni_avx) |
1801 | 0 | { |
1802 | 0 | u64 Ls[8]; |
1803 | 0 | u64 *l; |
1804 | |
|
1805 | 0 | if (nblocks >= 8) |
1806 | 0 | { |
1807 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1808 | | |
1809 | | /* Process data in 8 block chunks. */ |
1810 | 0 | while (nblocks >= 8) |
1811 | 0 | { |
1812 | 0 | blkn += 8; |
1813 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1814 | |
|
1815 | 0 | if (encrypt) |
1816 | 0 | _gcry_sm4_aesni_avx_ocb_enc(ctx->rkey_enc, outbuf, inbuf, |
1817 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1818 | 0 | else |
1819 | 0 | _gcry_sm4_aesni_avx_ocb_dec(ctx->rkey_dec, outbuf, inbuf, |
1820 | 0 | c->u_iv.iv, c->u_ctr.ctr, Ls); |
1821 | |
|
1822 | 0 | nblocks -= 8; |
1823 | 0 | outbuf += 8 * 16; |
1824 | 0 | inbuf += 8 * 16; |
1825 | 0 | } |
1826 | 0 | } |
1827 | 0 | } |
1828 | 0 | #endif |
1829 | | |
1830 | | /* Process remaining blocks. */ |
1831 | 0 | if (nblocks) |
1832 | 0 | { |
1833 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1834 | 0 | u32 *rk = encrypt ? ctx->rkey_enc : ctx->rkey_dec; |
1835 | 0 | unsigned char tmpbuf[16 * 16]; |
1836 | 0 | unsigned int tmp_used = 16; |
1837 | 0 | size_t nburn; |
1838 | |
|
1839 | 0 | nburn = bulk_ocb_crypt_128 (c, rk, crypt_blk1_16, outbuf, inbuf, nblocks, |
1840 | 0 | &blkn, encrypt, tmpbuf, sizeof(tmpbuf) / 16, |
1841 | 0 | &tmp_used); |
1842 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1843 | |
|
1844 | 0 | wipememory(tmpbuf, tmp_used); |
1845 | 0 | } |
1846 | |
|
1847 | 0 | c->u_mode.ocb.data_nblocks = blkn; |
1848 | |
|
1849 | 0 | if (burn_stack_depth) |
1850 | 0 | _gcry_burn_stack(burn_stack_depth); |
1851 | |
|
1852 | 0 | return 0; |
1853 | 0 | } |
1854 | | |
1855 | | /* Bulk authentication of complete blocks in OCB mode. */ |
1856 | | static size_t |
1857 | | _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) |
1858 | 0 | { |
1859 | 0 | SM4_context *ctx = (void *)&c->context.c; |
1860 | 0 | const unsigned char *abuf = abuf_arg; |
1861 | 0 | u64 blkn = c->u_mode.ocb.aad_nblocks; |
1862 | 0 | int burn_stack_depth = 0; |
1863 | |
|
1864 | 0 | #ifdef USE_GFNI_AVX512 |
1865 | 0 | if (ctx->use_gfni_avx512) |
1866 | 0 | { |
1867 | 0 | u64 Ls[16]; |
1868 | 0 | u64 *l; |
1869 | |
|
1870 | 0 | if (nblocks >= 16) |
1871 | 0 | { |
1872 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1873 | | |
1874 | | /* Process data in 16 block chunks. */ |
1875 | 0 | while (nblocks >= 16) |
1876 | 0 | { |
1877 | 0 | blkn += 16; |
1878 | 0 | *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 16); |
1879 | |
|
1880 | 0 | _gcry_sm4_gfni_avx512_ocb_auth (ctx->rkey_enc, abuf, |
1881 | 0 | c->u_mode.ocb.aad_offset, |
1882 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1883 | |
|
1884 | 0 | nblocks -= 16; |
1885 | 0 | abuf += 16 * 16; |
1886 | 0 | } |
1887 | 0 | } |
1888 | 0 | } |
1889 | 0 | #endif |
1890 | |
|
1891 | 0 | #ifdef USE_GFNI_AVX2 |
1892 | 0 | if (ctx->use_gfni_avx2) |
1893 | 0 | { |
1894 | 0 | u64 Ls[16]; |
1895 | 0 | u64 *l; |
1896 | |
|
1897 | 0 | if (nblocks >= 16) |
1898 | 0 | { |
1899 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1900 | | |
1901 | | /* Process data in 16 block chunks. */ |
1902 | 0 | while (nblocks >= 16) |
1903 | 0 | { |
1904 | 0 | blkn += 16; |
1905 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1906 | |
|
1907 | 0 | _gcry_sm4_gfni_avx2_ocb_auth(ctx->rkey_enc, abuf, |
1908 | 0 | c->u_mode.ocb.aad_offset, |
1909 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1910 | |
|
1911 | 0 | nblocks -= 16; |
1912 | 0 | abuf += 16 * 16; |
1913 | 0 | } |
1914 | 0 | } |
1915 | 0 | } |
1916 | 0 | #endif |
1917 | |
|
1918 | 0 | #ifdef USE_AESNI_AVX2 |
1919 | 0 | if (ctx->use_aesni_avx2) |
1920 | 0 | { |
1921 | 0 | u64 Ls[16]; |
1922 | 0 | u64 *l; |
1923 | |
|
1924 | 0 | if (nblocks >= 16) |
1925 | 0 | { |
1926 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1927 | | |
1928 | | /* Process data in 16 block chunks. */ |
1929 | 0 | while (nblocks >= 16) |
1930 | 0 | { |
1931 | 0 | blkn += 16; |
1932 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1933 | |
|
1934 | 0 | _gcry_sm4_aesni_avx2_ocb_auth(ctx->rkey_enc, abuf, |
1935 | 0 | c->u_mode.ocb.aad_offset, |
1936 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1937 | |
|
1938 | 0 | nblocks -= 16; |
1939 | 0 | abuf += 16 * 16; |
1940 | 0 | } |
1941 | 0 | } |
1942 | 0 | } |
1943 | 0 | #endif |
1944 | |
|
1945 | 0 | #ifdef USE_AESNI_AVX |
1946 | 0 | if (ctx->use_aesni_avx) |
1947 | 0 | { |
1948 | 0 | u64 Ls[8]; |
1949 | 0 | u64 *l; |
1950 | |
|
1951 | 0 | if (nblocks >= 8) |
1952 | 0 | { |
1953 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1954 | | |
1955 | | /* Process data in 8 block chunks. */ |
1956 | 0 | while (nblocks >= 8) |
1957 | 0 | { |
1958 | 0 | blkn += 8; |
1959 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1960 | |
|
1961 | 0 | _gcry_sm4_aesni_avx_ocb_auth(ctx->rkey_enc, abuf, |
1962 | 0 | c->u_mode.ocb.aad_offset, |
1963 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1964 | |
|
1965 | 0 | nblocks -= 8; |
1966 | 0 | abuf += 8 * 16; |
1967 | 0 | } |
1968 | 0 | } |
1969 | 0 | } |
1970 | 0 | #endif |
1971 | | |
1972 | | /* Process remaining blocks. */ |
1973 | 0 | if (nblocks) |
1974 | 0 | { |
1975 | 0 | crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16; |
1976 | 0 | unsigned char tmpbuf[16 * 16]; |
1977 | 0 | unsigned int tmp_used = 16; |
1978 | 0 | size_t nburn; |
1979 | |
|
1980 | 0 | nburn = bulk_ocb_auth_128 (c, ctx->rkey_enc, crypt_blk1_16, abuf, nblocks, |
1981 | 0 | &blkn, tmpbuf, sizeof(tmpbuf) / 16, &tmp_used); |
1982 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1983 | |
|
1984 | 0 | wipememory(tmpbuf, tmp_used); |
1985 | 0 | } |
1986 | |
|
1987 | 0 | c->u_mode.ocb.aad_nblocks = blkn; |
1988 | |
|
1989 | 0 | if (burn_stack_depth) |
1990 | 0 | _gcry_burn_stack(burn_stack_depth); |
1991 | |
|
1992 | 0 | return 0; |
1993 | 0 | } |
1994 | | |
1995 | | static const char * |
1996 | | sm4_selftest (void) |
1997 | 4 | { |
1998 | 4 | SM4_context ctx; |
1999 | 4 | byte scratch[16]; |
2000 | | |
2001 | 4 | static const byte plaintext[16] = { |
2002 | 4 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, |
2003 | 4 | 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, |
2004 | 4 | }; |
2005 | 4 | static const byte key[16] = { |
2006 | 4 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, |
2007 | 4 | 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, |
2008 | 4 | }; |
2009 | 4 | static const byte ciphertext[16] = { |
2010 | 4 | 0x68, 0x1E, 0xDF, 0x34, 0xD2, 0x06, 0x96, 0x5E, |
2011 | 4 | 0x86, 0xB3, 0xE9, 0x4F, 0x53, 0x6E, 0x42, 0x46 |
2012 | 4 | }; |
2013 | | |
2014 | 4 | memset (&ctx, 0, sizeof(ctx)); |
2015 | | |
2016 | 4 | sm4_expand_key (&ctx, key); |
2017 | 4 | sm4_encrypt (&ctx, scratch, plaintext); |
2018 | 4 | if (memcmp (scratch, ciphertext, sizeof (ciphertext))) |
2019 | 0 | return "SM4 test encryption failed."; |
2020 | 4 | sm4_decrypt (&ctx, scratch, scratch); |
2021 | 4 | if (memcmp (scratch, plaintext, sizeof (plaintext))) |
2022 | 0 | return "SM4 test decryption failed."; |
2023 | | |
2024 | 4 | return NULL; |
2025 | 4 | } |
2026 | | |
2027 | | static gpg_err_code_t |
2028 | | run_selftests (int algo, int extended, selftest_report_func_t report) |
2029 | 0 | { |
2030 | 0 | const char *what; |
2031 | 0 | const char *errtxt; |
2032 | |
|
2033 | 0 | (void)extended; |
2034 | |
|
2035 | 0 | if (algo != GCRY_CIPHER_SM4) |
2036 | 0 | return GPG_ERR_CIPHER_ALGO; |
2037 | | |
2038 | 0 | what = "selftest"; |
2039 | 0 | errtxt = sm4_selftest (); |
2040 | 0 | if (errtxt) |
2041 | 0 | goto failed; |
2042 | | |
2043 | 0 | return 0; |
2044 | | |
2045 | 0 | failed: |
2046 | 0 | if (report) |
2047 | 0 | report ("cipher", GCRY_CIPHER_SM4, what, errtxt); |
2048 | 0 | return GPG_ERR_SELFTEST_FAILED; |
2049 | 0 | } |
2050 | | |
2051 | | |
2052 | | static const gcry_cipher_oid_spec_t sm4_oids[] = |
2053 | | { |
2054 | | { "1.2.156.10197.1.104.1", GCRY_CIPHER_MODE_ECB }, |
2055 | | { "1.2.156.10197.1.104.2", GCRY_CIPHER_MODE_CBC }, |
2056 | | { "1.2.156.10197.1.104.3", GCRY_CIPHER_MODE_OFB }, |
2057 | | { "1.2.156.10197.1.104.4", GCRY_CIPHER_MODE_CFB }, |
2058 | | { "1.2.156.10197.1.104.7", GCRY_CIPHER_MODE_CTR }, |
2059 | | { NULL } |
2060 | | }; |
2061 | | |
2062 | | gcry_cipher_spec_t _gcry_cipher_spec_sm4 = |
2063 | | { |
2064 | | GCRY_CIPHER_SM4, {0, 0}, |
2065 | | "SM4", NULL, sm4_oids, 16, 128, |
2066 | | sizeof (SM4_context), |
2067 | | sm4_setkey, sm4_encrypt, sm4_decrypt, |
2068 | | NULL, NULL, |
2069 | | run_selftests |
2070 | | }; |