Coverage Report

Created: 2024-11-21 07:03

/src/libgcrypt/cipher/sm4.c
Line
Count
Source (jump to first uncovered line)
1
/* sm4.c  -  SM4 Cipher Algorithm
2
 * Copyright (C) 2020 Alibaba Group.
3
 * Copyright (C) 2020-2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
4
 * Copyright (C) 2020-2022 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5
 *
6
 * This file is part of Libgcrypt.
7
 *
8
 * Libgcrypt is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as
10
 * published by the Free Software Foundation; either version 2.1 of
11
 * the License, or (at your option) any later version.
12
 *
13
 * Libgcrypt is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20
 */
21
22
#include <config.h>
23
#include <stdio.h>
24
#include <stdlib.h>
25
26
#include "types.h"  /* for byte and u32 typedefs */
27
#include "bithelp.h"
28
#include "g10lib.h"
29
#include "cipher.h"
30
#include "bufhelp.h"
31
#include "cipher-internal.h"
32
#include "bulkhelp.h"
33
34
/* Helper macro to force alignment to 64 bytes.  */
35
#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
36
# define ATTR_ALIGNED_64  __attribute__ ((aligned (64)))
37
#else
38
# define ATTR_ALIGNED_64
39
#endif
40
41
/* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */
42
#undef USE_AESNI_AVX
43
#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
44
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
45
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
46
#  define USE_AESNI_AVX 1
47
# endif
48
#endif
49
50
/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
51
#undef USE_AESNI_AVX2
52
#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
53
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
54
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
55
#  define USE_AESNI_AVX2 1
56
# endif
57
#endif
58
59
/* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */
60
#undef USE_GFNI_AVX2
61
#if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
62
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
63
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
64
#  define USE_GFNI_AVX2 1
65
# endif
66
#endif
67
68
/* USE_GFNI_AVX512 inidicates whether to compile with Intel GFNI/AVX512 code. */
69
#undef USE_GFNI_AVX512
70
#if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX512_SUPPORT)
71
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
72
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
73
#  define USE_GFNI_AVX512 1
74
# endif
75
#endif
76
77
/* Assembly implementations use SystemV ABI, ABI conversion and additional
78
 * stack to store XMM6-XMM15 needed on Win64. */
79
#undef ASM_FUNC_ABI
80
#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) || \
81
    defined(USE_GFNI_AVX2) || defined(USE_GFNI_AVX512)
82
# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
83
#  define ASM_FUNC_ABI __attribute__((sysv_abi))
84
# else
85
#  define ASM_FUNC_ABI
86
# endif
87
#endif
88
89
#undef USE_AARCH64_SIMD
90
#ifdef ENABLE_NEON_SUPPORT
91
# if defined(__AARCH64EL__) && \
92
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
93
     defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON)
94
#   define USE_AARCH64_SIMD 1
95
# endif
96
#endif
97
98
#undef USE_ARM_CE
99
#ifdef ENABLE_ARM_CRYPTO_SUPPORT
100
# if defined(__AARCH64EL__) && \
101
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
102
     defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
103
#   define USE_ARM_CE 1
104
# endif
105
#endif
106
107
#undef USE_ARM_SVE_CE
108
#ifdef ENABLE_SVE_SUPPORT
109
# if defined(__AARCH64EL__) && \
110
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
111
     defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && \
112
     defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE) && \
113
     defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE2)
114
#   define USE_ARM_SVE_CE 1
115
# endif
116
#endif
117
118
#undef USE_PPC_CRYPTO
119
#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
120
    defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
121
    defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
122
    !defined(WORDS_BIGENDIAN) && (__GNUC__ >= 4)
123
# define USE_PPC_CRYPTO 1
124
#endif
125
126
static const char *sm4_selftest (void);
127
128
static void _gcry_sm4_ctr_enc (void *context, unsigned char *ctr,
129
             void *outbuf_arg, const void *inbuf_arg,
130
             size_t nblocks);
131
static void _gcry_sm4_cbc_dec (void *context, unsigned char *iv,
132
             void *outbuf_arg, const void *inbuf_arg,
133
             size_t nblocks);
134
static void _gcry_sm4_cfb_dec (void *context, unsigned char *iv,
135
             void *outbuf_arg, const void *inbuf_arg,
136
             size_t nblocks);
137
static void _gcry_sm4_xts_crypt (void *context, unsigned char *tweak,
138
                                 void *outbuf_arg, const void *inbuf_arg,
139
                                 size_t nblocks, int encrypt);
140
static void _gcry_sm4_ecb_crypt (void *context, void *outbuf_arg,
141
         const void *inbuf_arg, size_t nblocks,
142
         int encrypt);
143
static void _gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr,
144
                                  void *outbuf_arg, const void *inbuf_arg,
145
                                  size_t nblocks);
146
static size_t _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
147
           const void *inbuf_arg, size_t nblocks,
148
           int encrypt);
149
static size_t _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
150
          size_t nblocks);
151
152
typedef bulk_crypt_fn_t crypt_blk1_16_fn_t;
153
154
typedef struct
155
{
156
  u32 rkey_enc[32];
157
  u32 rkey_dec[32];
158
  crypt_blk1_16_fn_t crypt_blk1_16;
159
#ifdef USE_AESNI_AVX
160
  unsigned int use_aesni_avx:1;
161
#endif
162
#ifdef USE_AESNI_AVX2
163
  unsigned int use_aesni_avx2:1;
164
#endif
165
#ifdef USE_GFNI_AVX2
166
  unsigned int use_gfni_avx2:1;
167
#endif
168
#ifdef USE_GFNI_AVX512
169
  unsigned int use_gfni_avx512:1;
170
#endif
171
#ifdef USE_AARCH64_SIMD
172
  unsigned int use_aarch64_simd:1;
173
#endif
174
#ifdef USE_ARM_CE
175
  unsigned int use_arm_ce:1;
176
#endif
177
#ifdef USE_ARM_SVE_CE
178
  unsigned int use_arm_sve_ce:1;
179
#endif
180
#ifdef USE_PPC_CRYPTO
181
  unsigned int use_ppc8le:1;
182
  unsigned int use_ppc9le:1;
183
#endif
184
} SM4_context;
185
186
static const u32 fk[4] =
187
{
188
  0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
189
};
190
191
static struct
192
{
193
  volatile u32 counter_head;
194
  u32 cacheline_align[64 / 4 - 1];
195
  byte S[256];
196
  volatile u32 counter_tail;
197
} sbox_table ATTR_ALIGNED_64 =
198
  {
199
    0,
200
    { 0, },
201
    {
202
      0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
203
      0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
204
      0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
205
      0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
206
      0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
207
      0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
208
      0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
209
      0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
210
      0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
211
      0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
212
      0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
213
      0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
214
      0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
215
      0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
216
      0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
217
      0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
218
      0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
219
      0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
220
      0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
221
      0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
222
      0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
223
      0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
224
      0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
225
      0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
226
      0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
227
      0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
228
      0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
229
      0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
230
      0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
231
      0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
232
      0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
233
      0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
234
    },
235
    0
236
  };
237
238
static const u32 ck[] =
239
{
240
  0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
241
  0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
242
  0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
243
  0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
244
  0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
245
  0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
246
  0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
247
  0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
248
};
249
250
static inline crypt_blk1_16_fn_t sm4_get_crypt_blk1_16_fn(SM4_context *ctx);
251
252
#ifdef USE_AESNI_AVX
253
extern void _gcry_sm4_aesni_avx_expand_key(const byte *key, u32 *rk_enc,
254
             u32 *rk_dec, const u32 *fk,
255
             const u32 *ck) ASM_FUNC_ABI;
256
257
extern void _gcry_sm4_aesni_avx_ctr_enc(const u32 *rk_enc, byte *out,
258
          const byte *in, byte *ctr) ASM_FUNC_ABI;
259
260
extern void _gcry_sm4_aesni_avx_cbc_dec(const u32 *rk_dec, byte *out,
261
          const byte *in, byte *iv) ASM_FUNC_ABI;
262
263
extern void _gcry_sm4_aesni_avx_cfb_dec(const u32 *rk_enc, byte *out,
264
          const byte *in, byte *iv) ASM_FUNC_ABI;
265
266
extern void _gcry_sm4_aesni_avx_ocb_enc(const u32 *rk_enc,
267
          unsigned char *out,
268
          const unsigned char *in,
269
          unsigned char *offset,
270
          unsigned char *checksum,
271
          const u64 Ls[8]) ASM_FUNC_ABI;
272
273
extern void _gcry_sm4_aesni_avx_ocb_dec(const u32 *rk_dec,
274
          unsigned char *out,
275
          const unsigned char *in,
276
          unsigned char *offset,
277
          unsigned char *checksum,
278
          const u64 Ls[8]) ASM_FUNC_ABI;
279
280
extern void _gcry_sm4_aesni_avx_ocb_auth(const u32 *rk_enc,
281
           const unsigned char *abuf,
282
           unsigned char *offset,
283
           unsigned char *checksum,
284
           const u64 Ls[8]) ASM_FUNC_ABI;
285
286
extern unsigned int
287
_gcry_sm4_aesni_avx_crypt_blk1_8(u32 *rk, byte *out, const byte *in,
288
         unsigned int num_blks) ASM_FUNC_ABI;
289
290
static inline unsigned int
291
sm4_aesni_avx_crypt_blk1_16(void *rk, byte *out, const byte *in,
292
                            size_t num_blks)
293
0
{
294
0
  if (num_blks > 8)
295
0
    {
296
0
      _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, 8);
297
0
      in += 8 * 16;
298
0
      out += 8 * 16;
299
0
      num_blks -= 8;
300
0
    }
301
302
0
  return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks);
303
0
}
304
305
#endif /* USE_AESNI_AVX */
306
307
#ifdef USE_AESNI_AVX2
308
extern void _gcry_sm4_aesni_avx2_ctr_enc(const u32 *rk_enc, byte *out,
309
           const byte *in,
310
           byte *ctr) ASM_FUNC_ABI;
311
312
extern void _gcry_sm4_aesni_avx2_cbc_dec(const u32 *rk_dec, byte *out,
313
           const byte *in,
314
           byte *iv) ASM_FUNC_ABI;
315
316
extern void _gcry_sm4_aesni_avx2_cfb_dec(const u32 *rk_enc, byte *out,
317
           const byte *in,
318
           byte *iv) ASM_FUNC_ABI;
319
320
extern void _gcry_sm4_aesni_avx2_ocb_enc(const u32 *rk_enc,
321
           unsigned char *out,
322
           const unsigned char *in,
323
           unsigned char *offset,
324
           unsigned char *checksum,
325
           const u64 Ls[16]) ASM_FUNC_ABI;
326
327
extern void _gcry_sm4_aesni_avx2_ocb_dec(const u32 *rk_dec,
328
           unsigned char *out,
329
           const unsigned char *in,
330
           unsigned char *offset,
331
           unsigned char *checksum,
332
           const u64 Ls[16]) ASM_FUNC_ABI;
333
334
extern void _gcry_sm4_aesni_avx2_ocb_auth(const u32 *rk_enc,
335
            const unsigned char *abuf,
336
            unsigned char *offset,
337
            unsigned char *checksum,
338
            const u64 Ls[16]) ASM_FUNC_ABI;
339
340
extern unsigned int
341
_gcry_sm4_aesni_avx2_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
342
           unsigned int num_blks) ASM_FUNC_ABI;
343
344
static inline unsigned int
345
sm4_aesni_avx2_crypt_blk1_16(void *rk, byte *out, const byte *in,
346
           size_t num_blks)
347
0
{
348
0
#ifdef USE_AESNI_AVX
349
  /* Use 128-bit register implementation for short input. */
350
0
  if (num_blks <= 8)
351
0
    return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks);
352
0
#endif
353
354
0
  return _gcry_sm4_aesni_avx2_crypt_blk1_16(rk, out, in, num_blks);
355
0
}
356
357
#endif /* USE_AESNI_AVX2 */
358
359
#ifdef USE_GFNI_AVX2
360
extern void _gcry_sm4_gfni_avx2_expand_key(const byte *key, u32 *rk_enc,
361
                                           u32 *rk_dec, const u32 *fk,
362
                                           const u32 *ck) ASM_FUNC_ABI;
363
364
extern void _gcry_sm4_gfni_avx2_ctr_enc(const u32 *rk_enc, byte *out,
365
          const byte *in,
366
          byte *ctr) ASM_FUNC_ABI;
367
368
extern void _gcry_sm4_gfni_avx2_cbc_dec(const u32 *rk_dec, byte *out,
369
          const byte *in,
370
          byte *iv) ASM_FUNC_ABI;
371
372
extern void _gcry_sm4_gfni_avx2_cfb_dec(const u32 *rk_enc, byte *out,
373
          const byte *in,
374
          byte *iv) ASM_FUNC_ABI;
375
376
extern void _gcry_sm4_gfni_avx2_ocb_enc(const u32 *rk_enc,
377
          unsigned char *out,
378
          const unsigned char *in,
379
          unsigned char *offset,
380
          unsigned char *checksum,
381
          const u64 Ls[16]) ASM_FUNC_ABI;
382
383
extern void _gcry_sm4_gfni_avx2_ocb_dec(const u32 *rk_dec,
384
          unsigned char *out,
385
          const unsigned char *in,
386
          unsigned char *offset,
387
          unsigned char *checksum,
388
          const u64 Ls[16]) ASM_FUNC_ABI;
389
390
extern void _gcry_sm4_gfni_avx2_ocb_auth(const u32 *rk_enc,
391
           const unsigned char *abuf,
392
           unsigned char *offset,
393
           unsigned char *checksum,
394
           const u64 Ls[16]) ASM_FUNC_ABI;
395
396
extern unsigned int
397
_gcry_sm4_gfni_avx2_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
398
          unsigned int num_blks) ASM_FUNC_ABI;
399
400
static inline unsigned int
401
sm4_gfni_avx2_crypt_blk1_16(void *rk, byte *out, const byte *in,
402
          size_t num_blks)
403
0
{
404
0
  return _gcry_sm4_gfni_avx2_crypt_blk1_16(rk, out, in, num_blks);
405
0
}
406
407
#endif /* USE_GFNI_AVX2 */
408
409
#ifdef USE_GFNI_AVX512
410
extern void _gcry_sm4_gfni_avx512_expand_key(const byte *key, u32 *rk_enc,
411
                                             u32 *rk_dec, const u32 *fk,
412
                                             const u32 *ck) ASM_FUNC_ABI;
413
414
extern void _gcry_sm4_gfni_avx512_ctr_enc(const u32 *rk_enc, byte *out,
415
                                          const byte *in,
416
                                          byte *ctr) ASM_FUNC_ABI;
417
418
extern void _gcry_sm4_gfni_avx512_cbc_dec(const u32 *rk_dec, byte *out,
419
                                          const byte *in,
420
                                          byte *iv) ASM_FUNC_ABI;
421
422
extern void _gcry_sm4_gfni_avx512_cfb_dec(const u32 *rk_enc, byte *out,
423
                                          const byte *in,
424
                                          byte *iv) ASM_FUNC_ABI;
425
426
extern void _gcry_sm4_gfni_avx512_ocb_enc(const u32 *rk_enc,
427
                                          unsigned char *out,
428
                                          const unsigned char *in,
429
                                          unsigned char *offset,
430
                                          unsigned char *checksum,
431
                                          const u64 Ls[16]) ASM_FUNC_ABI;
432
433
extern void _gcry_sm4_gfni_avx512_ocb_dec(const u32 *rk_dec,
434
                                          unsigned char *out,
435
                                          const unsigned char *in,
436
                                          unsigned char *offset,
437
                                          unsigned char *checksum,
438
                                          const u64 Ls[16]) ASM_FUNC_ABI;
439
440
extern void _gcry_sm4_gfni_avx512_ocb_auth(const u32 *rk_enc,
441
                                           const unsigned char *abuf,
442
                                           unsigned char *offset,
443
                                           unsigned char *checksum,
444
                                           const u64 Ls[16]) ASM_FUNC_ABI;
445
446
extern void _gcry_sm4_gfni_avx512_ctr_enc_blk32(const u32 *rk_enc, byte *out,
447
                                                const byte *in,
448
                                                byte *ctr) ASM_FUNC_ABI;
449
450
extern void _gcry_sm4_gfni_avx512_cbc_dec_blk32(const u32 *rk_enc, byte *out,
451
                                                const byte *in,
452
                                                byte *iv) ASM_FUNC_ABI;
453
454
extern void _gcry_sm4_gfni_avx512_cfb_dec_blk32(const u32 *rk_enc, byte *out,
455
                                                const byte *in,
456
                                                byte *iv) ASM_FUNC_ABI;
457
458
extern void _gcry_sm4_gfni_avx512_ocb_enc_blk32(const u32 *rk_enc,
459
                                                unsigned char *out,
460
                                                const unsigned char *in,
461
                                                unsigned char *offset,
462
                                                unsigned char *checksum,
463
                                                const u64 Ls[32]) ASM_FUNC_ABI;
464
465
extern void _gcry_sm4_gfni_avx512_ocb_dec_blk32(const u32 *rk_dec,
466
                                                unsigned char *out,
467
                                                const unsigned char *in,
468
                                                unsigned char *offset,
469
                                                unsigned char *checksum,
470
                                                const u64 Ls[32]) ASM_FUNC_ABI;
471
472
extern unsigned int
473
_gcry_sm4_gfni_avx512_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
474
                                    unsigned int num_blks) ASM_FUNC_ABI;
475
476
extern unsigned int
477
_gcry_sm4_gfni_avx512_crypt_blk32(u32 *rk, byte *out,
478
                                  const byte *in) ASM_FUNC_ABI;
479
480
static inline unsigned int
481
sm4_gfni_avx512_crypt_blk1_16(void *rk, byte *out, const byte *in,
482
            size_t num_blks)
483
0
{
484
0
  return _gcry_sm4_gfni_avx512_crypt_blk1_16(rk, out, in, num_blks);
485
0
}
486
487
#endif /* USE_GFNI_AVX2 */
488
489
#ifdef USE_AARCH64_SIMD
490
extern void _gcry_sm4_aarch64_crypt(const u32 *rk, byte *out,
491
            const byte *in,
492
            size_t num_blocks);
493
494
extern void _gcry_sm4_aarch64_ctr_enc(const u32 *rk_enc, byte *out,
495
              const byte *in,
496
              byte *ctr,
497
              size_t nblocks);
498
499
extern void _gcry_sm4_aarch64_cbc_dec(const u32 *rk_dec, byte *out,
500
              const byte *in,
501
              byte *iv,
502
              size_t nblocks);
503
504
extern void _gcry_sm4_aarch64_cfb_dec(const u32 *rk_enc, byte *out,
505
              const byte *in,
506
              byte *iv,
507
              size_t nblocks);
508
509
extern void _gcry_sm4_aarch64_crypt_blk1_8(u32 *rk, byte *out,
510
             const byte *in,
511
             size_t num_blocks);
512
513
static inline unsigned int
514
sm4_aarch64_crypt_blk1_16(void *rk, byte *out, const byte *in,
515
        size_t num_blks)
516
{
517
  if (num_blks > 8)
518
    {
519
      _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, 8);
520
      in += 8 * 16;
521
      out += 8 * 16;
522
      num_blks -= 8;
523
    }
524
525
  _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks);
526
  return 0;
527
}
528
529
#endif /* USE_AARCH64_SIMD */
530
531
#ifdef USE_ARM_CE
532
extern void _gcry_sm4_armv8_ce_expand_key(const byte *key,
533
            u32 *rkey_enc, u32 *rkey_dec,
534
            const u32 *fk, const u32 *ck);
535
536
extern void _gcry_sm4_armv8_ce_crypt(const u32 *rk, byte *out,
537
             const byte *in,
538
             size_t num_blocks);
539
540
extern void _gcry_sm4_armv8_ce_ctr_enc(const u32 *rk_enc, byte *out,
541
               const byte *in,
542
               byte *ctr,
543
               size_t nblocks);
544
545
extern void _gcry_sm4_armv8_ce_cbc_dec(const u32 *rk_dec, byte *out,
546
               const byte *in,
547
               byte *iv,
548
               size_t nblocks);
549
550
extern void _gcry_sm4_armv8_ce_cfb_dec(const u32 *rk_enc, byte *out,
551
               const byte *in,
552
               byte *iv,
553
               size_t nblocks);
554
555
extern void _gcry_sm4_armv8_ce_xts_crypt(const u32 *rk, byte *out,
556
           const byte *in,
557
           byte *tweak,
558
           size_t nblocks);
559
560
extern void _gcry_sm4_armv8_ce_crypt_blk1_8(u32 *rk, byte *out,
561
              const byte *in,
562
              size_t num_blocks);
563
564
static inline unsigned int
565
sm4_armv8_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
566
         size_t num_blks)
567
{
568
  if (num_blks > 8)
569
    {
570
      _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, 8);
571
      in += 8 * 16;
572
      out += 8 * 16;
573
      num_blks -= 8;
574
    }
575
576
  _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks);
577
  return 0;
578
}
579
580
#endif /* USE_ARM_CE */
581
582
#ifdef USE_ARM_SVE_CE
583
extern void _gcry_sm4_armv9_sve_ce_crypt(u32 *rk, byte *out,
584
           const byte *in,
585
           size_t nblocks);
586
587
extern void _gcry_sm4_armv9_sve_ce_ctr_enc(const u32 *rk_enc, byte *out,
588
             const byte *in,
589
             byte *ctr,
590
             size_t nblocks);
591
592
extern void _gcry_sm4_armv9_sve_ce_cbc_dec(const u32 *rk_dec, byte *out,
593
             const byte *in,
594
             byte *iv,
595
             size_t nblocks);
596
597
extern void _gcry_sm4_armv9_sve_ce_cfb_dec(const u32 *rk_enc, byte *out,
598
             const byte *in,
599
             byte *iv,
600
             size_t nblocks);
601
602
static inline unsigned int
603
sm4_armv9_sve_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
604
             size_t num_blks)
605
{
606
  _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks);
607
  return 0;
608
}
609
610
extern unsigned int _gcry_sm4_armv9_sve_get_vl(void);
611
#endif /* USE_ARM_SVE_CE */
612
613
#ifdef USE_PPC_CRYPTO
614
extern void _gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
615
             size_t num_blks);
616
617
extern void _gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
618
             size_t num_blks);
619
620
static inline unsigned int
621
sm4_ppc8le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks)
622
{
623
  _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, num_blks);
624
  return 0;
625
}
626
627
static inline unsigned int
628
sm4_ppc9le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks)
629
{
630
  _gcry_sm4_ppc9le_crypt_blk1_16(rk, out, in, num_blks);
631
  return 0;
632
}
633
#endif /* USE_PPC_CRYPTO */
634
635
static inline void prefetch_sbox_table(void)
636
1.52k
{
637
1.52k
  const volatile byte *vtab = (void *)&sbox_table;
638
639
  /* Modify counters to trigger copy-on-write and unsharing if physical pages
640
   * of look-up table are shared between processes.  Modifying counters also
641
   * causes checksums for pages to change and hint same-page merging algorithm
642
   * that these pages are frequently changing.  */
643
1.52k
  sbox_table.counter_head++;
644
1.52k
  sbox_table.counter_tail++;
645
646
  /* Prefetch look-up table to cache.  */
647
1.52k
  (void)vtab[0 * 32];
648
1.52k
  (void)vtab[1 * 32];
649
1.52k
  (void)vtab[2 * 32];
650
1.52k
  (void)vtab[3 * 32];
651
1.52k
  (void)vtab[4 * 32];
652
1.52k
  (void)vtab[5 * 32];
653
1.52k
  (void)vtab[6 * 32];
654
1.52k
  (void)vtab[7 * 32];
655
1.52k
  (void)vtab[8 * 32 - 1];
656
1.52k
}
657
658
static inline u32 sm4_t_non_lin_sub(u32 x)
659
48.8k
{
660
48.8k
  u32 out;
661
662
48.8k
  out  = (u32)sbox_table.S[(x >> 0) & 0xff] << 0;
663
48.8k
  out |= (u32)sbox_table.S[(x >> 8) & 0xff] << 8;
664
48.8k
  out |= (u32)sbox_table.S[(x >> 16) & 0xff] << 16;
665
48.8k
  out |= (u32)sbox_table.S[(x >> 24) & 0xff] << 24;
666
667
48.8k
  return out;
668
48.8k
}
669
670
static inline u32 sm4_key_lin_sub(u32 x)
671
128
{
672
128
  return x ^ rol(x, 13) ^ rol(x, 23);
673
128
}
674
675
static inline u32 sm4_enc_lin_sub(u32 x)
676
48.7k
{
677
48.7k
  u32 xrol2 = rol(x, 2);
678
48.7k
  return x ^ xrol2 ^ rol(xrol2, 8) ^ rol(xrol2, 16) ^ rol(x, 24);
679
48.7k
}
680
681
static inline u32 sm4_key_sub(u32 x)
682
128
{
683
128
  return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
684
128
}
685
686
static inline u32 sm4_enc_sub(u32 x)
687
48.7k
{
688
48.7k
  return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
689
48.7k
}
690
691
static inline u32
692
sm4_round(const u32 x0, const u32 x1, const u32 x2, const u32 x3, const u32 rk)
693
48.7k
{
694
48.7k
  return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
695
48.7k
}
696
697
static void
698
sm4_expand_key (SM4_context *ctx, const byte *key)
699
14
{
700
14
  u32 rk[4];
701
14
  int i;
702
703
14
#ifdef USE_GFNI_AVX512
704
14
  if (ctx->use_gfni_avx512)
705
0
    {
706
0
      _gcry_sm4_gfni_avx512_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
707
0
                                        fk, ck);
708
0
      return;
709
0
    }
710
14
#endif
711
712
14
#ifdef USE_GFNI_AVX2
713
14
  if (ctx->use_gfni_avx2)
714
0
    {
715
0
      _gcry_sm4_gfni_avx2_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
716
0
                                      fk, ck);
717
0
      return;
718
0
    }
719
14
#endif
720
721
14
#ifdef USE_AESNI_AVX
722
14
  if (ctx->use_aesni_avx)
723
10
    {
724
10
      _gcry_sm4_aesni_avx_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
725
10
              fk, ck);
726
10
      return;
727
10
    }
728
4
#endif
729
730
#ifdef USE_ARM_CE
731
  if (ctx->use_arm_ce)
732
    {
733
      _gcry_sm4_armv8_ce_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
734
             fk, ck);
735
      return;
736
    }
737
#endif
738
739
4
  prefetch_sbox_table ();
740
741
4
  rk[0] = buf_get_be32(key + 4 * 0) ^ fk[0];
742
4
  rk[1] = buf_get_be32(key + 4 * 1) ^ fk[1];
743
4
  rk[2] = buf_get_be32(key + 4 * 2) ^ fk[2];
744
4
  rk[3] = buf_get_be32(key + 4 * 3) ^ fk[3];
745
746
36
  for (i = 0; i < 32; i += 4)
747
32
    {
748
32
      rk[0] = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
749
32
      rk[1] = rk[1] ^ sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
750
32
      rk[2] = rk[2] ^ sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
751
32
      rk[3] = rk[3] ^ sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
752
32
      ctx->rkey_enc[i + 0] = rk[0];
753
32
      ctx->rkey_enc[i + 1] = rk[1];
754
32
      ctx->rkey_enc[i + 2] = rk[2];
755
32
      ctx->rkey_enc[i + 3] = rk[3];
756
32
      ctx->rkey_dec[31 - i - 0] = rk[0];
757
32
      ctx->rkey_dec[31 - i - 1] = rk[1];
758
32
      ctx->rkey_dec[31 - i - 2] = rk[2];
759
32
      ctx->rkey_dec[31 - i - 3] = rk[3];
760
32
    }
761
762
4
  wipememory (rk, sizeof(rk));
763
4
}
764
765
static gcry_err_code_t
766
sm4_setkey (void *context, const byte *key, const unsigned keylen,
767
            cipher_bulk_ops_t *bulk_ops)
768
14
{
769
14
  SM4_context *ctx = context;
770
14
  static int init = 0;
771
14
  static const char *selftest_failed = NULL;
772
14
  unsigned int hwf = _gcry_get_hw_features ();
773
774
14
  (void)hwf;
775
776
14
  if (!init)
777
4
    {
778
4
      init = 1;
779
4
      selftest_failed = sm4_selftest();
780
4
      if (selftest_failed)
781
0
  log_error("%s\n", selftest_failed);
782
4
    }
783
14
  if (selftest_failed)
784
0
    return GPG_ERR_SELFTEST_FAILED;
785
786
14
  if (keylen != 16)
787
4
    return GPG_ERR_INV_KEYLEN;
788
789
10
#ifdef USE_AESNI_AVX
790
10
  ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
791
10
#endif
792
10
#ifdef USE_AESNI_AVX2
793
10
  ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
794
10
#endif
795
10
#ifdef USE_GFNI_AVX2
796
10
  ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2);
797
10
#endif
798
10
#ifdef USE_GFNI_AVX512
799
10
  ctx->use_gfni_avx512 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX512);
800
10
#endif
801
#ifdef USE_AARCH64_SIMD
802
  ctx->use_aarch64_simd = !!(hwf & HWF_ARM_NEON);
803
#endif
804
#ifdef USE_ARM_CE
805
  ctx->use_arm_ce = !!(hwf & HWF_ARM_SM4);
806
#endif
807
#ifdef USE_ARM_SVE_CE
808
  /* Only enabled when the SVE vector length is greater than 128 bits */
809
  ctx->use_arm_sve_ce = (hwf & HWF_ARM_SVE2) && (hwf & HWF_ARM_SVESM4)
810
    && _gcry_sm4_armv9_sve_get_vl() > 16;
811
#endif
812
#ifdef USE_PPC_CRYPTO
813
  ctx->use_ppc8le = (hwf & HWF_PPC_VCRYPTO) != 0;
814
  ctx->use_ppc9le = (hwf & HWF_PPC_VCRYPTO) && (hwf & HWF_PPC_ARCH_3_00);
815
#endif
816
817
10
#ifdef USE_GFNI_AVX2
818
10
  if (ctx->use_gfni_avx2)
819
0
    {
820
      /* Disable AESNI implementations when GFNI implementation is enabled. */
821
0
#ifdef USE_AESNI_AVX
822
0
      ctx->use_aesni_avx = 0;
823
0
#endif
824
0
#ifdef USE_AESNI_AVX2
825
0
      ctx->use_aesni_avx2 = 0;
826
0
#endif
827
0
    }
828
10
#endif
829
830
10
  ctx->crypt_blk1_16 = sm4_get_crypt_blk1_16_fn(ctx);
831
832
  /* Setup bulk encryption routines.  */
833
10
  memset (bulk_ops, 0, sizeof(*bulk_ops));
834
10
  bulk_ops->cbc_dec = _gcry_sm4_cbc_dec;
835
10
  bulk_ops->cfb_dec = _gcry_sm4_cfb_dec;
836
10
  bulk_ops->ctr_enc = _gcry_sm4_ctr_enc;
837
10
  bulk_ops->xts_crypt = _gcry_sm4_xts_crypt;
838
10
  bulk_ops->ecb_crypt = _gcry_sm4_ecb_crypt;
839
10
  bulk_ops->ctr32le_enc = _gcry_sm4_ctr32le_enc;
840
10
  bulk_ops->ocb_crypt = _gcry_sm4_ocb_crypt;
841
10
  bulk_ops->ocb_auth  = _gcry_sm4_ocb_auth;
842
843
10
  sm4_expand_key (ctx, key);
844
10
  return 0;
845
14
}
846
847
static unsigned int
848
sm4_do_crypt (const u32 *rk, byte *out, const byte *in)
849
1.52k
{
850
1.52k
  u32 x[4];
851
1.52k
  int i;
852
853
1.52k
  x[0] = buf_get_be32(in + 0 * 4);
854
1.52k
  x[1] = buf_get_be32(in + 1 * 4);
855
1.52k
  x[2] = buf_get_be32(in + 2 * 4);
856
1.52k
  x[3] = buf_get_be32(in + 3 * 4);
857
858
13.6k
  for (i = 0; i < 32; i += 4)
859
12.1k
    {
860
12.1k
      x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
861
12.1k
      x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
862
12.1k
      x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
863
12.1k
      x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
864
12.1k
    }
865
866
1.52k
  buf_put_be32(out + 0 * 4, x[3 - 0]);
867
1.52k
  buf_put_be32(out + 1 * 4, x[3 - 1]);
868
1.52k
  buf_put_be32(out + 2 * 4, x[3 - 2]);
869
1.52k
  buf_put_be32(out + 3 * 4, x[3 - 3]);
870
871
1.52k
  return /*burn_stack*/ 4*6+sizeof(void*)*4;
872
1.52k
}
873
874
static unsigned int
875
sm4_encrypt (void *context, byte *outbuf, const byte *inbuf)
876
1.51k
{
877
1.51k
  SM4_context *ctx = context;
878
879
1.51k
#ifdef USE_GFNI_AVX512
880
1.51k
  if (ctx->use_gfni_avx512)
881
0
    return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1);
882
1.51k
#endif
883
884
1.51k
#ifdef USE_GFNI_AVX2
885
1.51k
  if (ctx->use_gfni_avx2)
886
0
    return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1);
887
1.51k
#endif
888
889
#ifdef USE_ARM_CE
890
  if (ctx->use_arm_ce)
891
    return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1);
892
#endif
893
894
1.51k
  prefetch_sbox_table ();
895
896
1.51k
  return sm4_do_crypt (ctx->rkey_enc, outbuf, inbuf);
897
1.51k
}
898
899
static unsigned int
900
sm4_decrypt (void *context, byte *outbuf, const byte *inbuf)
901
4
{
902
4
  SM4_context *ctx = context;
903
904
4
#ifdef USE_GFNI_AVX512
905
4
  if (ctx->use_gfni_avx512)
906
0
    return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1);
907
4
#endif
908
909
4
#ifdef USE_GFNI_AVX2
910
4
  if (ctx->use_gfni_avx2)
911
0
    return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1);
912
4
#endif
913
914
#ifdef USE_ARM_CE
915
  if (ctx->use_arm_ce)
916
    return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1);
917
#endif
918
919
4
  prefetch_sbox_table ();
920
921
4
  return sm4_do_crypt (ctx->rkey_dec, outbuf, inbuf);
922
4
}
923
924
static unsigned int
925
sm4_do_crypt_blks2 (const u32 *rk, byte *out, const byte *in)
926
0
{
927
0
  u32 x[4];
928
0
  u32 y[4];
929
0
  u32 k;
930
0
  int i;
931
932
  /* Encrypts/Decrypts two blocks for higher instruction level
933
   * parallelism. */
934
935
0
  x[0] = buf_get_be32(in + 0 * 4);
936
0
  x[1] = buf_get_be32(in + 1 * 4);
937
0
  x[2] = buf_get_be32(in + 2 * 4);
938
0
  x[3] = buf_get_be32(in + 3 * 4);
939
0
  y[0] = buf_get_be32(in + 4 * 4);
940
0
  y[1] = buf_get_be32(in + 5 * 4);
941
0
  y[2] = buf_get_be32(in + 6 * 4);
942
0
  y[3] = buf_get_be32(in + 7 * 4);
943
944
0
  for (i = 0; i < 32; i += 4)
945
0
    {
946
0
      k = rk[i + 0];
947
0
      x[0] = sm4_round(x[0], x[1], x[2], x[3], k);
948
0
      y[0] = sm4_round(y[0], y[1], y[2], y[3], k);
949
0
      k = rk[i + 1];
950
0
      x[1] = sm4_round(x[1], x[2], x[3], x[0], k);
951
0
      y[1] = sm4_round(y[1], y[2], y[3], y[0], k);
952
0
      k = rk[i + 2];
953
0
      x[2] = sm4_round(x[2], x[3], x[0], x[1], k);
954
0
      y[2] = sm4_round(y[2], y[3], y[0], y[1], k);
955
0
      k = rk[i + 3];
956
0
      x[3] = sm4_round(x[3], x[0], x[1], x[2], k);
957
0
      y[3] = sm4_round(y[3], y[0], y[1], y[2], k);
958
0
    }
959
960
0
  buf_put_be32(out + 0 * 4, x[3 - 0]);
961
0
  buf_put_be32(out + 1 * 4, x[3 - 1]);
962
0
  buf_put_be32(out + 2 * 4, x[3 - 2]);
963
0
  buf_put_be32(out + 3 * 4, x[3 - 3]);
964
0
  buf_put_be32(out + 4 * 4, y[3 - 0]);
965
0
  buf_put_be32(out + 5 * 4, y[3 - 1]);
966
0
  buf_put_be32(out + 6 * 4, y[3 - 2]);
967
0
  buf_put_be32(out + 7 * 4, y[3 - 3]);
968
969
0
  return /*burn_stack*/ 4*10+sizeof(void*)*4;
970
0
}
971
972
static unsigned int
973
sm4_crypt_blocks (void *ctx, byte *out, const byte *in,
974
      size_t num_blks)
975
0
{
976
0
  const u32 *rk = ctx;
977
0
  unsigned int burn_depth = 0;
978
0
  unsigned int nburn;
979
980
0
  while (num_blks >= 2)
981
0
    {
982
0
      nburn = sm4_do_crypt_blks2 (rk, out, in);
983
0
      burn_depth = nburn > burn_depth ? nburn : burn_depth;
984
0
      out += 2 * 16;
985
0
      in += 2 * 16;
986
0
      num_blks -= 2;
987
0
    }
988
989
0
  while (num_blks)
990
0
    {
991
0
      nburn = sm4_do_crypt (rk, out, in);
992
0
      burn_depth = nburn > burn_depth ? nburn : burn_depth;
993
0
      out += 16;
994
0
      in += 16;
995
0
      num_blks--;
996
0
    }
997
998
0
  if (burn_depth)
999
0
    burn_depth += sizeof(void *) * 5;
1000
0
  return burn_depth;
1001
0
}
1002
1003
static inline crypt_blk1_16_fn_t
1004
sm4_get_crypt_blk1_16_fn(SM4_context *ctx)
1005
10
{
1006
10
  if (0)
1007
0
    ;
1008
10
#ifdef USE_GFNI_AVX512
1009
10
  else if (ctx->use_gfni_avx512)
1010
0
    {
1011
0
      return &sm4_gfni_avx512_crypt_blk1_16;
1012
0
    }
1013
10
#endif
1014
10
#ifdef USE_GFNI_AVX2
1015
10
  else if (ctx->use_gfni_avx2)
1016
0
    {
1017
0
      return &sm4_gfni_avx2_crypt_blk1_16;
1018
0
    }
1019
10
#endif
1020
10
#ifdef USE_AESNI_AVX2
1021
10
  else if (ctx->use_aesni_avx2)
1022
10
    {
1023
10
      return &sm4_aesni_avx2_crypt_blk1_16;
1024
10
    }
1025
0
#endif
1026
0
#ifdef USE_AESNI_AVX
1027
0
  else if (ctx->use_aesni_avx)
1028
0
    {
1029
0
      return &sm4_aesni_avx_crypt_blk1_16;
1030
0
    }
1031
0
#endif
1032
#ifdef USE_ARM_SVE_CE
1033
  else if (ctx->use_arm_sve_ce)
1034
    {
1035
      return &sm4_armv9_sve_ce_crypt_blk1_16;
1036
    }
1037
#endif
1038
#ifdef USE_ARM_CE
1039
  else if (ctx->use_arm_ce)
1040
    {
1041
      return &sm4_armv8_ce_crypt_blk1_16;
1042
    }
1043
#endif
1044
#ifdef USE_AARCH64_SIMD
1045
  else if (ctx->use_aarch64_simd)
1046
    {
1047
      return &sm4_aarch64_crypt_blk1_16;
1048
    }
1049
#endif
1050
#ifdef USE_PPC_CRYPTO
1051
  else if (ctx->use_ppc9le)
1052
    {
1053
      return &sm4_ppc9le_crypt_blk1_16;
1054
    }
1055
  else if (ctx->use_ppc8le)
1056
    {
1057
      return &sm4_ppc8le_crypt_blk1_16;
1058
    }
1059
#endif
1060
0
  else
1061
0
    {
1062
0
      (void)ctx;
1063
0
      return &sm4_crypt_blocks;
1064
0
    }
1065
10
}
1066
1067
/* Bulk encryption of complete blocks in CTR mode.  This function is only
1068
   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
1069
   of size 16. */
1070
static void
1071
_gcry_sm4_ctr_enc(void *context, unsigned char *ctr,
1072
                  void *outbuf_arg, const void *inbuf_arg,
1073
                  size_t nblocks)
1074
0
{
1075
0
  SM4_context *ctx = context;
1076
0
  byte *outbuf = outbuf_arg;
1077
0
  const byte *inbuf = inbuf_arg;
1078
0
  int burn_stack_depth = 0;
1079
1080
0
#ifdef USE_GFNI_AVX512
1081
0
  if (ctx->use_gfni_avx512)
1082
0
    {
1083
      /* Process data in 32 block chunks. */
1084
0
      while (nblocks >= 32)
1085
0
        {
1086
0
          _gcry_sm4_gfni_avx512_ctr_enc_blk32(ctx->rkey_enc,
1087
0
                                              outbuf, inbuf, ctr);
1088
1089
0
          nblocks -= 32;
1090
0
          outbuf += 32 * 16;
1091
0
          inbuf += 32 * 16;
1092
0
        }
1093
1094
      /* Process data in 16 block chunks. */
1095
0
      if (nblocks >= 16)
1096
0
        {
1097
0
          _gcry_sm4_gfni_avx512_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1098
1099
0
          nblocks -= 16;
1100
0
          outbuf += 16 * 16;
1101
0
          inbuf += 16 * 16;
1102
0
        }
1103
0
    }
1104
0
#endif
1105
1106
0
#ifdef USE_GFNI_AVX2
1107
0
  if (ctx->use_gfni_avx2)
1108
0
    {
1109
      /* Process data in 16 block chunks. */
1110
0
      while (nblocks >= 16)
1111
0
        {
1112
0
          _gcry_sm4_gfni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1113
1114
0
          nblocks -= 16;
1115
0
          outbuf += 16 * 16;
1116
0
          inbuf += 16 * 16;
1117
0
        }
1118
0
    }
1119
0
#endif
1120
1121
0
#ifdef USE_AESNI_AVX2
1122
0
  if (ctx->use_aesni_avx2)
1123
0
    {
1124
      /* Process data in 16 block chunks. */
1125
0
      while (nblocks >= 16)
1126
0
        {
1127
0
          _gcry_sm4_aesni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1128
1129
0
          nblocks -= 16;
1130
0
          outbuf += 16 * 16;
1131
0
          inbuf += 16 * 16;
1132
0
        }
1133
0
    }
1134
0
#endif
1135
1136
0
#ifdef USE_AESNI_AVX
1137
0
  if (ctx->use_aesni_avx)
1138
0
    {
1139
      /* Process data in 8 block chunks. */
1140
0
      while (nblocks >= 8)
1141
0
        {
1142
0
          _gcry_sm4_aesni_avx_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1143
1144
0
          nblocks -= 8;
1145
0
          outbuf += 8 * 16;
1146
0
          inbuf += 8 * 16;
1147
0
        }
1148
0
    }
1149
0
#endif
1150
1151
#ifdef USE_ARM_SVE_CE
1152
  if (ctx->use_arm_sve_ce)
1153
    {
1154
      /* Process all blocks at a time. */
1155
      _gcry_sm4_armv9_sve_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf,
1156
             ctr, nblocks);
1157
      nblocks = 0;
1158
    }
1159
#endif
1160
1161
#ifdef USE_ARM_CE
1162
  if (ctx->use_arm_ce)
1163
    {
1164
      /* Process multiples of 8 blocks at a time. */
1165
      if (nblocks >= 8)
1166
        {
1167
          size_t nblks = nblocks & ~(8 - 1);
1168
1169
          _gcry_sm4_armv8_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks);
1170
1171
          nblocks -= nblks;
1172
          outbuf += nblks * 16;
1173
          inbuf += nblks * 16;
1174
        }
1175
    }
1176
#endif
1177
1178
#ifdef USE_AARCH64_SIMD
1179
  if (ctx->use_aarch64_simd)
1180
    {
1181
      /* Process multiples of 8 blocks at a time. */
1182
      if (nblocks >= 8)
1183
        {
1184
          size_t nblks = nblocks & ~(8 - 1);
1185
1186
          _gcry_sm4_aarch64_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks);
1187
1188
          nblocks -= nblks;
1189
          outbuf += nblks * 16;
1190
          inbuf += nblks * 16;
1191
        }
1192
    }
1193
#endif
1194
1195
  /* Process remaining blocks. */
1196
0
  if (nblocks)
1197
0
    {
1198
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1199
0
      byte tmpbuf[16 * 16];
1200
0
      unsigned int tmp_used = 16;
1201
0
      size_t nburn;
1202
1203
0
      if (crypt_blk1_16 == &sm4_crypt_blocks)
1204
0
  prefetch_sbox_table ();
1205
1206
0
      nburn = bulk_ctr_enc_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf,
1207
0
                               nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16,
1208
0
                               &tmp_used);
1209
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1210
1211
0
      wipememory(tmpbuf, tmp_used);
1212
0
    }
1213
1214
0
  if (burn_stack_depth)
1215
0
    _gcry_burn_stack(burn_stack_depth);
1216
0
}
1217
1218
/* Bulk decryption of complete blocks in CBC mode.  This function is only
1219
   intended for the bulk encryption feature of cipher.c. */
1220
static void
1221
_gcry_sm4_cbc_dec(void *context, unsigned char *iv,
1222
                  void *outbuf_arg, const void *inbuf_arg,
1223
                  size_t nblocks)
1224
0
{
1225
0
  SM4_context *ctx = context;
1226
0
  unsigned char *outbuf = outbuf_arg;
1227
0
  const unsigned char *inbuf = inbuf_arg;
1228
0
  int burn_stack_depth = 0;
1229
1230
0
#ifdef USE_GFNI_AVX512
1231
0
  if (ctx->use_gfni_avx512)
1232
0
    {
1233
      /* Process data in 32 block chunks. */
1234
0
      while (nblocks >= 32)
1235
0
        {
1236
0
          _gcry_sm4_gfni_avx512_cbc_dec_blk32(ctx->rkey_dec, outbuf, inbuf, iv);
1237
1238
0
          nblocks -= 32;
1239
0
          outbuf += 32 * 16;
1240
0
          inbuf += 32 * 16;
1241
0
        }
1242
1243
      /* Process data in 16 block chunks. */
1244
0
      if (nblocks >= 16)
1245
0
        {
1246
0
          _gcry_sm4_gfni_avx512_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1247
1248
0
          nblocks -= 16;
1249
0
          outbuf += 16 * 16;
1250
0
          inbuf += 16 * 16;
1251
0
        }
1252
0
    }
1253
0
#endif
1254
1255
0
#ifdef USE_GFNI_AVX2
1256
0
  if (ctx->use_gfni_avx2)
1257
0
    {
1258
      /* Process data in 16 block chunks. */
1259
0
      while (nblocks >= 16)
1260
0
        {
1261
0
          _gcry_sm4_gfni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1262
1263
0
          nblocks -= 16;
1264
0
          outbuf += 16 * 16;
1265
0
          inbuf += 16 * 16;
1266
0
        }
1267
0
    }
1268
0
#endif
1269
1270
0
#ifdef USE_AESNI_AVX2
1271
0
  if (ctx->use_aesni_avx2)
1272
0
    {
1273
      /* Process data in 16 block chunks. */
1274
0
      while (nblocks >= 16)
1275
0
        {
1276
0
          _gcry_sm4_aesni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1277
1278
0
          nblocks -= 16;
1279
0
          outbuf += 16 * 16;
1280
0
          inbuf += 16 * 16;
1281
0
        }
1282
0
    }
1283
0
#endif
1284
1285
0
#ifdef USE_AESNI_AVX
1286
0
  if (ctx->use_aesni_avx)
1287
0
    {
1288
      /* Process data in 8 block chunks. */
1289
0
      while (nblocks >= 8)
1290
0
        {
1291
0
          _gcry_sm4_aesni_avx_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1292
1293
0
          nblocks -= 8;
1294
0
          outbuf += 8 * 16;
1295
0
          inbuf += 8 * 16;
1296
0
        }
1297
0
    }
1298
0
#endif
1299
1300
#ifdef USE_ARM_SVE_CE
1301
  if (ctx->use_arm_sve_ce)
1302
    {
1303
      /* Process all blocks at a time. */
1304
      _gcry_sm4_armv9_sve_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf,
1305
             iv, nblocks);
1306
      nblocks = 0;
1307
    }
1308
#endif
1309
1310
#ifdef USE_ARM_CE
1311
  if (ctx->use_arm_ce)
1312
    {
1313
      /* Process multiples of 8 blocks at a time. */
1314
      if (nblocks >= 8)
1315
        {
1316
          size_t nblks = nblocks & ~(8 - 1);
1317
1318
          _gcry_sm4_armv8_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks);
1319
1320
          nblocks -= nblks;
1321
          outbuf += nblks * 16;
1322
          inbuf += nblks * 16;
1323
        }
1324
    }
1325
#endif
1326
1327
#ifdef USE_AARCH64_SIMD
1328
  if (ctx->use_aarch64_simd)
1329
    {
1330
      /* Process multiples of 8 blocks at a time. */
1331
      if (nblocks >= 8)
1332
        {
1333
          size_t nblks = nblocks & ~(8 - 1);
1334
1335
          _gcry_sm4_aarch64_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks);
1336
1337
          nblocks -= nblks;
1338
          outbuf += nblks * 16;
1339
          inbuf += nblks * 16;
1340
        }
1341
    }
1342
#endif
1343
1344
  /* Process remaining blocks. */
1345
0
  if (nblocks)
1346
0
    {
1347
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1348
0
      unsigned char tmpbuf[16 * 16];
1349
0
      unsigned int tmp_used = 16;
1350
0
      size_t nburn;
1351
1352
0
      if (crypt_blk1_16 == &sm4_crypt_blocks)
1353
0
  prefetch_sbox_table ();
1354
1355
0
      nburn = bulk_cbc_dec_128(ctx->rkey_dec, crypt_blk1_16, outbuf, inbuf,
1356
0
                               nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16,
1357
0
                               &tmp_used);
1358
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1359
1360
0
      wipememory(tmpbuf, tmp_used);
1361
0
    }
1362
1363
0
  if (burn_stack_depth)
1364
0
    _gcry_burn_stack(burn_stack_depth);
1365
0
}
1366
1367
/* Bulk decryption of complete blocks in CFB mode.  This function is only
1368
   intended for the bulk encryption feature of cipher.c. */
1369
static void
1370
_gcry_sm4_cfb_dec(void *context, unsigned char *iv,
1371
                  void *outbuf_arg, const void *inbuf_arg,
1372
                  size_t nblocks)
1373
0
{
1374
0
  SM4_context *ctx = context;
1375
0
  unsigned char *outbuf = outbuf_arg;
1376
0
  const unsigned char *inbuf = inbuf_arg;
1377
0
  int burn_stack_depth = 0;
1378
1379
0
#ifdef USE_GFNI_AVX512
1380
0
  if (ctx->use_gfni_avx512)
1381
0
    {
1382
      /* Process data in 32 block chunks. */
1383
0
      while (nblocks >= 32)
1384
0
        {
1385
0
          _gcry_sm4_gfni_avx512_cfb_dec_blk32(ctx->rkey_enc, outbuf, inbuf, iv);
1386
1387
0
          nblocks -= 32;
1388
0
          outbuf += 32 * 16;
1389
0
          inbuf += 32 * 16;
1390
0
        }
1391
1392
      /* Process data in 16 block chunks. */
1393
0
      if (nblocks >= 16)
1394
0
        {
1395
0
          _gcry_sm4_gfni_avx512_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1396
1397
0
          nblocks -= 16;
1398
0
          outbuf += 16 * 16;
1399
0
          inbuf += 16 * 16;
1400
0
        }
1401
0
    }
1402
0
#endif
1403
1404
0
#ifdef USE_GFNI_AVX2
1405
0
  if (ctx->use_gfni_avx2)
1406
0
    {
1407
      /* Process data in 16 block chunks. */
1408
0
      while (nblocks >= 16)
1409
0
        {
1410
0
          _gcry_sm4_gfni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1411
1412
0
          nblocks -= 16;
1413
0
          outbuf += 16 * 16;
1414
0
          inbuf += 16 * 16;
1415
0
        }
1416
0
    }
1417
0
#endif
1418
1419
0
#ifdef USE_AESNI_AVX2
1420
0
  if (ctx->use_aesni_avx2)
1421
0
    {
1422
      /* Process data in 16 block chunks. */
1423
0
      while (nblocks >= 16)
1424
0
        {
1425
0
          _gcry_sm4_aesni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1426
1427
0
          nblocks -= 16;
1428
0
          outbuf += 16 * 16;
1429
0
          inbuf += 16 * 16;
1430
0
        }
1431
0
    }
1432
0
#endif
1433
1434
0
#ifdef USE_AESNI_AVX
1435
0
  if (ctx->use_aesni_avx)
1436
0
    {
1437
      /* Process data in 8 block chunks. */
1438
0
      while (nblocks >= 8)
1439
0
        {
1440
0
          _gcry_sm4_aesni_avx_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1441
1442
0
          nblocks -= 8;
1443
0
          outbuf += 8 * 16;
1444
0
          inbuf += 8 * 16;
1445
0
        }
1446
0
    }
1447
0
#endif
1448
1449
#ifdef USE_ARM_SVE_CE
1450
  if (ctx->use_arm_sve_ce)
1451
    {
1452
      /* Process all blocks at a time. */
1453
      _gcry_sm4_armv9_sve_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf,
1454
             iv, nblocks);
1455
      nblocks = 0;
1456
    }
1457
#endif
1458
1459
#ifdef USE_ARM_CE
1460
  if (ctx->use_arm_ce)
1461
    {
1462
      /* Process multiples of 8 blocks at a time. */
1463
      if (nblocks >= 8)
1464
        {
1465
          size_t nblks = nblocks & ~(8 - 1);
1466
1467
          _gcry_sm4_armv8_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks);
1468
1469
          nblocks -= nblks;
1470
          outbuf += nblks * 16;
1471
          inbuf += nblks * 16;
1472
        }
1473
    }
1474
#endif
1475
1476
#ifdef USE_AARCH64_SIMD
1477
  if (ctx->use_aarch64_simd)
1478
    {
1479
      /* Process multiples of 8 blocks at a time. */
1480
      if (nblocks >= 8)
1481
        {
1482
          size_t nblks = nblocks & ~(8 - 1);
1483
1484
          _gcry_sm4_aarch64_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks);
1485
1486
          nblocks -= nblks;
1487
          outbuf += nblks * 16;
1488
          inbuf += nblks * 16;
1489
        }
1490
    }
1491
#endif
1492
1493
  /* Process remaining blocks. */
1494
0
  if (nblocks)
1495
0
    {
1496
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1497
0
      unsigned char tmpbuf[16 * 16];
1498
0
      unsigned int tmp_used = 16;
1499
0
      size_t nburn;
1500
1501
0
      if (crypt_blk1_16 == &sm4_crypt_blocks)
1502
0
  prefetch_sbox_table ();
1503
1504
0
      nburn = bulk_cfb_dec_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf,
1505
0
                               nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16,
1506
0
                               &tmp_used);
1507
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1508
1509
0
      wipememory(tmpbuf, tmp_used);
1510
0
    }
1511
1512
0
  if (burn_stack_depth)
1513
0
    _gcry_burn_stack(burn_stack_depth);
1514
0
}
1515
1516
static unsigned int
1517
sm4_crypt_blk1_32 (SM4_context *ctx, byte *outbuf, const byte *inbuf,
1518
       size_t num_blks, u32 *rk)
1519
0
{
1520
0
  crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1521
0
  unsigned int stack_burn_size = 0;
1522
0
  unsigned int nburn;
1523
1524
0
  gcry_assert (num_blks <= 32);
1525
1526
0
#ifdef USE_GFNI_AVX512
1527
0
  if (num_blks == 32 && ctx->use_gfni_avx512)
1528
0
    {
1529
0
      return _gcry_sm4_gfni_avx512_crypt_blk32 (rk, outbuf, inbuf);
1530
0
    }
1531
0
#endif
1532
#ifdef USE_ARM_SVE_CE
1533
  if (ctx->use_arm_sve_ce)
1534
    {
1535
      _gcry_sm4_armv9_sve_ce_crypt (rk, outbuf, inbuf, num_blks);
1536
      return 0;
1537
    }
1538
#endif
1539
1540
0
  do
1541
0
    {
1542
0
      unsigned int curr_blks = num_blks > 16 ? 16 : num_blks;
1543
0
      nburn = crypt_blk1_16 (rk, outbuf, inbuf, curr_blks);
1544
0
      stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
1545
0
      outbuf += curr_blks * 16;
1546
0
      inbuf += curr_blks * 16;
1547
0
      num_blks -= curr_blks;
1548
0
    }
1549
0
  while (num_blks > 0);
1550
1551
0
  return stack_burn_size;
1552
0
}
1553
1554
static unsigned int
1555
sm4_encrypt_blk1_32 (void *context, byte *out, const byte *in,
1556
         size_t num_blks)
1557
0
{
1558
0
  SM4_context *ctx = context;
1559
0
  return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_enc);
1560
0
}
1561
1562
static unsigned int
1563
sm4_decrypt_blk1_32 (void *context, byte *out, const byte *in,
1564
         size_t num_blks)
1565
0
{
1566
0
  SM4_context *ctx = context;
1567
0
  return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_dec);
1568
0
}
1569
1570
/* Bulk encryption/decryption in ECB mode. */
1571
static void
1572
_gcry_sm4_ecb_crypt (void *context, void *outbuf_arg,
1573
         const void *inbuf_arg, size_t nblocks, int encrypt)
1574
0
{
1575
0
  SM4_context *ctx = context;
1576
0
  unsigned char *outbuf = outbuf_arg;
1577
0
  const unsigned char *inbuf = inbuf_arg;
1578
0
  int burn_stack_depth = 0;
1579
1580
  /* Process remaining blocks. */
1581
0
  if (nblocks)
1582
0
    {
1583
0
      size_t nburn;
1584
1585
0
      if (ctx->crypt_blk1_16 == &sm4_crypt_blocks)
1586
0
  prefetch_sbox_table ();
1587
1588
0
      nburn = bulk_ecb_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32
1589
0
                                              : sm4_decrypt_blk1_32,
1590
0
                                 outbuf, inbuf, nblocks, 32);
1591
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1592
0
    }
1593
1594
0
  if (burn_stack_depth)
1595
0
    _gcry_burn_stack(burn_stack_depth);
1596
0
}
1597
1598
/* Bulk encryption/decryption of complete blocks in XTS mode. */
1599
static void
1600
_gcry_sm4_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
1601
                     const void *inbuf_arg, size_t nblocks, int encrypt)
1602
0
{
1603
0
  SM4_context *ctx = context;
1604
0
  unsigned char *outbuf = outbuf_arg;
1605
0
  const unsigned char *inbuf = inbuf_arg;
1606
0
  int burn_stack_depth = 0;
1607
1608
#ifdef USE_ARM_CE
1609
  if (ctx->use_arm_ce)
1610
    {
1611
      /* Process all blocks at a time. */
1612
      _gcry_sm4_armv8_ce_xts_crypt(encrypt ? ctx->rkey_enc : ctx->rkey_dec,
1613
                                   outbuf, inbuf, tweak, nblocks);
1614
1615
      nblocks = 0;
1616
    }
1617
#endif
1618
1619
  /* Process remaining blocks. */
1620
0
  if (nblocks)
1621
0
    {
1622
0
      unsigned char tmpbuf[32 * 16];
1623
0
      unsigned int tmp_used = 16;
1624
0
      size_t nburn;
1625
1626
0
      if (ctx->crypt_blk1_16 == &sm4_crypt_blocks)
1627
0
  prefetch_sbox_table ();
1628
1629
0
      nburn = bulk_xts_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32
1630
0
                                              : sm4_decrypt_blk1_32,
1631
0
                                 outbuf, inbuf, nblocks,
1632
0
                                 tweak, tmpbuf, sizeof(tmpbuf) / 16,
1633
0
                                 &tmp_used);
1634
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1635
1636
0
      wipememory(tmpbuf, tmp_used);
1637
0
    }
1638
1639
0
  if (burn_stack_depth)
1640
0
    _gcry_burn_stack(burn_stack_depth);
1641
0
}
1642
1643
/* Bulk encryption of complete blocks in CTR32LE mode (for GCM-SIV). */
1644
static void
1645
_gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr,
1646
                      void *outbuf_arg, const void *inbuf_arg,
1647
                      size_t nblocks)
1648
0
{
1649
0
  SM4_context *ctx = context;
1650
0
  byte *outbuf = outbuf_arg;
1651
0
  const byte *inbuf = inbuf_arg;
1652
0
  int burn_stack_depth = 0;
1653
1654
  /* Process remaining blocks. */
1655
0
  if (nblocks)
1656
0
    {
1657
0
      byte tmpbuf[32 * 16];
1658
0
      unsigned int tmp_used = 16;
1659
0
      size_t nburn;
1660
1661
0
      nburn = bulk_ctr32le_enc_128 (ctx, sm4_encrypt_blk1_32, outbuf, inbuf,
1662
0
                                    nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16,
1663
0
                                    &tmp_used);
1664
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1665
1666
0
      wipememory (tmpbuf, tmp_used);
1667
0
    }
1668
1669
0
  if (burn_stack_depth)
1670
0
    _gcry_burn_stack (burn_stack_depth);
1671
0
}
1672
1673
/* Bulk encryption/decryption of complete blocks in OCB mode. */
1674
static size_t
1675
_gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
1676
         const void *inbuf_arg, size_t nblocks, int encrypt)
1677
0
{
1678
0
  SM4_context *ctx = (void *)&c->context.c;
1679
0
  unsigned char *outbuf = outbuf_arg;
1680
0
  const unsigned char *inbuf = inbuf_arg;
1681
0
  u64 blkn = c->u_mode.ocb.data_nblocks;
1682
0
  int burn_stack_depth = 0;
1683
1684
0
#ifdef USE_GFNI_AVX512
1685
0
  if (ctx->use_gfni_avx512)
1686
0
    {
1687
0
      u64 Ls[32];
1688
0
      u64 *l;
1689
1690
0
      if (nblocks >= 32)
1691
0
  {
1692
0
          l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn);
1693
1694
    /* Process data in 32 block chunks. */
1695
0
    while (nblocks >= 32)
1696
0
      {
1697
0
        blkn += 32;
1698
0
        *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 32);
1699
1700
0
        if (encrypt)
1701
0
    _gcry_sm4_gfni_avx512_ocb_enc_blk32 (ctx->rkey_enc, outbuf,
1702
0
                                                     inbuf, c->u_iv.iv,
1703
0
                                                     c->u_ctr.ctr, Ls);
1704
0
        else
1705
0
    _gcry_sm4_gfni_avx512_ocb_dec_blk32 (ctx->rkey_dec, outbuf,
1706
0
                                                     inbuf, c->u_iv.iv,
1707
0
                                                     c->u_ctr.ctr, Ls);
1708
1709
0
        nblocks -= 32;
1710
0
        outbuf += 32 * 16;
1711
0
        inbuf += 32 * 16;
1712
0
      }
1713
0
  }
1714
1715
0
      if (nblocks >= 16)
1716
0
  {
1717
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1718
1719
    /* Process data in 16 block chunks. */
1720
0
    blkn += 16;
1721
0
    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1722
1723
0
    if (encrypt)
1724
0
      _gcry_sm4_gfni_avx512_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
1725
0
            c->u_iv.iv, c->u_ctr.ctr, Ls);
1726
0
    else
1727
0
      _gcry_sm4_gfni_avx512_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
1728
0
            c->u_iv.iv, c->u_ctr.ctr, Ls);
1729
1730
0
    nblocks -= 16;
1731
0
    outbuf += 16 * 16;
1732
0
    inbuf += 16 * 16;
1733
0
  }
1734
0
    }
1735
0
#endif
1736
1737
0
#ifdef USE_GFNI_AVX2
1738
0
  if (ctx->use_gfni_avx2)
1739
0
    {
1740
0
      u64 Ls[16];
1741
0
      u64 *l;
1742
1743
0
      if (nblocks >= 16)
1744
0
  {
1745
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1746
1747
    /* Process data in 16 block chunks. */
1748
0
    while (nblocks >= 16)
1749
0
      {
1750
0
        blkn += 16;
1751
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1752
1753
0
        if (encrypt)
1754
0
    _gcry_sm4_gfni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
1755
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1756
0
        else
1757
0
    _gcry_sm4_gfni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
1758
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1759
1760
0
        nblocks -= 16;
1761
0
        outbuf += 16 * 16;
1762
0
        inbuf += 16 * 16;
1763
0
      }
1764
0
  }
1765
0
    }
1766
0
#endif
1767
1768
0
#ifdef USE_AESNI_AVX2
1769
0
  if (ctx->use_aesni_avx2)
1770
0
    {
1771
0
      u64 Ls[16];
1772
0
      u64 *l;
1773
1774
0
      if (nblocks >= 16)
1775
0
  {
1776
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1777
1778
    /* Process data in 16 block chunks. */
1779
0
    while (nblocks >= 16)
1780
0
      {
1781
0
        blkn += 16;
1782
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1783
1784
0
        if (encrypt)
1785
0
    _gcry_sm4_aesni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
1786
0
               c->u_iv.iv, c->u_ctr.ctr, Ls);
1787
0
        else
1788
0
    _gcry_sm4_aesni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
1789
0
               c->u_iv.iv, c->u_ctr.ctr, Ls);
1790
1791
0
        nblocks -= 16;
1792
0
        outbuf += 16 * 16;
1793
0
        inbuf += 16 * 16;
1794
0
      }
1795
0
  }
1796
0
    }
1797
0
#endif
1798
1799
0
#ifdef USE_AESNI_AVX
1800
0
  if (ctx->use_aesni_avx)
1801
0
    {
1802
0
      u64 Ls[8];
1803
0
      u64 *l;
1804
1805
0
      if (nblocks >= 8)
1806
0
  {
1807
0
          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1808
1809
    /* Process data in 8 block chunks. */
1810
0
    while (nblocks >= 8)
1811
0
      {
1812
0
        blkn += 8;
1813
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1814
1815
0
        if (encrypt)
1816
0
    _gcry_sm4_aesni_avx_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
1817
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1818
0
        else
1819
0
    _gcry_sm4_aesni_avx_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
1820
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1821
1822
0
        nblocks -= 8;
1823
0
        outbuf += 8 * 16;
1824
0
        inbuf += 8 * 16;
1825
0
      }
1826
0
  }
1827
0
    }
1828
0
#endif
1829
1830
  /* Process remaining blocks. */
1831
0
  if (nblocks)
1832
0
    {
1833
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1834
0
      u32 *rk = encrypt ? ctx->rkey_enc : ctx->rkey_dec;
1835
0
      unsigned char tmpbuf[16 * 16];
1836
0
      unsigned int tmp_used = 16;
1837
0
      size_t nburn;
1838
1839
0
      nburn = bulk_ocb_crypt_128 (c, rk, crypt_blk1_16, outbuf, inbuf, nblocks,
1840
0
                                  &blkn, encrypt, tmpbuf, sizeof(tmpbuf) / 16,
1841
0
                                  &tmp_used);
1842
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1843
1844
0
      wipememory(tmpbuf, tmp_used);
1845
0
    }
1846
1847
0
  c->u_mode.ocb.data_nblocks = blkn;
1848
1849
0
  if (burn_stack_depth)
1850
0
    _gcry_burn_stack(burn_stack_depth);
1851
1852
0
  return 0;
1853
0
}
1854
1855
/* Bulk authentication of complete blocks in OCB mode. */
1856
static size_t
1857
_gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
1858
0
{
1859
0
  SM4_context *ctx = (void *)&c->context.c;
1860
0
  const unsigned char *abuf = abuf_arg;
1861
0
  u64 blkn = c->u_mode.ocb.aad_nblocks;
1862
0
  int burn_stack_depth = 0;
1863
1864
0
#ifdef USE_GFNI_AVX512
1865
0
  if (ctx->use_gfni_avx512)
1866
0
    {
1867
0
      u64 Ls[16];
1868
0
      u64 *l;
1869
1870
0
      if (nblocks >= 16)
1871
0
        {
1872
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1873
1874
          /* Process data in 16 block chunks. */
1875
0
          while (nblocks >= 16)
1876
0
            {
1877
0
              blkn += 16;
1878
0
              *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 16);
1879
1880
0
              _gcry_sm4_gfni_avx512_ocb_auth (ctx->rkey_enc, abuf,
1881
0
                                              c->u_mode.ocb.aad_offset,
1882
0
                                              c->u_mode.ocb.aad_sum, Ls);
1883
1884
0
              nblocks -= 16;
1885
0
              abuf += 16 * 16;
1886
0
            }
1887
0
        }
1888
0
    }
1889
0
#endif
1890
1891
0
#ifdef USE_GFNI_AVX2
1892
0
  if (ctx->use_gfni_avx2)
1893
0
    {
1894
0
      u64 Ls[16];
1895
0
      u64 *l;
1896
1897
0
      if (nblocks >= 16)
1898
0
  {
1899
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1900
1901
    /* Process data in 16 block chunks. */
1902
0
    while (nblocks >= 16)
1903
0
      {
1904
0
        blkn += 16;
1905
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1906
1907
0
        _gcry_sm4_gfni_avx2_ocb_auth(ctx->rkey_enc, abuf,
1908
0
             c->u_mode.ocb.aad_offset,
1909
0
             c->u_mode.ocb.aad_sum, Ls);
1910
1911
0
        nblocks -= 16;
1912
0
        abuf += 16 * 16;
1913
0
      }
1914
0
  }
1915
0
    }
1916
0
#endif
1917
1918
0
#ifdef USE_AESNI_AVX2
1919
0
  if (ctx->use_aesni_avx2)
1920
0
    {
1921
0
      u64 Ls[16];
1922
0
      u64 *l;
1923
1924
0
      if (nblocks >= 16)
1925
0
  {
1926
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1927
1928
    /* Process data in 16 block chunks. */
1929
0
    while (nblocks >= 16)
1930
0
      {
1931
0
        blkn += 16;
1932
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1933
1934
0
        _gcry_sm4_aesni_avx2_ocb_auth(ctx->rkey_enc, abuf,
1935
0
              c->u_mode.ocb.aad_offset,
1936
0
              c->u_mode.ocb.aad_sum, Ls);
1937
1938
0
        nblocks -= 16;
1939
0
        abuf += 16 * 16;
1940
0
      }
1941
0
  }
1942
0
    }
1943
0
#endif
1944
1945
0
#ifdef USE_AESNI_AVX
1946
0
  if (ctx->use_aesni_avx)
1947
0
    {
1948
0
      u64 Ls[8];
1949
0
      u64 *l;
1950
1951
0
      if (nblocks >= 8)
1952
0
  {
1953
0
          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1954
1955
    /* Process data in 8 block chunks. */
1956
0
    while (nblocks >= 8)
1957
0
      {
1958
0
        blkn += 8;
1959
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1960
1961
0
        _gcry_sm4_aesni_avx_ocb_auth(ctx->rkey_enc, abuf,
1962
0
             c->u_mode.ocb.aad_offset,
1963
0
             c->u_mode.ocb.aad_sum, Ls);
1964
1965
0
        nblocks -= 8;
1966
0
        abuf += 8 * 16;
1967
0
      }
1968
0
  }
1969
0
    }
1970
0
#endif
1971
1972
  /* Process remaining blocks. */
1973
0
  if (nblocks)
1974
0
    {
1975
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1976
0
      unsigned char tmpbuf[16 * 16];
1977
0
      unsigned int tmp_used = 16;
1978
0
      size_t nburn;
1979
1980
0
      nburn = bulk_ocb_auth_128 (c, ctx->rkey_enc, crypt_blk1_16, abuf, nblocks,
1981
0
                                 &blkn, tmpbuf, sizeof(tmpbuf) / 16, &tmp_used);
1982
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1983
1984
0
      wipememory(tmpbuf, tmp_used);
1985
0
    }
1986
1987
0
  c->u_mode.ocb.aad_nblocks = blkn;
1988
1989
0
  if (burn_stack_depth)
1990
0
    _gcry_burn_stack(burn_stack_depth);
1991
1992
0
  return 0;
1993
0
}
1994
1995
static const char *
1996
sm4_selftest (void)
1997
4
{
1998
4
  SM4_context ctx;
1999
4
  byte scratch[16];
2000
2001
4
  static const byte plaintext[16] = {
2002
4
    0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
2003
4
    0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
2004
4
  };
2005
4
  static const byte key[16] = {
2006
4
    0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
2007
4
    0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
2008
4
  };
2009
4
  static const byte ciphertext[16] = {
2010
4
    0x68, 0x1E, 0xDF, 0x34, 0xD2, 0x06, 0x96, 0x5E,
2011
4
    0x86, 0xB3, 0xE9, 0x4F, 0x53, 0x6E, 0x42, 0x46
2012
4
  };
2013
2014
4
  memset (&ctx, 0, sizeof(ctx));
2015
2016
4
  sm4_expand_key (&ctx, key);
2017
4
  sm4_encrypt (&ctx, scratch, plaintext);
2018
4
  if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
2019
0
    return "SM4 test encryption failed.";
2020
4
  sm4_decrypt (&ctx, scratch, scratch);
2021
4
  if (memcmp (scratch, plaintext, sizeof (plaintext)))
2022
0
    return "SM4 test decryption failed.";
2023
2024
4
  return NULL;
2025
4
}
2026
2027
static gpg_err_code_t
2028
run_selftests (int algo, int extended, selftest_report_func_t report)
2029
0
{
2030
0
  const char *what;
2031
0
  const char *errtxt;
2032
2033
0
  (void)extended;
2034
2035
0
  if (algo != GCRY_CIPHER_SM4)
2036
0
    return GPG_ERR_CIPHER_ALGO;
2037
2038
0
  what = "selftest";
2039
0
  errtxt = sm4_selftest ();
2040
0
  if (errtxt)
2041
0
    goto failed;
2042
2043
0
  return 0;
2044
2045
0
 failed:
2046
0
  if (report)
2047
0
    report ("cipher", GCRY_CIPHER_SM4, what, errtxt);
2048
0
  return GPG_ERR_SELFTEST_FAILED;
2049
0
}
2050
2051
2052
static const gcry_cipher_oid_spec_t sm4_oids[] =
2053
  {
2054
    { "1.2.156.10197.1.104.1", GCRY_CIPHER_MODE_ECB },
2055
    { "1.2.156.10197.1.104.2", GCRY_CIPHER_MODE_CBC },
2056
    { "1.2.156.10197.1.104.3", GCRY_CIPHER_MODE_OFB },
2057
    { "1.2.156.10197.1.104.4", GCRY_CIPHER_MODE_CFB },
2058
    { "1.2.156.10197.1.104.7", GCRY_CIPHER_MODE_CTR },
2059
    { NULL }
2060
  };
2061
2062
gcry_cipher_spec_t _gcry_cipher_spec_sm4 =
2063
  {
2064
    GCRY_CIPHER_SM4, {0, 0},
2065
    "SM4", NULL, sm4_oids, 16, 128,
2066
    sizeof (SM4_context),
2067
    sm4_setkey, sm4_encrypt, sm4_decrypt,
2068
    NULL, NULL,
2069
    run_selftests
2070
  };