Coverage Report

Created: 2022-12-08 06:10

/src/libgcrypt/cipher/sm4.c
Line
Count
Source (jump to first uncovered line)
1
/* sm4.c  -  SM4 Cipher Algorithm
2
 * Copyright (C) 2020 Alibaba Group.
3
 * Copyright (C) 2020-2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
4
 * Copyright (C) 2020-2022 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5
 *
6
 * This file is part of Libgcrypt.
7
 *
8
 * Libgcrypt is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as
10
 * published by the Free Software Foundation; either version 2.1 of
11
 * the License, or (at your option) any later version.
12
 *
13
 * Libgcrypt is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20
 */
21
22
#include <config.h>
23
#include <stdio.h>
24
#include <stdlib.h>
25
26
#include "types.h"  /* for byte and u32 typedefs */
27
#include "bithelp.h"
28
#include "g10lib.h"
29
#include "cipher.h"
30
#include "bufhelp.h"
31
#include "cipher-internal.h"
32
#include "bulkhelp.h"
33
34
/* Helper macro to force alignment to 64 bytes.  */
35
#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
36
# define ATTR_ALIGNED_64  __attribute__ ((aligned (64)))
37
#else
38
# define ATTR_ALIGNED_64
39
#endif
40
41
/* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */
42
#undef USE_AESNI_AVX
43
#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
44
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
45
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
46
#  define USE_AESNI_AVX 1
47
# endif
48
#endif
49
50
/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
51
#undef USE_AESNI_AVX2
52
#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
53
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
54
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
55
#  define USE_AESNI_AVX2 1
56
# endif
57
#endif
58
59
/* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */
60
#undef USE_GFNI_AVX2
61
#if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
62
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
63
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
64
#  define USE_GFNI_AVX2 1
65
# endif
66
#endif
67
68
/* USE_GFNI_AVX512 inidicates whether to compile with Intel GFNI/AVX512 code. */
69
#undef USE_GFNI_AVX512
70
#if defined(ENABLE_GFNI_SUPPORT) && defined(ENABLE_AVX512_SUPPORT)
71
# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
72
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
73
#  define USE_GFNI_AVX512 1
74
# endif
75
#endif
76
77
/* Assembly implementations use SystemV ABI, ABI conversion and additional
78
 * stack to store XMM6-XMM15 needed on Win64. */
79
#undef ASM_FUNC_ABI
80
#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) || \
81
    defined(USE_GFNI_AVX2) || defined(USE_GFNI_AVX512)
82
# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
83
#  define ASM_FUNC_ABI __attribute__((sysv_abi))
84
# else
85
#  define ASM_FUNC_ABI
86
# endif
87
#endif
88
89
#undef USE_AARCH64_SIMD
90
#ifdef ENABLE_NEON_SUPPORT
91
# if defined(__AARCH64EL__) && \
92
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
93
     defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON)
94
#   define USE_AARCH64_SIMD 1
95
# endif
96
#endif
97
98
#undef USE_ARM_CE
99
#ifdef ENABLE_ARM_CRYPTO_SUPPORT
100
# if defined(__AARCH64EL__) && \
101
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
102
     defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
103
#   define USE_ARM_CE 1
104
# endif
105
#endif
106
107
#undef USE_ARM_SVE_CE
108
#ifdef ENABLE_SVE_SUPPORT
109
# if defined(__AARCH64EL__) && \
110
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
111
     defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && \
112
     defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE) && \
113
     defined(HAVE_GCC_INLINE_ASM_AARCH64_SVE2)
114
#   define USE_ARM_SVE_CE 1
115
# endif
116
#endif
117
118
static const char *sm4_selftest (void);
119
120
static void _gcry_sm4_ctr_enc (void *context, unsigned char *ctr,
121
             void *outbuf_arg, const void *inbuf_arg,
122
             size_t nblocks);
123
static void _gcry_sm4_cbc_dec (void *context, unsigned char *iv,
124
             void *outbuf_arg, const void *inbuf_arg,
125
             size_t nblocks);
126
static void _gcry_sm4_cfb_dec (void *context, unsigned char *iv,
127
             void *outbuf_arg, const void *inbuf_arg,
128
             size_t nblocks);
129
static void _gcry_sm4_xts_crypt (void *context, unsigned char *tweak,
130
                                 void *outbuf_arg, const void *inbuf_arg,
131
                                 size_t nblocks, int encrypt);
132
static void _gcry_sm4_ecb_crypt (void *context, void *outbuf_arg,
133
         const void *inbuf_arg, size_t nblocks,
134
         int encrypt);
135
static void _gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr,
136
                                  void *outbuf_arg, const void *inbuf_arg,
137
                                  size_t nblocks);
138
static size_t _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
139
           const void *inbuf_arg, size_t nblocks,
140
           int encrypt);
141
static size_t _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
142
          size_t nblocks);
143
144
typedef unsigned int (*crypt_blk1_16_fn_t) (const void *ctx, byte *out,
145
                                            const byte *in,
146
                                            unsigned int num_blks);
147
148
typedef struct
149
{
150
  u32 rkey_enc[32];
151
  u32 rkey_dec[32];
152
  crypt_blk1_16_fn_t crypt_blk1_16;
153
#ifdef USE_AESNI_AVX
154
  unsigned int use_aesni_avx:1;
155
#endif
156
#ifdef USE_AESNI_AVX2
157
  unsigned int use_aesni_avx2:1;
158
#endif
159
#ifdef USE_GFNI_AVX2
160
  unsigned int use_gfni_avx2:1;
161
#endif
162
#ifdef USE_GFNI_AVX512
163
  unsigned int use_gfni_avx512:1;
164
#endif
165
#ifdef USE_AARCH64_SIMD
166
  unsigned int use_aarch64_simd:1;
167
#endif
168
#ifdef USE_ARM_CE
169
  unsigned int use_arm_ce:1;
170
#endif
171
#ifdef USE_ARM_SVE_CE
172
  unsigned int use_arm_sve_ce:1;
173
#endif
174
} SM4_context;
175
176
static const u32 fk[4] =
177
{
178
  0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
179
};
180
181
static struct
182
{
183
  volatile u32 counter_head;
184
  u32 cacheline_align[64 / 4 - 1];
185
  byte S[256];
186
  volatile u32 counter_tail;
187
} sbox_table ATTR_ALIGNED_64 =
188
  {
189
    0,
190
    { 0, },
191
    {
192
      0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
193
      0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
194
      0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
195
      0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
196
      0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
197
      0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
198
      0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
199
      0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
200
      0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
201
      0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
202
      0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
203
      0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
204
      0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
205
      0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
206
      0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
207
      0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
208
      0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
209
      0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
210
      0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
211
      0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
212
      0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
213
      0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
214
      0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
215
      0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
216
      0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
217
      0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
218
      0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
219
      0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
220
      0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
221
      0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
222
      0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
223
      0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
224
    },
225
    0
226
  };
227
228
static const u32 ck[] =
229
{
230
  0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
231
  0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
232
  0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
233
  0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
234
  0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
235
  0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
236
  0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
237
  0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
238
};
239
240
static inline crypt_blk1_16_fn_t sm4_get_crypt_blk1_16_fn(SM4_context *ctx);
241
242
#ifdef USE_AESNI_AVX
243
extern void _gcry_sm4_aesni_avx_expand_key(const byte *key, u32 *rk_enc,
244
             u32 *rk_dec, const u32 *fk,
245
             const u32 *ck) ASM_FUNC_ABI;
246
247
extern void _gcry_sm4_aesni_avx_ctr_enc(const u32 *rk_enc, byte *out,
248
          const byte *in, byte *ctr) ASM_FUNC_ABI;
249
250
extern void _gcry_sm4_aesni_avx_cbc_dec(const u32 *rk_dec, byte *out,
251
          const byte *in, byte *iv) ASM_FUNC_ABI;
252
253
extern void _gcry_sm4_aesni_avx_cfb_dec(const u32 *rk_enc, byte *out,
254
          const byte *in, byte *iv) ASM_FUNC_ABI;
255
256
extern void _gcry_sm4_aesni_avx_ocb_enc(const u32 *rk_enc,
257
          unsigned char *out,
258
          const unsigned char *in,
259
          unsigned char *offset,
260
          unsigned char *checksum,
261
          const u64 Ls[8]) ASM_FUNC_ABI;
262
263
extern void _gcry_sm4_aesni_avx_ocb_dec(const u32 *rk_dec,
264
          unsigned char *out,
265
          const unsigned char *in,
266
          unsigned char *offset,
267
          unsigned char *checksum,
268
          const u64 Ls[8]) ASM_FUNC_ABI;
269
270
extern void _gcry_sm4_aesni_avx_ocb_auth(const u32 *rk_enc,
271
           const unsigned char *abuf,
272
           unsigned char *offset,
273
           unsigned char *checksum,
274
           const u64 Ls[8]) ASM_FUNC_ABI;
275
276
extern unsigned int
277
_gcry_sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in,
278
         unsigned int num_blks) ASM_FUNC_ABI;
279
280
static inline unsigned int
281
sm4_aesni_avx_crypt_blk1_16(const void *rk, byte *out, const byte *in,
282
                            unsigned int num_blks)
283
0
{
284
0
  if (num_blks > 8)
285
0
    {
286
0
      _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, 8);
287
0
      in += 8 * 16;
288
0
      out += 8 * 16;
289
0
      num_blks -= 8;
290
0
    }
291
292
0
  return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks);
293
0
}
294
295
#endif /* USE_AESNI_AVX */
296
297
#ifdef USE_AESNI_AVX2
298
extern void _gcry_sm4_aesni_avx2_ctr_enc(const u32 *rk_enc, byte *out,
299
           const byte *in,
300
           byte *ctr) ASM_FUNC_ABI;
301
302
extern void _gcry_sm4_aesni_avx2_cbc_dec(const u32 *rk_dec, byte *out,
303
           const byte *in,
304
           byte *iv) ASM_FUNC_ABI;
305
306
extern void _gcry_sm4_aesni_avx2_cfb_dec(const u32 *rk_enc, byte *out,
307
           const byte *in,
308
           byte *iv) ASM_FUNC_ABI;
309
310
extern void _gcry_sm4_aesni_avx2_ocb_enc(const u32 *rk_enc,
311
           unsigned char *out,
312
           const unsigned char *in,
313
           unsigned char *offset,
314
           unsigned char *checksum,
315
           const u64 Ls[16]) ASM_FUNC_ABI;
316
317
extern void _gcry_sm4_aesni_avx2_ocb_dec(const u32 *rk_dec,
318
           unsigned char *out,
319
           const unsigned char *in,
320
           unsigned char *offset,
321
           unsigned char *checksum,
322
           const u64 Ls[16]) ASM_FUNC_ABI;
323
324
extern void _gcry_sm4_aesni_avx2_ocb_auth(const u32 *rk_enc,
325
            const unsigned char *abuf,
326
            unsigned char *offset,
327
            unsigned char *checksum,
328
            const u64 Ls[16]) ASM_FUNC_ABI;
329
330
extern unsigned int
331
_gcry_sm4_aesni_avx2_crypt_blk1_16(const u32 *rk, byte *out, const byte *in,
332
           unsigned int num_blks) ASM_FUNC_ABI;
333
334
static inline unsigned int
335
sm4_aesni_avx2_crypt_blk1_16(const void *rk, byte *out, const byte *in,
336
                             unsigned int num_blks)
337
0
{
338
0
#ifdef USE_AESNI_AVX
339
  /* Use 128-bit register implementation for short input. */
340
0
  if (num_blks <= 8)
341
0
    return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks);
342
0
#endif
343
344
0
  return _gcry_sm4_aesni_avx2_crypt_blk1_16(rk, out, in, num_blks);
345
0
}
346
347
#endif /* USE_AESNI_AVX2 */
348
349
#ifdef USE_GFNI_AVX2
350
extern void _gcry_sm4_gfni_avx2_expand_key(const byte *key, u32 *rk_enc,
351
                                           u32 *rk_dec, const u32 *fk,
352
                                           const u32 *ck) ASM_FUNC_ABI;
353
354
extern void _gcry_sm4_gfni_avx2_ctr_enc(const u32 *rk_enc, byte *out,
355
          const byte *in,
356
          byte *ctr) ASM_FUNC_ABI;
357
358
extern void _gcry_sm4_gfni_avx2_cbc_dec(const u32 *rk_dec, byte *out,
359
          const byte *in,
360
          byte *iv) ASM_FUNC_ABI;
361
362
extern void _gcry_sm4_gfni_avx2_cfb_dec(const u32 *rk_enc, byte *out,
363
          const byte *in,
364
          byte *iv) ASM_FUNC_ABI;
365
366
extern void _gcry_sm4_gfni_avx2_ocb_enc(const u32 *rk_enc,
367
          unsigned char *out,
368
          const unsigned char *in,
369
          unsigned char *offset,
370
          unsigned char *checksum,
371
          const u64 Ls[16]) ASM_FUNC_ABI;
372
373
extern void _gcry_sm4_gfni_avx2_ocb_dec(const u32 *rk_dec,
374
          unsigned char *out,
375
          const unsigned char *in,
376
          unsigned char *offset,
377
          unsigned char *checksum,
378
          const u64 Ls[16]) ASM_FUNC_ABI;
379
380
extern void _gcry_sm4_gfni_avx2_ocb_auth(const u32 *rk_enc,
381
           const unsigned char *abuf,
382
           unsigned char *offset,
383
           unsigned char *checksum,
384
           const u64 Ls[16]) ASM_FUNC_ABI;
385
386
extern unsigned int
387
_gcry_sm4_gfni_avx2_crypt_blk1_16(const u32 *rk, byte *out, const byte *in,
388
          unsigned int num_blks) ASM_FUNC_ABI;
389
390
static inline unsigned int
391
sm4_gfni_avx2_crypt_blk1_16(const void *rk, byte *out, const byte *in,
392
         unsigned int num_blks)
393
0
{
394
0
  return _gcry_sm4_gfni_avx2_crypt_blk1_16(rk, out, in, num_blks);
395
0
}
396
397
#endif /* USE_GFNI_AVX2 */
398
399
#ifdef USE_GFNI_AVX512
400
extern void _gcry_sm4_gfni_avx512_expand_key(const byte *key, u32 *rk_enc,
401
                                             u32 *rk_dec, const u32 *fk,
402
                                             const u32 *ck) ASM_FUNC_ABI;
403
404
extern void _gcry_sm4_gfni_avx512_ctr_enc(const u32 *rk_enc, byte *out,
405
                                          const byte *in,
406
                                          byte *ctr) ASM_FUNC_ABI;
407
408
extern void _gcry_sm4_gfni_avx512_cbc_dec(const u32 *rk_dec, byte *out,
409
                                          const byte *in,
410
                                          byte *iv) ASM_FUNC_ABI;
411
412
extern void _gcry_sm4_gfni_avx512_cfb_dec(const u32 *rk_enc, byte *out,
413
                                          const byte *in,
414
                                          byte *iv) ASM_FUNC_ABI;
415
416
extern void _gcry_sm4_gfni_avx512_ocb_enc(const u32 *rk_enc,
417
                                          unsigned char *out,
418
                                          const unsigned char *in,
419
                                          unsigned char *offset,
420
                                          unsigned char *checksum,
421
                                          const u64 Ls[16]) ASM_FUNC_ABI;
422
423
extern void _gcry_sm4_gfni_avx512_ocb_dec(const u32 *rk_dec,
424
                                          unsigned char *out,
425
                                          const unsigned char *in,
426
                                          unsigned char *offset,
427
                                          unsigned char *checksum,
428
                                          const u64 Ls[16]) ASM_FUNC_ABI;
429
430
extern void _gcry_sm4_gfni_avx512_ocb_auth(const u32 *rk_enc,
431
                                           const unsigned char *abuf,
432
                                           unsigned char *offset,
433
                                           unsigned char *checksum,
434
                                           const u64 Ls[16]) ASM_FUNC_ABI;
435
436
extern void _gcry_sm4_gfni_avx512_ctr_enc_blk32(const u32 *rk_enc, byte *out,
437
                                                const byte *in,
438
                                                byte *ctr) ASM_FUNC_ABI;
439
440
extern void _gcry_sm4_gfni_avx512_cbc_dec_blk32(const u32 *rk_enc, byte *out,
441
                                                const byte *in,
442
                                                byte *iv) ASM_FUNC_ABI;
443
444
extern void _gcry_sm4_gfni_avx512_cfb_dec_blk32(const u32 *rk_enc, byte *out,
445
                                                const byte *in,
446
                                                byte *iv) ASM_FUNC_ABI;
447
448
extern void _gcry_sm4_gfni_avx512_ocb_enc_blk32(const u32 *rk_enc,
449
                                                unsigned char *out,
450
                                                const unsigned char *in,
451
                                                unsigned char *offset,
452
                                                unsigned char *checksum,
453
                                                const u64 Ls[32]) ASM_FUNC_ABI;
454
455
extern void _gcry_sm4_gfni_avx512_ocb_dec_blk32(const u32 *rk_dec,
456
                                                unsigned char *out,
457
                                                const unsigned char *in,
458
                                                unsigned char *offset,
459
                                                unsigned char *checksum,
460
                                                const u64 Ls[32]) ASM_FUNC_ABI;
461
462
extern unsigned int
463
_gcry_sm4_gfni_avx512_crypt_blk1_16(const u32 *rk, byte *out, const byte *in,
464
                                    unsigned int num_blks) ASM_FUNC_ABI;
465
466
extern unsigned int
467
_gcry_sm4_gfni_avx512_crypt_blk32(const u32 *rk, byte *out,
468
                                  const byte *in) ASM_FUNC_ABI;
469
470
static inline unsigned int
471
sm4_gfni_avx512_crypt_blk1_16(const void *rk, byte *out, const byte *in,
472
                              unsigned int num_blks)
473
0
{
474
0
  return _gcry_sm4_gfni_avx512_crypt_blk1_16(rk, out, in, num_blks);
475
0
}
476
477
#endif /* USE_GFNI_AVX2 */
478
479
#ifdef USE_AARCH64_SIMD
480
extern void _gcry_sm4_aarch64_crypt(const u32 *rk, byte *out,
481
            const byte *in,
482
            size_t num_blocks);
483
484
extern void _gcry_sm4_aarch64_ctr_enc(const u32 *rk_enc, byte *out,
485
              const byte *in,
486
              byte *ctr,
487
              size_t nblocks);
488
489
extern void _gcry_sm4_aarch64_cbc_dec(const u32 *rk_dec, byte *out,
490
              const byte *in,
491
              byte *iv,
492
              size_t nblocks);
493
494
extern void _gcry_sm4_aarch64_cfb_dec(const u32 *rk_enc, byte *out,
495
              const byte *in,
496
              byte *iv,
497
              size_t nblocks);
498
499
extern void _gcry_sm4_aarch64_crypt_blk1_8(const u32 *rk, byte *out,
500
             const byte *in,
501
             size_t num_blocks);
502
503
static inline unsigned int
504
sm4_aarch64_crypt_blk1_16(const void *rk, byte *out, const byte *in,
505
                          unsigned int num_blks)
506
{
507
  if (num_blks > 8)
508
    {
509
      _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, 8);
510
      in += 8 * 16;
511
      out += 8 * 16;
512
      num_blks -= 8;
513
    }
514
515
  _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks);
516
  return 0;
517
}
518
519
#endif /* USE_AARCH64_SIMD */
520
521
#ifdef USE_ARM_CE
522
extern void _gcry_sm4_armv8_ce_expand_key(const byte *key,
523
            u32 *rkey_enc, u32 *rkey_dec,
524
            const u32 *fk, const u32 *ck);
525
526
extern void _gcry_sm4_armv8_ce_crypt(const u32 *rk, byte *out,
527
             const byte *in,
528
             size_t num_blocks);
529
530
extern void _gcry_sm4_armv8_ce_ctr_enc(const u32 *rk_enc, byte *out,
531
               const byte *in,
532
               byte *ctr,
533
               size_t nblocks);
534
535
extern void _gcry_sm4_armv8_ce_cbc_dec(const u32 *rk_dec, byte *out,
536
               const byte *in,
537
               byte *iv,
538
               size_t nblocks);
539
540
extern void _gcry_sm4_armv8_ce_cfb_dec(const u32 *rk_enc, byte *out,
541
               const byte *in,
542
               byte *iv,
543
               size_t nblocks);
544
545
extern void _gcry_sm4_armv8_ce_xts_crypt(const u32 *rk, byte *out,
546
           const byte *in,
547
           byte *tweak,
548
           size_t nblocks);
549
550
extern void _gcry_sm4_armv8_ce_crypt_blk1_8(const u32 *rk, byte *out,
551
              const byte *in,
552
              size_t num_blocks);
553
554
static inline unsigned int
555
sm4_armv8_ce_crypt_blk1_16(const void *rk, byte *out, const byte *in,
556
                           unsigned int num_blks)
557
{
558
  if (num_blks > 8)
559
    {
560
      _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, 8);
561
      in += 8 * 16;
562
      out += 8 * 16;
563
      num_blks -= 8;
564
    }
565
566
  _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks);
567
  return 0;
568
}
569
570
#endif /* USE_ARM_CE */
571
572
#ifdef USE_ARM_SVE_CE
573
extern void _gcry_sm4_armv9_sve_ce_crypt(const u32 *rk, byte *out,
574
           const byte *in,
575
           size_t nblocks);
576
577
extern void _gcry_sm4_armv9_sve_ce_ctr_enc(const u32 *rk_enc, byte *out,
578
             const byte *in,
579
             byte *ctr,
580
             size_t nblocks);
581
582
extern void _gcry_sm4_armv9_sve_ce_cbc_dec(const u32 *rk_dec, byte *out,
583
             const byte *in,
584
             byte *iv,
585
             size_t nblocks);
586
587
extern void _gcry_sm4_armv9_sve_ce_cfb_dec(const u32 *rk_enc, byte *out,
588
             const byte *in,
589
             byte *iv,
590
             size_t nblocks);
591
592
static inline unsigned int
593
sm4_armv9_sve_ce_crypt_blk1_16(const void *rk, byte *out, const byte *in,
594
             unsigned int num_blks)
595
{
596
  _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks);
597
  return 0;
598
}
599
600
extern unsigned int _gcry_sm4_armv9_sve_get_vl(void);
601
#endif /* USE_ARM_SVE_CE */
602
603
static inline void prefetch_sbox_table(void)
604
0
{
605
0
  const volatile byte *vtab = (void *)&sbox_table;
606
607
  /* Modify counters to trigger copy-on-write and unsharing if physical pages
608
   * of look-up table are shared between processes.  Modifying counters also
609
   * causes checksums for pages to change and hint same-page merging algorithm
610
   * that these pages are frequently changing.  */
611
0
  sbox_table.counter_head++;
612
0
  sbox_table.counter_tail++;
613
614
  /* Prefetch look-up table to cache.  */
615
0
  (void)vtab[0 * 32];
616
0
  (void)vtab[1 * 32];
617
0
  (void)vtab[2 * 32];
618
0
  (void)vtab[3 * 32];
619
0
  (void)vtab[4 * 32];
620
0
  (void)vtab[5 * 32];
621
0
  (void)vtab[6 * 32];
622
0
  (void)vtab[7 * 32];
623
0
  (void)vtab[8 * 32 - 1];
624
0
}
625
626
static inline u32 sm4_t_non_lin_sub(u32 x)
627
0
{
628
0
  u32 out;
629
630
0
  out  = (u32)sbox_table.S[(x >> 0) & 0xff] << 0;
631
0
  out |= (u32)sbox_table.S[(x >> 8) & 0xff] << 8;
632
0
  out |= (u32)sbox_table.S[(x >> 16) & 0xff] << 16;
633
0
  out |= (u32)sbox_table.S[(x >> 24) & 0xff] << 24;
634
635
0
  return out;
636
0
}
637
638
static inline u32 sm4_key_lin_sub(u32 x)
639
0
{
640
0
  return x ^ rol(x, 13) ^ rol(x, 23);
641
0
}
642
643
static inline u32 sm4_enc_lin_sub(u32 x)
644
0
{
645
0
  u32 xrol2 = rol(x, 2);
646
0
  return x ^ xrol2 ^ rol(xrol2, 8) ^ rol(xrol2, 16) ^ rol(x, 24);
647
0
}
648
649
static inline u32 sm4_key_sub(u32 x)
650
0
{
651
0
  return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
652
0
}
653
654
static inline u32 sm4_enc_sub(u32 x)
655
0
{
656
0
  return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
657
0
}
658
659
static inline u32
660
sm4_round(const u32 x0, const u32 x1, const u32 x2, const u32 x3, const u32 rk)
661
0
{
662
0
  return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
663
0
}
664
665
static void
666
sm4_expand_key (SM4_context *ctx, const byte *key)
667
0
{
668
0
  u32 rk[4];
669
0
  int i;
670
671
0
#ifdef USE_GFNI_AVX512
672
0
  if (ctx->use_gfni_avx512)
673
0
    {
674
0
      _gcry_sm4_gfni_avx512_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
675
0
                                        fk, ck);
676
0
      return;
677
0
    }
678
0
#endif
679
680
0
#ifdef USE_GFNI_AVX2
681
0
  if (ctx->use_gfni_avx2)
682
0
    {
683
0
      _gcry_sm4_gfni_avx2_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
684
0
                                      fk, ck);
685
0
      return;
686
0
    }
687
0
#endif
688
689
0
#ifdef USE_AESNI_AVX
690
0
  if (ctx->use_aesni_avx)
691
0
    {
692
0
      _gcry_sm4_aesni_avx_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
693
0
              fk, ck);
694
0
      return;
695
0
    }
696
0
#endif
697
698
#ifdef USE_ARM_CE
699
  if (ctx->use_arm_ce)
700
    {
701
      _gcry_sm4_armv8_ce_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
702
             fk, ck);
703
      return;
704
    }
705
#endif
706
707
0
  prefetch_sbox_table ();
708
709
0
  rk[0] = buf_get_be32(key + 4 * 0) ^ fk[0];
710
0
  rk[1] = buf_get_be32(key + 4 * 1) ^ fk[1];
711
0
  rk[2] = buf_get_be32(key + 4 * 2) ^ fk[2];
712
0
  rk[3] = buf_get_be32(key + 4 * 3) ^ fk[3];
713
714
0
  for (i = 0; i < 32; i += 4)
715
0
    {
716
0
      rk[0] = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
717
0
      rk[1] = rk[1] ^ sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
718
0
      rk[2] = rk[2] ^ sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
719
0
      rk[3] = rk[3] ^ sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
720
0
      ctx->rkey_enc[i + 0] = rk[0];
721
0
      ctx->rkey_enc[i + 1] = rk[1];
722
0
      ctx->rkey_enc[i + 2] = rk[2];
723
0
      ctx->rkey_enc[i + 3] = rk[3];
724
0
      ctx->rkey_dec[31 - i - 0] = rk[0];
725
0
      ctx->rkey_dec[31 - i - 1] = rk[1];
726
0
      ctx->rkey_dec[31 - i - 2] = rk[2];
727
0
      ctx->rkey_dec[31 - i - 3] = rk[3];
728
0
    }
729
730
0
  wipememory (rk, sizeof(rk));
731
0
}
732
733
static gcry_err_code_t
734
sm4_setkey (void *context, const byte *key, const unsigned keylen,
735
            cipher_bulk_ops_t *bulk_ops)
736
0
{
737
0
  SM4_context *ctx = context;
738
0
  static int init = 0;
739
0
  static const char *selftest_failed = NULL;
740
0
  unsigned int hwf = _gcry_get_hw_features ();
741
742
0
  (void)hwf;
743
744
0
  if (!init)
745
0
    {
746
0
      init = 1;
747
0
      selftest_failed = sm4_selftest();
748
0
      if (selftest_failed)
749
0
  log_error("%s\n", selftest_failed);
750
0
    }
751
0
  if (selftest_failed)
752
0
    return GPG_ERR_SELFTEST_FAILED;
753
754
0
  if (keylen != 16)
755
0
    return GPG_ERR_INV_KEYLEN;
756
757
0
#ifdef USE_AESNI_AVX
758
0
  ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
759
0
#endif
760
0
#ifdef USE_AESNI_AVX2
761
0
  ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
762
0
#endif
763
0
#ifdef USE_GFNI_AVX2
764
0
  ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2);
765
0
#endif
766
0
#ifdef USE_GFNI_AVX512
767
0
  ctx->use_gfni_avx512 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX512);
768
0
#endif
769
#ifdef USE_AARCH64_SIMD
770
  ctx->use_aarch64_simd = !!(hwf & HWF_ARM_NEON);
771
#endif
772
#ifdef USE_ARM_CE
773
  ctx->use_arm_ce = !!(hwf & HWF_ARM_SM4);
774
#endif
775
#ifdef USE_ARM_SVE_CE
776
  /* Only enabled when the SVE vector length is greater than 128 bits */
777
  ctx->use_arm_sve_ce = (hwf & HWF_ARM_SVE2) && (hwf & HWF_ARM_SVESM4)
778
    && _gcry_sm4_armv9_sve_get_vl() > 16;
779
#endif
780
781
0
#ifdef USE_GFNI_AVX2
782
0
  if (ctx->use_gfni_avx2)
783
0
    {
784
      /* Disable AESNI implementations when GFNI implementation is enabled. */
785
0
#ifdef USE_AESNI_AVX
786
0
      ctx->use_aesni_avx = 0;
787
0
#endif
788
0
#ifdef USE_AESNI_AVX2
789
0
      ctx->use_aesni_avx2 = 0;
790
0
#endif
791
0
    }
792
0
#endif
793
794
0
  ctx->crypt_blk1_16 = sm4_get_crypt_blk1_16_fn(ctx);
795
796
  /* Setup bulk encryption routines.  */
797
0
  memset (bulk_ops, 0, sizeof(*bulk_ops));
798
0
  bulk_ops->cbc_dec = _gcry_sm4_cbc_dec;
799
0
  bulk_ops->cfb_dec = _gcry_sm4_cfb_dec;
800
0
  bulk_ops->ctr_enc = _gcry_sm4_ctr_enc;
801
0
  bulk_ops->xts_crypt = _gcry_sm4_xts_crypt;
802
0
  bulk_ops->ecb_crypt = _gcry_sm4_ecb_crypt;
803
0
  bulk_ops->ctr32le_enc = _gcry_sm4_ctr32le_enc;
804
0
  bulk_ops->ocb_crypt = _gcry_sm4_ocb_crypt;
805
0
  bulk_ops->ocb_auth  = _gcry_sm4_ocb_auth;
806
807
0
  sm4_expand_key (ctx, key);
808
0
  return 0;
809
0
}
810
811
static unsigned int
812
sm4_do_crypt (const u32 *rk, byte *out, const byte *in)
813
0
{
814
0
  u32 x[4];
815
0
  int i;
816
817
0
  x[0] = buf_get_be32(in + 0 * 4);
818
0
  x[1] = buf_get_be32(in + 1 * 4);
819
0
  x[2] = buf_get_be32(in + 2 * 4);
820
0
  x[3] = buf_get_be32(in + 3 * 4);
821
822
0
  for (i = 0; i < 32; i += 4)
823
0
    {
824
0
      x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
825
0
      x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
826
0
      x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
827
0
      x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
828
0
    }
829
830
0
  buf_put_be32(out + 0 * 4, x[3 - 0]);
831
0
  buf_put_be32(out + 1 * 4, x[3 - 1]);
832
0
  buf_put_be32(out + 2 * 4, x[3 - 2]);
833
0
  buf_put_be32(out + 3 * 4, x[3 - 3]);
834
835
0
  return /*burn_stack*/ 4*6+sizeof(void*)*4;
836
0
}
837
838
static unsigned int
839
sm4_encrypt (void *context, byte *outbuf, const byte *inbuf)
840
0
{
841
0
  SM4_context *ctx = context;
842
843
0
#ifdef USE_GFNI_AVX512
844
0
  if (ctx->use_gfni_avx512)
845
0
    return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1);
846
0
#endif
847
848
0
#ifdef USE_GFNI_AVX2
849
0
  if (ctx->use_gfni_avx2)
850
0
    return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1);
851
0
#endif
852
853
#ifdef USE_ARM_CE
854
  if (ctx->use_arm_ce)
855
    return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_enc, outbuf, inbuf, 1);
856
#endif
857
858
0
  prefetch_sbox_table ();
859
860
0
  return sm4_do_crypt (ctx->rkey_enc, outbuf, inbuf);
861
0
}
862
863
static unsigned int
864
sm4_decrypt (void *context, byte *outbuf, const byte *inbuf)
865
0
{
866
0
  SM4_context *ctx = context;
867
868
0
#ifdef USE_GFNI_AVX512
869
0
  if (ctx->use_gfni_avx512)
870
0
    return sm4_gfni_avx512_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1);
871
0
#endif
872
873
0
#ifdef USE_GFNI_AVX2
874
0
  if (ctx->use_gfni_avx2)
875
0
    return sm4_gfni_avx2_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1);
876
0
#endif
877
878
#ifdef USE_ARM_CE
879
  if (ctx->use_arm_ce)
880
    return sm4_armv8_ce_crypt_blk1_16(ctx->rkey_dec, outbuf, inbuf, 1);
881
#endif
882
883
0
  prefetch_sbox_table ();
884
885
0
  return sm4_do_crypt (ctx->rkey_dec, outbuf, inbuf);
886
0
}
887
888
static unsigned int
889
sm4_do_crypt_blks2 (const u32 *rk, byte *out, const byte *in)
890
0
{
891
0
  u32 x[4];
892
0
  u32 y[4];
893
0
  u32 k;
894
0
  int i;
895
896
  /* Encrypts/Decrypts two blocks for higher instruction level
897
   * parallelism. */
898
899
0
  x[0] = buf_get_be32(in + 0 * 4);
900
0
  x[1] = buf_get_be32(in + 1 * 4);
901
0
  x[2] = buf_get_be32(in + 2 * 4);
902
0
  x[3] = buf_get_be32(in + 3 * 4);
903
0
  y[0] = buf_get_be32(in + 4 * 4);
904
0
  y[1] = buf_get_be32(in + 5 * 4);
905
0
  y[2] = buf_get_be32(in + 6 * 4);
906
0
  y[3] = buf_get_be32(in + 7 * 4);
907
908
0
  for (i = 0; i < 32; i += 4)
909
0
    {
910
0
      k = rk[i + 0];
911
0
      x[0] = sm4_round(x[0], x[1], x[2], x[3], k);
912
0
      y[0] = sm4_round(y[0], y[1], y[2], y[3], k);
913
0
      k = rk[i + 1];
914
0
      x[1] = sm4_round(x[1], x[2], x[3], x[0], k);
915
0
      y[1] = sm4_round(y[1], y[2], y[3], y[0], k);
916
0
      k = rk[i + 2];
917
0
      x[2] = sm4_round(x[2], x[3], x[0], x[1], k);
918
0
      y[2] = sm4_round(y[2], y[3], y[0], y[1], k);
919
0
      k = rk[i + 3];
920
0
      x[3] = sm4_round(x[3], x[0], x[1], x[2], k);
921
0
      y[3] = sm4_round(y[3], y[0], y[1], y[2], k);
922
0
    }
923
924
0
  buf_put_be32(out + 0 * 4, x[3 - 0]);
925
0
  buf_put_be32(out + 1 * 4, x[3 - 1]);
926
0
  buf_put_be32(out + 2 * 4, x[3 - 2]);
927
0
  buf_put_be32(out + 3 * 4, x[3 - 3]);
928
0
  buf_put_be32(out + 4 * 4, y[3 - 0]);
929
0
  buf_put_be32(out + 5 * 4, y[3 - 1]);
930
0
  buf_put_be32(out + 6 * 4, y[3 - 2]);
931
0
  buf_put_be32(out + 7 * 4, y[3 - 3]);
932
933
0
  return /*burn_stack*/ 4*10+sizeof(void*)*4;
934
0
}
935
936
static unsigned int
937
sm4_crypt_blocks (const void *ctx, byte *out, const byte *in,
938
      unsigned int num_blks)
939
0
{
940
0
  const u32 *rk = ctx;
941
0
  unsigned int burn_depth = 0;
942
0
  unsigned int nburn;
943
944
0
  while (num_blks >= 2)
945
0
    {
946
0
      nburn = sm4_do_crypt_blks2 (rk, out, in);
947
0
      burn_depth = nburn > burn_depth ? nburn : burn_depth;
948
0
      out += 2 * 16;
949
0
      in += 2 * 16;
950
0
      num_blks -= 2;
951
0
    }
952
953
0
  while (num_blks)
954
0
    {
955
0
      nburn = sm4_do_crypt (rk, out, in);
956
0
      burn_depth = nburn > burn_depth ? nburn : burn_depth;
957
0
      out += 16;
958
0
      in += 16;
959
0
      num_blks--;
960
0
    }
961
962
0
  if (burn_depth)
963
0
    burn_depth += sizeof(void *) * 5;
964
0
  return burn_depth;
965
0
}
966
967
static inline crypt_blk1_16_fn_t
968
sm4_get_crypt_blk1_16_fn(SM4_context *ctx)
969
0
{
970
0
  if (0)
971
0
    ;
972
0
#ifdef USE_GFNI_AVX512
973
0
  else if (ctx->use_gfni_avx512)
974
0
    {
975
0
      return &sm4_gfni_avx512_crypt_blk1_16;
976
0
    }
977
0
#endif
978
0
#ifdef USE_GFNI_AVX2
979
0
  else if (ctx->use_gfni_avx2)
980
0
    {
981
0
      return &sm4_gfni_avx2_crypt_blk1_16;
982
0
    }
983
0
#endif
984
0
#ifdef USE_AESNI_AVX2
985
0
  else if (ctx->use_aesni_avx2)
986
0
    {
987
0
      return &sm4_aesni_avx2_crypt_blk1_16;
988
0
    }
989
0
#endif
990
0
#ifdef USE_AESNI_AVX
991
0
  else if (ctx->use_aesni_avx)
992
0
    {
993
0
      return &sm4_aesni_avx_crypt_blk1_16;
994
0
    }
995
0
#endif
996
#ifdef USE_ARM_SVE_CE
997
  else if (ctx->use_arm_sve_ce)
998
    {
999
      return &sm4_armv9_sve_ce_crypt_blk1_16;
1000
    }
1001
#endif
1002
#ifdef USE_ARM_CE
1003
  else if (ctx->use_arm_ce)
1004
    {
1005
      return &sm4_armv8_ce_crypt_blk1_16;
1006
    }
1007
#endif
1008
#ifdef USE_AARCH64_SIMD
1009
  else if (ctx->use_aarch64_simd)
1010
    {
1011
      return &sm4_aarch64_crypt_blk1_16;
1012
    }
1013
#endif
1014
0
  else
1015
0
    {
1016
0
      (void)ctx;
1017
0
      return &sm4_crypt_blocks;
1018
0
    }
1019
0
}
1020
1021
/* Bulk encryption of complete blocks in CTR mode.  This function is only
1022
   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
1023
   of size 16. */
1024
static void
1025
_gcry_sm4_ctr_enc(void *context, unsigned char *ctr,
1026
                  void *outbuf_arg, const void *inbuf_arg,
1027
                  size_t nblocks)
1028
0
{
1029
0
  SM4_context *ctx = context;
1030
0
  byte *outbuf = outbuf_arg;
1031
0
  const byte *inbuf = inbuf_arg;
1032
0
  int burn_stack_depth = 0;
1033
1034
0
#ifdef USE_GFNI_AVX512
1035
0
  if (ctx->use_gfni_avx512)
1036
0
    {
1037
      /* Process data in 32 block chunks. */
1038
0
      while (nblocks >= 32)
1039
0
        {
1040
0
          _gcry_sm4_gfni_avx512_ctr_enc_blk32(ctx->rkey_enc,
1041
0
                                              outbuf, inbuf, ctr);
1042
1043
0
          nblocks -= 32;
1044
0
          outbuf += 32 * 16;
1045
0
          inbuf += 32 * 16;
1046
0
        }
1047
1048
      /* Process data in 16 block chunks. */
1049
0
      if (nblocks >= 16)
1050
0
        {
1051
0
          _gcry_sm4_gfni_avx512_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1052
1053
0
          nblocks -= 16;
1054
0
          outbuf += 16 * 16;
1055
0
          inbuf += 16 * 16;
1056
0
        }
1057
0
    }
1058
0
#endif
1059
1060
0
#ifdef USE_GFNI_AVX2
1061
0
  if (ctx->use_gfni_avx2)
1062
0
    {
1063
      /* Process data in 16 block chunks. */
1064
0
      while (nblocks >= 16)
1065
0
        {
1066
0
          _gcry_sm4_gfni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1067
1068
0
          nblocks -= 16;
1069
0
          outbuf += 16 * 16;
1070
0
          inbuf += 16 * 16;
1071
0
        }
1072
0
    }
1073
0
#endif
1074
1075
0
#ifdef USE_AESNI_AVX2
1076
0
  if (ctx->use_aesni_avx2)
1077
0
    {
1078
      /* Process data in 16 block chunks. */
1079
0
      while (nblocks >= 16)
1080
0
        {
1081
0
          _gcry_sm4_aesni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1082
1083
0
          nblocks -= 16;
1084
0
          outbuf += 16 * 16;
1085
0
          inbuf += 16 * 16;
1086
0
        }
1087
0
    }
1088
0
#endif
1089
1090
0
#ifdef USE_AESNI_AVX
1091
0
  if (ctx->use_aesni_avx)
1092
0
    {
1093
      /* Process data in 8 block chunks. */
1094
0
      while (nblocks >= 8)
1095
0
        {
1096
0
          _gcry_sm4_aesni_avx_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
1097
1098
0
          nblocks -= 8;
1099
0
          outbuf += 8 * 16;
1100
0
          inbuf += 8 * 16;
1101
0
        }
1102
0
    }
1103
0
#endif
1104
1105
#ifdef USE_ARM_SVE_CE
1106
  if (ctx->use_arm_sve_ce)
1107
    {
1108
      /* Process all blocks at a time. */
1109
      _gcry_sm4_armv9_sve_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf,
1110
             ctr, nblocks);
1111
      nblocks = 0;
1112
    }
1113
#endif
1114
1115
#ifdef USE_ARM_CE
1116
  if (ctx->use_arm_ce)
1117
    {
1118
      /* Process multiples of 8 blocks at a time. */
1119
      if (nblocks >= 8)
1120
        {
1121
          size_t nblks = nblocks & ~(8 - 1);
1122
1123
          _gcry_sm4_armv8_ce_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks);
1124
1125
          nblocks -= nblks;
1126
          outbuf += nblks * 16;
1127
          inbuf += nblks * 16;
1128
        }
1129
    }
1130
#endif
1131
1132
#ifdef USE_AARCH64_SIMD
1133
  if (ctx->use_aarch64_simd)
1134
    {
1135
      /* Process multiples of 8 blocks at a time. */
1136
      if (nblocks >= 8)
1137
        {
1138
          size_t nblks = nblocks & ~(8 - 1);
1139
1140
          _gcry_sm4_aarch64_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr, nblks);
1141
1142
          nblocks -= nblks;
1143
          outbuf += nblks * 16;
1144
          inbuf += nblks * 16;
1145
        }
1146
    }
1147
#endif
1148
1149
  /* Process remaining blocks. */
1150
0
  if (nblocks)
1151
0
    {
1152
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1153
0
      byte tmpbuf[16 * 16];
1154
0
      unsigned int tmp_used = 16;
1155
0
      size_t nburn;
1156
1157
0
      if (crypt_blk1_16 == &sm4_crypt_blocks)
1158
0
  prefetch_sbox_table ();
1159
1160
0
      nburn = bulk_ctr_enc_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf,
1161
0
                               nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16,
1162
0
                               &tmp_used);
1163
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1164
1165
0
      wipememory(tmpbuf, tmp_used);
1166
0
    }
1167
1168
0
  if (burn_stack_depth)
1169
0
    _gcry_burn_stack(burn_stack_depth);
1170
0
}
1171
1172
/* Bulk decryption of complete blocks in CBC mode.  This function is only
1173
   intended for the bulk encryption feature of cipher.c. */
1174
static void
1175
_gcry_sm4_cbc_dec(void *context, unsigned char *iv,
1176
                  void *outbuf_arg, const void *inbuf_arg,
1177
                  size_t nblocks)
1178
0
{
1179
0
  SM4_context *ctx = context;
1180
0
  unsigned char *outbuf = outbuf_arg;
1181
0
  const unsigned char *inbuf = inbuf_arg;
1182
0
  int burn_stack_depth = 0;
1183
1184
0
#ifdef USE_GFNI_AVX512
1185
0
  if (ctx->use_gfni_avx512)
1186
0
    {
1187
      /* Process data in 32 block chunks. */
1188
0
      while (nblocks >= 32)
1189
0
        {
1190
0
          _gcry_sm4_gfni_avx512_cbc_dec_blk32(ctx->rkey_dec, outbuf, inbuf, iv);
1191
1192
0
          nblocks -= 32;
1193
0
          outbuf += 32 * 16;
1194
0
          inbuf += 32 * 16;
1195
0
        }
1196
1197
      /* Process data in 16 block chunks. */
1198
0
      if (nblocks >= 16)
1199
0
        {
1200
0
          _gcry_sm4_gfni_avx512_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1201
1202
0
          nblocks -= 16;
1203
0
          outbuf += 16 * 16;
1204
0
          inbuf += 16 * 16;
1205
0
        }
1206
0
    }
1207
0
#endif
1208
1209
0
#ifdef USE_GFNI_AVX2
1210
0
  if (ctx->use_gfni_avx2)
1211
0
    {
1212
      /* Process data in 16 block chunks. */
1213
0
      while (nblocks >= 16)
1214
0
        {
1215
0
          _gcry_sm4_gfni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1216
1217
0
          nblocks -= 16;
1218
0
          outbuf += 16 * 16;
1219
0
          inbuf += 16 * 16;
1220
0
        }
1221
0
    }
1222
0
#endif
1223
1224
0
#ifdef USE_AESNI_AVX2
1225
0
  if (ctx->use_aesni_avx2)
1226
0
    {
1227
      /* Process data in 16 block chunks. */
1228
0
      while (nblocks >= 16)
1229
0
        {
1230
0
          _gcry_sm4_aesni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1231
1232
0
          nblocks -= 16;
1233
0
          outbuf += 16 * 16;
1234
0
          inbuf += 16 * 16;
1235
0
        }
1236
0
    }
1237
0
#endif
1238
1239
0
#ifdef USE_AESNI_AVX
1240
0
  if (ctx->use_aesni_avx)
1241
0
    {
1242
      /* Process data in 8 block chunks. */
1243
0
      while (nblocks >= 8)
1244
0
        {
1245
0
          _gcry_sm4_aesni_avx_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
1246
1247
0
          nblocks -= 8;
1248
0
          outbuf += 8 * 16;
1249
0
          inbuf += 8 * 16;
1250
0
        }
1251
0
    }
1252
0
#endif
1253
1254
#ifdef USE_ARM_SVE_CE
1255
  if (ctx->use_arm_sve_ce)
1256
    {
1257
      /* Process all blocks at a time. */
1258
      _gcry_sm4_armv9_sve_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf,
1259
             iv, nblocks);
1260
      nblocks = 0;
1261
    }
1262
#endif
1263
1264
#ifdef USE_ARM_CE
1265
  if (ctx->use_arm_ce)
1266
    {
1267
      /* Process multiples of 8 blocks at a time. */
1268
      if (nblocks >= 8)
1269
        {
1270
          size_t nblks = nblocks & ~(8 - 1);
1271
1272
          _gcry_sm4_armv8_ce_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks);
1273
1274
          nblocks -= nblks;
1275
          outbuf += nblks * 16;
1276
          inbuf += nblks * 16;
1277
        }
1278
    }
1279
#endif
1280
1281
#ifdef USE_AARCH64_SIMD
1282
  if (ctx->use_aarch64_simd)
1283
    {
1284
      /* Process multiples of 8 blocks at a time. */
1285
      if (nblocks >= 8)
1286
        {
1287
          size_t nblks = nblocks & ~(8 - 1);
1288
1289
          _gcry_sm4_aarch64_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv, nblks);
1290
1291
          nblocks -= nblks;
1292
          outbuf += nblks * 16;
1293
          inbuf += nblks * 16;
1294
        }
1295
    }
1296
#endif
1297
1298
  /* Process remaining blocks. */
1299
0
  if (nblocks)
1300
0
    {
1301
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1302
0
      unsigned char tmpbuf[16 * 16];
1303
0
      unsigned int tmp_used = 16;
1304
0
      size_t nburn;
1305
1306
0
      if (crypt_blk1_16 == &sm4_crypt_blocks)
1307
0
  prefetch_sbox_table ();
1308
1309
0
      nburn = bulk_cbc_dec_128(ctx->rkey_dec, crypt_blk1_16, outbuf, inbuf,
1310
0
                               nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16,
1311
0
                               &tmp_used);
1312
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1313
1314
0
      wipememory(tmpbuf, tmp_used);
1315
0
    }
1316
1317
0
  if (burn_stack_depth)
1318
0
    _gcry_burn_stack(burn_stack_depth);
1319
0
}
1320
1321
/* Bulk decryption of complete blocks in CFB mode.  This function is only
1322
   intended for the bulk encryption feature of cipher.c. */
1323
static void
1324
_gcry_sm4_cfb_dec(void *context, unsigned char *iv,
1325
                  void *outbuf_arg, const void *inbuf_arg,
1326
                  size_t nblocks)
1327
0
{
1328
0
  SM4_context *ctx = context;
1329
0
  unsigned char *outbuf = outbuf_arg;
1330
0
  const unsigned char *inbuf = inbuf_arg;
1331
0
  int burn_stack_depth = 0;
1332
1333
0
#ifdef USE_GFNI_AVX512
1334
0
  if (ctx->use_gfni_avx512)
1335
0
    {
1336
      /* Process data in 32 block chunks. */
1337
0
      while (nblocks >= 32)
1338
0
        {
1339
0
          _gcry_sm4_gfni_avx512_cfb_dec_blk32(ctx->rkey_enc, outbuf, inbuf, iv);
1340
1341
0
          nblocks -= 32;
1342
0
          outbuf += 32 * 16;
1343
0
          inbuf += 32 * 16;
1344
0
        }
1345
1346
      /* Process data in 16 block chunks. */
1347
0
      if (nblocks >= 16)
1348
0
        {
1349
0
          _gcry_sm4_gfni_avx512_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1350
1351
0
          nblocks -= 16;
1352
0
          outbuf += 16 * 16;
1353
0
          inbuf += 16 * 16;
1354
0
        }
1355
0
    }
1356
0
#endif
1357
1358
0
#ifdef USE_GFNI_AVX2
1359
0
  if (ctx->use_gfni_avx2)
1360
0
    {
1361
      /* Process data in 16 block chunks. */
1362
0
      while (nblocks >= 16)
1363
0
        {
1364
0
          _gcry_sm4_gfni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1365
1366
0
          nblocks -= 16;
1367
0
          outbuf += 16 * 16;
1368
0
          inbuf += 16 * 16;
1369
0
        }
1370
0
    }
1371
0
#endif
1372
1373
0
#ifdef USE_AESNI_AVX2
1374
0
  if (ctx->use_aesni_avx2)
1375
0
    {
1376
      /* Process data in 16 block chunks. */
1377
0
      while (nblocks >= 16)
1378
0
        {
1379
0
          _gcry_sm4_aesni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1380
1381
0
          nblocks -= 16;
1382
0
          outbuf += 16 * 16;
1383
0
          inbuf += 16 * 16;
1384
0
        }
1385
0
    }
1386
0
#endif
1387
1388
0
#ifdef USE_AESNI_AVX
1389
0
  if (ctx->use_aesni_avx)
1390
0
    {
1391
      /* Process data in 8 block chunks. */
1392
0
      while (nblocks >= 8)
1393
0
        {
1394
0
          _gcry_sm4_aesni_avx_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
1395
1396
0
          nblocks -= 8;
1397
0
          outbuf += 8 * 16;
1398
0
          inbuf += 8 * 16;
1399
0
        }
1400
0
    }
1401
0
#endif
1402
1403
#ifdef USE_ARM_SVE_CE
1404
  if (ctx->use_arm_sve_ce)
1405
    {
1406
      /* Process all blocks at a time. */
1407
      _gcry_sm4_armv9_sve_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf,
1408
             iv, nblocks);
1409
      nblocks = 0;
1410
    }
1411
#endif
1412
1413
#ifdef USE_ARM_CE
1414
  if (ctx->use_arm_ce)
1415
    {
1416
      /* Process multiples of 8 blocks at a time. */
1417
      if (nblocks >= 8)
1418
        {
1419
          size_t nblks = nblocks & ~(8 - 1);
1420
1421
          _gcry_sm4_armv8_ce_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks);
1422
1423
          nblocks -= nblks;
1424
          outbuf += nblks * 16;
1425
          inbuf += nblks * 16;
1426
        }
1427
    }
1428
#endif
1429
1430
#ifdef USE_AARCH64_SIMD
1431
  if (ctx->use_aarch64_simd)
1432
    {
1433
      /* Process multiples of 8 blocks at a time. */
1434
      if (nblocks >= 8)
1435
        {
1436
          size_t nblks = nblocks & ~(8 - 1);
1437
1438
          _gcry_sm4_aarch64_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv, nblks);
1439
1440
          nblocks -= nblks;
1441
          outbuf += nblks * 16;
1442
          inbuf += nblks * 16;
1443
        }
1444
    }
1445
#endif
1446
1447
  /* Process remaining blocks. */
1448
0
  if (nblocks)
1449
0
    {
1450
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1451
0
      unsigned char tmpbuf[16 * 16];
1452
0
      unsigned int tmp_used = 16;
1453
0
      size_t nburn;
1454
1455
0
      if (crypt_blk1_16 == &sm4_crypt_blocks)
1456
0
  prefetch_sbox_table ();
1457
1458
0
      nburn = bulk_cfb_dec_128(ctx->rkey_enc, crypt_blk1_16, outbuf, inbuf,
1459
0
                               nblocks, iv, tmpbuf, sizeof(tmpbuf) / 16,
1460
0
                               &tmp_used);
1461
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1462
1463
0
      wipememory(tmpbuf, tmp_used);
1464
0
    }
1465
1466
0
  if (burn_stack_depth)
1467
0
    _gcry_burn_stack(burn_stack_depth);
1468
0
}
1469
1470
static unsigned int
1471
sm4_crypt_blk1_32 (const SM4_context *ctx, byte *outbuf, const byte *inbuf,
1472
                   unsigned int num_blks, const u32 *rk)
1473
0
{
1474
0
  crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1475
0
  unsigned int stack_burn_size = 0;
1476
0
  unsigned int nburn;
1477
1478
0
  gcry_assert (num_blks <= 32);
1479
1480
0
#ifdef USE_GFNI_AVX512
1481
0
  if (num_blks == 32 && ctx->use_gfni_avx512)
1482
0
    {
1483
0
      return _gcry_sm4_gfni_avx512_crypt_blk32 (rk, outbuf, inbuf);
1484
0
    }
1485
0
#endif
1486
#ifdef USE_ARM_SVE_CE
1487
  if (ctx->use_arm_sve_ce)
1488
    {
1489
      _gcry_sm4_armv9_sve_ce_crypt (rk, outbuf, inbuf, num_blks);
1490
      return 0;
1491
    }
1492
#endif
1493
1494
0
  do
1495
0
    {
1496
0
      unsigned int curr_blks = num_blks > 16 ? 16 : num_blks;
1497
0
      nburn = crypt_blk1_16 (rk, outbuf, inbuf, curr_blks);
1498
0
      stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
1499
0
      outbuf += curr_blks * 16;
1500
0
      inbuf += curr_blks * 16;
1501
0
      num_blks -= curr_blks;
1502
0
    }
1503
0
  while (num_blks > 0);
1504
1505
0
  return stack_burn_size;
1506
0
}
1507
1508
static unsigned int
1509
sm4_encrypt_blk1_32 (const void *context, byte *out, const byte *in,
1510
                     unsigned int num_blks)
1511
0
{
1512
0
  const SM4_context *ctx = context;
1513
0
  return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_enc);
1514
0
}
1515
1516
static unsigned int
1517
sm4_decrypt_blk1_32 (const void *context, byte *out, const byte *in,
1518
                     unsigned int num_blks)
1519
0
{
1520
0
  const SM4_context *ctx = context;
1521
0
  return sm4_crypt_blk1_32 (ctx, out, in, num_blks, ctx->rkey_dec);
1522
0
}
1523
1524
/* Bulk encryption/decryption in ECB mode. */
1525
static void
1526
_gcry_sm4_ecb_crypt (void *context, void *outbuf_arg,
1527
         const void *inbuf_arg, size_t nblocks, int encrypt)
1528
0
{
1529
0
  SM4_context *ctx = context;
1530
0
  unsigned char *outbuf = outbuf_arg;
1531
0
  const unsigned char *inbuf = inbuf_arg;
1532
0
  int burn_stack_depth = 0;
1533
1534
  /* Process remaining blocks. */
1535
0
  if (nblocks)
1536
0
    {
1537
0
      size_t nburn;
1538
1539
0
      if (ctx->crypt_blk1_16 == &sm4_crypt_blocks)
1540
0
  prefetch_sbox_table ();
1541
1542
0
      nburn = bulk_ecb_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32
1543
0
                                              : sm4_decrypt_blk1_32,
1544
0
                                 outbuf, inbuf, nblocks, 32);
1545
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1546
0
    }
1547
1548
0
  if (burn_stack_depth)
1549
0
    _gcry_burn_stack(burn_stack_depth);
1550
0
}
1551
1552
/* Bulk encryption/decryption of complete blocks in XTS mode. */
1553
static void
1554
_gcry_sm4_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
1555
                     const void *inbuf_arg, size_t nblocks, int encrypt)
1556
0
{
1557
0
  SM4_context *ctx = context;
1558
0
  unsigned char *outbuf = outbuf_arg;
1559
0
  const unsigned char *inbuf = inbuf_arg;
1560
0
  int burn_stack_depth = 0;
1561
1562
#ifdef USE_ARM_CE
1563
  if (ctx->use_arm_ce)
1564
    {
1565
      /* Process all blocks at a time. */
1566
      _gcry_sm4_armv8_ce_xts_crypt(encrypt ? ctx->rkey_enc : ctx->rkey_dec,
1567
                                   outbuf, inbuf, tweak, nblocks);
1568
1569
      nblocks = 0;
1570
    }
1571
#endif
1572
1573
  /* Process remaining blocks. */
1574
0
  if (nblocks)
1575
0
    {
1576
0
      unsigned char tmpbuf[32 * 16];
1577
0
      unsigned int tmp_used = 16;
1578
0
      size_t nburn;
1579
1580
0
      if (ctx->crypt_blk1_16 == &sm4_crypt_blocks)
1581
0
  prefetch_sbox_table ();
1582
1583
0
      nburn = bulk_xts_crypt_128(ctx, encrypt ? sm4_encrypt_blk1_32
1584
0
                                              : sm4_decrypt_blk1_32,
1585
0
                                 outbuf, inbuf, nblocks,
1586
0
                                 tweak, tmpbuf, sizeof(tmpbuf) / 16,
1587
0
                                 &tmp_used);
1588
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1589
1590
0
      wipememory(tmpbuf, tmp_used);
1591
0
    }
1592
1593
0
  if (burn_stack_depth)
1594
0
    _gcry_burn_stack(burn_stack_depth);
1595
0
}
1596
1597
/* Bulk encryption of complete blocks in CTR32LE mode (for GCM-SIV). */
1598
static void
1599
_gcry_sm4_ctr32le_enc(void *context, unsigned char *ctr,
1600
                      void *outbuf_arg, const void *inbuf_arg,
1601
                      size_t nblocks)
1602
0
{
1603
0
  SM4_context *ctx = context;
1604
0
  byte *outbuf = outbuf_arg;
1605
0
  const byte *inbuf = inbuf_arg;
1606
0
  int burn_stack_depth = 0;
1607
1608
  /* Process remaining blocks. */
1609
0
  if (nblocks)
1610
0
    {
1611
0
      byte tmpbuf[32 * 16];
1612
0
      unsigned int tmp_used = 16;
1613
0
      size_t nburn;
1614
1615
0
      nburn = bulk_ctr32le_enc_128 (ctx, sm4_encrypt_blk1_32, outbuf, inbuf,
1616
0
                                    nblocks, ctr, tmpbuf, sizeof(tmpbuf) / 16,
1617
0
                                    &tmp_used);
1618
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1619
1620
0
      wipememory (tmpbuf, tmp_used);
1621
0
    }
1622
1623
0
  if (burn_stack_depth)
1624
0
    _gcry_burn_stack (burn_stack_depth);
1625
0
}
1626
1627
/* Bulk encryption/decryption of complete blocks in OCB mode. */
1628
static size_t
1629
_gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
1630
         const void *inbuf_arg, size_t nblocks, int encrypt)
1631
0
{
1632
0
  SM4_context *ctx = (void *)&c->context.c;
1633
0
  unsigned char *outbuf = outbuf_arg;
1634
0
  const unsigned char *inbuf = inbuf_arg;
1635
0
  u64 blkn = c->u_mode.ocb.data_nblocks;
1636
0
  int burn_stack_depth = 0;
1637
1638
0
#ifdef USE_GFNI_AVX512
1639
0
  if (ctx->use_gfni_avx512)
1640
0
    {
1641
0
      u64 Ls[32];
1642
0
      u64 *l;
1643
1644
0
      if (nblocks >= 32)
1645
0
  {
1646
0
          l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn);
1647
1648
    /* Process data in 32 block chunks. */
1649
0
    while (nblocks >= 32)
1650
0
      {
1651
0
        blkn += 32;
1652
0
        *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 32);
1653
1654
0
        if (encrypt)
1655
0
    _gcry_sm4_gfni_avx512_ocb_enc_blk32 (ctx->rkey_enc, outbuf,
1656
0
                                                     inbuf, c->u_iv.iv,
1657
0
                                                     c->u_ctr.ctr, Ls);
1658
0
        else
1659
0
    _gcry_sm4_gfni_avx512_ocb_dec_blk32 (ctx->rkey_dec, outbuf,
1660
0
                                                     inbuf, c->u_iv.iv,
1661
0
                                                     c->u_ctr.ctr, Ls);
1662
1663
0
        nblocks -= 32;
1664
0
        outbuf += 32 * 16;
1665
0
        inbuf += 32 * 16;
1666
0
      }
1667
0
  }
1668
0
    }
1669
0
#endif
1670
1671
0
#ifdef USE_GFNI_AVX2
1672
0
  if (ctx->use_gfni_avx2)
1673
0
    {
1674
0
      u64 Ls[16];
1675
0
      u64 *l;
1676
1677
0
      if (nblocks >= 16)
1678
0
  {
1679
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1680
1681
    /* Process data in 16 block chunks. */
1682
0
    while (nblocks >= 16)
1683
0
      {
1684
0
        blkn += 16;
1685
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1686
1687
0
        if (encrypt)
1688
0
    _gcry_sm4_gfni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
1689
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1690
0
        else
1691
0
    _gcry_sm4_gfni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
1692
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1693
1694
0
        nblocks -= 16;
1695
0
        outbuf += 16 * 16;
1696
0
        inbuf += 16 * 16;
1697
0
      }
1698
0
  }
1699
0
    }
1700
0
#endif
1701
1702
0
#ifdef USE_AESNI_AVX2
1703
0
  if (ctx->use_aesni_avx2)
1704
0
    {
1705
0
      u64 Ls[16];
1706
0
      u64 *l;
1707
1708
0
      if (nblocks >= 16)
1709
0
  {
1710
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1711
1712
    /* Process data in 16 block chunks. */
1713
0
    while (nblocks >= 16)
1714
0
      {
1715
0
        blkn += 16;
1716
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1717
1718
0
        if (encrypt)
1719
0
    _gcry_sm4_aesni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
1720
0
               c->u_iv.iv, c->u_ctr.ctr, Ls);
1721
0
        else
1722
0
    _gcry_sm4_aesni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
1723
0
               c->u_iv.iv, c->u_ctr.ctr, Ls);
1724
1725
0
        nblocks -= 16;
1726
0
        outbuf += 16 * 16;
1727
0
        inbuf += 16 * 16;
1728
0
      }
1729
0
  }
1730
0
    }
1731
0
#endif
1732
1733
0
#ifdef USE_AESNI_AVX
1734
0
  if (ctx->use_aesni_avx)
1735
0
    {
1736
0
      u64 Ls[8];
1737
0
      u64 *l;
1738
1739
0
      if (nblocks >= 8)
1740
0
  {
1741
0
          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1742
1743
    /* Process data in 8 block chunks. */
1744
0
    while (nblocks >= 8)
1745
0
      {
1746
0
        blkn += 8;
1747
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1748
1749
0
        if (encrypt)
1750
0
    _gcry_sm4_aesni_avx_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
1751
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1752
0
        else
1753
0
    _gcry_sm4_aesni_avx_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
1754
0
              c->u_iv.iv, c->u_ctr.ctr, Ls);
1755
1756
0
        nblocks -= 8;
1757
0
        outbuf += 8 * 16;
1758
0
        inbuf += 8 * 16;
1759
0
      }
1760
0
  }
1761
0
    }
1762
0
#endif
1763
1764
  /* Process remaining blocks. */
1765
0
  if (nblocks)
1766
0
    {
1767
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1768
0
      u32 *rk = encrypt ? ctx->rkey_enc : ctx->rkey_dec;
1769
0
      unsigned char tmpbuf[16 * 16];
1770
0
      unsigned int tmp_used = 16;
1771
0
      size_t nburn;
1772
1773
0
      nburn = bulk_ocb_crypt_128 (c, rk, crypt_blk1_16, outbuf, inbuf, nblocks,
1774
0
                                  &blkn, encrypt, tmpbuf, sizeof(tmpbuf) / 16,
1775
0
                                  &tmp_used);
1776
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1777
1778
0
      wipememory(tmpbuf, tmp_used);
1779
0
    }
1780
1781
0
  c->u_mode.ocb.data_nblocks = blkn;
1782
1783
0
  if (burn_stack_depth)
1784
0
    _gcry_burn_stack(burn_stack_depth);
1785
1786
0
  return 0;
1787
0
}
1788
1789
/* Bulk authentication of complete blocks in OCB mode. */
1790
static size_t
1791
_gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
1792
0
{
1793
0
  SM4_context *ctx = (void *)&c->context.c;
1794
0
  const unsigned char *abuf = abuf_arg;
1795
0
  u64 blkn = c->u_mode.ocb.aad_nblocks;
1796
0
  int burn_stack_depth = 0;
1797
1798
0
#ifdef USE_GFNI_AVX512
1799
0
  if (ctx->use_gfni_avx512)
1800
0
    {
1801
0
      u64 Ls[16];
1802
0
      u64 *l;
1803
1804
0
      if (nblocks >= 16)
1805
0
        {
1806
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1807
1808
          /* Process data in 16 block chunks. */
1809
0
          while (nblocks >= 16)
1810
0
            {
1811
0
              blkn += 16;
1812
0
              *l = (uintptr_t)(void *)ocb_get_l (c, blkn - blkn % 16);
1813
1814
0
              _gcry_sm4_gfni_avx512_ocb_auth (ctx->rkey_enc, abuf,
1815
0
                                              c->u_mode.ocb.aad_offset,
1816
0
                                              c->u_mode.ocb.aad_sum, Ls);
1817
1818
0
              nblocks -= 16;
1819
0
              abuf += 16 * 16;
1820
0
            }
1821
0
        }
1822
0
    }
1823
0
#endif
1824
1825
0
#ifdef USE_GFNI_AVX2
1826
0
  if (ctx->use_gfni_avx2)
1827
0
    {
1828
0
      u64 Ls[16];
1829
0
      u64 *l;
1830
1831
0
      if (nblocks >= 16)
1832
0
  {
1833
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1834
1835
    /* Process data in 16 block chunks. */
1836
0
    while (nblocks >= 16)
1837
0
      {
1838
0
        blkn += 16;
1839
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1840
1841
0
        _gcry_sm4_gfni_avx2_ocb_auth(ctx->rkey_enc, abuf,
1842
0
             c->u_mode.ocb.aad_offset,
1843
0
             c->u_mode.ocb.aad_sum, Ls);
1844
1845
0
        nblocks -= 16;
1846
0
        abuf += 16 * 16;
1847
0
      }
1848
0
  }
1849
0
    }
1850
0
#endif
1851
1852
0
#ifdef USE_AESNI_AVX2
1853
0
  if (ctx->use_aesni_avx2)
1854
0
    {
1855
0
      u64 Ls[16];
1856
0
      u64 *l;
1857
1858
0
      if (nblocks >= 16)
1859
0
  {
1860
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1861
1862
    /* Process data in 16 block chunks. */
1863
0
    while (nblocks >= 16)
1864
0
      {
1865
0
        blkn += 16;
1866
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1867
1868
0
        _gcry_sm4_aesni_avx2_ocb_auth(ctx->rkey_enc, abuf,
1869
0
              c->u_mode.ocb.aad_offset,
1870
0
              c->u_mode.ocb.aad_sum, Ls);
1871
1872
0
        nblocks -= 16;
1873
0
        abuf += 16 * 16;
1874
0
      }
1875
0
  }
1876
0
    }
1877
0
#endif
1878
1879
0
#ifdef USE_AESNI_AVX
1880
0
  if (ctx->use_aesni_avx)
1881
0
    {
1882
0
      u64 Ls[8];
1883
0
      u64 *l;
1884
1885
0
      if (nblocks >= 8)
1886
0
  {
1887
0
          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1888
1889
    /* Process data in 8 block chunks. */
1890
0
    while (nblocks >= 8)
1891
0
      {
1892
0
        blkn += 8;
1893
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1894
1895
0
        _gcry_sm4_aesni_avx_ocb_auth(ctx->rkey_enc, abuf,
1896
0
             c->u_mode.ocb.aad_offset,
1897
0
             c->u_mode.ocb.aad_sum, Ls);
1898
1899
0
        nblocks -= 8;
1900
0
        abuf += 8 * 16;
1901
0
      }
1902
0
  }
1903
0
    }
1904
0
#endif
1905
1906
  /* Process remaining blocks. */
1907
0
  if (nblocks)
1908
0
    {
1909
0
      crypt_blk1_16_fn_t crypt_blk1_16 = ctx->crypt_blk1_16;
1910
0
      unsigned char tmpbuf[16 * 16];
1911
0
      unsigned int tmp_used = 16;
1912
0
      size_t nburn;
1913
1914
0
      nburn = bulk_ocb_auth_128 (c, ctx->rkey_enc, crypt_blk1_16, abuf, nblocks,
1915
0
                                 &blkn, tmpbuf, sizeof(tmpbuf) / 16, &tmp_used);
1916
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1917
1918
0
      wipememory(tmpbuf, tmp_used);
1919
0
    }
1920
1921
0
  c->u_mode.ocb.aad_nblocks = blkn;
1922
1923
0
  if (burn_stack_depth)
1924
0
    _gcry_burn_stack(burn_stack_depth);
1925
1926
0
  return 0;
1927
0
}
1928
1929
static const char *
1930
sm4_selftest (void)
1931
0
{
1932
0
  SM4_context ctx;
1933
0
  byte scratch[16];
1934
1935
0
  static const byte plaintext[16] = {
1936
0
    0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
1937
0
    0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
1938
0
  };
1939
0
  static const byte key[16] = {
1940
0
    0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
1941
0
    0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
1942
0
  };
1943
0
  static const byte ciphertext[16] = {
1944
0
    0x68, 0x1E, 0xDF, 0x34, 0xD2, 0x06, 0x96, 0x5E,
1945
0
    0x86, 0xB3, 0xE9, 0x4F, 0x53, 0x6E, 0x42, 0x46
1946
0
  };
1947
1948
0
  memset (&ctx, 0, sizeof(ctx));
1949
1950
0
  sm4_expand_key (&ctx, key);
1951
0
  sm4_encrypt (&ctx, scratch, plaintext);
1952
0
  if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
1953
0
    return "SM4 test encryption failed.";
1954
0
  sm4_decrypt (&ctx, scratch, scratch);
1955
0
  if (memcmp (scratch, plaintext, sizeof (plaintext)))
1956
0
    return "SM4 test decryption failed.";
1957
1958
0
  return NULL;
1959
0
}
1960
1961
static gpg_err_code_t
1962
run_selftests (int algo, int extended, selftest_report_func_t report)
1963
0
{
1964
0
  const char *what;
1965
0
  const char *errtxt;
1966
1967
0
  (void)extended;
1968
1969
0
  if (algo != GCRY_CIPHER_SM4)
1970
0
    return GPG_ERR_CIPHER_ALGO;
1971
1972
0
  what = "selftest";
1973
0
  errtxt = sm4_selftest ();
1974
0
  if (errtxt)
1975
0
    goto failed;
1976
1977
0
  return 0;
1978
1979
0
 failed:
1980
0
  if (report)
1981
0
    report ("cipher", GCRY_CIPHER_SM4, what, errtxt);
1982
0
  return GPG_ERR_SELFTEST_FAILED;
1983
0
}
1984
1985
1986
static const gcry_cipher_oid_spec_t sm4_oids[] =
1987
  {
1988
    { "1.2.156.10197.1.104.1", GCRY_CIPHER_MODE_ECB },
1989
    { "1.2.156.10197.1.104.2", GCRY_CIPHER_MODE_CBC },
1990
    { "1.2.156.10197.1.104.3", GCRY_CIPHER_MODE_OFB },
1991
    { "1.2.156.10197.1.104.4", GCRY_CIPHER_MODE_CFB },
1992
    { "1.2.156.10197.1.104.7", GCRY_CIPHER_MODE_CTR },
1993
    { NULL }
1994
  };
1995
1996
gcry_cipher_spec_t _gcry_cipher_spec_sm4 =
1997
  {
1998
    GCRY_CIPHER_SM4, {0, 0},
1999
    "SM4", NULL, sm4_oids, 16, 128,
2000
    sizeof (SM4_context),
2001
    sm4_setkey, sm4_encrypt, sm4_decrypt,
2002
    NULL, NULL,
2003
    run_selftests
2004
  };