Coverage Report

Created: 2024-11-21 07:03

/src/SymCrypt/lib/aes-key.c
Line
Count
Source (jump to first uncovered line)
1
//
2
// aes.c   code for AES implementation
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
// The actual encryption and decryption routines here are not nearly as fast as the
7
// assembler ones. They are used on platforms that don't have assembler implementations
8
// and for various testing purposes.
9
//
10
// This code derives from the orignal fast AES code that Niels Ferguson wrote
11
// for BitLocker in Windows Vista.
12
// The C code is derived from the AES that was already in the RSA32 library,
13
// the assembler code was created new at that time.
14
//
15
16
17
#include "precomp.h"
18
19
20
///////////////////////////////////////////////////////////////////////////////
21
// Key expansion uses two functions, a 4-byte S-box lookup and one
22
// to create a decryption round key from an encryption round key.
23
// These are the C implementations of these functions
24
//
25
26
27
static BYTE g_SymCryptAesRoundConstant[11] =
28
{
29
    0, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
30
};
31
32
SYMCRYPT_NOINLINE
33
SYMCRYPT_ERROR
34
SYMCRYPT_CALL
35
SymCryptAesExpandKeyInternal(
36
    _Out_               PSYMCRYPT_AES_EXPANDED_KEY  pExpandedKey,
37
    _In_reads_(cbKey)   PCBYTE                      pbKey,
38
                        SIZE_T                      cbKey,
39
                        BOOLEAN                     fCreateDecryptionKeys )
40
0
{
41
0
    UINT32  nRounds;
42
0
    BYTE *  p;
43
0
    BYTE *  q;
44
0
    UINT32  i;
45
0
    UINT32  t;
46
47
0
    BOOL            UseSimd = FALSE;
48
0
    SYMCRYPT_ERROR  status = SYMCRYPT_NO_ERROR;
49
50
#if SYMCRYPT_CPU_X86
51
    SYMCRYPT_EXTENDED_SAVE_DATA  SaveData;
52
53
    if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) )
54
    {
55
        if( SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR )
56
        {
57
            UseSimd = TRUE;
58
        }
59
    }
60
#elif SYMCRYPT_CPU_AMD64
61
0
    if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) )
62
0
    {
63
0
        UseSimd = TRUE;
64
0
    }
65
#elif SYMCRYPT_CPU_ARM64
66
    if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) )
67
    {
68
        UseSimd = TRUE;
69
    }
70
#endif
71
72
0
    SYMCRYPT_SET_MAGIC( pExpandedKey );
73
74
    //
75
    // Separate code for each key size, this is significantly faster.
76
    // We have a number of applications that do frequent key expansions.
77
    //
78
0
    switch( cbKey )
79
0
    {
80
0
    case 16:
81
0
        nRounds = 10;
82
0
        pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds];
83
0
        pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds];
84
85
0
        memcpy( &pExpandedKey->RoundKey[0], pbKey, 16 );
86
87
0
        p = (BYTE *)&pExpandedKey->RoundKey[1];
88
89
0
        for( i=1; i<=nRounds; i++ )
90
0
        {
91
0
            SymCryptAes4Sbox( &p[-4], p, UseSimd );
92
0
            t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 16) ^ g_SymCryptAesRoundConstant[i];
93
0
            SYMCRYPT_STORE_LSBFIRST32( p, t );     // this is a macro that re-evaluates its arguments
94
95
0
            *(UINT32 *)(p+4)  = *(UINT32 *) p    ^ *(UINT32 *)(p - 12);
96
0
            *(UINT32 *)(p+8)  = *(UINT32 *)(p+4) ^ *(UINT32 *)(p -  8);
97
0
            *(UINT32 *)(p+12) = *(UINT32 *)(p+8) ^ *(UINT32 *)(p -  4);
98
99
0
            p += 16;
100
0
        }
101
102
0
        break;
103
104
0
    case 24:
105
0
        nRounds = 12;
106
0
        pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds];
107
0
        pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds];
108
109
0
        memcpy( &pExpandedKey->RoundKey[0], pbKey, 24 );
110
111
0
        p = (BYTE *)&pExpandedKey->RoundKey[0] + 24;
112
113
        //
114
        // We have 12 rounds, 13 round keys, and 13*16 = 208 bytes of encrytion key to generate.
115
        // We have 24 already, so we need 184 more.
116
        // Each iteration produces 24 bytes, so we need to loop 8 times.
117
        //
118
0
        for( i=1; i<=8; i++ )
119
0
        {
120
0
            SymCryptAes4Sbox( &p[-4], p, UseSimd );
121
0
            t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 24) ^ g_SymCryptAesRoundConstant[i];
122
0
            SYMCRYPT_STORE_LSBFIRST32( p, t );
123
124
0
            *(UINT32 *)(p+4)  = *(UINT32 *) p     ^ *(UINT32 *)(p - 20);
125
0
            *(UINT32 *)(p+8)  = *(UINT32 *)(p+ 4) ^ *(UINT32 *)(p - 16);
126
0
            *(UINT32 *)(p+12) = *(UINT32 *)(p+ 8) ^ *(UINT32 *)(p - 12);
127
0
            *(UINT32 *)(p+16) = *(UINT32 *)(p+12) ^ *(UINT32 *)(p -  8);
128
0
            *(UINT32 *)(p+20) = *(UINT32 *)(p+16) ^ *(UINT32 *)(p -  4);
129
130
0
            p += 24;
131
0
        }
132
133
0
        break;
134
135
0
    case 32:
136
0
        nRounds = 14;
137
0
        pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds];
138
0
        pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds];
139
140
0
        memcpy( &pExpandedKey->RoundKey[0], pbKey, 32 );
141
142
0
        p = (BYTE *)&pExpandedKey->RoundKey[0] + 32;
143
144
        //
145
        // We have 14 rounds, 15 round keys, and 15*16 = 240 bytes of encrytion key to generate.
146
        // We have 32 already, so we need 208 more.
147
        // Each iteration produces 32 bytes, so we need to loop 6.5 times.
148
        //
149
0
        for( i=1; i<=6; i++ )
150
0
        {
151
0
            SymCryptAes4Sbox( &p[-4], p, UseSimd );
152
0
            t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i];
153
0
            SYMCRYPT_STORE_LSBFIRST32( p, t );
154
155
0
            *(UINT32 *)(p+4)  = *(UINT32 *) p       ^ *(UINT32 *)(p - 28);
156
0
            *(UINT32 *)(p+8)  = *(UINT32 *)(p +  4) ^ *(UINT32 *)(p - 24);
157
0
            *(UINT32 *)(p+12) = *(UINT32 *)(p +  8) ^ *(UINT32 *)(p - 20);
158
159
0
            SymCryptAes4Sbox( &p[12], &p[16], UseSimd );
160
0
            *(UINT32 *)(p+16) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 16);
161
162
0
            *(UINT32 *)(p+20) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 12);
163
0
            *(UINT32 *)(p+24) = *(UINT32 *)(p + 20) ^ *(UINT32 *)(p -  8);
164
0
            *(UINT32 *)(p+28) = *(UINT32 *)(p + 24) ^ *(UINT32 *)(p -  4);
165
166
0
            p += 32;
167
0
        }
168
169
        // We looped 6 times, so here is the half-loop
170
171
0
        SymCryptAes4Sbox( &p[-4], p, UseSimd );
172
0
        t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i];
173
0
        SYMCRYPT_STORE_LSBFIRST32( p, t );
174
175
0
        *(UINT32 *)(p+4)  = *(UINT32 *) p       ^ *(UINT32 *)(p - 28);
176
0
        *(UINT32 *)(p+8)  = *(UINT32 *)(p +  4) ^ *(UINT32 *)(p - 24);
177
0
        *(UINT32 *)(p+12) = *(UINT32 *)(p +  8) ^ *(UINT32 *)(p - 20);
178
179
0
        break;
180
181
0
    default:
182
0
        status = SYMCRYPT_WRONG_KEY_SIZE;
183
0
        goto cleanup;
184
0
    }
185
186
187
0
    if( fCreateDecryptionKeys )
188
0
    {
189
0
        p = &pExpandedKey->RoundKey[0][0][0];
190
0
        q = (PBYTE)(pExpandedKey->lastDecRoundKey);
191
192
        // The first encryption round key is the last decryption round key
193
0
        memcpy( q, p, SYMCRYPT_AES_BLOCK_SIZE );
194
0
        p += 16;
195
0
        q -= 16;
196
197
0
        while( p < (PBYTE) pExpandedKey->lastEncRoundKey )
198
0
        {
199
0
            SymCryptAesCreateDecryptionRoundKey( p, q, UseSimd );
200
0
            q -= 16;
201
0
            p += 16;
202
0
        }
203
0
    }
204
205
0
cleanup:
206
207
#if SYMCRYPT_CPU_X86
208
    if( UseSimd )
209
    {
210
        SymCryptRestoreXmm( &SaveData );
211
    }
212
#endif
213
214
0
    return status;
215
0
}
216
217
SYMCRYPT_ERROR
218
SYMCRYPT_CALL
219
SymCryptAesExpandKey(
220
    _Out_               PSYMCRYPT_AES_EXPANDED_KEY  pExpandedKey,
221
    _In_reads_(cbKey)   PCBYTE                      pbKey,
222
                        SIZE_T                      cbKey )
223
224
0
{
225
0
    return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, TRUE );
226
0
}
227
228
SYMCRYPT_ERROR
229
SYMCRYPT_CALL
230
SymCryptAesExpandKeyEncryptOnly(
231
    _Out_               PSYMCRYPT_AES_EXPANDED_KEY  pExpandedKey,
232
    _In_reads_(cbKey)   PCBYTE                      pbKey,
233
                        SIZE_T                      cbKey )
234
0
{
235
0
    return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, FALSE );
236
0
}
237
238
VOID
239
SYMCRYPT_CALL
240
SymCryptAesKeyCopy( _In_    PCSYMCRYPT_AES_EXPANDED_KEY pSrc,
241
                    _Out_   PSYMCRYPT_AES_EXPANDED_KEY  pDst )
242
0
{
243
0
    SYMCRYPT_CHECK_MAGIC( pSrc );
244
245
0
    *pDst = *pSrc;
246
0
    pDst->lastEncRoundKey = &pDst->RoundKey[0] + (pSrc->lastEncRoundKey - &pSrc->RoundKey[0]);
247
0
    pDst->lastDecRoundKey = &pDst->RoundKey[0] + (pSrc->lastDecRoundKey - &pSrc->RoundKey[0]);
248
249
0
    SYMCRYPT_SET_MAGIC( pDst );
250
0
}
251
252
//
253
// Self test code
254
//
255
256
257
const BYTE SymCryptAesNistTestVector128Ciphertext[16] = {
258
    0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30,
259
    0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a,
260
};
261
262
263
264
/****************************************************************
265
 * OLD CODE
266
 *
267
 * Old code to generate the AES tables dynamically.
268
 * Kept for future reference.
269
 *
270
271
272
//
273
// Prototype; on some platforms this function is in assembler.
274
//
275
VOID
276
SYMCRYPT_CALL
277
SymCryptAesCreateRotatedTables( BYTE MatrixMult[4][256][4] );
278
279
VOID
280
SYMCRYPT_CALL
281
SymCryptAesCreateRotatedTables( _Inout_ BYTE MatrixMult[4][256][4] )
282
{
283
    int i,j,k;
284
285
    //
286
    // We do this byte-by-byte, which is easiest.
287
    // It would be faster to use UINT32 operations,
288
    // but that is endian-specific, and therefore platform-specific.
289
    // Endian-agnostic UINT32-based code would be a lot more complicated.
290
    // All this is extremely easy to do in assembler, which we do on those
291
    // platforms that have assembler implementations.
292
    //
293
    for( j=1; j<4; j++ ) {
294
        for( i=0; i<256; i++ ) {
295
            for( k=0; k<4; k++ ) {
296
                MatrixMult[j][i][k] = MatrixMult[0][i][(k-j)&3];
297
            }
298
        }
299
    }
300
}
301
302
303
304
//
305
// SymCryptAesInitMatrixMultiplyTable
306
//
307
// Initialize a matrix multiplication table.
308
// Each matrix multiplication table consists of 4 tables of 256 entries of 4 bytes each.
309
// The four tables are rotated copies of each other.
310
// This funciton generates the first of those four tables from the init
311
// value.
312
//
313
// After this call:
314
//    At index i the table contains the four bytes
315
//        i * init[0], i * init[1], i * init[2], i * init[3]
316
//    where multiplication is in GF(2^8).
317
//
318
// We do not do a GF(2^8) multiplication for each entry, but rather use the
319
// relationship (a xor b) * init[.] = a * init[.] xor b * init[.]
320
// And only compute i*init[.] for i = 1,2,4,8,...,128. This can be done
321
// using repeated multiplication by x in the finite field.
322
//
323
// It is safe to call this function on two separate threads for the same table.
324
// All invocations will write the same data to the table, and within a tread each entry is written
325
// before it is read. Doing parallel initializations of the same table can be very inefficient
326
// as multiple cores will be fighting over the cache lines, but the result will be correct.
327
// We use this property to initialize the tables lazilly.
328
//
329
static
330
VOID
331
SYMCRYPT_CALL
332
SymCryptAesInitMatrixMultiplyTable( _Out_   SYMCRYPT_ALIGN BYTE MatrixMult[256][4],
333
                                    _In_    SYMCRYPT_ALIGN BYTE init[4]
334
                                    )
335
{
336
    int i,j;
337
    SYMCRYPT_ALIGN BYTE initCopy[4];
338
    UINT32 initCopyAsUint32;
339
340
    //
341
    // We copy the init value so that we can modify it without worrying about multi-threading
342
    // issues.
343
    //
344
    *(UINT32 *)initCopy = *(UINT32 *)init;
345
346
    *(UINT32 *)MatrixMult[0] = 0;
347
    for( i=1; i<256; i<<=1 )
348
    {
349
        initCopyAsUint32 = *(UINT32 *)initCopy;
350
        for( j=0; j<i; j++ )
351
        {
352
            *(UINT32 *)MatrixMult[i+j] = *(UINT32 *)MatrixMult[j] ^ initCopyAsUint32;
353
        }
354
        for( j=0; j<4; j++ )
355
        {
356
            initCopy[j] = MULT_BY_X( initCopy[j] );
357
        }
358
    }
359
}
360
361
362
//
363
// SymCryptAesInitialize
364
//
365
// Initialize the static tables for the AES implementation.
366
// This function is called by the key expansion function if it finds the
367
// tables not initialized.
368
//
369
// This leads to an interesting case where multiple threads running on multiple
370
// CPUs run this initialization code at the same time.
371
// This code is carefully structured to allow that. When global data is written it is
372
// always with the final value, and we never read uninitialized global data.
373
// Thus, even if two CPUs run this code at the same time, they will both initialize each
374
// memory location to the same correct value and the end result will be correct.
375
// (Performance will suffer due to the fact that cache lines will be bounced back and force
376
// between the two CPUs, but that is not a significant concern as this code is used only once.)
377
//
378
// At the end of the initialization the flag is set to indicate that further
379
// key expansion invocations do not need to re-run the initialization.
380
// We use memory barriers to keep this multi-thread safe.
381
//
382
static
383
VOID
384
SYMCRYPT_CALL
385
SymCryptAesInitialize(void)
386
{
387
    int i,j;
388
    BYTE S;
389
    BYTE Stimes2;
390
391
    //
392
    // We force alignment of these arrays as we sometimes treat them as a UINT32
393
    //
394
    SYMCRYPT_ALIGN BYTE InvMatrixEntry[4] = {0xe, 0x9, 0xd, 0xb};
395
    SYMCRYPT_ALIGN BYTE MatrixEntry[4] = {2, 1, 1, 3};
396
    SYMCRYPT_ALIGN BYTE MatrixScratch[256][4];
397
398
    // Generate the forward MDS multiplication table in the scratch space
399
    SymCryptAesInitMatrixMultiplyTable( MatrixScratch, MatrixEntry );
400
401
    // Initialize first table of SymCryptAesInvMatrixMult
402
    SymCryptAesInitMatrixMultiplyTable( SymCryptAesInvMatrixMult[0], InvMatrixEntry );
403
404
    //
405
    // Build the InvSbox table and the first table of SymCryptAesSboxMatrixMult and
406
    // SymCryptAesInvSboxMatrixMult
407
    //
408
    for( i=0; i<256; i++ ) {
409
        S = SymCryptAesSbox[i];
410
        SymCryptAesInvSbox[S] = (BYTE) i;
411
        *(UINT32 *)SymCryptAesSboxMatrixMult[0][i] = *(UINT32 *)MatrixScratch[S];
412
        *(UINT32 *)SymCryptAesInvSboxMatrixMult[0][S] = *(UINT32 *)SymCryptAesInvMatrixMult[0][i];
413
    }
414
415
    //
416
    // Now we generate the byte rotations of the tables
417
    //
418
    SymCryptAesCreateRotatedTables( SymCryptAesSboxMatrixMult );
419
    SymCryptAesCreateRotatedTables( SymCryptAesInvSboxMatrixMult );
420
    SymCryptAesCreateRotatedTables( SymCryptAesInvMatrixMult );
421
422
    //
423
    // This is a memory barrier. It ensures that all the memory writes we do before the barrier
424
    // are globally visible to other CPUs before the memory writes we do after the fence.
425
    // In this particular case, it ensures that every CPU sees the completed tables before
426
    // it sees the flag as set.
427
    //
428
    MemoryBarrier();
429
430
    //
431
    // Set the flag to signal that the tables are initialized.
432
    //
433
    SymCryptAesTablesInitialized = TRUE;
434
}
435
436
437
*/