Coverage Report

Created: 2024-11-21 07:03

/src/SymCrypt/lib/aes-c.c
Line
Count
Source (jump to first uncovered line)
1
//
2
// aes-c.c   code for AES implementation
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
// The fast-ish C implementation of the core AES functions
7
//
8
// Separate C file because at some point we want to be able to switch this out with a compact-C implementation
9
// that is smaller.
10
//
11
12
#include "precomp.h"
13
14
//
15
// Static vs. dynamically generated tables.
16
//
17
// AES uses about 13 kB of tables; it turns out that most of these tables can be generated
18
// algorithmically much faster than they can be read off the disk.
19
// This implementation does not do so.
20
// The reason is that generated tables live in the modifyable data segment, which means
21
// that they are not shared between different instances of a DLL.
22
// Static tables are shared. Especially for applications that have a very large number
23
// of processes (e.g. Terminal Servers) the extra cost of generating and storing a
24
// per-process copy of these tables is higher then the cost of loading it a few times
25
// from disk.
26
// Earlier versions of this implementation did generate the tables dynamically and ran into
27
// this very problem.
28
//
29
// Our tables are aligned to eliminate side-channels from TLB lookups if the TLB page size
30
// is big enough. For example, the SboxMatrixMult table is 1024-aligned. Each use of that
31
// table consists of 4 lookups, and each lookup is within its own 1kB aligned subtable.
32
// The side-channels from cache lines still remains, of course.
33
//
34
35
//extern BYTE SymCryptAesSbox[256];                   // Basic S-box, not used
36
extern SYMCRYPT_ALIGN_AT( 256) BYTE SymCryptAesInvSbox[256];                // For final round in decryption
37
extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesSboxMatrixMult[4][256][4];   // Main encryption tables
38
extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvSboxMatrixMult[4][256][4];// Main decryption tables
39
extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvMatrixMult[4][256][4];    // For computing decryption round keys
40
41
//
42
// Throughout this implementation we use UINT32s to access byte arrays. The AES
43
// algorithm almost requires this; without it the performance would be abysmal.
44
// All data elements are SYMCRYPT_ALIGNed, which must be at least 4.
45
//
46
47
//
48
// Macro to check for alignment to support platforms that need alignment fix-ups.
49
//
50
#define IS_UINT32_ALIGNED( __p )   ((((intptr_t)__p) & 3) == 0)
51
52
//
53
// Only need to enforce alignment on platforms that are not x86 or x64
54
// Future improvement: should switch to using unaligned pointer accesses
55
// on some platforms.
56
//
57
#define NEED_ALIGN (!(SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64))
58
59
60
VOID
61
SYMCRYPT_CALL
62
SymCryptAes4SboxC(
63
    _In_reads_(4)   PCBYTE  pIn,
64
    _Out_writes_(4) PBYTE   pOut )
65
//
66
// Perform 4 S-box lookups.
67
// This is a separate function as it can be done side-channel safe using
68
// AES-NI.
69
// Key expansion can actually be improved a lot more with AES-NI, but that
70
// requires major code changes for which we don't have time right now.
71
//
72
0
{
73
0
    pOut[0] = SymCryptAesSboxMatrixMult[0][pIn[0]][1];
74
0
    pOut[1] = SymCryptAesSboxMatrixMult[0][pIn[1]][1];
75
0
    pOut[2] = SymCryptAesSboxMatrixMult[0][pIn[2]][1];
76
0
    pOut[3] = SymCryptAesSboxMatrixMult[0][pIn[3]][1];
77
0
}
78
79
VOID
80
SYMCRYPT_CALL
81
SymCryptAesCreateDecryptionRoundKeyC(
82
    _In_reads_(16)     PCBYTE  pEncryptionRoundKey,
83
    _Out_writes_(16)    PBYTE   pDecryptionRoundKey )
84
//
85
// Convert an encryption round key to a decryption round key by applying the inverse
86
// mixcolumn function to each 4-byte subword.
87
// This is a separate function as with AES-NI there is an assembler version of this
88
// function that is side-channel safe.
89
//
90
0
{
91
0
    int i;
92
0
    PBYTE p = pDecryptionRoundKey;
93
0
    PCBYTE q = pEncryptionRoundKey;
94
95
0
    for( i=0; i<4; i++ ) {
96
0
        *(UINT32 *)p =
97
0
            *(UINT32 *)SymCryptAesInvMatrixMult[0][q[0]] ^
98
0
            *(UINT32 *)SymCryptAesInvMatrixMult[1][q[1]] ^
99
0
            *(UINT32 *)SymCryptAesInvMatrixMult[2][q[2]] ^
100
0
            *(UINT32 *)SymCryptAesInvMatrixMult[3][q[3]];
101
0
        p += 4;
102
0
        q += 4;
103
0
    }
104
105
0
}
106
107
//
108
// SymCryptAesEncrypt
109
// NOINLINE prevents the compiler from creating additional implementations
110
// that have to be FIPS selftested.
111
//
112
SYMCRYPT_NOINLINE
113
VOID
114
SYMCRYPT_CALL
115
SymCryptAesEncryptC(
116
    _In_                                    PCSYMCRYPT_AES_EXPANDED_KEY  pExpandedKey,
117
    _In_reads_(SYMCRYPT_AES_BLOCK_SIZE)     PCBYTE      pbPlaintext,
118
    _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE)   PBYTE       pbCiphertext )
119
0
{
120
0
    SYMCRYPT_ALIGN BYTE state[4][4] = { 0 };
121
0
    SYMCRYPT_ALIGN UINT32 state2[4] = { 0 };
122
123
0
    const BYTE (*keyPtr)[4][4];
124
0
    const BYTE (*keyLimit)[4][4];
125
126
#if NEED_ALIGN
127
    SYMCRYPT_ALIGN BYTE   alignBuffer[SYMCRYPT_AES_BLOCK_SIZE];
128
#endif
129
130
#if NEED_ALIGN
131
132
    //
133
    // Callers who don't have their buffers aligned don't care about speed,
134
    // so we do this in the simplest way.
135
    //
136
    if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) {
137
        memcpy( alignBuffer, pbPlaintext, SYMCRYPT_AES_BLOCK_SIZE );
138
        SymCryptAesEncrypt( pExpandedKey, alignBuffer, alignBuffer );
139
        memcpy( pbCiphertext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE );
140
        SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) );
141
        return;
142
    }
143
#endif
144
145
0
    SYMCRYPT_CHECK_MAGIC( pExpandedKey );
146
147
    //
148
    // From this point on all our data is UINT32 aligned or better on those
149
    // platforms that have alignement restrictions.
150
    //
151
152
0
    keyPtr = &pExpandedKey->RoundKey[0];            // First round key
153
0
    keyLimit = &pExpandedKey->lastEncRoundKey[0];   // Last round key
154
155
    // Initial round (AddRoundKey)
156
0
    *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbPlaintext[0];
157
0
    *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbPlaintext[4];
158
0
    *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbPlaintext[8];
159
0
    *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbPlaintext[12];
160
161
0
    keyPtr += 1;
162
163
    // Main rounds
164
0
    while (keyPtr < keyLimit)
165
0
    {
166
167
        // SubBytes/ShiftRows/MixColumns for col. 0
168
0
        state2[0] = *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[0][0] ]);
169
0
        state2[3] = *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[0][1] ]);
170
0
        state2[2] = *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[0][2] ]);
171
0
        state2[1] = *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[0][3] ]);
172
173
        // SubBytes/ShiftRows/MixColumns for col. 1
174
0
        state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[1][0] ]);
175
0
        state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[1][1] ]);
176
0
        state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[1][2] ]);
177
0
        state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[1][3] ]);
178
179
        // SubBytes/ShiftRows/MixColumns for col. 2
180
0
        state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[2][0] ]);
181
0
        state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[2][1] ]);
182
0
        state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[2][2] ]);
183
0
        state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[2][3] ]);
184
185
        // SubBytes/ShiftRows/MixColumns for col. 3
186
0
        state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[3][0] ]);
187
0
        state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[3][1] ]);
188
0
        state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[3][2] ]);
189
0
        state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[3][3] ]);
190
191
        // AddRoundKey
192
0
        *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
193
0
        *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
194
0
        *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
195
0
        *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
196
197
0
        keyPtr += 1;
198
0
    }
199
200
    // Final round
201
202
    // SubBytes/ShiftRows for col. 0
203
0
    state2[0] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][0] ][1];
204
0
    state2[3] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][1] ][1] << 8;
205
0
    state2[2] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][2] ][1] << 16;
206
0
    state2[1] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][3] ][1] << 24;
207
208
    // SubBytes/ShiftRows for col. 1
209
0
    state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][0] ][1];
210
0
    state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][1] ][1] << 8;
211
0
    state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][2] ][1] << 16;
212
0
    state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][3] ][1] << 24;
213
214
    // SubBytes/ShiftRows for col. 2
215
0
    state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][0] ][1];
216
0
    state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][1] ][1] << 8;
217
0
    state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][2] ][1] << 16;
218
0
    state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][3] ][1] << 24;
219
220
    // SubBytes/ShiftRows for col. 3
221
0
    state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][0] ][1];
222
0
    state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][1] ][1] << 8;
223
0
    state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][2] ][1] << 16;
224
0
    state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][3] ][1] << 24;
225
226
    // AddRoundKey
227
0
    *((UINT32 *) &pbCiphertext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
228
0
    *((UINT32 *) &pbCiphertext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
229
0
    *((UINT32 *) &pbCiphertext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
230
0
    *((UINT32 *) &pbCiphertext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
231
232
0
    SymCryptWipeKnownSize( state, sizeof( state ) );
233
0
    SymCryptWipeKnownSize( state2, sizeof( state2 ) );
234
235
0
    return;
236
0
}
237
238
239
SYMCRYPT_NOINLINE
240
VOID
241
SYMCRYPT_CALL
242
SymCryptAesDecryptC(
243
    _In_                                    PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
244
    _In_reads_(SYMCRYPT_AES_BLOCK_SIZE)     PCBYTE                      pbCiphertext,
245
    _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE)   PBYTE                       pbPlaintext )
246
0
{
247
0
    SYMCRYPT_ALIGN BYTE state[4][4] = { 0 };
248
0
    SYMCRYPT_ALIGN UINT32 state2[4] = { 0 };
249
250
0
    const BYTE (*keyPtr)[4][4];
251
0
    const BYTE (*keyLimit)[4][4];
252
253
#if NEED_ALIGN
254
    SYMCRYPT_ALIGN BYTE   alignBuffer[SYMCRYPT_AES_BLOCK_SIZE];
255
#endif
256
257
#if NEED_ALIGN
258
    //
259
    // Callers who don't have their buffers aligned don't care about speed,
260
    // so we do this in the simplest way.
261
    //
262
    if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) {
263
        memcpy( alignBuffer, pbCiphertext, SYMCRYPT_AES_BLOCK_SIZE );
264
        SymCryptAesDecrypt( pExpandedKey, alignBuffer, alignBuffer );
265
        memcpy( pbPlaintext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE );
266
        SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) );
267
        return;
268
    }
269
#endif
270
271
0
    SYMCRYPT_CHECK_MAGIC( pExpandedKey );
272
273
0
    keyPtr = &pExpandedKey->lastEncRoundKey[0];     // First round key
274
0
    keyLimit = &pExpandedKey->lastDecRoundKey[0];   // Last round key
275
276
    // Initial round (AddRoundKey)
277
0
    *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbCiphertext[0];
278
0
    *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbCiphertext[4];
279
0
    *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbCiphertext[8];
280
0
    *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbCiphertext[12];
281
282
0
    keyPtr += 1;
283
284
    // Main rounds
285
0
    while (keyPtr < keyLimit)
286
0
    {
287
288
        // SubBytes/ShiftRows/MixColumns for col. 0
289
0
        state2[0] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[0][0] ]);
290
0
        state2[1] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[0][1] ]);
291
0
        state2[2] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[0][2] ]);
292
0
        state2[3] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[0][3] ]);
293
294
        // SubBytes/ShiftRows/MixColumns for col. 1
295
0
        state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[1][0] ]);
296
0
        state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[1][1] ]);
297
0
        state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[1][2] ]);
298
0
        state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[1][3] ]);
299
300
        // SubBytes/ShiftRows/MixColumns for col. 2
301
0
        state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[2][0] ]);
302
0
        state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[2][1] ]);
303
0
        state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[2][2] ]);
304
0
        state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[2][3] ]);
305
306
        // SubBytes/ShiftRows/MixColumns for col. 3
307
0
        state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[3][0] ]);
308
0
        state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[3][1] ]);
309
0
        state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[3][2] ]);
310
0
        state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[3][3] ]);
311
312
        // AddRoundKey
313
0
        *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
314
0
        *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
315
0
        *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
316
0
        *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
317
318
0
        keyPtr += 1;
319
0
    }
320
321
    // Final round
322
323
    // SubBytes/ShiftRows for col. 0
324
0
    state2[0] = (UINT32) SymCryptAesInvSbox[ state[0][0] ];
325
0
    state2[1] = (UINT32) SymCryptAesInvSbox[ state[0][1] ] << 8;
326
0
    state2[2] = (UINT32) SymCryptAesInvSbox[ state[0][2] ] << 16;
327
0
    state2[3] = (UINT32) SymCryptAesInvSbox[ state[0][3] ] << 24;
328
329
    // SubBytes/ShiftRows for col. 1
330
0
    state2[1] |= (UINT32) SymCryptAesInvSbox[ state[1][0] ];
331
0
    state2[2] |= (UINT32) SymCryptAesInvSbox[ state[1][1] ] << 8;
332
0
    state2[3] |= (UINT32) SymCryptAesInvSbox[ state[1][2] ] << 16;
333
0
    state2[0] |= (UINT32) SymCryptAesInvSbox[ state[1][3] ] << 24;
334
335
    // SubBytes/ShiftRows for col. 2
336
0
    state2[2] |= (UINT32) SymCryptAesInvSbox[ state[2][0] ];
337
0
    state2[3] |= (UINT32) SymCryptAesInvSbox[ state[2][1] ] << 8;
338
0
    state2[0] |= (UINT32) SymCryptAesInvSbox[ state[2][2] ] << 16;
339
0
    state2[1] |= (UINT32) SymCryptAesInvSbox[ state[2][3] ] << 24;
340
341
    // SubBytes/ShiftRows for col. 3
342
0
    state2[3] |= (UINT32) SymCryptAesInvSbox[ state[3][0] ];
343
0
    state2[0] |= (UINT32) SymCryptAesInvSbox[ state[3][1] ] << 8;
344
0
    state2[1] |= (UINT32) SymCryptAesInvSbox[ state[3][2] ] << 16;
345
0
    state2[2] |= (UINT32) SymCryptAesInvSbox[ state[3][3] ] << 24;
346
347
    // AddRoundKey
348
0
    *((UINT32 *) &pbPlaintext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
349
0
    *((UINT32 *) &pbPlaintext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
350
0
    *((UINT32 *) &pbPlaintext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
351
0
    *((UINT32 *) &pbPlaintext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
352
353
0
    SymCryptWipeKnownSize( state, sizeof( state ) );
354
0
    SymCryptWipeKnownSize( state2, sizeof( state2 ) );
355
356
0
    return;
357
0
}
358
359
VOID
360
SYMCRYPT_CALL
361
SymCryptAesEcbEncryptC(
362
    _In_                                        PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
363
    _In_reads_( cbData )                        PCBYTE                      pbSrc,
364
    _Out_writes_( cbData )                      PBYTE                       pbDst,
365
                                                SIZE_T                      cbData )
366
0
{
367
0
    while( cbData >= SYMCRYPT_AES_BLOCK_SIZE )
368
0
    {
369
0
        SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst );
370
0
        pbSrc += SYMCRYPT_AES_BLOCK_SIZE;
371
0
        pbDst += SYMCRYPT_AES_BLOCK_SIZE;
372
0
        cbData -= SYMCRYPT_AES_BLOCK_SIZE;
373
0
    }
374
0
}
375
376
VOID
377
SYMCRYPT_CALL
378
SymCryptAesEcbDecryptC(
379
    _In_                                        PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
380
    _In_reads_( cbData )                        PCBYTE                      pbSrc,
381
    _Out_writes_( cbData )                      PBYTE                       pbDst,
382
                                                SIZE_T                      cbData )
383
0
{
384
0
    while( cbData >= SYMCRYPT_AES_BLOCK_SIZE )
385
0
    {
386
0
        SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst );
387
0
        pbSrc += SYMCRYPT_AES_BLOCK_SIZE;
388
0
        pbDst += SYMCRYPT_AES_BLOCK_SIZE;
389
0
        cbData -= SYMCRYPT_AES_BLOCK_SIZE;
390
0
    }
391
0
}
392