Coverage Report

Created: 2024-11-21 07:03

/src/SymCrypt/lib/aes-pattern.c
Line
Count
Source (jump to first uncovered line)
1
//
2
// aes-pattern.c
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
// This file contains "pattern" code for AES-related functions. It's not intended to be compiled
7
// directly; rather it is included by other aes-*.c files which define the macros used here.
8
//
9
10
#if SYMCRYPT_CPU_ARM64
11
12
VOID
13
SYMCRYPT_CALL
14
SYMCRYPT_AesCtrMsbXxNeon(
15
    _In_                                        PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
16
    _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE )  PBYTE                       pbChainingValue,
17
    _In_reads_( cbData )                        PCBYTE                      pbSrc,
18
    _Out_writes_( cbData )                      PBYTE                       pbDst,
19
                                                SIZE_T                      cbData )
20
{
21
    __n128          chain = *(__n128 *)pbChainingValue;
22
    const __n128 *  pSrc = (const __n128 *) pbSrc;
23
    __n128 *        pDst = (__n128 *) pbDst;
24
25
    const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 );
26
    const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 );
27
    const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 );
28
29
    __n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7;
30
    __n128 c0, c1, c2, c3, c4, c5, c6, c7;
31
32
    cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1);
33
34
    // Our chain variable is in integer format, not the MSBfirst format loaded from memory.
35
    ctr0 = vrev64q_u8( chain );
36
    ctr1 = VADDQ_UXX( ctr0, chainIncrement1 );
37
    ctr2 = VADDQ_UXX( ctr0, chainIncrement2 );
38
    ctr3 = VADDQ_UXX( ctr1, chainIncrement2 );
39
    ctr4 = VADDQ_UXX( ctr2, chainIncrement2 );
40
    ctr5 = VADDQ_UXX( ctr3, chainIncrement2 );
41
    ctr6 = VADDQ_UXX( ctr4, chainIncrement2 );
42
    ctr7 = VADDQ_UXX( ctr5, chainIncrement2 );
43
44
/*
45
    while cbData >= 5 * block
46
        generate 8 blocks of key stream
47
        if cbData < 8 * block
48
            break;
49
        process 8 blocks
50
    if cbData >= 5 * block
51
        process 5-7 blocks
52
        done
53
    if cbData >= 2 * block
54
        generate 4 blocks of key stream
55
        process 2-4 blocks
56
        done
57
    if cbData == 1 block
58
        generate 1 block of key stream
59
        process block
60
*/
61
    while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
62
    {
63
        c0 = vrev64q_u8( ctr0 );
64
        c1 = vrev64q_u8( ctr1 );
65
        c2 = vrev64q_u8( ctr2 );
66
        c3 = vrev64q_u8( ctr3 );
67
        c4 = vrev64q_u8( ctr4 );
68
        c5 = vrev64q_u8( ctr5 );
69
        c6 = vrev64q_u8( ctr6 );
70
        c7 = vrev64q_u8( ctr7 );
71
72
        ctr0 = VADDQ_UXX( ctr0, chainIncrement8 );
73
        ctr1 = VADDQ_UXX( ctr1, chainIncrement8 );
74
        ctr2 = VADDQ_UXX( ctr2, chainIncrement8 );
75
        ctr3 = VADDQ_UXX( ctr3, chainIncrement8 );
76
        ctr4 = VADDQ_UXX( ctr4, chainIncrement8 );
77
        ctr5 = VADDQ_UXX( ctr5, chainIncrement8 );
78
        ctr6 = VADDQ_UXX( ctr6, chainIncrement8 );
79
        ctr7 = VADDQ_UXX( ctr7, chainIncrement8 );
80
81
        AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 );
82
83
        if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE )
84
        {
85
            break;
86
        }
87
88
        pDst[0] = veorq_u64( pSrc[0], c0 );
89
        pDst[1] = veorq_u64( pSrc[1], c1 );
90
        pDst[2] = veorq_u64( pSrc[2], c2 );
91
        pDst[3] = veorq_u64( pSrc[3], c3 );
92
        pDst[4] = veorq_u64( pSrc[4], c4 );
93
        pDst[5] = veorq_u64( pSrc[5], c5 );
94
        pDst[6] = veorq_u64( pSrc[6], c6 );
95
        pDst[7] = veorq_u64( pSrc[7], c7 );
96
97
        pDst  += 8;
98
        pSrc  += 8;
99
        cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE;
100
    }
101
102
    //
103
    // At this point we have one of the two following cases:
104
    // - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. ctr0-ctr7 is set to (c0+8)-(c7+8)
105
    // - cbData < 5 * 16 and we have no blocks of key stream, and ctr0-ctr7 set to the next 8 counters to use
106
    //
107
108
    if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks
109
    {
110
        if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
111
        {
112
            //
113
            // We already have the key stream
114
            //
115
            pDst[0] = veorq_u64( pSrc[0], c0 );
116
            pDst[1] = veorq_u64( pSrc[1], c1 );
117
            pDst[2] = veorq_u64( pSrc[2], c2 );
118
            pDst[3] = veorq_u64( pSrc[3], c3 );
119
            pDst[4] = veorq_u64( pSrc[4], c4 );
120
            chain = VSUBQ_UXX( ctr5, chainIncrement8 );
121
122
            if( cbData >= 96 )
123
            {
124
                chain = VSUBQ_UXX( ctr6, chainIncrement8 );
125
                pDst[5] = veorq_u64( pSrc[5], c5 );
126
                if( cbData >= 112 )
127
                {
128
                    chain = VSUBQ_UXX( ctr7, chainIncrement8 );
129
                    pDst[6] = veorq_u64( pSrc[6], c6 );
130
                }
131
            }
132
        }
133
        else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE )
134
        {
135
            // Produce 4 blocks of key stream
136
137
            chain = ctr2;           // chain is only incremented by 2 for now
138
139
            c0 = vrev64q_u8( ctr0 );
140
            c1 = vrev64q_u8( ctr1 );
141
            c2 = vrev64q_u8( ctr2 );
142
            c3 = vrev64q_u8( ctr3 );
143
144
            AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 );
145
146
            pDst[0] = veorq_u64( pSrc[0], c0 );
147
            pDst[1] = veorq_u64( pSrc[1], c1 );
148
            if( cbData >= 48 )
149
            {
150
                chain = ctr3;
151
                pDst[2] = veorq_u64( pSrc[2], c2 );
152
                if( cbData >= 64 )
153
                {
154
                    chain = ctr4;
155
                    pDst[3] = veorq_u64( pSrc[3], c3 );
156
                }
157
            }
158
        }
159
        else
160
        {
161
            // Exactly 1 block to process
162
            chain = ctr1;
163
164
            c0 = vrev64q_u8( ctr0 );
165
166
            AES_ENCRYPT_1( pExpandedKey, c0 );
167
            pDst[0] = veorq_u64( pSrc[0], c0 );
168
        }
169
    }
170
    else
171
    {
172
        chain = ctr0;
173
    }
174
175
    chain = vrev64q_u8( chain );
176
    *(__n128 *)pbChainingValue = chain;
177
}
178
179
#endif // SYMCRYPT_CPU_ARM64
180
181
#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64
182
183
VOID
184
SYMCRYPT_CALL
185
SYMCRYPT_AesCtrMsbXxXmm(
186
    _In_                                        PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
187
    _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE )  PBYTE                       pbChainingValue,
188
    _In_reads_( cbData )                        PCBYTE                      pbSrc,
189
    _Out_writes_( cbData )                      PBYTE                       pbDst,
190
                                                SIZE_T                      cbData )
191
0
{
192
0
    __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue );
193
194
0
    __m128i BYTE_REVERSE_ORDER = _mm_set_epi8(
195
0
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 );
196
197
0
    __m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 );
198
0
    __m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 );
199
0
    __m128i chainIncrement3 = _mm_set_epi32( 0, 0, 0, 3 );
200
    //__m128i chainIncrement8 = _mm_set_epi32( 0, 0, 0, 8 );
201
202
0
    __m128i c0, c1, c2, c3, c4, c5, c6, c7;
203
204
0
    cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1);
205
206
0
    chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER );
207
208
/*
209
    while cbData >= 5 * block
210
        generate 8 blocks of key stream
211
        if cbData < 8 * block
212
            break;
213
        process 8 blocks
214
    if cbData >= 5 * block
215
        process 5-7 blocks
216
        done
217
    if cbData > 1 block
218
        generate 4 blocks of key stream
219
        process 2-4 blocks
220
        done
221
    if cbData == 1 block
222
        generate 1 block of key stream
223
        process block
224
*/
225
0
    while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
226
0
    {
227
0
        c0 = chain;
228
0
        c1 = MM_ADD_EPIXX( chain, chainIncrement1 );
229
0
        c2 = MM_ADD_EPIXX( chain, chainIncrement2 );
230
0
        c3 = MM_ADD_EPIXX( c1, chainIncrement2 );
231
0
        c4 = MM_ADD_EPIXX( c2, chainIncrement2 );
232
0
        c5 = MM_ADD_EPIXX( c3, chainIncrement2 );
233
0
        c6 = MM_ADD_EPIXX( c4, chainIncrement2 );
234
0
        c7 = MM_ADD_EPIXX( c5, chainIncrement2 );
235
0
        chain = MM_ADD_EPIXX( c6, chainIncrement2 );
236
237
0
        c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
238
0
        c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
239
0
        c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER );
240
0
        c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER );
241
0
        c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER );
242
0
        c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER );
243
0
        c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER );
244
0
        c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER );
245
246
0
        AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 );
247
248
0
        if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE )
249
0
        {
250
0
            break;
251
0
        }
252
253
0
        _mm_storeu_si128( (__m128i *) (pbDst +  0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc +  0 ) ) ) );
254
0
        _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
255
0
        _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
256
0
        _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
257
0
        _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) );
258
0
        _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) );
259
0
        _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) );
260
0
        _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112 ) ) ) );
261
0
        pbDst  += 8 * SYMCRYPT_AES_BLOCK_SIZE;
262
0
        pbSrc  += 8 * SYMCRYPT_AES_BLOCK_SIZE;
263
0
        cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE;
264
0
    }
265
266
    //
267
    // At this point we have one of the two following cases:
268
    // - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. chain is set to c7 + 1
269
    // - cbData < 5 * 16 and we have no blocks of key stream, with chain the next value to use
270
    //
271
272
0
    if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks
273
0
    {
274
0
        if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
275
0
        {
276
            //
277
            // We already have the key stream
278
            //
279
0
            _mm_storeu_si128( (__m128i *) (pbDst +  0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc +  0 ) ) ) );
280
0
            _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
281
0
            _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
282
0
            _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
283
0
            _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) );
284
0
            chain = MM_SUB_EPIXX( chain, chainIncrement3 );
285
286
0
            if( cbData >= 96 )
287
0
            {
288
0
                chain = MM_ADD_EPIXX( chain, chainIncrement1 );
289
0
                _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) );
290
0
                if( cbData >= 112 )
291
0
                {
292
0
                    chain = MM_ADD_EPIXX( chain, chainIncrement1 );
293
0
                    _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) );
294
0
                }
295
0
            }
296
0
        }
297
0
        else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE )
298
0
        {
299
            // Produce 4 blocks of key stream
300
301
0
            c0 = chain;
302
0
            c1 = MM_ADD_EPIXX( chain, chainIncrement1 );
303
0
            c2 = MM_ADD_EPIXX( chain, chainIncrement2 );
304
0
            c3 = MM_ADD_EPIXX( c1, chainIncrement2 );
305
0
            chain = c2;             // chain is only incremented by 2 for now
306
307
0
            c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
308
0
            c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
309
0
            c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER );
310
0
            c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER );
311
312
0
            AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 );
313
314
0
            _mm_storeu_si128( (__m128i *) (pbDst +  0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc +  0 ) ) ) );
315
0
            _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
316
0
            if( cbData >= 48 )
317
0
            {
318
0
                chain = MM_ADD_EPIXX( chain, chainIncrement1 );
319
0
                _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
320
0
                if( cbData >= 64 )
321
0
                {
322
0
                    chain = MM_ADD_EPIXX( chain, chainIncrement1 );
323
0
                    _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
324
0
                }
325
0
            }
326
0
        }
327
0
        else
328
0
        {
329
            // Exactly 1 block to process
330
0
            c0 = chain;
331
0
            chain = MM_ADD_EPIXX( chain, chainIncrement1 );
332
333
0
            c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
334
335
0
            AES_ENCRYPT_1( pExpandedKey, c0 );
336
0
            _mm_storeu_si128( (__m128i *) (pbDst +  0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc +  0 ) ) ) );
337
0
        }
338
0
    }
339
340
0
    chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER );
341
0
    _mm_storeu_si128( (__m128i *) pbChainingValue, chain );
342
0
}
Unexecuted instantiation: SymCryptAesCtrMsb64Xmm
Unexecuted instantiation: SymCryptAesCtrMsb32Xmm
343
344
#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64