/src/nss/lib/freebl/aes-x86.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* This Source Code Form is subject to the terms of the Mozilla Public  | 
2  |  |  * License, v. 2.0. If a copy of the MPL was not distributed with this  | 
3  |  |  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */  | 
4  |  |  | 
5  |  | #ifdef FREEBL_NO_DEPEND  | 
6  |  | #include "stubs.h"  | 
7  |  | #endif  | 
8  |  | #include "rijndael.h"  | 
9  |  | #include "secerr.h"  | 
10  |  |  | 
11  |  | #include <wmmintrin.h> /* aes-ni */  | 
12  |  |  | 
13  |  | #define EXPAND_KEY128(k, rcon, res)                   \  | 
14  | 1.92k  |     tmp_key = _mm_aeskeygenassist_si128(k, rcon);     \  | 
15  | 1.92k  |     tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF);       \  | 
16  | 1.92k  |     tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4));     \  | 
17  | 1.92k  |     tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \  | 
18  | 1.92k  |     tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \  | 
19  | 1.92k  |     res = _mm_xor_si128(tmp, tmp_key)  | 
20  |  |  | 
21  |  | static void  | 
22  |  | native_key_expansion128(AESContext *cx, const unsigned char *key)  | 
23  | 192  | { | 
24  | 192  |     __m128i *keySchedule = cx->k.keySchedule;  | 
25  | 192  |     pre_align __m128i tmp_key post_align;  | 
26  | 192  |     pre_align __m128i tmp post_align;  | 
27  | 192  |     keySchedule[0] = _mm_loadu_si128((__m128i *)key);  | 
28  | 192  |     EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]);  | 
29  | 192  |     EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]);  | 
30  | 192  |     EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]);  | 
31  | 192  |     EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]);  | 
32  | 192  |     EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]);  | 
33  | 192  |     EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]);  | 
34  | 192  |     EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]);  | 
35  | 192  |     EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]);  | 
36  | 192  |     EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]);  | 
37  | 192  |     EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]);  | 
38  | 192  | }  | 
39  |  |  | 
40  |  | #define EXPAND_KEY192_PART1(res, k0, kt, rcon)                                \  | 
41  | 0  |     tmp2 = _mm_slli_si128(k0, 4);                                             \  | 
42  | 0  |     tmp1 = _mm_xor_si128(k0, tmp2);                                           \  | 
43  | 0  |     tmp2 = _mm_slli_si128(tmp2, 4);                                           \  | 
44  | 0  |     tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \  | 
45  | 0  |     tmp2 = _mm_aeskeygenassist_si128(kt, rcon);                               \  | 
46  | 0  |     res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55))  | 
47  |  |  | 
48  |  | #define EXPAND_KEY192_PART2(res, k1, k2)             \  | 
49  | 0  |     tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \  | 
50  | 0  |     res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF))  | 
51  |  |  | 
52  |  | #define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2)         \  | 
53  | 0  |     EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1);                          \  | 
54  | 0  |     EXPAND_KEY192_PART2(carry, res1, tmp3);                              \  | 
55  | 0  |     res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1),       \  | 
56  | 0  |                                            _mm_castsi128_pd(tmp3), 0));  \  | 
57  | 0  |     res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3),       \  | 
58  | 0  |                                            _mm_castsi128_pd(carry), 1)); \  | 
59  | 0  |     EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2)  | 
60  |  |  | 
61  |  | static void  | 
62  |  | native_key_expansion192(AESContext *cx, const unsigned char *key)  | 
63  | 0  | { | 
64  | 0  |     __m128i *keySchedule = cx->k.keySchedule;  | 
65  | 0  |     pre_align __m128i tmp1 post_align;  | 
66  | 0  |     pre_align __m128i tmp2 post_align;  | 
67  | 0  |     pre_align __m128i tmp3 post_align;  | 
68  | 0  |     pre_align __m128i carry post_align;  | 
69  | 0  |     keySchedule[0] = _mm_loadu_si128((__m128i *)key);  | 
70  | 0  |     keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));  | 
71  | 0  |     EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2],  | 
72  | 0  |                   keySchedule[3], carry, 0x1, 0x2);  | 
73  | 0  |     EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]);  | 
74  | 0  |     EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5],  | 
75  | 0  |                   keySchedule[6], carry, 0x4, 0x8);  | 
76  | 0  |     EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]);  | 
77  | 0  |     EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8],  | 
78  | 0  |                   keySchedule[9], carry, 0x10, 0x20);  | 
79  | 0  |     EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]);  | 
80  | 0  |     EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11],  | 
81  | 0  |                   keySchedule[12], carry, 0x40, 0x80);  | 
82  | 0  | }  | 
83  |  |  | 
84  |  | #define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X)                           \  | 
85  | 5.90k  |     tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X);    \  | 
86  | 5.90k  |     tmp2 = _mm_slli_si128(k1x, 4);                                            \  | 
87  | 5.90k  |     tmp1 = _mm_xor_si128(k1x, tmp2);                                          \  | 
88  | 5.90k  |     tmp2 = _mm_slli_si128(tmp2, 4);                                           \  | 
89  | 5.90k  |     tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \  | 
90  | 5.90k  |     res = _mm_xor_si128(tmp1, tmp_key);  | 
91  |  |  | 
92  |  | #define EXPAND_KEY256(res1, res2, k1, k2, rcon)   \  | 
93  | 2.72k  |     EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \  | 
94  | 2.72k  |     EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA)  | 
95  |  |  | 
96  |  | static void  | 
97  |  | native_key_expansion256(AESContext *cx, const unsigned char *key)  | 
98  | 454  | { | 
99  | 454  |     __m128i *keySchedule = cx->k.keySchedule;  | 
100  | 454  |     pre_align __m128i tmp_key post_align;  | 
101  | 454  |     pre_align __m128i tmp1 post_align;  | 
102  | 454  |     pre_align __m128i tmp2 post_align;  | 
103  | 454  |     keySchedule[0] = _mm_loadu_si128((__m128i *)key);  | 
104  | 454  |     keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));  | 
105  | 454  |     EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0],  | 
106  | 454  |                   keySchedule[1], 0x01);  | 
107  | 454  |     EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2],  | 
108  | 454  |                   keySchedule[3], 0x02);  | 
109  | 454  |     EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4],  | 
110  | 454  |                   keySchedule[5], 0x04);  | 
111  | 454  |     EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6],  | 
112  | 454  |                   keySchedule[7], 0x08);  | 
113  | 454  |     EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8],  | 
114  | 454  |                   keySchedule[9], 0x10);  | 
115  | 454  |     EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10],  | 
116  | 454  |                   keySchedule[11], 0x20);  | 
117  | 454  |     EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12],  | 
118  | 454  |                        keySchedule[13], 0xFF);  | 
119  | 454  | }  | 
120  |  |  | 
121  |  | /*  | 
122  |  |  * AES key expansion using aes-ni instructions.  | 
123  |  |  */  | 
124  |  | void  | 
125  |  | rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,  | 
126  |  |                               unsigned int Nk)  | 
127  | 646  | { | 
128  | 646  |     switch (Nk) { | 
129  | 192  |         case 4:  | 
130  | 192  |             native_key_expansion128(cx, key);  | 
131  | 192  |             return;  | 
132  | 0  |         case 6:  | 
133  | 0  |             native_key_expansion192(cx, key);  | 
134  | 0  |             return;  | 
135  | 454  |         case 8:  | 
136  | 454  |             native_key_expansion256(cx, key);  | 
137  | 454  |             return;  | 
138  | 0  |         default:  | 
139  |  |             /* This shouldn't happen (checked by the caller). */  | 
140  | 0  |             return;  | 
141  | 646  |     }  | 
142  | 646  | }  | 
143  |  |  | 
144  |  | void  | 
145  |  | rijndael_native_encryptBlock(AESContext *cx,  | 
146  |  |                              unsigned char *output,  | 
147  |  |                              const unsigned char *input)  | 
148  | 28.0k  | { | 
149  | 28.0k  |     unsigned int i;  | 
150  | 28.0k  |     pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);  | 
151  | 28.0k  |     m = _mm_xor_si128(m, cx->k.keySchedule[0]);  | 
152  | 366k  |     for (i = 1; i < cx->Nr; ++i) { | 
153  | 338k  |         m = _mm_aesenc_si128(m, cx->k.keySchedule[i]);  | 
154  | 338k  |     }  | 
155  | 28.0k  |     m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]);  | 
156  | 28.0k  |     _mm_storeu_si128((__m128i *)output, m);  | 
157  | 28.0k  | }  | 
158  |  |  | 
159  |  | void  | 
160  |  | rijndael_native_decryptBlock(AESContext *cx,  | 
161  |  |                              unsigned char *output,  | 
162  |  |                              const unsigned char *input)  | 
163  | 4  | { | 
164  | 4  |     int i;  | 
165  | 4  |     pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);  | 
166  | 4  |     m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]);  | 
167  | 56  |     for (i = cx->Nr - 1; i > 0; --i) { | 
168  | 52  |         m = _mm_aesdec_si128(m, cx->k.keySchedule[i]);  | 
169  | 52  |     }  | 
170  | 4  |     m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]);  | 
171  | 4  |     _mm_storeu_si128((__m128i *)output, m);  | 
172  | 4  | }  | 
173  |  |  | 
174  |  | // out = a ^ b  | 
175  |  | void  | 
176  |  | native_xorBlock(unsigned char *out,  | 
177  |  |                 const unsigned char *a,  | 
178  |  |                 const unsigned char *b)  | 
179  | 529  | { | 
180  | 529  |     pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a));  | 
181  | 529  |     pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b));  | 
182  | 529  |     in1 = _mm_xor_si128(in1, in2);  | 
183  | 529  |     _mm_storeu_si128((__m128i *)(out), in1);  | 
184  | 529  | }  |