/src/nss/lib/freebl/aes-x86.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
2 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | | |
5 | | #ifdef FREEBL_NO_DEPEND |
6 | | #include "stubs.h" |
7 | | #endif |
8 | | #include "rijndael.h" |
9 | | #include "secerr.h" |
10 | | |
11 | | #include <wmmintrin.h> /* aes-ni */ |
12 | | |
13 | | #define EXPAND_KEY128(k, rcon, res) \ |
14 | 0 | tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ |
15 | 0 | tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ |
16 | 0 | tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ |
17 | 0 | tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ |
18 | 0 | tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ |
19 | 0 | res = _mm_xor_si128(tmp, tmp_key) |
20 | | |
21 | | static void |
22 | | native_key_expansion128(AESContext *cx, const unsigned char *key) |
23 | 0 | { |
24 | 0 | __m128i *keySchedule = cx->k.keySchedule; |
25 | 0 | pre_align __m128i tmp_key post_align; |
26 | 0 | pre_align __m128i tmp post_align; |
27 | 0 | keySchedule[0] = _mm_loadu_si128((__m128i *)key); |
28 | 0 | EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); |
29 | 0 | EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); |
30 | 0 | EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); |
31 | 0 | EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); |
32 | 0 | EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); |
33 | 0 | EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); |
34 | 0 | EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); |
35 | 0 | EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); |
36 | 0 | EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); |
37 | 0 | EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); |
38 | 0 | } |
39 | | |
40 | | #define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ |
41 | 0 | tmp2 = _mm_slli_si128(k0, 4); \ |
42 | 0 | tmp1 = _mm_xor_si128(k0, tmp2); \ |
43 | 0 | tmp2 = _mm_slli_si128(tmp2, 4); \ |
44 | 0 | tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ |
45 | 0 | tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ |
46 | 0 | res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) |
47 | | |
48 | | #define EXPAND_KEY192_PART2(res, k1, k2) \ |
49 | 0 | tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ |
50 | 0 | res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) |
51 | | |
52 | | #define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ |
53 | 0 | EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ |
54 | 0 | EXPAND_KEY192_PART2(carry, res1, tmp3); \ |
55 | 0 | res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ |
56 | 0 | _mm_castsi128_pd(tmp3), 0)); \ |
57 | 0 | res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ |
58 | 0 | _mm_castsi128_pd(carry), 1)); \ |
59 | 0 | EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) |
60 | | |
61 | | static void |
62 | | native_key_expansion192(AESContext *cx, const unsigned char *key) |
63 | 0 | { |
64 | 0 | __m128i *keySchedule = cx->k.keySchedule; |
65 | 0 | pre_align __m128i tmp1 post_align; |
66 | 0 | pre_align __m128i tmp2 post_align; |
67 | 0 | pre_align __m128i tmp3 post_align; |
68 | 0 | pre_align __m128i carry post_align; |
69 | 0 | keySchedule[0] = _mm_loadu_si128((__m128i *)key); |
70 | 0 | keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); |
71 | 0 | EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], |
72 | 0 | keySchedule[3], carry, 0x1, 0x2); |
73 | 0 | EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); |
74 | 0 | EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], |
75 | 0 | keySchedule[6], carry, 0x4, 0x8); |
76 | 0 | EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); |
77 | 0 | EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], |
78 | 0 | keySchedule[9], carry, 0x10, 0x20); |
79 | 0 | EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); |
80 | 0 | EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], |
81 | 0 | keySchedule[12], carry, 0x40, 0x80); |
82 | 0 | } |
83 | | |
84 | | #define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ |
85 | 0 | tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ |
86 | 0 | tmp2 = _mm_slli_si128(k1x, 4); \ |
87 | 0 | tmp1 = _mm_xor_si128(k1x, tmp2); \ |
88 | 0 | tmp2 = _mm_slli_si128(tmp2, 4); \ |
89 | 0 | tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ |
90 | 0 | res = _mm_xor_si128(tmp1, tmp_key); |
91 | | |
92 | | #define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ |
93 | 0 | EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ |
94 | 0 | EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) |
95 | | |
96 | | static void |
97 | | native_key_expansion256(AESContext *cx, const unsigned char *key) |
98 | 0 | { |
99 | 0 | __m128i *keySchedule = cx->k.keySchedule; |
100 | 0 | pre_align __m128i tmp_key post_align; |
101 | 0 | pre_align __m128i tmp1 post_align; |
102 | 0 | pre_align __m128i tmp2 post_align; |
103 | 0 | keySchedule[0] = _mm_loadu_si128((__m128i *)key); |
104 | 0 | keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); |
105 | 0 | EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], |
106 | 0 | keySchedule[1], 0x01); |
107 | 0 | EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], |
108 | 0 | keySchedule[3], 0x02); |
109 | 0 | EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], |
110 | 0 | keySchedule[5], 0x04); |
111 | 0 | EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], |
112 | 0 | keySchedule[7], 0x08); |
113 | 0 | EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], |
114 | 0 | keySchedule[9], 0x10); |
115 | 0 | EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], |
116 | 0 | keySchedule[11], 0x20); |
117 | 0 | EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], |
118 | 0 | keySchedule[13], 0xFF); |
119 | 0 | } |
120 | | |
121 | | /* |
122 | | * AES key expansion using aes-ni instructions. |
123 | | */ |
124 | | void |
125 | | rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, |
126 | | unsigned int Nk) |
127 | 0 | { |
128 | 0 | switch (Nk) { |
129 | 0 | case 4: |
130 | 0 | native_key_expansion128(cx, key); |
131 | 0 | return; |
132 | 0 | case 6: |
133 | 0 | native_key_expansion192(cx, key); |
134 | 0 | return; |
135 | 0 | case 8: |
136 | 0 | native_key_expansion256(cx, key); |
137 | 0 | return; |
138 | 0 | default: |
139 | | /* This shouldn't happen (checked by the caller). */ |
140 | 0 | return; |
141 | 0 | } |
142 | 0 | } |
143 | | |
144 | | void |
145 | | rijndael_native_encryptBlock(AESContext *cx, |
146 | | unsigned char *output, |
147 | | const unsigned char *input) |
148 | 0 | { |
149 | 0 | unsigned int i; |
150 | 0 | pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); |
151 | 0 | m = _mm_xor_si128(m, cx->k.keySchedule[0]); |
152 | 0 | for (i = 1; i < cx->Nr; ++i) { |
153 | 0 | m = _mm_aesenc_si128(m, cx->k.keySchedule[i]); |
154 | 0 | } |
155 | 0 | m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]); |
156 | 0 | _mm_storeu_si128((__m128i *)output, m); |
157 | 0 | } |
158 | | |
159 | | void |
160 | | rijndael_native_decryptBlock(AESContext *cx, |
161 | | unsigned char *output, |
162 | | const unsigned char *input) |
163 | 0 | { |
164 | 0 | int i; |
165 | 0 | pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); |
166 | 0 | m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]); |
167 | 0 | for (i = cx->Nr - 1; i > 0; --i) { |
168 | 0 | m = _mm_aesdec_si128(m, cx->k.keySchedule[i]); |
169 | 0 | } |
170 | 0 | m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]); |
171 | 0 | _mm_storeu_si128((__m128i *)output, m); |
172 | 0 | } |
173 | | |
174 | | // out = a ^ b |
175 | | void |
176 | | native_xorBlock(unsigned char *out, |
177 | | const unsigned char *a, |
178 | | const unsigned char *b) |
179 | 0 | { |
180 | 0 | pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a)); |
181 | 0 | pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b)); |
182 | 0 | in1 = _mm_xor_si128(in1, in2); |
183 | 0 | _mm_storeu_si128((__m128i *)(out), in1); |
184 | 0 | } |