/src/lzma-fuzz/sdk/C/AesOpt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* AesOpt.c -- Intel's AES |
2 | | 2017-06-08 : Igor Pavlov : Public domain */ |
3 | | |
4 | | #include "Precomp.h" |
5 | | |
6 | | #include "CpuArch.h" |
7 | | |
8 | | #ifdef MY_CPU_X86_OR_AMD64 |
9 | | #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) |
10 | | #define USE_INTEL_AES |
11 | | #endif |
12 | | #endif |
13 | | |
14 | | #ifdef USE_INTEL_AES |
15 | | |
16 | | #include <wmmintrin.h> |
17 | | |
18 | | void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks) |
19 | | { |
20 | | __m128i m = *p; |
21 | | for (; numBlocks != 0; numBlocks--, data++) |
22 | | { |
23 | | UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; |
24 | | const __m128i *w = p + 3; |
25 | | m = _mm_xor_si128(m, *data); |
26 | | m = _mm_xor_si128(m, p[2]); |
27 | | do |
28 | | { |
29 | | m = _mm_aesenc_si128(m, w[0]); |
30 | | m = _mm_aesenc_si128(m, w[1]); |
31 | | w += 2; |
32 | | } |
33 | | while (--numRounds2 != 0); |
34 | | m = _mm_aesenc_si128(m, w[0]); |
35 | | m = _mm_aesenclast_si128(m, w[1]); |
36 | | *data = m; |
37 | | } |
38 | | *p = m; |
39 | | } |
40 | | |
41 | | #define NUM_WAYS 3 |
42 | | |
43 | | #define AES_OP_W(op, n) { \ |
44 | | const __m128i t = w[n]; \ |
45 | | m0 = op(m0, t); \ |
46 | | m1 = op(m1, t); \ |
47 | | m2 = op(m2, t); \ |
48 | | } |
49 | | |
50 | | #define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n) |
51 | | #define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n) |
52 | | #define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n) |
53 | | #define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n) |
54 | | |
55 | | void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks) |
56 | | { |
57 | | __m128i iv = *p; |
58 | | for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) |
59 | | { |
60 | | UInt32 numRounds2 = *(const UInt32 *)(p + 1); |
61 | | const __m128i *w = p + numRounds2 * 2; |
62 | | __m128i m0, m1, m2; |
63 | | { |
64 | | const __m128i t = w[2]; |
65 | | m0 = _mm_xor_si128(t, data[0]); |
66 | | m1 = _mm_xor_si128(t, data[1]); |
67 | | m2 = _mm_xor_si128(t, data[2]); |
68 | | } |
69 | | numRounds2--; |
70 | | do |
71 | | { |
72 | | AES_DEC(1) |
73 | | AES_DEC(0) |
74 | | w -= 2; |
75 | | } |
76 | | while (--numRounds2 != 0); |
77 | | AES_DEC(1) |
78 | | AES_DEC_LAST(0) |
79 | | |
80 | | { |
81 | | __m128i t; |
82 | | t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t; |
83 | | t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t; |
84 | | t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t; |
85 | | } |
86 | | } |
87 | | for (; numBlocks != 0; numBlocks--, data++) |
88 | | { |
89 | | UInt32 numRounds2 = *(const UInt32 *)(p + 1); |
90 | | const __m128i *w = p + numRounds2 * 2; |
91 | | __m128i m = _mm_xor_si128(w[2], *data); |
92 | | numRounds2--; |
93 | | do |
94 | | { |
95 | | m = _mm_aesdec_si128(m, w[1]); |
96 | | m = _mm_aesdec_si128(m, w[0]); |
97 | | w -= 2; |
98 | | } |
99 | | while (--numRounds2 != 0); |
100 | | m = _mm_aesdec_si128(m, w[1]); |
101 | | m = _mm_aesdeclast_si128(m, w[0]); |
102 | | |
103 | | m = _mm_xor_si128(m, iv); |
104 | | iv = *data; |
105 | | *data = m; |
106 | | } |
107 | | *p = iv; |
108 | | } |
109 | | |
110 | | void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks) |
111 | | { |
112 | | __m128i ctr = *p; |
113 | | __m128i one; |
114 | | one.m128i_u64[0] = 1; |
115 | | one.m128i_u64[1] = 0; |
116 | | for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) |
117 | | { |
118 | | UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; |
119 | | const __m128i *w = p; |
120 | | __m128i m0, m1, m2; |
121 | | { |
122 | | const __m128i t = w[2]; |
123 | | ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t); |
124 | | ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t); |
125 | | ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t); |
126 | | } |
127 | | w += 3; |
128 | | do |
129 | | { |
130 | | AES_ENC(0) |
131 | | AES_ENC(1) |
132 | | w += 2; |
133 | | } |
134 | | while (--numRounds2 != 0); |
135 | | AES_ENC(0) |
136 | | AES_ENC_LAST(1) |
137 | | data[0] = _mm_xor_si128(data[0], m0); |
138 | | data[1] = _mm_xor_si128(data[1], m1); |
139 | | data[2] = _mm_xor_si128(data[2], m2); |
140 | | } |
141 | | for (; numBlocks != 0; numBlocks--, data++) |
142 | | { |
143 | | UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; |
144 | | const __m128i *w = p; |
145 | | __m128i m; |
146 | | ctr = _mm_add_epi64(ctr, one); |
147 | | m = _mm_xor_si128(ctr, p[2]); |
148 | | w += 3; |
149 | | do |
150 | | { |
151 | | m = _mm_aesenc_si128(m, w[0]); |
152 | | m = _mm_aesenc_si128(m, w[1]); |
153 | | w += 2; |
154 | | } |
155 | | while (--numRounds2 != 0); |
156 | | m = _mm_aesenc_si128(m, w[0]); |
157 | | m = _mm_aesenclast_si128(m, w[1]); |
158 | | *data = _mm_xor_si128(*data, m); |
159 | | } |
160 | | *p = ctr; |
161 | | } |
162 | | |
163 | | #else |
164 | | |
165 | | void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); |
166 | | void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); |
167 | | void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); |
168 | | |
169 | | void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks) |
170 | 362 | { |
171 | 362 | AesCbc_Encode(p, data, numBlocks); |
172 | 362 | } |
173 | | |
174 | | void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks) |
175 | 362 | { |
176 | 362 | AesCbc_Decode(p, data, numBlocks); |
177 | 362 | } |
178 | | |
179 | | void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks) |
180 | 0 | { |
181 | 0 | AesCtr_Code(p, data, numBlocks); |
182 | 0 | } |
183 | | |
184 | | #endif |