Line | Count | Source (jump to first uncovered line) |
1 | | // tiger.cpp - originally written and placed in the public domain by Wei Dai |
2 | | |
3 | | #include "pch.h" |
4 | | #include "config.h" |
5 | | |
6 | | #include "tiger.h" |
7 | | #include "misc.h" |
8 | | #include "cpu.h" |
9 | | |
10 | | #if defined(CRYPTOPP_DISABLE_TIGER_ASM) |
11 | | # undef CRYPTOPP_X86_ASM_AVAILABLE |
12 | | # undef CRYPTOPP_X32_ASM_AVAILABLE |
13 | | # undef CRYPTOPP_X64_ASM_AVAILABLE |
14 | | # undef CRYPTOPP_SSE2_ASM_AVAILABLE |
15 | | #endif |
16 | | |
17 | | NAMESPACE_BEGIN(CryptoPP) |
18 | | |
19 | | std::string Tiger::AlgorithmProvider() const |
20 | 0 | { |
21 | | #ifndef CRYPTOPP_DISABLE_TIGER_ASM |
22 | | # if CRYPTOPP_SSE2_ASM_AVAILABLE |
23 | | if (HasSSE2()) |
24 | | return "SSE2"; |
25 | | # endif |
26 | | #endif |
27 | 0 | return "C++"; |
28 | 0 | } |
29 | | |
30 | | void Tiger::InitState(HashWordType *state) |
31 | 10.7k | { |
32 | 10.7k | state[0] = W64LIT(0x0123456789ABCDEF); |
33 | 10.7k | state[1] = W64LIT(0xFEDCBA9876543210); |
34 | 10.7k | state[2] = W64LIT(0xF096A5B4C3B2E187); |
35 | 10.7k | } |
36 | | |
37 | | void Tiger::TruncatedFinal(byte *digest, size_t digestSize) |
38 | 10.5k | { |
39 | 10.5k | CRYPTOPP_ASSERT(digest != NULLPTR); |
40 | 10.5k | ThrowIfInvalidTruncatedSize(digestSize); |
41 | | |
42 | 10.5k | PadLastBlock(56, 0x01); |
43 | 10.5k | CorrectEndianess(m_data, m_data, 56); |
44 | | |
45 | 10.5k | m_data[7] = GetBitCountLo(); |
46 | | |
47 | 10.5k | Transform(m_state, m_data); |
48 | 10.5k | CorrectEndianess(m_state, m_state, DigestSize()); |
49 | 10.5k | std::memcpy(digest, m_state, digestSize); |
50 | | |
51 | 10.5k | Restart(); // reinit for next use |
52 | 10.5k | } |
53 | | |
54 | | void Tiger::Transform (word64 *state, const word64 *data) |
55 | 573k | { |
56 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 |
57 | | if (HasSSE2()) |
58 | | { |
59 | | #ifdef __GNUC__ |
60 | | __asm__ __volatile__ |
61 | | ( |
62 | | INTEL_NOPREFIX |
63 | | AS_PUSH_IF86(bx) |
64 | | #else |
65 | | AS2( lea edx, [table]) |
66 | | AS2( mov eax, state) |
67 | | AS2( mov esi, data) |
68 | | #endif |
69 | | AS2( movq mm0, [eax]) |
70 | | AS2( movq mm1, [eax+1*8]) |
71 | | AS2( movq mm5, mm1) |
72 | | AS2( movq mm2, [eax+2*8]) |
73 | | AS2( movq mm7, [edx+4*2048+0*8]) |
74 | | AS2( movq mm6, [edx+4*2048+1*8]) |
75 | | AS2( mov ecx, esp) |
76 | | AS2( and esp, 0xfffffff0) |
77 | | AS2( sub esp, 8*8) |
78 | | AS_PUSH_IF86(cx) |
79 | | |
80 | | #define SSE2_round(a,b,c,x,mul) \ |
81 | | AS2( pxor c, [x])\ |
82 | | AS2( movd ecx, c)\ |
83 | | AS2( movzx edi, cl)\ |
84 | | AS2( movq mm3, [edx+0*2048+edi*8])\ |
85 | | AS2( movzx edi, ch)\ |
86 | | AS2( movq mm4, [edx+3*2048+edi*8])\ |
87 | | AS2( shr ecx, 16)\ |
88 | | AS2( movzx edi, cl)\ |
89 | | AS2( pxor mm3, [edx+1*2048+edi*8])\ |
90 | | AS2( movzx edi, ch)\ |
91 | | AS2( pxor mm4, [edx+2*2048+edi*8])\ |
92 | | AS3( pextrw ecx, c, 2)\ |
93 | | AS2( movzx edi, cl)\ |
94 | | AS2( pxor mm3, [edx+2*2048+edi*8])\ |
95 | | AS2( movzx edi, ch)\ |
96 | | AS2( pxor mm4, [edx+1*2048+edi*8])\ |
97 | | AS3( pextrw ecx, c, 3)\ |
98 | | AS2( movzx edi, cl)\ |
99 | | AS2( pxor mm3, [edx+3*2048+edi*8])\ |
100 | | AS2( psubq a, mm3)\ |
101 | | AS2( movzx edi, ch)\ |
102 | | AS2( pxor mm4, [edx+0*2048+edi*8])\ |
103 | | AS2( paddq b, mm4)\ |
104 | | SSE2_mul_##mul(b) |
105 | | |
106 | | #define SSE2_mul_5(b) \ |
107 | | AS2( movq mm3, b)\ |
108 | | AS2( psllq b, 2)\ |
109 | | AS2( paddq b, mm3) |
110 | | |
111 | | #define SSE2_mul_7(b) \ |
112 | | AS2( movq mm3, b)\ |
113 | | AS2( psllq b, 3)\ |
114 | | AS2( psubq b, mm3) |
115 | | |
116 | | #define SSE2_mul_9(b) \ |
117 | | AS2( movq mm3, b)\ |
118 | | AS2( psllq b, 3)\ |
119 | | AS2( paddq b, mm3) |
120 | | |
121 | | #define label2_5 1 |
122 | | #define label2_7 2 |
123 | | #define label2_9 3 |
124 | | |
125 | | #define SSE2_pass(A,B,C,mul,X) \ |
126 | | AS2( xor ebx, ebx)\ |
127 | | ASL(mul)\ |
128 | | SSE2_round(A,B,C,X+0*8+ebx,mul)\ |
129 | | SSE2_round(B,C,A,X+1*8+ebx,mul)\ |
130 | | AS2( cmp ebx, 6*8)\ |
131 | | ASJ( je, label2_##mul, f)\ |
132 | | SSE2_round(C,A,B,X+2*8+ebx,mul)\ |
133 | | AS2( add ebx, 3*8)\ |
134 | | ASJ( jmp, mul, b)\ |
135 | | ASL(label2_##mul) |
136 | | |
137 | | #define SSE2_key_schedule(Y,X) \ |
138 | | AS2( movq mm3, [X+7*8])\ |
139 | | AS2( pxor mm3, mm6)\ |
140 | | AS2( movq mm4, [X+0*8])\ |
141 | | AS2( psubq mm4, mm3)\ |
142 | | AS2( movq [Y+0*8], mm4)\ |
143 | | AS2( pxor mm4, [X+1*8])\ |
144 | | AS2( movq mm3, mm4)\ |
145 | | AS2( movq [Y+1*8], mm4)\ |
146 | | AS2( paddq mm4, [X+2*8])\ |
147 | | AS2( pxor mm3, mm7)\ |
148 | | AS2( psllq mm3, 19)\ |
149 | | AS2( movq [Y+2*8], mm4)\ |
150 | | AS2( pxor mm3, mm4)\ |
151 | | AS2( movq mm4, [X+3*8])\ |
152 | | AS2( psubq mm4, mm3)\ |
153 | | AS2( movq [Y+3*8], mm4)\ |
154 | | AS2( pxor mm4, [X+4*8])\ |
155 | | AS2( movq mm3, mm4)\ |
156 | | AS2( movq [Y+4*8], mm4)\ |
157 | | AS2( paddq mm4, [X+5*8])\ |
158 | | AS2( pxor mm3, mm7)\ |
159 | | AS2( psrlq mm3, 23)\ |
160 | | AS2( movq [Y+5*8], mm4)\ |
161 | | AS2( pxor mm3, mm4)\ |
162 | | AS2( movq mm4, [X+6*8])\ |
163 | | AS2( psubq mm4, mm3)\ |
164 | | AS2( movq [Y+6*8], mm4)\ |
165 | | AS2( pxor mm4, [X+7*8])\ |
166 | | AS2( movq mm3, mm4)\ |
167 | | AS2( movq [Y+7*8], mm4)\ |
168 | | AS2( paddq mm4, [Y+0*8])\ |
169 | | AS2( pxor mm3, mm7)\ |
170 | | AS2( psllq mm3, 19)\ |
171 | | AS2( movq [Y+0*8], mm4)\ |
172 | | AS2( pxor mm3, mm4)\ |
173 | | AS2( movq mm4, [Y+1*8])\ |
174 | | AS2( psubq mm4, mm3)\ |
175 | | AS2( movq [Y+1*8], mm4)\ |
176 | | AS2( pxor mm4, [Y+2*8])\ |
177 | | AS2( movq mm3, mm4)\ |
178 | | AS2( movq [Y+2*8], mm4)\ |
179 | | AS2( paddq mm4, [Y+3*8])\ |
180 | | AS2( pxor mm3, mm7)\ |
181 | | AS2( psrlq mm3, 23)\ |
182 | | AS2( movq [Y+3*8], mm4)\ |
183 | | AS2( pxor mm3, mm4)\ |
184 | | AS2( movq mm4, [Y+4*8])\ |
185 | | AS2( psubq mm4, mm3)\ |
186 | | AS2( movq [Y+4*8], mm4)\ |
187 | | AS2( pxor mm4, [Y+5*8])\ |
188 | | AS2( movq [Y+5*8], mm4)\ |
189 | | AS2( paddq mm4, [Y+6*8])\ |
190 | | AS2( movq [Y+6*8], mm4)\ |
191 | | AS2( pxor mm4, [edx+4*2048+2*8])\ |
192 | | AS2( movq mm3, [Y+7*8])\ |
193 | | AS2( psubq mm3, mm4)\ |
194 | | AS2( movq [Y+7*8], mm3) |
195 | | |
196 | | SSE2_pass(mm0, mm1, mm2, 5, esi) |
197 | | SSE2_key_schedule(esp+4, esi) |
198 | | SSE2_pass(mm2, mm0, mm1, 7, esp+4) |
199 | | SSE2_key_schedule(esp+4, esp+4) |
200 | | SSE2_pass(mm1, mm2, mm0, 9, esp+4) |
201 | | |
202 | | AS2( pxor mm0, [eax+0*8]) |
203 | | AS2( movq [eax+0*8], mm0) |
204 | | AS2( psubq mm1, mm5) |
205 | | AS2( movq [eax+1*8], mm1) |
206 | | AS2( paddq mm2, [eax+2*8]) |
207 | | AS2( movq [eax+2*8], mm2) |
208 | | |
209 | | AS_POP_IF86(sp) |
210 | | AS1( emms) |
211 | | |
212 | | #ifdef __GNUC__ |
213 | | AS_POP_IF86(bx) |
214 | | ATT_PREFIX |
215 | | : |
216 | | : "a" (state), "S" (data), "d" (table) |
217 | | : "%ecx", "%edi", "memory", "cc" |
218 | | ); |
219 | | #endif |
220 | | } |
221 | | else |
222 | | #endif |
223 | 573k | { |
224 | 573k | word64 a = state[0]; |
225 | 573k | word64 b = state[1]; |
226 | 573k | word64 c = state[2]; |
227 | 573k | word64 Y[8]; |
228 | | |
229 | 27.5M | #define t1 (table) |
230 | 27.5M | #define t2 (table+256) |
231 | 27.5M | #define t3 (table+256*2) |
232 | 27.5M | #define t4 (table+256*3) |
233 | | |
234 | 573k | #define round(a,b,c,x,mul) \ |
235 | 13.7M | c ^= x; \ |
236 | 13.7M | a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \ |
237 | 13.7M | b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \ |
238 | 13.7M | b *= mul |
239 | | |
240 | 1.71M | #define pass(a,b,c,mul,X) {\ |
241 | 1.71M | int i=0;\ |
242 | 5.15M | while (true)\ |
243 | 5.15M | {\ |
244 | 5.15M | round(a,b,c,X[i+0],mul); \ |
245 | 5.15M | round(b,c,a,X[i+1],mul); \ |
246 | 5.15M | if (i==6)\ |
247 | 5.15M | break;\ |
248 | 5.15M | round(c,a,b,X[i+2],mul); \ |
249 | 3.43M | i+=3;\ |
250 | 3.43M | }} |
251 | | |
252 | 573k | #define key_schedule(Y,X) \ |
253 | 1.14M | Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \ |
254 | 1.14M | Y[1] = X[1] ^ Y[0]; \ |
255 | 1.14M | Y[2] = X[2] + Y[1]; \ |
256 | 1.14M | Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \ |
257 | 1.14M | Y[4] = X[4] ^ Y[3]; \ |
258 | 1.14M | Y[5] = X[5] + Y[4]; \ |
259 | 1.14M | Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \ |
260 | 1.14M | Y[7] = X[7] ^ Y[6]; \ |
261 | 1.14M | Y[0] += Y[7]; \ |
262 | 1.14M | Y[1] -= Y[0] ^ ((~Y[7])<<19); \ |
263 | 1.14M | Y[2] ^= Y[1]; \ |
264 | 1.14M | Y[3] += Y[2]; \ |
265 | 1.14M | Y[4] -= Y[3] ^ ((~Y[2])>>23); \ |
266 | 1.14M | Y[5] ^= Y[4]; \ |
267 | 1.14M | Y[6] += Y[5]; \ |
268 | 1.14M | Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF) |
269 | | |
270 | 573k | pass(a,b,c,5,data); |
271 | 573k | key_schedule(Y,data); |
272 | 573k | pass(c,a,b,7,Y); |
273 | 573k | key_schedule(Y,Y); |
274 | 573k | pass(b,c,a,9,Y); |
275 | | |
276 | 573k | state[0] = a ^ state[0]; |
277 | 573k | state[1] = b - state[1]; |
278 | 573k | state[2] = c + state[2]; |
279 | 573k | } |
280 | 573k | } |
281 | | |
282 | | NAMESPACE_END |