/src/openssl/engines/e_padlock.c
Line | Count | Source |
1 | | /*- |
2 | | * Support for VIA PadLock Advanced Cryptography Engine (ACE) |
3 | | * Written by Michal Ludvig <michal@logix.cz> |
4 | | * http://www.logix.cz/michal |
5 | | * |
6 | | * Big thanks to Andy Polyakov for a help with optimization, |
7 | | * assembler fixes, port to MS Windows and a lot of other |
8 | | * valuable work on this engine! |
9 | | */ |
10 | | |
11 | | /* ==================================================================== |
12 | | * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. |
13 | | * |
14 | | * Redistribution and use in source and binary forms, with or without |
15 | | * modification, are permitted provided that the following conditions |
16 | | * are met: |
17 | | * |
18 | | * 1. Redistributions of source code must retain the above copyright |
19 | | * notice, this list of conditions and the following disclaimer. |
20 | | * |
21 | | * 2. Redistributions in binary form must reproduce the above copyright |
22 | | * notice, this list of conditions and the following disclaimer in |
23 | | * the documentation and/or other materials provided with the |
24 | | * distribution. |
25 | | * |
26 | | * 3. All advertising materials mentioning features or use of this |
27 | | * software must display the following acknowledgment: |
28 | | * "This product includes software developed by the OpenSSL Project |
29 | | * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
30 | | * |
31 | | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
32 | | * endorse or promote products derived from this software without |
33 | | * prior written permission. For written permission, please contact |
34 | | * licensing@OpenSSL.org. |
35 | | * |
36 | | * 5. Products derived from this software may not be called "OpenSSL" |
37 | | * nor may "OpenSSL" appear in their names without prior written |
38 | | * permission of the OpenSSL Project. |
39 | | * |
40 | | * 6. Redistributions of any form whatsoever must retain the following |
41 | | * acknowledgment: |
42 | | * "This product includes software developed by the OpenSSL Project |
43 | | * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
44 | | * |
45 | | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
46 | | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
47 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
48 | | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
49 | | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
50 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
51 | | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
52 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
53 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
54 | | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
55 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
56 | | * OF THE POSSIBILITY OF SUCH DAMAGE. |
57 | | * ==================================================================== |
58 | | * |
59 | | * This product includes cryptographic software written by Eric Young |
60 | | * (eay@cryptsoft.com). This product includes software written by Tim |
61 | | * Hudson (tjh@cryptsoft.com). |
62 | | * |
63 | | */ |
64 | | |
65 | | #include <stdio.h> |
66 | | #include <string.h> |
67 | | |
68 | | #include <openssl/opensslconf.h> |
69 | | #include <openssl/crypto.h> |
70 | | #include <openssl/dso.h> |
71 | | #include <openssl/engine.h> |
72 | | #include <openssl/evp.h> |
73 | | #ifndef OPENSSL_NO_AES |
74 | | # include <openssl/aes.h> |
75 | | #endif |
76 | | #include <openssl/rand.h> |
77 | | #include <openssl/err.h> |
78 | | |
79 | | #ifndef OPENSSL_NO_HW |
80 | | # ifndef OPENSSL_NO_HW_PADLOCK |
81 | | |
82 | | /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ |
83 | | # if (OPENSSL_VERSION_NUMBER >= 0x00908000L) |
84 | | # ifndef OPENSSL_NO_DYNAMIC_ENGINE |
85 | | # define DYNAMIC_ENGINE |
86 | | # endif |
87 | | # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) |
88 | | # ifdef ENGINE_DYNAMIC_SUPPORT |
89 | | # define DYNAMIC_ENGINE |
90 | | # endif |
91 | | # else |
92 | | # error "Only OpenSSL >= 0.9.7 is supported" |
93 | | # endif |
94 | | |
95 | | /* |
96 | | * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it |
97 | | * doesn't exist elsewhere, but it even can't be compiled on other platforms! |
98 | | * |
99 | | * In addition, because of the heavy use of inline assembler, compiler choice |
100 | | * is limited to GCC and Microsoft C. |
101 | | */ |
102 | | # undef COMPILE_HW_PADLOCK |
103 | | # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) |
104 | | # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ |
105 | | (defined(_MSC_VER) && defined(_M_IX86)) |
106 | | # define COMPILE_HW_PADLOCK |
107 | | # endif |
108 | | # endif |
109 | | |
110 | | # ifdef OPENSSL_NO_DYNAMIC_ENGINE |
111 | | # ifdef COMPILE_HW_PADLOCK |
112 | | static ENGINE *ENGINE_padlock(void); |
113 | | # endif |
114 | | |
115 | | void ENGINE_load_padlock(void) |
116 | 19 | { |
117 | | /* On non-x86 CPUs it just returns. */ |
118 | | # ifdef COMPILE_HW_PADLOCK |
119 | | ENGINE *toadd = ENGINE_padlock(); |
120 | | if (!toadd) |
121 | | return; |
122 | | ENGINE_add(toadd); |
123 | | ENGINE_free(toadd); |
124 | | ERR_clear_error(); |
125 | | # endif |
126 | 19 | } |
127 | | |
128 | | # endif |
129 | | |
130 | | # ifdef COMPILE_HW_PADLOCK |
131 | | /* |
132 | | * We do these includes here to avoid header problems on platforms that do |
133 | | * not have the VIA padlock anyway... |
134 | | */ |
135 | | # include <stdlib.h> |
136 | | # ifdef _WIN32 |
137 | | # include <malloc.h> |
138 | | # ifndef alloca |
139 | | # define alloca _alloca |
140 | | # endif |
141 | | # elif defined(__GNUC__) |
142 | | # ifndef alloca |
143 | | # define alloca(s) __builtin_alloca(s) |
144 | | # endif |
145 | | # endif |
146 | | |
147 | | /* Function for ENGINE detection and control */ |
148 | | static int padlock_available(void); |
149 | | static int padlock_init(ENGINE *e); |
150 | | |
151 | | /* RNG Stuff */ |
152 | | static RAND_METHOD padlock_rand; |
153 | | |
154 | | /* Cipher Stuff */ |
155 | | # ifndef OPENSSL_NO_AES |
156 | | static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, |
157 | | const int **nids, int nid); |
158 | | # endif |
159 | | |
160 | | /* Engine names */ |
161 | | static const char *padlock_id = "padlock"; |
162 | | static char padlock_name[100]; |
163 | | |
164 | | /* Available features */ |
165 | | static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ |
166 | | static int padlock_use_rng = 0; /* Random Number Generator */ |
167 | | # ifndef OPENSSL_NO_AES |
168 | | static int padlock_aes_align_required = 1; |
169 | | # endif |
170 | | |
171 | | /* ===== Engine "management" functions ===== */ |
172 | | |
173 | | /* Prepare the ENGINE structure for registration */ |
174 | | static int padlock_bind_helper(ENGINE *e) |
175 | | { |
176 | | /* Check available features */ |
177 | | padlock_available(); |
178 | | |
179 | | # if 1 /* disable RNG for now, see commentary in |
180 | | * vicinity of RNG code */ |
181 | | padlock_use_rng = 0; |
182 | | # endif |
183 | | |
184 | | /* Generate a nice engine name with available features */ |
185 | | BIO_snprintf(padlock_name, sizeof(padlock_name), |
186 | | "VIA PadLock (%s, %s)", |
187 | | padlock_use_rng ? "RNG" : "no-RNG", |
188 | | padlock_use_ace ? "ACE" : "no-ACE"); |
189 | | |
190 | | /* Register everything or return with an error */ |
191 | | if (!ENGINE_set_id(e, padlock_id) || |
192 | | !ENGINE_set_name(e, padlock_name) || |
193 | | !ENGINE_set_init_function(e, padlock_init) || |
194 | | # ifndef OPENSSL_NO_AES |
195 | | (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) || |
196 | | # endif |
197 | | (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) { |
198 | | return 0; |
199 | | } |
200 | | |
201 | | /* Everything looks good */ |
202 | | return 1; |
203 | | } |
204 | | |
205 | | # ifdef OPENSSL_NO_DYNAMIC_ENGINE |
206 | | |
207 | | /* Constructor */ |
208 | | static ENGINE *ENGINE_padlock(void) |
209 | | { |
210 | | ENGINE *eng = ENGINE_new(); |
211 | | |
212 | | if (!eng) { |
213 | | return NULL; |
214 | | } |
215 | | |
216 | | if (!padlock_bind_helper(eng)) { |
217 | | ENGINE_free(eng); |
218 | | return NULL; |
219 | | } |
220 | | |
221 | | return eng; |
222 | | } |
223 | | |
224 | | # endif |
225 | | |
226 | | /* Check availability of the engine */ |
227 | | static int padlock_init(ENGINE *e) |
228 | | { |
229 | | return (padlock_use_rng || padlock_use_ace); |
230 | | } |
231 | | |
232 | | /* |
233 | | * This stuff is needed if this ENGINE is being compiled into a |
234 | | * self-contained shared-library. |
235 | | */ |
236 | | # ifdef DYNAMIC_ENGINE |
237 | | static int padlock_bind_fn(ENGINE *e, const char *id) |
238 | | { |
239 | | if (id && (strcmp(id, padlock_id) != 0)) { |
240 | | return 0; |
241 | | } |
242 | | |
243 | | if (!padlock_bind_helper(e)) { |
244 | | return 0; |
245 | | } |
246 | | |
247 | | return 1; |
248 | | } |
249 | | |
250 | | IMPLEMENT_DYNAMIC_CHECK_FN() |
251 | | IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn) |
252 | | # endif /* DYNAMIC_ENGINE */ |
253 | | /* ===== Here comes the "real" engine ===== */ |
254 | | # ifndef OPENSSL_NO_AES |
255 | | /* Some AES-related constants */ |
256 | | # define AES_BLOCK_SIZE 16 |
257 | | # define AES_KEY_SIZE_128 16 |
258 | | # define AES_KEY_SIZE_192 24 |
259 | | # define AES_KEY_SIZE_256 32 |
260 | | /* |
261 | | * Here we store the status information relevant to the current context. |
262 | | */ |
263 | | /* |
264 | | * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on |
265 | | * the order of items in this structure. Don't blindly modify, reorder, |
266 | | * etc! |
267 | | */ |
268 | | struct padlock_cipher_data { |
269 | | unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ |
270 | | union { |
271 | | unsigned int pad[4]; |
272 | | struct { |
273 | | int rounds:4; |
274 | | int dgst:1; /* n/a in C3 */ |
275 | | int align:1; /* n/a in C3 */ |
276 | | int ciphr:1; /* n/a in C3 */ |
277 | | unsigned int keygen:1; |
278 | | int interm:1; |
279 | | unsigned int encdec:1; |
280 | | int ksize:2; |
281 | | } b; |
282 | | } cword; /* Control word */ |
283 | | AES_KEY ks; /* Encryption key */ |
284 | | }; |
285 | | |
286 | | /* |
287 | | * Essentially this variable belongs in thread local storage. |
288 | | * Having this variable global on the other hand can only cause |
289 | | * few bogus key reloads [if any at all on single-CPU system], |
290 | | * so we accept the penatly... |
291 | | */ |
292 | | static volatile struct padlock_cipher_data *padlock_saved_context; |
293 | | # endif |
294 | | |
295 | | /*- |
296 | | * ======================================================= |
297 | | * Inline assembler section(s). |
298 | | * ======================================================= |
299 | | * Order of arguments is chosen to facilitate Windows port |
300 | | * using __fastcall calling convention. If you wish to add |
301 | | * more routines, keep in mind that first __fastcall |
302 | | * argument is passed in %ecx and second - in %edx. |
303 | | * ======================================================= |
304 | | */ |
305 | | # if defined(__GNUC__) && __GNUC__>=2 |
306 | | /* |
307 | | * As for excessive "push %ebx"/"pop %ebx" found all over. |
308 | | * When generating position-independent code GCC won't let |
309 | | * us use "b" in assembler templates nor even respect "ebx" |
310 | | * in "clobber description." Therefore the trouble... |
311 | | */ |
312 | | |
313 | | /* |
314 | | * Helper function - check if a CPUID instruction is available on this CPU |
315 | | */ |
316 | | static int padlock_insn_cpuid_available(void) |
317 | | { |
318 | | int result = -1; |
319 | | |
320 | | /* |
321 | | * We're checking if the bit #21 of EFLAGS can be toggled. If yes = |
322 | | * CPUID is available. |
323 | | */ |
324 | | asm volatile ("pushf\n" |
325 | | "popl %%eax\n" |
326 | | "xorl $0x200000, %%eax\n" |
327 | | "movl %%eax, %%ecx\n" |
328 | | "andl $0x200000, %%ecx\n" |
329 | | "pushl %%eax\n" |
330 | | "popf\n" |
331 | | "pushf\n" |
332 | | "popl %%eax\n" |
333 | | "andl $0x200000, %%eax\n" |
334 | | "xorl %%eax, %%ecx\n" |
335 | | "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx"); |
336 | | |
337 | | return (result == 0); |
338 | | } |
339 | | |
340 | | /* |
341 | | * Load supported features of the CPU to see if the PadLock is available. |
342 | | */ |
343 | | static int padlock_available(void) |
344 | | { |
345 | | char vendor_string[16]; |
346 | | unsigned int eax, edx; |
347 | | |
348 | | /* First check if the CPUID instruction is available at all... */ |
349 | | if (!padlock_insn_cpuid_available()) |
350 | | return 0; |
351 | | |
352 | | /* Are we running on the Centaur (VIA) CPU? */ |
353 | | eax = 0x00000000; |
354 | | vendor_string[12] = 0; |
355 | | asm volatile ("pushl %%ebx\n" |
356 | | "cpuid\n" |
357 | | "movl %%ebx,(%%edi)\n" |
358 | | "movl %%edx,4(%%edi)\n" |
359 | | "movl %%ecx,8(%%edi)\n" |
360 | | "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx"); |
361 | | if (strcmp(vendor_string, "CentaurHauls") != 0) |
362 | | return 0; |
363 | | |
364 | | /* Check for Centaur Extended Feature Flags presence */ |
365 | | eax = 0xC0000000; |
366 | | asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx"); |
367 | | if (eax < 0xC0000001) |
368 | | return 0; |
369 | | |
370 | | /* Read the Centaur Extended Feature Flags */ |
371 | | eax = 0xC0000001; |
372 | | asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax), |
373 | | "=d"(edx)::"ecx"); |
374 | | |
375 | | /* Fill up some flags */ |
376 | | padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6)); |
377 | | padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2)); |
378 | | |
379 | | return padlock_use_ace + padlock_use_rng; |
380 | | } |
381 | | |
382 | | # ifndef OPENSSL_NO_AES |
383 | | # ifndef AES_ASM |
384 | | /* Our own htonl()/ntohl() */ |
385 | | static inline void padlock_bswapl(AES_KEY *ks) |
386 | | { |
387 | | size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); |
388 | | unsigned int *key = ks->rd_key; |
389 | | |
390 | | while (i--) { |
391 | | asm volatile ("bswapl %0":"+r" (*key)); |
392 | | key++; |
393 | | } |
394 | | } |
395 | | # endif |
396 | | # endif |
397 | | |
398 | | /* |
399 | | * Force key reload from memory to the CPU microcode. Loading EFLAGS from the |
400 | | * stack clears EFLAGS[30] which does the trick. |
401 | | */ |
402 | | static inline void padlock_reload_key(void) |
403 | | { |
404 | | asm volatile ("pushfl; popfl"); |
405 | | } |
406 | | |
407 | | # ifndef OPENSSL_NO_AES |
408 | | /* |
409 | | * This is heuristic key context tracing. At first one |
410 | | * believes that one should use atomic swap instructions, |
411 | | * but it's not actually necessary. Point is that if |
412 | | * padlock_saved_context was changed by another thread |
413 | | * after we've read it and before we compare it with cdata, |
414 | | * our key *shall* be reloaded upon thread context switch |
415 | | * and we are therefore set in either case... |
416 | | */ |
417 | | static inline void padlock_verify_context(struct padlock_cipher_data *cdata) |
418 | | { |
419 | | asm volatile ("pushfl\n" |
420 | | " btl $30,(%%esp)\n" |
421 | | " jnc 1f\n" |
422 | | " cmpl %2,%1\n" |
423 | | " je 1f\n" |
424 | | " popfl\n" |
425 | | " subl $4,%%esp\n" |
426 | | "1: addl $4,%%esp\n" |
427 | | " movl %2,%0":"+m" (padlock_saved_context) |
428 | | :"r"(padlock_saved_context), "r"(cdata):"cc"); |
429 | | } |
430 | | |
431 | | /* Template for padlock_xcrypt_* modes */ |
432 | | /* |
433 | | * BIG FAT WARNING: The offsets used with 'leal' instructions describe items |
434 | | * of the 'padlock_cipher_data' structure. |
435 | | */ |
436 | | # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ |
437 | | static inline void *name(size_t cnt, \ |
438 | | struct padlock_cipher_data *cdata, \ |
439 | | void *out, const void *inp) \ |
440 | | { void *iv; \ |
441 | | asm volatile ( "pushl %%ebx\n" \ |
442 | | " leal 16(%0),%%edx\n" \ |
443 | | " leal 32(%0),%%ebx\n" \ |
444 | | rep_xcrypt "\n" \ |
445 | | " popl %%ebx" \ |
446 | | : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ |
447 | | : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ |
448 | | : "edx", "cc", "memory"); \ |
449 | | return iv; \ |
450 | | } |
451 | | |
452 | | /* Generate all functions with appropriate opcodes */ |
453 | | /* rep xcryptecb */ |
454 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") |
455 | | /* rep xcryptcbc */ |
456 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") |
457 | | /* rep xcryptcfb */ |
458 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") |
459 | | /* rep xcryptofb */ |
460 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") |
461 | | # endif |
462 | | /* The RNG call itself */ |
463 | | static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) |
464 | | { |
465 | | unsigned int eax_out; |
466 | | |
467 | | asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ |
468 | | :"=a" (eax_out), "=m"(*(unsigned *)addr) |
469 | | :"D"(addr), "d"(edx_in) |
470 | | ); |
471 | | |
472 | | return eax_out; |
473 | | } |
474 | | |
475 | | /* |
476 | | * Why not inline 'rep movsd'? I failed to find information on what value in |
477 | | * Direction Flag one can expect and consequently have to apply |
478 | | * "better-safe-than-sorry" approach and assume "undefined." I could |
479 | | * explicitly clear it and restore the original value upon return from |
480 | | * padlock_aes_cipher, but it's presumably too much trouble for too little |
481 | | * gain... In case you wonder 'rep xcrypt*' instructions above are *not* |
482 | | * affected by the Direction Flag and pointers advance toward larger |
483 | | * addresses unconditionally. |
484 | | */ |
485 | | static inline unsigned char *padlock_memcpy(void *dst, const void *src, |
486 | | size_t n) |
487 | | { |
488 | | long *d = dst; |
489 | | const long *s = src; |
490 | | |
491 | | n /= sizeof(*d); |
492 | | do { |
493 | | *d++ = *s++; |
494 | | } while (--n); |
495 | | |
496 | | return dst; |
497 | | } |
498 | | |
499 | | # elif defined(_MSC_VER) |
500 | | /* |
501 | | * Unlike GCC these are real functions. In order to minimize impact |
502 | | * on performance we adhere to __fastcall calling convention in |
503 | | * order to get two first arguments passed through %ecx and %edx. |
504 | | * Which kind of suits very well, as instructions in question use |
505 | | * both %ecx and %edx as input:-) |
506 | | */ |
507 | | # define REP_XCRYPT(code) \ |
508 | | _asm _emit 0xf3 \ |
509 | | _asm _emit 0x0f _asm _emit 0xa7 \ |
510 | | _asm _emit code |
511 | | |
512 | | /* |
513 | | * BIG FAT WARNING: The offsets used with 'lea' instructions describe items |
514 | | * of the 'padlock_cipher_data' structure. |
515 | | */ |
516 | | # define PADLOCK_XCRYPT_ASM(name,code) \ |
517 | | static void * __fastcall \ |
518 | | name (size_t cnt, void *cdata, \ |
519 | | void *outp, const void *inp) \ |
520 | | { _asm mov eax,edx \ |
521 | | _asm lea edx,[eax+16] \ |
522 | | _asm lea ebx,[eax+32] \ |
523 | | _asm mov edi,outp \ |
524 | | _asm mov esi,inp \ |
525 | | REP_XCRYPT(code) \ |
526 | | } |
527 | | |
528 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) |
529 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) |
530 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) |
531 | | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) |
532 | | |
533 | | static int __fastcall padlock_xstore(void *outp, unsigned int code) |
534 | | { |
535 | | _asm mov edi,ecx |
536 | | _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 |
537 | | } |
538 | | |
539 | | static void __fastcall padlock_reload_key(void) |
540 | | { |
541 | | _asm pushfd |
542 | | _asm popfd |
543 | | } |
544 | | |
545 | | static void __fastcall padlock_verify_context(void *cdata) |
546 | | { |
547 | | _asm { |
548 | | pushfd |
549 | | bt DWORD PTR[esp],30 |
550 | | jnc skip |
551 | | cmp ecx,padlock_saved_context |
552 | | je skip |
553 | | popfd |
554 | | sub esp,4 |
555 | | skip: add esp,4 |
556 | | mov padlock_saved_context,ecx |
557 | | } |
558 | | } |
559 | | |
560 | | static int |
561 | | padlock_available(void) |
562 | | { |
563 | | _asm { |
564 | | pushfd |
565 | | pop eax |
566 | | mov ecx,eax |
567 | | xor eax,1<<21 |
568 | | push eax |
569 | | popfd |
570 | | pushfd |
571 | | pop eax |
572 | | xor eax,ecx |
573 | | bt eax,21 |
574 | | jnc noluck |
575 | | mov eax,0 |
576 | | cpuid |
577 | | xor eax,eax |
578 | | cmp ebx,'tneC' |
579 | | jne noluck |
580 | | cmp edx,'Hrua' |
581 | | jne noluck |
582 | | cmp ecx,'slua' |
583 | | jne noluck |
584 | | mov eax,0xC0000000 |
585 | | cpuid |
586 | | mov edx,eax |
587 | | xor eax,eax |
588 | | cmp edx,0xC0000001 |
589 | | jb noluck |
590 | | mov eax,0xC0000001 |
591 | | cpuid |
592 | | xor eax,eax |
593 | | bt edx,6 |
594 | | jnc skip_a |
595 | | bt edx,7 |
596 | | jnc skip_a |
597 | | mov padlock_use_ace,1 |
598 | | inc eax |
599 | | skip_a: bt edx,2 |
600 | | jnc skip_r |
601 | | bt edx,3 |
602 | | jnc skip_r |
603 | | mov padlock_use_rng,1 |
604 | | inc eax |
605 | | skip_r: |
606 | | noluck: |
607 | | } |
608 | | } |
609 | | |
610 | | static void __fastcall padlock_bswapl(void *key) |
611 | | { |
612 | | _asm { |
613 | | pushfd |
614 | | cld |
615 | | mov esi,ecx |
616 | | mov edi,ecx |
617 | | mov ecx,60 |
618 | | up: lodsd |
619 | | bswap eax |
620 | | stosd |
621 | | loop up |
622 | | popfd |
623 | | } |
624 | | } |
625 | | |
626 | | /* |
627 | | * MS actually specifies status of Direction Flag and compiler even manages |
628 | | * to compile following as 'rep movsd' all by itself... |
629 | | */ |
630 | | # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) |
631 | | # endif |
632 | | /* ===== AES encryption/decryption ===== */ |
633 | | # ifndef OPENSSL_NO_AES |
634 | | # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) |
635 | | # define NID_aes_128_cfb NID_aes_128_cfb128 |
636 | | # endif |
637 | | # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) |
638 | | # define NID_aes_128_ofb NID_aes_128_ofb128 |
639 | | # endif |
640 | | # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) |
641 | | # define NID_aes_192_cfb NID_aes_192_cfb128 |
642 | | # endif |
643 | | # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) |
644 | | # define NID_aes_192_ofb NID_aes_192_ofb128 |
645 | | # endif |
646 | | # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) |
647 | | # define NID_aes_256_cfb NID_aes_256_cfb128 |
648 | | # endif |
649 | | # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) |
650 | | # define NID_aes_256_ofb NID_aes_256_ofb128 |
651 | | # endif |
652 | | /* |
653 | | * List of supported ciphers. |
654 | | */ static int padlock_cipher_nids[] = { |
655 | | NID_aes_128_ecb, |
656 | | NID_aes_128_cbc, |
657 | | NID_aes_128_cfb, |
658 | | NID_aes_128_ofb, |
659 | | |
660 | | NID_aes_192_ecb, |
661 | | NID_aes_192_cbc, |
662 | | NID_aes_192_cfb, |
663 | | NID_aes_192_ofb, |
664 | | |
665 | | NID_aes_256_ecb, |
666 | | NID_aes_256_cbc, |
667 | | NID_aes_256_cfb, |
668 | | NID_aes_256_ofb, |
669 | | }; |
670 | | |
671 | | static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) / |
672 | | sizeof(padlock_cipher_nids[0])); |
673 | | |
674 | | /* Function prototypes ... */ |
675 | | static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
676 | | const unsigned char *iv, int enc); |
677 | | static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
678 | | const unsigned char *in, size_t nbytes); |
679 | | |
680 | | # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ |
681 | | ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) |
682 | | # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ |
683 | | NEAREST_ALIGNED(ctx->cipher_data)) |
684 | | |
685 | | # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE |
686 | | # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE |
687 | | # define EVP_CIPHER_block_size_OFB 1 |
688 | | # define EVP_CIPHER_block_size_CFB 1 |
689 | | |
690 | | /* |
691 | | * Declaring so many ciphers by hand would be a pain. Instead introduce a bit |
692 | | * of preprocessor magic :-) |
693 | | */ |
694 | | # define DECLARE_AES_EVP(ksize,lmode,umode) \ |
695 | | static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ |
696 | | NID_aes_##ksize##_##lmode, \ |
697 | | EVP_CIPHER_block_size_##umode, \ |
698 | | AES_KEY_SIZE_##ksize, \ |
699 | | AES_BLOCK_SIZE, \ |
700 | | 0 | EVP_CIPH_##umode##_MODE, \ |
701 | | padlock_aes_init_key, \ |
702 | | padlock_aes_cipher, \ |
703 | | NULL, \ |
704 | | sizeof(struct padlock_cipher_data) + 16, \ |
705 | | EVP_CIPHER_set_asn1_iv, \ |
706 | | EVP_CIPHER_get_asn1_iv, \ |
707 | | NULL, \ |
708 | | NULL \ |
709 | | } |
710 | | |
711 | | DECLARE_AES_EVP(128, ecb, ECB); |
712 | | DECLARE_AES_EVP(128, cbc, CBC); |
713 | | DECLARE_AES_EVP(128, cfb, CFB); |
714 | | DECLARE_AES_EVP(128, ofb, OFB); |
715 | | |
716 | | DECLARE_AES_EVP(192, ecb, ECB); |
717 | | DECLARE_AES_EVP(192, cbc, CBC); |
718 | | DECLARE_AES_EVP(192, cfb, CFB); |
719 | | DECLARE_AES_EVP(192, ofb, OFB); |
720 | | |
721 | | DECLARE_AES_EVP(256, ecb, ECB); |
722 | | DECLARE_AES_EVP(256, cbc, CBC); |
723 | | DECLARE_AES_EVP(256, cfb, CFB); |
724 | | DECLARE_AES_EVP(256, ofb, OFB); |
725 | | |
726 | | static int |
727 | | padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, |
728 | | int nid) |
729 | | { |
730 | | /* No specific cipher => return a list of supported nids ... */ |
731 | | if (!cipher) { |
732 | | *nids = padlock_cipher_nids; |
733 | | return padlock_cipher_nids_num; |
734 | | } |
735 | | |
736 | | /* ... or the requested "cipher" otherwise */ |
737 | | switch (nid) { |
738 | | case NID_aes_128_ecb: |
739 | | *cipher = &padlock_aes_128_ecb; |
740 | | break; |
741 | | case NID_aes_128_cbc: |
742 | | *cipher = &padlock_aes_128_cbc; |
743 | | break; |
744 | | case NID_aes_128_cfb: |
745 | | *cipher = &padlock_aes_128_cfb; |
746 | | break; |
747 | | case NID_aes_128_ofb: |
748 | | *cipher = &padlock_aes_128_ofb; |
749 | | break; |
750 | | |
751 | | case NID_aes_192_ecb: |
752 | | *cipher = &padlock_aes_192_ecb; |
753 | | break; |
754 | | case NID_aes_192_cbc: |
755 | | *cipher = &padlock_aes_192_cbc; |
756 | | break; |
757 | | case NID_aes_192_cfb: |
758 | | *cipher = &padlock_aes_192_cfb; |
759 | | break; |
760 | | case NID_aes_192_ofb: |
761 | | *cipher = &padlock_aes_192_ofb; |
762 | | break; |
763 | | |
764 | | case NID_aes_256_ecb: |
765 | | *cipher = &padlock_aes_256_ecb; |
766 | | break; |
767 | | case NID_aes_256_cbc: |
768 | | *cipher = &padlock_aes_256_cbc; |
769 | | break; |
770 | | case NID_aes_256_cfb: |
771 | | *cipher = &padlock_aes_256_cfb; |
772 | | break; |
773 | | case NID_aes_256_ofb: |
774 | | *cipher = &padlock_aes_256_ofb; |
775 | | break; |
776 | | |
777 | | default: |
778 | | /* Sorry, we don't support this NID */ |
779 | | *cipher = NULL; |
780 | | return 0; |
781 | | } |
782 | | |
783 | | return 1; |
784 | | } |
785 | | |
786 | | /* Prepare the encryption key for PadLock usage */ |
787 | | static int |
788 | | padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
789 | | const unsigned char *iv, int enc) |
790 | | { |
791 | | struct padlock_cipher_data *cdata; |
792 | | int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; |
793 | | |
794 | | if (key == NULL) |
795 | | return 0; /* ERROR */ |
796 | | |
797 | | cdata = ALIGNED_CIPHER_DATA(ctx); |
798 | | memset(cdata, 0, sizeof(struct padlock_cipher_data)); |
799 | | |
800 | | /* Prepare Control word. */ |
801 | | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) |
802 | | cdata->cword.b.encdec = 0; |
803 | | else |
804 | | cdata->cword.b.encdec = (ctx->encrypt == 0); |
805 | | cdata->cword.b.rounds = 10 + (key_len - 128) / 32; |
806 | | cdata->cword.b.ksize = (key_len - 128) / 64; |
807 | | |
808 | | switch (key_len) { |
809 | | case 128: |
810 | | /* |
811 | | * PadLock can generate an extended key for AES128 in hardware |
812 | | */ |
813 | | memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); |
814 | | cdata->cword.b.keygen = 0; |
815 | | break; |
816 | | |
817 | | case 192: |
818 | | case 256: |
819 | | /* |
820 | | * Generate an extended AES key in software. Needed for AES192/AES256 |
821 | | */ |
822 | | /* |
823 | | * Well, the above applies to Stepping 8 CPUs and is listed as |
824 | | * hardware errata. They most likely will fix it at some point and |
825 | | * then a check for stepping would be due here. |
826 | | */ |
827 | | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || |
828 | | EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc) |
829 | | AES_set_encrypt_key(key, key_len, &cdata->ks); |
830 | | else |
831 | | AES_set_decrypt_key(key, key_len, &cdata->ks); |
832 | | # ifndef AES_ASM |
833 | | /* |
834 | | * OpenSSL C functions use byte-swapped extended key. |
835 | | */ |
836 | | padlock_bswapl(&cdata->ks); |
837 | | # endif |
838 | | cdata->cword.b.keygen = 1; |
839 | | break; |
840 | | |
841 | | default: |
842 | | /* ERROR */ |
843 | | return 0; |
844 | | } |
845 | | |
846 | | /* |
847 | | * This is done to cover for cases when user reuses the |
848 | | * context for new key. The catch is that if we don't do |
849 | | * this, padlock_eas_cipher might proceed with old key... |
850 | | */ |
851 | | padlock_reload_key(); |
852 | | |
853 | | return 1; |
854 | | } |
855 | | |
856 | | /*- |
857 | | * Simplified version of padlock_aes_cipher() used when |
858 | | * 1) both input and output buffers are at aligned addresses. |
859 | | * or when |
860 | | * 2) running on a newer CPU that doesn't require aligned buffers. |
861 | | */ |
862 | | static int |
863 | | padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, |
864 | | const unsigned char *in_arg, size_t nbytes) |
865 | | { |
866 | | struct padlock_cipher_data *cdata; |
867 | | void *iv; |
868 | | |
869 | | cdata = ALIGNED_CIPHER_DATA(ctx); |
870 | | padlock_verify_context(cdata); |
871 | | |
872 | | switch (EVP_CIPHER_CTX_mode(ctx)) { |
873 | | case EVP_CIPH_ECB_MODE: |
874 | | padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); |
875 | | break; |
876 | | |
877 | | case EVP_CIPH_CBC_MODE: |
878 | | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
879 | | iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg, |
880 | | in_arg); |
881 | | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
882 | | break; |
883 | | |
884 | | case EVP_CIPH_CFB_MODE: |
885 | | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
886 | | iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, |
887 | | in_arg); |
888 | | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
889 | | break; |
890 | | |
891 | | case EVP_CIPH_OFB_MODE: |
892 | | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
893 | | padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); |
894 | | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); |
895 | | break; |
896 | | |
897 | | default: |
898 | | return 0; |
899 | | } |
900 | | |
901 | | memset(cdata->iv, 0, AES_BLOCK_SIZE); |
902 | | |
903 | | return 1; |
904 | | } |
905 | | |
906 | | # ifndef PADLOCK_CHUNK |
907 | | # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ |
908 | | # endif |
909 | | # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) |
910 | | # error "insane PADLOCK_CHUNK..." |
911 | | # endif |
912 | | |
913 | | /* |
914 | | * Re-align the arguments to 16-Bytes boundaries and run the encryption |
915 | | * function itself. This function is not AES-specific. |
916 | | */ |
917 | | static int |
918 | | padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, |
919 | | const unsigned char *in_arg, size_t nbytes) |
920 | | { |
921 | | struct padlock_cipher_data *cdata; |
922 | | const void *inp; |
923 | | unsigned char *out; |
924 | | void *iv; |
925 | | int inp_misaligned, out_misaligned, realign_in_loop; |
926 | | size_t chunk, allocated = 0; |
927 | | |
928 | | /* |
929 | | * ctx->num is maintained in byte-oriented modes, such as CFB and OFB... |
930 | | */ |
931 | | if ((chunk = ctx->num)) { /* borrow chunk variable */ |
932 | | unsigned char *ivp = ctx->iv; |
933 | | |
934 | | switch (EVP_CIPHER_CTX_mode(ctx)) { |
935 | | case EVP_CIPH_CFB_MODE: |
936 | | if (chunk >= AES_BLOCK_SIZE) |
937 | | return 0; /* bogus value */ |
938 | | |
939 | | if (ctx->encrypt) |
940 | | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { |
941 | | ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; |
942 | | chunk++, nbytes--; |
943 | | } else |
944 | | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { |
945 | | unsigned char c = *(in_arg++); |
946 | | *(out_arg++) = c ^ ivp[chunk]; |
947 | | ivp[chunk++] = c, nbytes--; |
948 | | } |
949 | | |
950 | | ctx->num = chunk % AES_BLOCK_SIZE; |
951 | | break; |
952 | | case EVP_CIPH_OFB_MODE: |
953 | | if (chunk >= AES_BLOCK_SIZE) |
954 | | return 0; /* bogus value */ |
955 | | |
956 | | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { |
957 | | *(out_arg++) = *(in_arg++) ^ ivp[chunk]; |
958 | | chunk++, nbytes--; |
959 | | } |
960 | | |
961 | | ctx->num = chunk % AES_BLOCK_SIZE; |
962 | | break; |
963 | | } |
964 | | } |
965 | | |
966 | | if (nbytes == 0) |
967 | | return 1; |
968 | | # if 0 |
969 | | if (nbytes % AES_BLOCK_SIZE) |
970 | | return 0; /* are we expected to do tail processing? */ |
971 | | # else |
972 | | /* |
973 | | * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and |
974 | | * arbitrary value in byte-oriented modes, such as CFB and OFB... |
975 | | */ |
976 | | # endif |
977 | | |
978 | | /* |
979 | | * VIA promises CPUs that won't require alignment in the future. For now |
980 | | * padlock_aes_align_required is initialized to 1 and the condition is |
981 | | * never met... |
982 | | */ |
983 | | /* |
984 | | * C7 core is capable to manage unaligned input in non-ECB[!] mode, but |
985 | | * performance penalties appear to be approximately same as for software |
986 | | * alignment below or ~3x. They promise to improve it in the future, but |
987 | | * for now we can just as well pretend that it can only handle aligned |
988 | | * input... |
989 | | */ |
990 | | if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0) |
991 | | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); |
992 | | |
993 | | inp_misaligned = (((size_t)in_arg) & 0x0F); |
994 | | out_misaligned = (((size_t)out_arg) & 0x0F); |
995 | | |
996 | | /* |
997 | | * Note that even if output is aligned and input not, I still prefer to |
998 | | * loop instead of copy the whole input and then encrypt in one stroke. |
999 | | * This is done in order to improve L1 cache utilization... |
1000 | | */ |
1001 | | realign_in_loop = out_misaligned | inp_misaligned; |
1002 | | |
1003 | | if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0) |
1004 | | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); |
1005 | | |
1006 | | /* this takes one "if" out of the loops */ |
1007 | | chunk = nbytes; |
1008 | | chunk %= PADLOCK_CHUNK; |
1009 | | if (chunk == 0) |
1010 | | chunk = PADLOCK_CHUNK; |
1011 | | |
1012 | | if (out_misaligned) { |
1013 | | /* optmize for small input */ |
1014 | | allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes); |
1015 | | out = alloca(0x10 + allocated); |
1016 | | out = NEAREST_ALIGNED(out); |
1017 | | } else |
1018 | | out = out_arg; |
1019 | | |
1020 | | cdata = ALIGNED_CIPHER_DATA(ctx); |
1021 | | padlock_verify_context(cdata); |
1022 | | |
1023 | | switch (EVP_CIPHER_CTX_mode(ctx)) { |
1024 | | case EVP_CIPH_ECB_MODE: |
1025 | | do { |
1026 | | if (inp_misaligned) |
1027 | | inp = padlock_memcpy(out, in_arg, chunk); |
1028 | | else |
1029 | | inp = in_arg; |
1030 | | in_arg += chunk; |
1031 | | |
1032 | | padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp); |
1033 | | |
1034 | | if (out_misaligned) |
1035 | | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; |
1036 | | else |
1037 | | out = out_arg += chunk; |
1038 | | |
1039 | | nbytes -= chunk; |
1040 | | chunk = PADLOCK_CHUNK; |
1041 | | } while (nbytes); |
1042 | | break; |
1043 | | |
1044 | | case EVP_CIPH_CBC_MODE: |
1045 | | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
1046 | | goto cbc_shortcut; |
1047 | | do { |
1048 | | if (iv != cdata->iv) |
1049 | | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); |
1050 | | chunk = PADLOCK_CHUNK; |
1051 | | cbc_shortcut: /* optimize for small input */ |
1052 | | if (inp_misaligned) |
1053 | | inp = padlock_memcpy(out, in_arg, chunk); |
1054 | | else |
1055 | | inp = in_arg; |
1056 | | in_arg += chunk; |
1057 | | |
1058 | | iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp); |
1059 | | |
1060 | | if (out_misaligned) |
1061 | | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; |
1062 | | else |
1063 | | out = out_arg += chunk; |
1064 | | |
1065 | | } while (nbytes -= chunk); |
1066 | | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
1067 | | break; |
1068 | | |
1069 | | case EVP_CIPH_CFB_MODE: |
1070 | | memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
1071 | | chunk &= ~(AES_BLOCK_SIZE - 1); |
1072 | | if (chunk) |
1073 | | goto cfb_shortcut; |
1074 | | else |
1075 | | goto cfb_skiploop; |
1076 | | do { |
1077 | | if (iv != cdata->iv) |
1078 | | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); |
1079 | | chunk = PADLOCK_CHUNK; |
1080 | | cfb_shortcut: /* optimize for small input */ |
1081 | | if (inp_misaligned) |
1082 | | inp = padlock_memcpy(out, in_arg, chunk); |
1083 | | else |
1084 | | inp = in_arg; |
1085 | | in_arg += chunk; |
1086 | | |
1087 | | iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp); |
1088 | | |
1089 | | if (out_misaligned) |
1090 | | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; |
1091 | | else |
1092 | | out = out_arg += chunk; |
1093 | | |
1094 | | nbytes -= chunk; |
1095 | | } while (nbytes >= AES_BLOCK_SIZE); |
1096 | | |
1097 | | cfb_skiploop: |
1098 | | if (nbytes) { |
1099 | | unsigned char *ivp = cdata->iv; |
1100 | | |
1101 | | if (iv != ivp) { |
1102 | | memcpy(ivp, iv, AES_BLOCK_SIZE); |
1103 | | iv = ivp; |
1104 | | } |
1105 | | ctx->num = nbytes; |
1106 | | if (cdata->cword.b.encdec) { |
1107 | | cdata->cword.b.encdec = 0; |
1108 | | padlock_reload_key(); |
1109 | | padlock_xcrypt_ecb(1, cdata, ivp, ivp); |
1110 | | cdata->cword.b.encdec = 1; |
1111 | | padlock_reload_key(); |
1112 | | while (nbytes) { |
1113 | | unsigned char c = *(in_arg++); |
1114 | | *(out_arg++) = c ^ *ivp; |
1115 | | *(ivp++) = c, nbytes--; |
1116 | | } |
1117 | | } else { |
1118 | | padlock_reload_key(); |
1119 | | padlock_xcrypt_ecb(1, cdata, ivp, ivp); |
1120 | | padlock_reload_key(); |
1121 | | while (nbytes) { |
1122 | | *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; |
1123 | | ivp++, nbytes--; |
1124 | | } |
1125 | | } |
1126 | | } |
1127 | | |
1128 | | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
1129 | | break; |
1130 | | |
1131 | | case EVP_CIPH_OFB_MODE: |
1132 | | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
1133 | | chunk &= ~(AES_BLOCK_SIZE - 1); |
1134 | | if (chunk) |
1135 | | do { |
1136 | | if (inp_misaligned) |
1137 | | inp = padlock_memcpy(out, in_arg, chunk); |
1138 | | else |
1139 | | inp = in_arg; |
1140 | | in_arg += chunk; |
1141 | | |
1142 | | padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp); |
1143 | | |
1144 | | if (out_misaligned) |
1145 | | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; |
1146 | | else |
1147 | | out = out_arg += chunk; |
1148 | | |
1149 | | nbytes -= chunk; |
1150 | | chunk = PADLOCK_CHUNK; |
1151 | | } while (nbytes >= AES_BLOCK_SIZE); |
1152 | | |
1153 | | if (nbytes) { |
1154 | | unsigned char *ivp = cdata->iv; |
1155 | | |
1156 | | ctx->num = nbytes; |
1157 | | padlock_reload_key(); /* empirically found */ |
1158 | | padlock_xcrypt_ecb(1, cdata, ivp, ivp); |
1159 | | padlock_reload_key(); /* empirically found */ |
1160 | | while (nbytes) { |
1161 | | *(out_arg++) = *(in_arg++) ^ *ivp; |
1162 | | ivp++, nbytes--; |
1163 | | } |
1164 | | } |
1165 | | |
1166 | | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); |
1167 | | break; |
1168 | | |
1169 | | default: |
1170 | | return 0; |
1171 | | } |
1172 | | |
1173 | | /* Clean the realign buffer if it was used */ |
1174 | | if (out_misaligned) { |
1175 | | volatile unsigned long *p = (void *)out; |
1176 | | size_t n = allocated / sizeof(*p); |
1177 | | while (n--) |
1178 | | *p++ = 0; |
1179 | | } |
1180 | | |
1181 | | memset(cdata->iv, 0, AES_BLOCK_SIZE); |
1182 | | |
1183 | | return 1; |
1184 | | } |
1185 | | |
1186 | | # endif /* OPENSSL_NO_AES */ |
1187 | | |
1188 | | /* ===== Random Number Generator ===== */ |
1189 | | /* |
1190 | | * This code is not engaged. The reason is that it does not comply |
1191 | | * with recommendations for VIA RNG usage for secure applications |
1192 | | * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it |
1193 | | * provide meaningful error control... |
1194 | | */ |
1195 | | /* |
1196 | | * Wrapper that provides an interface between the API and the raw PadLock |
1197 | | * RNG |
1198 | | */ |
1199 | | static int padlock_rand_bytes(unsigned char *output, int count) |
1200 | | { |
1201 | | unsigned int eax, buf; |
1202 | | |
1203 | | while (count >= 8) { |
1204 | | eax = padlock_xstore(output, 0); |
1205 | | if (!(eax & (1 << 6))) |
1206 | | return 0; /* RNG disabled */ |
1207 | | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ |
1208 | | if (eax & (0x1F << 10)) |
1209 | | return 0; |
1210 | | if ((eax & 0x1F) == 0) |
1211 | | continue; /* no data, retry... */ |
1212 | | if ((eax & 0x1F) != 8) |
1213 | | return 0; /* fatal failure... */ |
1214 | | output += 8; |
1215 | | count -= 8; |
1216 | | } |
1217 | | while (count > 0) { |
1218 | | eax = padlock_xstore(&buf, 3); |
1219 | | if (!(eax & (1 << 6))) |
1220 | | return 0; /* RNG disabled */ |
1221 | | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ |
1222 | | if (eax & (0x1F << 10)) |
1223 | | return 0; |
1224 | | if ((eax & 0x1F) == 0) |
1225 | | continue; /* no data, retry... */ |
1226 | | if ((eax & 0x1F) != 1) |
1227 | | return 0; /* fatal failure... */ |
1228 | | *output++ = (unsigned char)buf; |
1229 | | count--; |
1230 | | } |
1231 | | *(volatile unsigned int *)&buf = 0; |
1232 | | |
1233 | | return 1; |
1234 | | } |
1235 | | |
1236 | | /* Dummy but necessary function */ |
1237 | | static int padlock_rand_status(void) |
1238 | | { |
1239 | | return 1; |
1240 | | } |
1241 | | |
1242 | | /* Prepare structure for registration */ |
1243 | | static RAND_METHOD padlock_rand = { |
1244 | | NULL, /* seed */ |
1245 | | padlock_rand_bytes, /* bytes */ |
1246 | | NULL, /* cleanup */ |
1247 | | NULL, /* add */ |
1248 | | padlock_rand_bytes, /* pseudorand */ |
1249 | | padlock_rand_status, /* rand status */ |
1250 | | }; |
1251 | | |
1252 | | # else /* !COMPILE_HW_PADLOCK */ |
1253 | | # ifndef OPENSSL_NO_DYNAMIC_ENGINE |
1254 | | OPENSSL_EXPORT |
1255 | | int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); |
1256 | | OPENSSL_EXPORT |
1257 | | int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) |
1258 | | { |
1259 | | return 0; |
1260 | | } |
1261 | | |
1262 | | IMPLEMENT_DYNAMIC_CHECK_FN() |
1263 | | # endif |
1264 | | # endif /* COMPILE_HW_PADLOCK */ |
1265 | | # endif /* !OPENSSL_NO_HW_PADLOCK */ |
1266 | | #endif /* !OPENSSL_NO_HW */ |