1 /* 2 * Support for VIA PadLock Advanced Cryptography Engine (ACE) 3 * Written by Michal Ludvig <michal@logix.cz> 4 * http://www.logix.cz/michal 5 * 6 * Big thanks to Andy Polyakov for a help with optimization, 7 * assembler fixes, port to MS Windows and a lot of other 8 * valuable work on this engine! 9 */ 10 11 /* ==================================================================== 12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in 23 * the documentation and/or other materials provided with the 24 * distribution. 25 * 26 * 3. All advertising materials mentioning features or use of this 27 * software must display the following acknowledgment: 28 * "This product includes software developed by the OpenSSL Project 29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 30 * 31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 32 * endorse or promote products derived from this software without 33 * prior written permission. For written permission, please contact 34 * licensing@OpenSSL.org. 35 * 36 * 5. Products derived from this software may not be called "OpenSSL" 37 * nor may "OpenSSL" appear in their names without prior written 38 * permission of the OpenSSL Project. 39 * 40 * 6. Redistributions of any form whatsoever must retain the following 41 * acknowledgment: 42 * "This product includes software developed by the OpenSSL Project 43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 56 * OF THE POSSIBILITY OF SUCH DAMAGE. 57 * ==================================================================== 58 * 59 * This product includes cryptographic software written by Eric Young 60 * (eay@cryptsoft.com). This product includes software written by Tim 61 * Hudson (tjh@cryptsoft.com). 62 * 63 */ 64 65 66 #include <stdio.h> 67 #include <string.h> 68 69 #include <openssl/opensslconf.h> 70 #include <openssl/crypto.h> 71 #include <openssl/dso.h> 72 #include <openssl/engine.h> 73 #include <openssl/evp.h> 74 #ifndef OPENSSL_NO_AES 75 #include <openssl/aes.h> 76 #endif 77 #include <openssl/rand.h> 78 #include <openssl/err.h> 79 80 #ifndef OPENSSL_NO_HW 81 #ifndef OPENSSL_NO_HW_PADLOCK 82 83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ 84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L) 85 # ifndef OPENSSL_NO_DYNAMIC_ENGINE 86 # define DYNAMIC_ENGINE 87 # endif 88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) 89 # ifdef ENGINE_DYNAMIC_SUPPORT 90 # define DYNAMIC_ENGINE 91 # endif 92 #else 93 # error "Only OpenSSL >= 0.9.7 is supported" 94 #endif 95 96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs. 97 Not only that it doesn't exist elsewhere, but it 98 even can't be compiled on other platforms! 99 100 In addition, because of the heavy use of inline assembler, 101 compiler choice is limited to GCC and Microsoft C. */ 102 #undef COMPILE_HW_PADLOCK 103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) 104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ 105 (defined(_MSC_VER) && defined(_M_IX86)) 106 # define COMPILE_HW_PADLOCK 107 # endif 108 #endif 109 110 #ifdef OPENSSL_NO_DYNAMIC_ENGINE 111 #ifdef COMPILE_HW_PADLOCK 112 static ENGINE *ENGINE_padlock (void); 113 #endif 114 115 void ENGINE_load_padlock (void) 116 { 117 /* On non-x86 CPUs it just returns. */ 118 #ifdef COMPILE_HW_PADLOCK 119 ENGINE *toadd = ENGINE_padlock (); 120 if (!toadd) return; 121 ENGINE_add (toadd); 122 ENGINE_free (toadd); 123 ERR_clear_error (); 124 #endif 125 } 126 127 #endif 128 129 #ifdef COMPILE_HW_PADLOCK 130 /* We do these includes here to avoid header problems on platforms that 131 do not have the VIA padlock anyway... */ 132 #include <stdlib.h> 133 #ifdef _WIN32 134 # include <malloc.h> 135 # ifndef alloca 136 # define alloca _alloca 137 # endif 138 #elif defined(__GNUC__) 139 # ifndef alloca 140 # define alloca(s) __builtin_alloca(s) 141 # endif 142 #endif 143 144 /* Function for ENGINE detection and control */ 145 static int padlock_available(void); 146 static int padlock_init(ENGINE *e); 147 148 /* RNG Stuff */ 149 static RAND_METHOD padlock_rand; 150 151 /* Cipher Stuff */ 152 #ifndef OPENSSL_NO_AES 153 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid); 154 #endif 155 156 /* Engine names */ 157 static const char *padlock_id = "padlock"; 158 static char padlock_name[100]; 159 160 /* Available features */ 161 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ 162 static int padlock_use_rng = 0; /* Random Number Generator */ 163 #ifndef OPENSSL_NO_AES 164 static int padlock_aes_align_required = 1; 165 #endif 166 167 /* ===== Engine "management" functions ===== */ 168 169 /* Prepare the ENGINE structure for registration */ 170 static int 171 padlock_bind_helper(ENGINE *e) 172 { 173 /* Check available features */ 174 padlock_available(); 175 176 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */ 177 padlock_use_rng=0; 178 #endif 179 180 /* Generate a nice engine name with available features */ 181 BIO_snprintf(padlock_name, sizeof(padlock_name), 182 "VIA PadLock (%s, %s)", 183 padlock_use_rng ? "RNG" : "no-RNG", 184 padlock_use_ace ? "ACE" : "no-ACE"); 185 186 /* Register everything or return with an error */ 187 if (!ENGINE_set_id(e, padlock_id) || 188 !ENGINE_set_name(e, padlock_name) || 189 190 !ENGINE_set_init_function(e, padlock_init) || 191 #ifndef OPENSSL_NO_AES 192 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) || 193 #endif 194 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) { 195 return 0; 196 } 197 198 /* Everything looks good */ 199 return 1; 200 } 201 202 #ifdef OPENSSL_NO_DYNAMIC_ENGINE 203 204 /* Constructor */ 205 static ENGINE * 206 ENGINE_padlock(void) 207 { 208 ENGINE *eng = ENGINE_new(); 209 210 if (!eng) { 211 return NULL; 212 } 213 214 if (!padlock_bind_helper(eng)) { 215 ENGINE_free(eng); 216 return NULL; 217 } 218 219 return eng; 220 } 221 222 #endif 223 224 /* Check availability of the engine */ 225 static int 226 padlock_init(ENGINE *e) 227 { 228 return (padlock_use_rng || padlock_use_ace); 229 } 230 231 /* This stuff is needed if this ENGINE is being compiled into a self-contained 232 * shared-library. 233 */ 234 #ifdef DYNAMIC_ENGINE 235 static int 236 padlock_bind_fn(ENGINE *e, const char *id) 237 { 238 if (id && (strcmp(id, padlock_id) != 0)) { 239 return 0; 240 } 241 242 if (!padlock_bind_helper(e)) { 243 return 0; 244 } 245 246 return 1; 247 } 248 249 IMPLEMENT_DYNAMIC_CHECK_FN() 250 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn) 251 #endif /* DYNAMIC_ENGINE */ 252 253 /* ===== Here comes the "real" engine ===== */ 254 255 #ifndef OPENSSL_NO_AES 256 /* Some AES-related constants */ 257 #define AES_BLOCK_SIZE 16 258 #define AES_KEY_SIZE_128 16 259 #define AES_KEY_SIZE_192 24 260 #define AES_KEY_SIZE_256 32 261 262 /* Here we store the status information relevant to the 263 current context. */ 264 /* BIG FAT WARNING: 265 * Inline assembler in PADLOCK_XCRYPT_ASM() 266 * depends on the order of items in this structure. 267 * Don't blindly modify, reorder, etc! 268 */ 269 struct padlock_cipher_data 270 { 271 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ 272 union { unsigned int pad[4]; 273 struct { 274 int rounds:4; 275 int dgst:1; /* n/a in C3 */ 276 int align:1; /* n/a in C3 */ 277 int ciphr:1; /* n/a in C3 */ 278 unsigned int keygen:1; 279 int interm:1; 280 unsigned int encdec:1; 281 int ksize:2; 282 } b; 283 } cword; /* Control word */ 284 AES_KEY ks; /* Encryption key */ 285 }; 286 287 /* 288 * Essentially this variable belongs in thread local storage. 289 * Having this variable global on the other hand can only cause 290 * few bogus key reloads [if any at all on single-CPU system], 291 * so we accept the penatly... 292 */ 293 static volatile struct padlock_cipher_data *padlock_saved_context; 294 #endif 295 296 /* 297 * ======================================================= 298 * Inline assembler section(s). 299 * ======================================================= 300 * Order of arguments is chosen to facilitate Windows port 301 * using __fastcall calling convention. If you wish to add 302 * more routines, keep in mind that first __fastcall 303 * argument is passed in %ecx and second - in %edx. 304 * ======================================================= 305 */ 306 #if defined(__GNUC__) && __GNUC__>=2 307 /* 308 * As for excessive "push %ebx"/"pop %ebx" found all over. 309 * When generating position-independent code GCC won't let 310 * us use "b" in assembler templates nor even respect "ebx" 311 * in "clobber description." Therefore the trouble... 312 */ 313 314 /* Helper function - check if a CPUID instruction 315 is available on this CPU */ 316 static int 317 padlock_insn_cpuid_available(void) 318 { 319 int result = -1; 320 321 /* We're checking if the bit #21 of EFLAGS 322 can be toggled. If yes = CPUID is available. */ 323 asm volatile ( 324 "pushf\n" 325 "popl %%eax\n" 326 "xorl $0x200000, %%eax\n" 327 "movl %%eax, %%ecx\n" 328 "andl $0x200000, %%ecx\n" 329 "pushl %%eax\n" 330 "popf\n" 331 "pushf\n" 332 "popl %%eax\n" 333 "andl $0x200000, %%eax\n" 334 "xorl %%eax, %%ecx\n" 335 "movl %%ecx, %0\n" 336 : "=r" (result) : : "eax", "ecx"); 337 338 return (result == 0); 339 } 340 341 /* Load supported features of the CPU to see if 342 the PadLock is available. */ 343 static int 344 padlock_available(void) 345 { 346 char vendor_string[16]; 347 unsigned int eax, edx; 348 349 /* First check if the CPUID instruction is available at all... */ 350 if (! padlock_insn_cpuid_available()) 351 return 0; 352 353 /* Are we running on the Centaur (VIA) CPU? */ 354 eax = 0x00000000; 355 vendor_string[12] = 0; 356 asm volatile ( 357 "pushl %%ebx\n" 358 "cpuid\n" 359 "movl %%ebx,(%%edi)\n" 360 "movl %%edx,4(%%edi)\n" 361 "movl %%ecx,8(%%edi)\n" 362 "popl %%ebx" 363 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx"); 364 if (strcmp(vendor_string, "CentaurHauls") != 0) 365 return 0; 366 367 /* Check for Centaur Extended Feature Flags presence */ 368 eax = 0xC0000000; 369 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" 370 : "+a"(eax) : : "ecx", "edx"); 371 if (eax < 0xC0000001) 372 return 0; 373 374 /* Read the Centaur Extended Feature Flags */ 375 eax = 0xC0000001; 376 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" 377 : "+a"(eax), "=d"(edx) : : "ecx"); 378 379 /* Fill up some flags */ 380 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); 381 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); 382 383 return padlock_use_ace + padlock_use_rng; 384 } 385 386 #ifndef OPENSSL_NO_AES 387 #ifndef AES_ASM 388 /* Our own htonl()/ntohl() */ 389 static inline void 390 padlock_bswapl(AES_KEY *ks) 391 { 392 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); 393 unsigned int *key = ks->rd_key; 394 395 while (i--) { 396 asm volatile ("bswapl %0" : "+r"(*key)); 397 key++; 398 } 399 } 400 #endif 401 #endif 402 403 /* Force key reload from memory to the CPU microcode. 404 Loading EFLAGS from the stack clears EFLAGS[30] 405 which does the trick. */ 406 static inline void 407 padlock_reload_key(void) 408 { 409 asm volatile ("pushfl; popfl"); 410 } 411 412 #ifndef OPENSSL_NO_AES 413 /* 414 * This is heuristic key context tracing. At first one 415 * believes that one should use atomic swap instructions, 416 * but it's not actually necessary. Point is that if 417 * padlock_saved_context was changed by another thread 418 * after we've read it and before we compare it with cdata, 419 * our key *shall* be reloaded upon thread context switch 420 * and we are therefore set in either case... 421 */ 422 static inline void 423 padlock_verify_context(struct padlock_cipher_data *cdata) 424 { 425 asm volatile ( 426 "pushfl\n" 427 " btl $30,(%%esp)\n" 428 " jnc 1f\n" 429 " cmpl %2,%1\n" 430 " je 1f\n" 431 " popfl\n" 432 " subl $4,%%esp\n" 433 "1: addl $4,%%esp\n" 434 " movl %2,%0" 435 :"+m"(padlock_saved_context) 436 : "r"(padlock_saved_context), "r"(cdata) : "cc"); 437 } 438 439 /* Template for padlock_xcrypt_* modes */ 440 /* BIG FAT WARNING: 441 * The offsets used with 'leal' instructions 442 * describe items of the 'padlock_cipher_data' 443 * structure. 444 */ 445 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ 446 static inline void *name(size_t cnt, \ 447 struct padlock_cipher_data *cdata, \ 448 void *out, const void *inp) \ 449 { void *iv; \ 450 asm volatile ( "pushl %%ebx\n" \ 451 " leal 16(%0),%%edx\n" \ 452 " leal 32(%0),%%ebx\n" \ 453 rep_xcrypt "\n" \ 454 " popl %%ebx" \ 455 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ 456 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ 457 : "edx", "cc", "memory"); \ 458 return iv; \ 459 } 460 461 /* Generate all functions with appropriate opcodes */ 462 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ 463 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ 464 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ 465 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ 466 #endif 467 468 /* The RNG call itself */ 469 static inline unsigned int 470 padlock_xstore(void *addr, unsigned int edx_in) 471 { 472 unsigned int eax_out; 473 474 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ 475 : "=a"(eax_out),"=m"(*(unsigned *)addr) 476 : "D"(addr), "d" (edx_in) 477 ); 478 479 return eax_out; 480 } 481 482 /* Why not inline 'rep movsd'? I failed to find information on what 483 * value in Direction Flag one can expect and consequently have to 484 * apply "better-safe-than-sorry" approach and assume "undefined." 485 * I could explicitly clear it and restore the original value upon 486 * return from padlock_aes_cipher, but it's presumably too much 487 * trouble for too little gain... 488 * 489 * In case you wonder 'rep xcrypt*' instructions above are *not* 490 * affected by the Direction Flag and pointers advance toward 491 * larger addresses unconditionally. 492 */ 493 static inline unsigned char * 494 padlock_memcpy(void *dst,const void *src,size_t n) 495 { 496 long *d=dst; 497 const long *s=src; 498 499 n /= sizeof(*d); 500 do { *d++ = *s++; } while (--n); 501 502 return dst; 503 } 504 505 #elif defined(_MSC_VER) 506 /* 507 * Unlike GCC these are real functions. In order to minimize impact 508 * on performance we adhere to __fastcall calling convention in 509 * order to get two first arguments passed through %ecx and %edx. 510 * Which kind of suits very well, as instructions in question use 511 * both %ecx and %edx as input:-) 512 */ 513 #define REP_XCRYPT(code) \ 514 _asm _emit 0xf3 \ 515 _asm _emit 0x0f _asm _emit 0xa7 \ 516 _asm _emit code 517 518 /* BIG FAT WARNING: 519 * The offsets used with 'lea' instructions 520 * describe items of the 'padlock_cipher_data' 521 * structure. 522 */ 523 #define PADLOCK_XCRYPT_ASM(name,code) \ 524 static void * __fastcall \ 525 name (size_t cnt, void *cdata, \ 526 void *outp, const void *inp) \ 527 { _asm mov eax,edx \ 528 _asm lea edx,[eax+16] \ 529 _asm lea ebx,[eax+32] \ 530 _asm mov edi,outp \ 531 _asm mov esi,inp \ 532 REP_XCRYPT(code) \ 533 } 534 535 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) 536 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) 537 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) 538 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) 539 540 static int __fastcall 541 padlock_xstore(void *outp,unsigned int code) 542 { _asm mov edi,ecx 543 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 544 } 545 546 static void __fastcall 547 padlock_reload_key(void) 548 { _asm pushfd _asm popfd } 549 550 static void __fastcall 551 padlock_verify_context(void *cdata) 552 { _asm { 553 pushfd 554 bt DWORD PTR[esp],30 555 jnc skip 556 cmp ecx,padlock_saved_context 557 je skip 558 popfd 559 sub esp,4 560 skip: add esp,4 561 mov padlock_saved_context,ecx 562 } 563 } 564 565 static int 566 padlock_available(void) 567 { _asm { 568 pushfd 569 pop eax 570 mov ecx,eax 571 xor eax,1<<21 572 push eax 573 popfd 574 pushfd 575 pop eax 576 xor eax,ecx 577 bt eax,21 578 jnc noluck 579 mov eax,0 580 cpuid 581 xor eax,eax 582 cmp ebx,'tneC' 583 jne noluck 584 cmp edx,'Hrua' 585 jne noluck 586 cmp ecx,'slua' 587 jne noluck 588 mov eax,0xC0000000 589 cpuid 590 mov edx,eax 591 xor eax,eax 592 cmp edx,0xC0000001 593 jb noluck 594 mov eax,0xC0000001 595 cpuid 596 xor eax,eax 597 bt edx,6 598 jnc skip_a 599 bt edx,7 600 jnc skip_a 601 mov padlock_use_ace,1 602 inc eax 603 skip_a: bt edx,2 604 jnc skip_r 605 bt edx,3 606 jnc skip_r 607 mov padlock_use_rng,1 608 inc eax 609 skip_r: 610 noluck: 611 } 612 } 613 614 static void __fastcall 615 padlock_bswapl(void *key) 616 { _asm { 617 pushfd 618 cld 619 mov esi,ecx 620 mov edi,ecx 621 mov ecx,60 622 up: lodsd 623 bswap eax 624 stosd 625 loop up 626 popfd 627 } 628 } 629 630 /* MS actually specifies status of Direction Flag and compiler even 631 * manages to compile following as 'rep movsd' all by itself... 632 */ 633 #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) 634 #endif 635 636 /* ===== AES encryption/decryption ===== */ 637 #ifndef OPENSSL_NO_AES 638 639 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) 640 #define NID_aes_128_cfb NID_aes_128_cfb128 641 #endif 642 643 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) 644 #define NID_aes_128_ofb NID_aes_128_ofb128 645 #endif 646 647 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) 648 #define NID_aes_192_cfb NID_aes_192_cfb128 649 #endif 650 651 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) 652 #define NID_aes_192_ofb NID_aes_192_ofb128 653 #endif 654 655 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) 656 #define NID_aes_256_cfb NID_aes_256_cfb128 657 #endif 658 659 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) 660 #define NID_aes_256_ofb NID_aes_256_ofb128 661 #endif 662 663 /* List of supported ciphers. */ 664 static int padlock_cipher_nids[] = { 665 NID_aes_128_ecb, 666 NID_aes_128_cbc, 667 NID_aes_128_cfb, 668 NID_aes_128_ofb, 669 670 NID_aes_192_ecb, 671 NID_aes_192_cbc, 672 NID_aes_192_cfb, 673 NID_aes_192_ofb, 674 675 NID_aes_256_ecb, 676 NID_aes_256_cbc, 677 NID_aes_256_cfb, 678 NID_aes_256_ofb, 679 }; 680 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ 681 sizeof(padlock_cipher_nids[0])); 682 683 /* Function prototypes ... */ 684 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 685 const unsigned char *iv, int enc); 686 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, 687 const unsigned char *in, size_t nbytes); 688 689 #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ 690 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) 691 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ 692 NEAREST_ALIGNED(ctx->cipher_data)) 693 694 #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE 695 #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE 696 #define EVP_CIPHER_block_size_OFB 1 697 #define EVP_CIPHER_block_size_CFB 1 698 699 /* Declaring so many ciphers by hand would be a pain. 700 Instead introduce a bit of preprocessor magic :-) */ 701 #define DECLARE_AES_EVP(ksize,lmode,umode) \ 702 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ 703 NID_aes_##ksize##_##lmode, \ 704 EVP_CIPHER_block_size_##umode, \ 705 AES_KEY_SIZE_##ksize, \ 706 AES_BLOCK_SIZE, \ 707 0 | EVP_CIPH_##umode##_MODE, \ 708 padlock_aes_init_key, \ 709 padlock_aes_cipher, \ 710 NULL, \ 711 sizeof(struct padlock_cipher_data) + 16, \ 712 EVP_CIPHER_set_asn1_iv, \ 713 EVP_CIPHER_get_asn1_iv, \ 714 NULL, \ 715 NULL \ 716 } 717 718 DECLARE_AES_EVP(128,ecb,ECB); 719 DECLARE_AES_EVP(128,cbc,CBC); 720 DECLARE_AES_EVP(128,cfb,CFB); 721 DECLARE_AES_EVP(128,ofb,OFB); 722 723 DECLARE_AES_EVP(192,ecb,ECB); 724 DECLARE_AES_EVP(192,cbc,CBC); 725 DECLARE_AES_EVP(192,cfb,CFB); 726 DECLARE_AES_EVP(192,ofb,OFB); 727 728 DECLARE_AES_EVP(256,ecb,ECB); 729 DECLARE_AES_EVP(256,cbc,CBC); 730 DECLARE_AES_EVP(256,cfb,CFB); 731 DECLARE_AES_EVP(256,ofb,OFB); 732 733 static int 734 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) 735 { 736 /* No specific cipher => return a list of supported nids ... */ 737 if (!cipher) { 738 *nids = padlock_cipher_nids; 739 return padlock_cipher_nids_num; 740 } 741 742 /* ... or the requested "cipher" otherwise */ 743 switch (nid) { 744 case NID_aes_128_ecb: 745 *cipher = &padlock_aes_128_ecb; 746 break; 747 case NID_aes_128_cbc: 748 *cipher = &padlock_aes_128_cbc; 749 break; 750 case NID_aes_128_cfb: 751 *cipher = &padlock_aes_128_cfb; 752 break; 753 case NID_aes_128_ofb: 754 *cipher = &padlock_aes_128_ofb; 755 break; 756 757 case NID_aes_192_ecb: 758 *cipher = &padlock_aes_192_ecb; 759 break; 760 case NID_aes_192_cbc: 761 *cipher = &padlock_aes_192_cbc; 762 break; 763 case NID_aes_192_cfb: 764 *cipher = &padlock_aes_192_cfb; 765 break; 766 case NID_aes_192_ofb: 767 *cipher = &padlock_aes_192_ofb; 768 break; 769 770 case NID_aes_256_ecb: 771 *cipher = &padlock_aes_256_ecb; 772 break; 773 case NID_aes_256_cbc: 774 *cipher = &padlock_aes_256_cbc; 775 break; 776 case NID_aes_256_cfb: 777 *cipher = &padlock_aes_256_cfb; 778 break; 779 case NID_aes_256_ofb: 780 *cipher = &padlock_aes_256_ofb; 781 break; 782 783 default: 784 /* Sorry, we don't support this NID */ 785 *cipher = NULL; 786 return 0; 787 } 788 789 return 1; 790 } 791 792 /* Prepare the encryption key for PadLock usage */ 793 static int 794 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, 795 const unsigned char *iv, int enc) 796 { 797 struct padlock_cipher_data *cdata; 798 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; 799 800 if (key==NULL) return 0; /* ERROR */ 801 802 cdata = ALIGNED_CIPHER_DATA(ctx); 803 memset(cdata, 0, sizeof(struct padlock_cipher_data)); 804 805 /* Prepare Control word. */ 806 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) 807 cdata->cword.b.encdec = 0; 808 else 809 cdata->cword.b.encdec = (ctx->encrypt == 0); 810 cdata->cword.b.rounds = 10 + (key_len - 128) / 32; 811 cdata->cword.b.ksize = (key_len - 128) / 64; 812 813 switch(key_len) { 814 case 128: 815 /* PadLock can generate an extended key for 816 AES128 in hardware */ 817 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); 818 cdata->cword.b.keygen = 0; 819 break; 820 821 case 192: 822 case 256: 823 /* Generate an extended AES key in software. 824 Needed for AES192/AES256 */ 825 /* Well, the above applies to Stepping 8 CPUs 826 and is listed as hardware errata. They most 827 likely will fix it at some point and then 828 a check for stepping would be due here. */ 829 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || 830 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || 831 enc) 832 AES_set_encrypt_key(key, key_len, &cdata->ks); 833 else 834 AES_set_decrypt_key(key, key_len, &cdata->ks); 835 #ifndef AES_ASM 836 /* OpenSSL C functions use byte-swapped extended key. */ 837 padlock_bswapl(&cdata->ks); 838 #endif 839 cdata->cword.b.keygen = 1; 840 break; 841 842 default: 843 /* ERROR */ 844 return 0; 845 } 846 847 /* 848 * This is done to cover for cases when user reuses the 849 * context for new key. The catch is that if we don't do 850 * this, padlock_eas_cipher might proceed with old key... 851 */ 852 padlock_reload_key (); 853 854 return 1; 855 } 856 857 /* 858 * Simplified version of padlock_aes_cipher() used when 859 * 1) both input and output buffers are at aligned addresses. 860 * or when 861 * 2) running on a newer CPU that doesn't require aligned buffers. 862 */ 863 static int 864 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 865 const unsigned char *in_arg, size_t nbytes) 866 { 867 struct padlock_cipher_data *cdata; 868 void *iv; 869 870 cdata = ALIGNED_CIPHER_DATA(ctx); 871 padlock_verify_context(cdata); 872 873 switch (EVP_CIPHER_CTX_mode(ctx)) { 874 case EVP_CIPH_ECB_MODE: 875 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 876 break; 877 878 case EVP_CIPH_CBC_MODE: 879 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 880 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 881 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 882 break; 883 884 case EVP_CIPH_CFB_MODE: 885 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 886 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 887 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 888 break; 889 890 case EVP_CIPH_OFB_MODE: 891 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 892 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 893 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 894 break; 895 896 default: 897 return 0; 898 } 899 900 memset(cdata->iv, 0, AES_BLOCK_SIZE); 901 902 return 1; 903 } 904 905 #ifndef PADLOCK_CHUNK 906 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ 907 #endif 908 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) 909 # error "insane PADLOCK_CHUNK..." 910 #endif 911 912 /* Re-align the arguments to 16-Bytes boundaries and run the 913 encryption function itself. This function is not AES-specific. */ 914 static int 915 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 916 const unsigned char *in_arg, size_t nbytes) 917 { 918 struct padlock_cipher_data *cdata; 919 const void *inp; 920 unsigned char *out; 921 void *iv; 922 int inp_misaligned, out_misaligned, realign_in_loop; 923 size_t chunk, allocated=0; 924 925 /* ctx->num is maintained in byte-oriented modes, 926 such as CFB and OFB... */ 927 if ((chunk = ctx->num)) { /* borrow chunk variable */ 928 unsigned char *ivp=ctx->iv; 929 930 switch (EVP_CIPHER_CTX_mode(ctx)) { 931 case EVP_CIPH_CFB_MODE: 932 if (chunk >= AES_BLOCK_SIZE) 933 return 0; /* bogus value */ 934 935 if (ctx->encrypt) 936 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 937 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 938 chunk++, nbytes--; 939 } 940 else while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 941 unsigned char c = *(in_arg++); 942 *(out_arg++) = c ^ ivp[chunk]; 943 ivp[chunk++] = c, nbytes--; 944 } 945 946 ctx->num = chunk%AES_BLOCK_SIZE; 947 break; 948 case EVP_CIPH_OFB_MODE: 949 if (chunk >= AES_BLOCK_SIZE) 950 return 0; /* bogus value */ 951 952 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 953 *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 954 chunk++, nbytes--; 955 } 956 957 ctx->num = chunk%AES_BLOCK_SIZE; 958 break; 959 } 960 } 961 962 if (nbytes == 0) 963 return 1; 964 #if 0 965 if (nbytes % AES_BLOCK_SIZE) 966 return 0; /* are we expected to do tail processing? */ 967 #else 968 /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC 969 modes and arbitrary value in byte-oriented modes, such as 970 CFB and OFB... */ 971 #endif 972 973 /* VIA promises CPUs that won't require alignment in the future. 974 For now padlock_aes_align_required is initialized to 1 and 975 the condition is never met... */ 976 /* C7 core is capable to manage unaligned input in non-ECB[!] 977 mode, but performance penalties appear to be approximately 978 same as for software alignment below or ~3x. They promise to 979 improve it in the future, but for now we can just as well 980 pretend that it can only handle aligned input... */ 981 if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0) 982 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 983 984 inp_misaligned = (((size_t)in_arg) & 0x0F); 985 out_misaligned = (((size_t)out_arg) & 0x0F); 986 987 /* Note that even if output is aligned and input not, 988 * I still prefer to loop instead of copy the whole 989 * input and then encrypt in one stroke. This is done 990 * in order to improve L1 cache utilization... */ 991 realign_in_loop = out_misaligned|inp_misaligned; 992 993 if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0) 994 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 995 996 /* this takes one "if" out of the loops */ 997 chunk = nbytes; 998 chunk %= PADLOCK_CHUNK; 999 if (chunk==0) chunk = PADLOCK_CHUNK; 1000 1001 if (out_misaligned) { 1002 /* optmize for small input */ 1003 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes); 1004 out = alloca(0x10 + allocated); 1005 out = NEAREST_ALIGNED(out); 1006 } 1007 else 1008 out = out_arg; 1009 1010 cdata = ALIGNED_CIPHER_DATA(ctx); 1011 padlock_verify_context(cdata); 1012 1013 switch (EVP_CIPHER_CTX_mode(ctx)) { 1014 case EVP_CIPH_ECB_MODE: 1015 do { 1016 if (inp_misaligned) 1017 inp = padlock_memcpy(out, in_arg, chunk); 1018 else 1019 inp = in_arg; 1020 in_arg += chunk; 1021 1022 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1023 1024 if (out_misaligned) 1025 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1026 else 1027 out = out_arg+=chunk; 1028 1029 nbytes -= chunk; 1030 chunk = PADLOCK_CHUNK; 1031 } while (nbytes); 1032 break; 1033 1034 case EVP_CIPH_CBC_MODE: 1035 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1036 goto cbc_shortcut; 1037 do { 1038 if (iv != cdata->iv) 1039 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1040 chunk = PADLOCK_CHUNK; 1041 cbc_shortcut: /* optimize for small input */ 1042 if (inp_misaligned) 1043 inp = padlock_memcpy(out, in_arg, chunk); 1044 else 1045 inp = in_arg; 1046 in_arg += chunk; 1047 1048 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1049 1050 if (out_misaligned) 1051 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1052 else 1053 out = out_arg+=chunk; 1054 1055 } while (nbytes -= chunk); 1056 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1057 break; 1058 1059 case EVP_CIPH_CFB_MODE: 1060 memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1061 chunk &= ~(AES_BLOCK_SIZE-1); 1062 if (chunk) goto cfb_shortcut; 1063 else goto cfb_skiploop; 1064 do { 1065 if (iv != cdata->iv) 1066 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1067 chunk = PADLOCK_CHUNK; 1068 cfb_shortcut: /* optimize for small input */ 1069 if (inp_misaligned) 1070 inp = padlock_memcpy(out, in_arg, chunk); 1071 else 1072 inp = in_arg; 1073 in_arg += chunk; 1074 1075 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1076 1077 if (out_misaligned) 1078 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1079 else 1080 out = out_arg+=chunk; 1081 1082 nbytes -= chunk; 1083 } while (nbytes >= AES_BLOCK_SIZE); 1084 1085 cfb_skiploop: 1086 if (nbytes) { 1087 unsigned char *ivp = cdata->iv; 1088 1089 if (iv != ivp) { 1090 memcpy(ivp, iv, AES_BLOCK_SIZE); 1091 iv = ivp; 1092 } 1093 ctx->num = nbytes; 1094 if (cdata->cword.b.encdec) { 1095 cdata->cword.b.encdec=0; 1096 padlock_reload_key(); 1097 padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1098 cdata->cword.b.encdec=1; 1099 padlock_reload_key(); 1100 while(nbytes) { 1101 unsigned char c = *(in_arg++); 1102 *(out_arg++) = c ^ *ivp; 1103 *(ivp++) = c, nbytes--; 1104 } 1105 } 1106 else { padlock_reload_key(); 1107 padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1108 padlock_reload_key(); 1109 while (nbytes) { 1110 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; 1111 ivp++, nbytes--; 1112 } 1113 } 1114 } 1115 1116 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1117 break; 1118 1119 case EVP_CIPH_OFB_MODE: 1120 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1121 chunk &= ~(AES_BLOCK_SIZE-1); 1122 if (chunk) do { 1123 if (inp_misaligned) 1124 inp = padlock_memcpy(out, in_arg, chunk); 1125 else 1126 inp = in_arg; 1127 in_arg += chunk; 1128 1129 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1130 1131 if (out_misaligned) 1132 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1133 else 1134 out = out_arg+=chunk; 1135 1136 nbytes -= chunk; 1137 chunk = PADLOCK_CHUNK; 1138 } while (nbytes >= AES_BLOCK_SIZE); 1139 1140 if (nbytes) { 1141 unsigned char *ivp = cdata->iv; 1142 1143 ctx->num = nbytes; 1144 padlock_reload_key(); /* empirically found */ 1145 padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1146 padlock_reload_key(); /* empirically found */ 1147 while (nbytes) { 1148 *(out_arg++) = *(in_arg++) ^ *ivp; 1149 ivp++, nbytes--; 1150 } 1151 } 1152 1153 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 1154 break; 1155 1156 default: 1157 return 0; 1158 } 1159 1160 /* Clean the realign buffer if it was used */ 1161 if (out_misaligned) { 1162 volatile unsigned long *p=(void *)out; 1163 size_t n = allocated/sizeof(*p); 1164 while (n--) *p++=0; 1165 } 1166 1167 memset(cdata->iv, 0, AES_BLOCK_SIZE); 1168 1169 return 1; 1170 } 1171 1172 #endif /* OPENSSL_NO_AES */ 1173 1174 /* ===== Random Number Generator ===== */ 1175 /* 1176 * This code is not engaged. The reason is that it does not comply 1177 * with recommendations for VIA RNG usage for secure applications 1178 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it 1179 * provide meaningful error control... 1180 */ 1181 /* Wrapper that provides an interface between the API and 1182 the raw PadLock RNG */ 1183 static int 1184 padlock_rand_bytes(unsigned char *output, int count) 1185 { 1186 unsigned int eax, buf; 1187 1188 while (count >= 8) { 1189 eax = padlock_xstore(output, 0); 1190 if (!(eax&(1<<6))) return 0; /* RNG disabled */ 1191 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1192 if (eax&(0x1F<<10)) return 0; 1193 if ((eax&0x1F)==0) continue; /* no data, retry... */ 1194 if ((eax&0x1F)!=8) return 0; /* fatal failure... */ 1195 output += 8; 1196 count -= 8; 1197 } 1198 while (count > 0) { 1199 eax = padlock_xstore(&buf, 3); 1200 if (!(eax&(1<<6))) return 0; /* RNG disabled */ 1201 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1202 if (eax&(0x1F<<10)) return 0; 1203 if ((eax&0x1F)==0) continue; /* no data, retry... */ 1204 if ((eax&0x1F)!=1) return 0; /* fatal failure... */ 1205 *output++ = (unsigned char)buf; 1206 count--; 1207 } 1208 *(volatile unsigned int *)&buf=0; 1209 1210 return 1; 1211 } 1212 1213 /* Dummy but necessary function */ 1214 static int 1215 padlock_rand_status(void) 1216 { 1217 return 1; 1218 } 1219 1220 /* Prepare structure for registration */ 1221 static RAND_METHOD padlock_rand = { 1222 NULL, /* seed */ 1223 padlock_rand_bytes, /* bytes */ 1224 NULL, /* cleanup */ 1225 NULL, /* add */ 1226 padlock_rand_bytes, /* pseudorand */ 1227 padlock_rand_status, /* rand status */ 1228 }; 1229 1230 #else /* !COMPILE_HW_PADLOCK */ 1231 #ifndef OPENSSL_NO_DYNAMIC_ENGINE 1232 OPENSSL_EXPORT 1233 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); 1234 OPENSSL_EXPORT 1235 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; } 1236 IMPLEMENT_DYNAMIC_CHECK_FN() 1237 #endif 1238 #endif /* COMPILE_HW_PADLOCK */ 1239 1240 #endif /* !OPENSSL_NO_HW_PADLOCK */ 1241 #endif /* !OPENSSL_NO_HW */ 1242