1/* 2 * ==================================================================== 3 * Written by Intel Corporation for the OpenSSL project to add support 4 * for Intel AES-NI instructions. Rights for redistribution and usage 5 * in source and binary forms are granted according to the OpenSSL 6 * license. 7 * 8 * Author: Huang Ying <ying.huang at intel dot com> 9 * Vinodh Gopal <vinodh.gopal at intel dot com> 10 * Kahraman Akdemir 11 * 12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) 13 * instructions that are going to be introduced in the next generation 14 * of Intel processor, as of 2009. These instructions enable fast and 15 * secure data encryption and decryption, using the Advanced Encryption 16 * Standard (AES), defined by FIPS Publication number 197. The 17 * architecture introduces six instructions that offer full hardware 18 * support for AES. Four of them support high performance data 19 * encryption and decryption, and the other two instructions support 20 * the AES key expansion procedure. 21 * ==================================================================== 22 */ 23 24/* 25 * ==================================================================== 26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 32 * 1. Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * 35 * 2. Redistributions in binary form must reproduce the above copyright 36 * notice, this list of conditions and the following disclaimer in 37 * the documentation and/or other materials provided with the 38 * distribution. 39 * 40 * 3. All advertising materials mentioning features or use of this 41 * software must display the following acknowledgment: 42 * "This product includes software developed by the OpenSSL Project 43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 44 * 45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 46 * endorse or promote products derived from this software without 47 * prior written permission. For written permission, please contact 48 * openssl-core@openssl.org. 49 * 50 * 5. Products derived from this software may not be called "OpenSSL" 51 * nor may "OpenSSL" appear in their names without prior written 52 * permission of the OpenSSL Project. 53 * 54 * 6. Redistributions of any form whatsoever must retain the following 55 * acknowledgment: 56 * "This product includes software developed by the OpenSSL Project 57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 70 * OF THE POSSIBILITY OF SUCH DAMAGE. 71 * ==================================================================== 72 */ 73 74/* 75 * ==================================================================== 76 * OpenSolaris OS modifications 77 * 78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in 79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by 80 * Huang Ying of Intel to the openssl-dev mailing list under the subject 81 * of "Add support to Intel AES-NI instruction set for x86_64 platform". 82 * 83 * This OpenSolaris version has these major changes from the original source: 84 * 85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from 86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function 87 * definitions for lint. 88 * 89 * 2. Formatted code, added comments, and added #includes and #defines. 90 * 91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before 92 * calling kpreempt_disable() and kpreempt_enable(). 93 * If the TS bit is not set, Save and restore %xmm registers at the beginning 94 * and end of function calls (%xmm* registers are not saved and restored by 95 * during kernel thread preemption). 96 * 97 * 4. Renamed functions, reordered parameters, and changed return value 98 * to match OpenSolaris: 99 * 100 * OpenSSL interface: 101 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 102 * const int bits, AES_KEY *key); 103 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 104 * const int bits, AES_KEY *key); 105 * Return values for above are non-zero on error, 0 on success. 106 * 107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 108 * const AES_KEY *key); 109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 110 * const AES_KEY *key); 111 * typedef struct aes_key_st { 112 * unsigned int rd_key[4 *(AES_MAXNR + 1)]; 113 * int rounds; 114 * unsigned int pad[3]; 115 * } AES_KEY; 116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules 117 * (ks32) instead of 64-bit (ks64). 118 * Number of rounds (aka round count) is at offset 240 of AES_KEY. 119 * 120 * OpenSolaris OS interface (#ifdefs removed for readability): 121 * int rijndael_key_setup_dec_intel(uint32_t rk[], 122 * const uint32_t cipherKey[], uint64_t keyBits); 123 * int rijndael_key_setup_enc_intel(uint32_t rk[], 124 * const uint32_t cipherKey[], uint64_t keyBits); 125 * Return values for above are 0 on error, number of rounds on success. 126 * 127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 128 * const uint32_t pt[4], uint32_t ct[4]); 129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 130 * const uint32_t pt[4], uint32_t ct[4]); 131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]; 132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t; 133 * 134 * typedef union { 135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)]; 136 * } aes_ks_t; 137 * typedef struct aes_key { 138 * aes_ks_t encr_ks, decr_ks; 139 * long double align128; 140 * int flags, nr, type; 141 * } aes_key_t; 142 * 143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text, 144 * ct is crypto text, and MAX_AES_NR is 14. 145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64. 146 * 147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary. 148 * 149 * ==================================================================== 150 */ 151 152 153#if defined(lint) || defined(__lint) 154 155#include <sys/types.h> 156 157void 158aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], 159 uint32_t ct[4]) { 160 (void) rk, (void) Nr, (void) pt, (void) ct; 161} 162void 163aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], 164 uint32_t pt[4]) { 165 (void) rk, (void) Nr, (void) ct, (void) pt; 166} 167int 168rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 169 uint64_t keyBits) { 170 (void) rk, (void) cipherKey, (void) keyBits; 171 return (0); 172} 173int 174rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 175 uint64_t keyBits) { 176 (void) rk, (void) cipherKey, (void) keyBits; 177 return (0); 178} 179 180 181#elif defined(HAVE_AES) /* guard by instruction set */ 182 183#define _ASM 184#include <sys/asm_linkage.h> 185 186/* 187 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(), 188 * _key_expansion_256a(), _key_expansion_256b() 189 * 190 * Helper functions called by rijndael_key_setup_inc_intel(). 191 * Also used indirectly by rijndael_key_setup_dec_intel(). 192 * 193 * Input: 194 * %xmm0 User-provided cipher key 195 * %xmm1 Round constant 196 * Output: 197 * (%rcx) AES key 198 */ 199 200ENTRY_NP2(_key_expansion_128, _key_expansion_256a) 201_key_expansion_128_local: 202_key_expansion_256a_local: 203 pshufd $0b11111111, %xmm1, %xmm1 204 shufps $0b00010000, %xmm0, %xmm4 205 pxor %xmm4, %xmm0 206 shufps $0b10001100, %xmm0, %xmm4 207 pxor %xmm4, %xmm0 208 pxor %xmm1, %xmm0 209 movups %xmm0, (%rcx) 210 add $0x10, %rcx 211 RET 212 nop 213SET_SIZE(_key_expansion_128) 214SET_SIZE(_key_expansion_256a) 215 216 217ENTRY_NP(_key_expansion_192a) 218_key_expansion_192a_local: 219 pshufd $0b01010101, %xmm1, %xmm1 220 shufps $0b00010000, %xmm0, %xmm4 221 pxor %xmm4, %xmm0 222 shufps $0b10001100, %xmm0, %xmm4 223 pxor %xmm4, %xmm0 224 pxor %xmm1, %xmm0 225 226 movups %xmm2, %xmm5 227 movups %xmm2, %xmm6 228 pslldq $4, %xmm5 229 pshufd $0b11111111, %xmm0, %xmm3 230 pxor %xmm3, %xmm2 231 pxor %xmm5, %xmm2 232 233 movups %xmm0, %xmm1 234 shufps $0b01000100, %xmm0, %xmm6 235 movups %xmm6, (%rcx) 236 shufps $0b01001110, %xmm2, %xmm1 237 movups %xmm1, 0x10(%rcx) 238 add $0x20, %rcx 239 RET 240SET_SIZE(_key_expansion_192a) 241 242 243ENTRY_NP(_key_expansion_192b) 244_key_expansion_192b_local: 245 pshufd $0b01010101, %xmm1, %xmm1 246 shufps $0b00010000, %xmm0, %xmm4 247 pxor %xmm4, %xmm0 248 shufps $0b10001100, %xmm0, %xmm4 249 pxor %xmm4, %xmm0 250 pxor %xmm1, %xmm0 251 252 movups %xmm2, %xmm5 253 pslldq $4, %xmm5 254 pshufd $0b11111111, %xmm0, %xmm3 255 pxor %xmm3, %xmm2 256 pxor %xmm5, %xmm2 257 258 movups %xmm0, (%rcx) 259 add $0x10, %rcx 260 RET 261SET_SIZE(_key_expansion_192b) 262 263 264ENTRY_NP(_key_expansion_256b) 265_key_expansion_256b_local: 266 pshufd $0b10101010, %xmm1, %xmm1 267 shufps $0b00010000, %xmm2, %xmm4 268 pxor %xmm4, %xmm2 269 shufps $0b10001100, %xmm2, %xmm4 270 pxor %xmm4, %xmm2 271 pxor %xmm1, %xmm2 272 movups %xmm2, (%rcx) 273 add $0x10, %rcx 274 RET 275SET_SIZE(_key_expansion_256b) 276 277 278/* 279 * rijndael_key_setup_enc_intel() 280 * Expand the cipher key into the encryption key schedule. 281 * 282 * For kernel code, caller is responsible for ensuring kpreempt_disable() 283 * has been called. This is because %xmm registers are not saved/restored. 284 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 285 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 286 * on the stack. 287 * 288 * OpenSolaris interface: 289 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 290 * uint64_t keyBits); 291 * Return value is 0 on error, number of rounds on success. 292 * 293 * Original Intel OpenSSL interface: 294 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 295 * const int bits, AES_KEY *key); 296 * Return value is non-zero on error, 0 on success. 297 */ 298 299#ifdef OPENSSL_INTERFACE 300#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key 301#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key 302 303#define USERCIPHERKEY rdi /* P1, 64 bits */ 304#define KEYSIZE32 esi /* P2, 32 bits */ 305#define KEYSIZE64 rsi /* P2, 64 bits */ 306#define AESKEY rdx /* P3, 64 bits */ 307 308#else /* OpenSolaris Interface */ 309#define AESKEY rdi /* P1, 64 bits */ 310#define USERCIPHERKEY rsi /* P2, 64 bits */ 311#define KEYSIZE32 edx /* P3, 32 bits */ 312#define KEYSIZE64 rdx /* P3, 64 bits */ 313#endif /* OPENSSL_INTERFACE */ 314 315#define ROUNDS32 KEYSIZE32 /* temp */ 316#define ROUNDS64 KEYSIZE64 /* temp */ 317#define ENDAESKEY USERCIPHERKEY /* temp */ 318 319ENTRY_NP(rijndael_key_setup_enc_intel) 320rijndael_key_setup_enc_intel_local: 321 FRAME_BEGIN 322 // NULL pointer sanity check 323 test %USERCIPHERKEY, %USERCIPHERKEY 324 jz .Lenc_key_invalid_param 325 test %AESKEY, %AESKEY 326 jz .Lenc_key_invalid_param 327 328 movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes) 329 movups %xmm0, (%AESKEY) 330 lea 0x10(%AESKEY), %rcx // key addr 331 pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x 332 333 cmp $256, %KEYSIZE32 334 jnz .Lenc_key192 335 336 // AES 256: 14 rounds in encryption key schedule 337#ifdef OPENSSL_INTERFACE 338 mov $14, %ROUNDS32 339 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14 340#endif /* OPENSSL_INTERFACE */ 341 342 movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes) 343 movups %xmm2, (%rcx) 344 add $0x10, %rcx 345 346 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key 347 call _key_expansion_256a_local 348 aeskeygenassist $0x1, %xmm0, %xmm1 349 call _key_expansion_256b_local 350 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key 351 call _key_expansion_256a_local 352 aeskeygenassist $0x2, %xmm0, %xmm1 353 call _key_expansion_256b_local 354 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key 355 call _key_expansion_256a_local 356 aeskeygenassist $0x4, %xmm0, %xmm1 357 call _key_expansion_256b_local 358 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key 359 call _key_expansion_256a_local 360 aeskeygenassist $0x8, %xmm0, %xmm1 361 call _key_expansion_256b_local 362 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key 363 call _key_expansion_256a_local 364 aeskeygenassist $0x10, %xmm0, %xmm1 365 call _key_expansion_256b_local 366 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key 367 call _key_expansion_256a_local 368 aeskeygenassist $0x20, %xmm0, %xmm1 369 call _key_expansion_256b_local 370 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key 371 call _key_expansion_256a_local 372 373#ifdef OPENSSL_INTERFACE 374 xor %rax, %rax // return 0 (OK) 375#else /* Open Solaris Interface */ 376 mov $14, %rax // return # rounds = 14 377#endif 378 FRAME_END 379 RET 380 381.balign 4 382.Lenc_key192: 383 cmp $192, %KEYSIZE32 384 jnz .Lenc_key128 385 386 // AES 192: 12 rounds in encryption key schedule 387#ifdef OPENSSL_INTERFACE 388 mov $12, %ROUNDS32 389 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12 390#endif /* OPENSSL_INTERFACE */ 391 392 movq 0x10(%USERCIPHERKEY), %xmm2 // other user key 393 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key 394 call _key_expansion_192a_local 395 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key 396 call _key_expansion_192b_local 397 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key 398 call _key_expansion_192a_local 399 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key 400 call _key_expansion_192b_local 401 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key 402 call _key_expansion_192a_local 403 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key 404 call _key_expansion_192b_local 405 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key 406 call _key_expansion_192a_local 407 aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key 408 call _key_expansion_192b_local 409 410#ifdef OPENSSL_INTERFACE 411 xor %rax, %rax // return 0 (OK) 412#else /* OpenSolaris Interface */ 413 mov $12, %rax // return # rounds = 12 414#endif 415 FRAME_END 416 RET 417 418.balign 4 419.Lenc_key128: 420 cmp $128, %KEYSIZE32 421 jnz .Lenc_key_invalid_key_bits 422 423 // AES 128: 10 rounds in encryption key schedule 424#ifdef OPENSSL_INTERFACE 425 mov $10, %ROUNDS32 426 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10 427#endif /* OPENSSL_INTERFACE */ 428 429 aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key 430 call _key_expansion_128_local 431 aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key 432 call _key_expansion_128_local 433 aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key 434 call _key_expansion_128_local 435 aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key 436 call _key_expansion_128_local 437 aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key 438 call _key_expansion_128_local 439 aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key 440 call _key_expansion_128_local 441 aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key 442 call _key_expansion_128_local 443 aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key 444 call _key_expansion_128_local 445 aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key 446 call _key_expansion_128_local 447 aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key 448 call _key_expansion_128_local 449 450#ifdef OPENSSL_INTERFACE 451 xor %rax, %rax // return 0 (OK) 452#else /* OpenSolaris Interface */ 453 mov $10, %rax // return # rounds = 10 454#endif 455 FRAME_END 456 RET 457 458.Lenc_key_invalid_param: 459#ifdef OPENSSL_INTERFACE 460 mov $-1, %rax // user key or AES key pointer is NULL 461 FRAME_END 462 RET 463#else 464 /* FALLTHROUGH */ 465#endif /* OPENSSL_INTERFACE */ 466 467.Lenc_key_invalid_key_bits: 468#ifdef OPENSSL_INTERFACE 469 mov $-2, %rax // keysize is invalid 470#else /* Open Solaris Interface */ 471 xor %rax, %rax // a key pointer is NULL or invalid keysize 472#endif /* OPENSSL_INTERFACE */ 473 FRAME_END 474 RET 475 SET_SIZE(rijndael_key_setup_enc_intel) 476 477 478/* 479 * rijndael_key_setup_dec_intel() 480 * Expand the cipher key into the decryption key schedule. 481 * 482 * For kernel code, caller is responsible for ensuring kpreempt_disable() 483 * has been called. This is because %xmm registers are not saved/restored. 484 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 485 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 486 * on the stack. 487 * 488 * OpenSolaris interface: 489 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 490 * uint64_t keyBits); 491 * Return value is 0 on error, number of rounds on success. 492 * P1->P2, P2->P3, P3->P1 493 * 494 * Original Intel OpenSSL interface: 495 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 496 * const int bits, AES_KEY *key); 497 * Return value is non-zero on error, 0 on success. 498 */ 499 500ENTRY_NP(rijndael_key_setup_dec_intel) 501FRAME_BEGIN 502 // Generate round keys used for encryption 503 call rijndael_key_setup_enc_intel_local 504 test %rax, %rax 505#ifdef OPENSSL_INTERFACE 506 jnz .Ldec_key_exit // Failed if returned non-0 507#else /* OpenSolaris Interface */ 508 jz .Ldec_key_exit // Failed if returned 0 509#endif /* OPENSSL_INTERFACE */ 510 511 /* 512 * Convert round keys used for encryption 513 * to a form usable for decryption 514 */ 515#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */ 516 mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14) 517 // (already set for OpenSSL) 518#endif 519 520 lea 0x10(%AESKEY), %rcx // key addr 521 shl $4, %ROUNDS32 522 add %AESKEY, %ROUNDS64 523 mov %ROUNDS64, %ENDAESKEY 524 525.balign 4 526.Ldec_key_reorder_loop: 527 movups (%AESKEY), %xmm0 528 movups (%ROUNDS64), %xmm1 529 movups %xmm0, (%ROUNDS64) 530 movups %xmm1, (%AESKEY) 531 lea 0x10(%AESKEY), %AESKEY 532 lea -0x10(%ROUNDS64), %ROUNDS64 533 cmp %AESKEY, %ROUNDS64 534 ja .Ldec_key_reorder_loop 535 536.balign 4 537.Ldec_key_inv_loop: 538 movups (%rcx), %xmm0 539 // Convert an encryption round key to a form usable for decryption 540 // with the "AES Inverse Mix Columns" instruction 541 aesimc %xmm0, %xmm1 542 movups %xmm1, (%rcx) 543 lea 0x10(%rcx), %rcx 544 cmp %ENDAESKEY, %rcx 545 jnz .Ldec_key_inv_loop 546 547.Ldec_key_exit: 548 // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error 549 // OpenSSL: rax = 0 for OK, or non-zero for error 550 FRAME_END 551 RET 552 SET_SIZE(rijndael_key_setup_dec_intel) 553 554 555/* 556 * aes_encrypt_intel() 557 * Encrypt a single block (in and out can overlap). 558 * 559 * For kernel code, caller is responsible for ensuring kpreempt_disable() 560 * has been called. This is because %xmm registers are not saved/restored. 561 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 562 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 563 * on the stack. 564 * 565 * Temporary register usage: 566 * %xmm0 State 567 * %xmm1 Key 568 * 569 * Original OpenSolaris Interface: 570 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 571 * const uint32_t pt[4], uint32_t ct[4]) 572 * 573 * Original Intel OpenSSL Interface: 574 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 575 * const AES_KEY *key) 576 */ 577 578#ifdef OPENSSL_INTERFACE 579#define aes_encrypt_intel intel_AES_encrypt 580#define aes_decrypt_intel intel_AES_decrypt 581 582#define INP rdi /* P1, 64 bits */ 583#define OUTP rsi /* P2, 64 bits */ 584#define KEYP rdx /* P3, 64 bits */ 585 586/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */ 587#define NROUNDS32 ecx /* temporary, 32 bits */ 588#define NROUNDS cl /* temporary, 8 bits */ 589 590#else /* OpenSolaris Interface */ 591#define KEYP rdi /* P1, 64 bits */ 592#define NROUNDS esi /* P2, 32 bits */ 593#define INP rdx /* P3, 64 bits */ 594#define OUTP rcx /* P4, 64 bits */ 595#endif /* OPENSSL_INTERFACE */ 596 597#define STATE xmm0 /* temporary, 128 bits */ 598#define KEY xmm1 /* temporary, 128 bits */ 599 600 601ENTRY_NP(aes_encrypt_intel) 602 603 movups (%INP), %STATE // input 604 movups (%KEYP), %KEY // key 605#ifdef OPENSSL_INTERFACE 606 mov 240(%KEYP), %NROUNDS32 // round count 607#else /* OpenSolaris Interface */ 608 /* Round count is already present as P2 in %rsi/%esi */ 609#endif /* OPENSSL_INTERFACE */ 610 611 pxor %KEY, %STATE // round 0 612 lea 0x30(%KEYP), %KEYP 613 cmp $12, %NROUNDS 614 jb .Lenc128 615 lea 0x20(%KEYP), %KEYP 616 je .Lenc192 617 618 // AES 256 619 lea 0x20(%KEYP), %KEYP 620 movups -0x60(%KEYP), %KEY 621 aesenc %KEY, %STATE 622 movups -0x50(%KEYP), %KEY 623 aesenc %KEY, %STATE 624 625.balign 4 626.Lenc192: 627 // AES 192 and 256 628 movups -0x40(%KEYP), %KEY 629 aesenc %KEY, %STATE 630 movups -0x30(%KEYP), %KEY 631 aesenc %KEY, %STATE 632 633.balign 4 634.Lenc128: 635 // AES 128, 192, and 256 636 movups -0x20(%KEYP), %KEY 637 aesenc %KEY, %STATE 638 movups -0x10(%KEYP), %KEY 639 aesenc %KEY, %STATE 640 movups (%KEYP), %KEY 641 aesenc %KEY, %STATE 642 movups 0x10(%KEYP), %KEY 643 aesenc %KEY, %STATE 644 movups 0x20(%KEYP), %KEY 645 aesenc %KEY, %STATE 646 movups 0x30(%KEYP), %KEY 647 aesenc %KEY, %STATE 648 movups 0x40(%KEYP), %KEY 649 aesenc %KEY, %STATE 650 movups 0x50(%KEYP), %KEY 651 aesenc %KEY, %STATE 652 movups 0x60(%KEYP), %KEY 653 aesenc %KEY, %STATE 654 movups 0x70(%KEYP), %KEY 655 aesenclast %KEY, %STATE // last round 656 movups %STATE, (%OUTP) // output 657 658 RET 659 SET_SIZE(aes_encrypt_intel) 660 661 662/* 663 * aes_decrypt_intel() 664 * Decrypt a single block (in and out can overlap). 665 * 666 * For kernel code, caller is responsible for ensuring kpreempt_disable() 667 * has been called. This is because %xmm registers are not saved/restored. 668 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 669 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 670 * on the stack. 671 * 672 * Temporary register usage: 673 * %xmm0 State 674 * %xmm1 Key 675 * 676 * Original OpenSolaris Interface: 677 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 678 * const uint32_t pt[4], uint32_t ct[4])/ 679 * 680 * Original Intel OpenSSL Interface: 681 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 682 * const AES_KEY *key); 683 */ 684ENTRY_NP(aes_decrypt_intel) 685 686 movups (%INP), %STATE // input 687 movups (%KEYP), %KEY // key 688#ifdef OPENSSL_INTERFACE 689 mov 240(%KEYP), %NROUNDS32 // round count 690#else /* OpenSolaris Interface */ 691 /* Round count is already present as P2 in %rsi/%esi */ 692#endif /* OPENSSL_INTERFACE */ 693 694 pxor %KEY, %STATE // round 0 695 lea 0x30(%KEYP), %KEYP 696 cmp $12, %NROUNDS 697 jb .Ldec128 698 lea 0x20(%KEYP), %KEYP 699 je .Ldec192 700 701 // AES 256 702 lea 0x20(%KEYP), %KEYP 703 movups -0x60(%KEYP), %KEY 704 aesdec %KEY, %STATE 705 movups -0x50(%KEYP), %KEY 706 aesdec %KEY, %STATE 707 708.balign 4 709.Ldec192: 710 // AES 192 and 256 711 movups -0x40(%KEYP), %KEY 712 aesdec %KEY, %STATE 713 movups -0x30(%KEYP), %KEY 714 aesdec %KEY, %STATE 715 716.balign 4 717.Ldec128: 718 // AES 128, 192, and 256 719 movups -0x20(%KEYP), %KEY 720 aesdec %KEY, %STATE 721 movups -0x10(%KEYP), %KEY 722 aesdec %KEY, %STATE 723 movups (%KEYP), %KEY 724 aesdec %KEY, %STATE 725 movups 0x10(%KEYP), %KEY 726 aesdec %KEY, %STATE 727 movups 0x20(%KEYP), %KEY 728 aesdec %KEY, %STATE 729 movups 0x30(%KEYP), %KEY 730 aesdec %KEY, %STATE 731 movups 0x40(%KEYP), %KEY 732 aesdec %KEY, %STATE 733 movups 0x50(%KEYP), %KEY 734 aesdec %KEY, %STATE 735 movups 0x60(%KEYP), %KEY 736 aesdec %KEY, %STATE 737 movups 0x70(%KEYP), %KEY 738 aesdeclast %KEY, %STATE // last round 739 movups %STATE, (%OUTP) // output 740 741 RET 742 SET_SIZE(aes_decrypt_intel) 743 744#endif /* lint || __lint */ 745 746#ifdef __ELF__ 747.section .note.GNU-stack,"",%progbits 748#endif 749