1// SPDX-License-Identifier: OpenSSL-standalone 2/* 3 * ==================================================================== 4 * Written by Intel Corporation for the OpenSSL project to add support 5 * for Intel AES-NI instructions. Rights for redistribution and usage 6 * in source and binary forms are granted according to the OpenSSL 7 * license. 8 * 9 * Author: Huang Ying <ying.huang at intel dot com> 10 * Vinodh Gopal <vinodh.gopal at intel dot com> 11 * Kahraman Akdemir 12 * 13 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) 14 * instructions that are going to be introduced in the next generation 15 * of Intel processor, as of 2009. These instructions enable fast and 16 * secure data encryption and decryption, using the Advanced Encryption 17 * Standard (AES), defined by FIPS Publication number 197. The 18 * architecture introduces six instructions that offer full hardware 19 * support for AES. Four of them support high performance data 20 * encryption and decryption, and the other two instructions support 21 * the AES key expansion procedure. 22 * ==================================================================== 23 */ 24 25/* 26 * ==================================================================== 27 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved. 28 * 29 * Redistribution and use in source and binary forms, with or without 30 * modification, are permitted provided that the following conditions 31 * are met: 32 * 33 * 1. Redistributions of source code must retain the above copyright 34 * notice, this list of conditions and the following disclaimer. 35 * 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in 38 * the documentation and/or other materials provided with the 39 * distribution. 40 * 41 * 3. All advertising materials mentioning features or use of this 42 * software must display the following acknowledgment: 43 * "This product includes software developed by the OpenSSL Project 44 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 45 * 46 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 47 * endorse or promote products derived from this software without 48 * prior written permission. For written permission, please contact 49 * openssl-core@openssl.org. 50 * 51 * 5. Products derived from this software may not be called "OpenSSL" 52 * nor may "OpenSSL" appear in their names without prior written 53 * permission of the OpenSSL Project. 54 * 55 * 6. Redistributions of any form whatsoever must retain the following 56 * acknowledgment: 57 * "This product includes software developed by the OpenSSL Project 58 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 61 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 63 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 64 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 65 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 66 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 67 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 69 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 70 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 71 * OF THE POSSIBILITY OF SUCH DAMAGE. 72 * ==================================================================== 73 */ 74 75/* 76 * ==================================================================== 77 * OpenSolaris OS modifications 78 * 79 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in 80 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by 81 * Huang Ying of Intel to the openssl-dev mailing list under the subject 82 * of "Add support to Intel AES-NI instruction set for x86_64 platform". 83 * 84 * This OpenSolaris version has these major changes from the original source: 85 * 86 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from 87 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function 88 * definitions for lint. 89 * 90 * 2. Formatted code, added comments, and added #includes and #defines. 91 * 92 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before 93 * calling kpreempt_disable() and kpreempt_enable(). 94 * If the TS bit is not set, Save and restore %xmm registers at the beginning 95 * and end of function calls (%xmm* registers are not saved and restored by 96 * during kernel thread preemption). 97 * 98 * 4. Renamed functions, reordered parameters, and changed return value 99 * to match OpenSolaris: 100 * 101 * OpenSSL interface: 102 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 103 * const int bits, AES_KEY *key); 104 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 105 * const int bits, AES_KEY *key); 106 * Return values for above are non-zero on error, 0 on success. 107 * 108 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 109 * const AES_KEY *key); 110 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 111 * const AES_KEY *key); 112 * typedef struct aes_key_st { 113 * unsigned int rd_key[4 *(AES_MAXNR + 1)]; 114 * int rounds; 115 * unsigned int pad[3]; 116 * } AES_KEY; 117 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules 118 * (ks32) instead of 64-bit (ks64). 119 * Number of rounds (aka round count) is at offset 240 of AES_KEY. 120 * 121 * OpenSolaris OS interface (#ifdefs removed for readability): 122 * int rijndael_key_setup_dec_intel(uint32_t rk[], 123 * const uint32_t cipherKey[], uint64_t keyBits); 124 * int rijndael_key_setup_enc_intel(uint32_t rk[], 125 * const uint32_t cipherKey[], uint64_t keyBits); 126 * Return values for above are 0 on error, number of rounds on success. 127 * 128 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 129 * const uint32_t pt[4], uint32_t ct[4]); 130 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 131 * const uint32_t pt[4], uint32_t ct[4]); 132 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]; 133 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t; 134 * 135 * typedef union { 136 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)]; 137 * } aes_ks_t; 138 * typedef struct aes_key { 139 * aes_ks_t encr_ks, decr_ks; 140 * long double align128; 141 * int flags, nr, type; 142 * } aes_key_t; 143 * 144 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text, 145 * ct is crypto text, and MAX_AES_NR is 14. 146 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64. 147 * 148 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary. 149 * 150 * ==================================================================== 151 */ 152 153 154#if defined(lint) || defined(__lint) 155 156#include <sys/types.h> 157 158void 159aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], 160 uint32_t ct[4]) { 161 (void) rk, (void) Nr, (void) pt, (void) ct; 162} 163void 164aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], 165 uint32_t pt[4]) { 166 (void) rk, (void) Nr, (void) ct, (void) pt; 167} 168int 169rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 170 uint64_t keyBits) { 171 (void) rk, (void) cipherKey, (void) keyBits; 172 return (0); 173} 174int 175rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 176 uint64_t keyBits) { 177 (void) rk, (void) cipherKey, (void) keyBits; 178 return (0); 179} 180 181 182#elif defined(HAVE_AES) /* guard by instruction set */ 183 184#define _ASM 185#include <sys/asm_linkage.h> 186 187/* 188 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(), 189 * _key_expansion_256a(), _key_expansion_256b() 190 * 191 * Helper functions called by rijndael_key_setup_inc_intel(). 192 * Also used indirectly by rijndael_key_setup_dec_intel(). 193 * 194 * Input: 195 * %xmm0 User-provided cipher key 196 * %xmm1 Round constant 197 * Output: 198 * (%rcx) AES key 199 */ 200 201ENTRY_NP2(_key_expansion_128, _key_expansion_256a) 202_key_expansion_128_local: 203_key_expansion_256a_local: 204 pshufd $0b11111111, %xmm1, %xmm1 205 shufps $0b00010000, %xmm0, %xmm4 206 pxor %xmm4, %xmm0 207 shufps $0b10001100, %xmm0, %xmm4 208 pxor %xmm4, %xmm0 209 pxor %xmm1, %xmm0 210 movups %xmm0, (%rcx) 211 add $0x10, %rcx 212 RET 213 nop 214SET_SIZE(_key_expansion_128) 215SET_SIZE(_key_expansion_256a) 216 217 218ENTRY_NP(_key_expansion_192a) 219_key_expansion_192a_local: 220 pshufd $0b01010101, %xmm1, %xmm1 221 shufps $0b00010000, %xmm0, %xmm4 222 pxor %xmm4, %xmm0 223 shufps $0b10001100, %xmm0, %xmm4 224 pxor %xmm4, %xmm0 225 pxor %xmm1, %xmm0 226 227 movups %xmm2, %xmm5 228 movups %xmm2, %xmm6 229 pslldq $4, %xmm5 230 pshufd $0b11111111, %xmm0, %xmm3 231 pxor %xmm3, %xmm2 232 pxor %xmm5, %xmm2 233 234 movups %xmm0, %xmm1 235 shufps $0b01000100, %xmm0, %xmm6 236 movups %xmm6, (%rcx) 237 shufps $0b01001110, %xmm2, %xmm1 238 movups %xmm1, 0x10(%rcx) 239 add $0x20, %rcx 240 RET 241SET_SIZE(_key_expansion_192a) 242 243 244ENTRY_NP(_key_expansion_192b) 245_key_expansion_192b_local: 246 pshufd $0b01010101, %xmm1, %xmm1 247 shufps $0b00010000, %xmm0, %xmm4 248 pxor %xmm4, %xmm0 249 shufps $0b10001100, %xmm0, %xmm4 250 pxor %xmm4, %xmm0 251 pxor %xmm1, %xmm0 252 253 movups %xmm2, %xmm5 254 pslldq $4, %xmm5 255 pshufd $0b11111111, %xmm0, %xmm3 256 pxor %xmm3, %xmm2 257 pxor %xmm5, %xmm2 258 259 movups %xmm0, (%rcx) 260 add $0x10, %rcx 261 RET 262SET_SIZE(_key_expansion_192b) 263 264 265ENTRY_NP(_key_expansion_256b) 266_key_expansion_256b_local: 267 pshufd $0b10101010, %xmm1, %xmm1 268 shufps $0b00010000, %xmm2, %xmm4 269 pxor %xmm4, %xmm2 270 shufps $0b10001100, %xmm2, %xmm4 271 pxor %xmm4, %xmm2 272 pxor %xmm1, %xmm2 273 movups %xmm2, (%rcx) 274 add $0x10, %rcx 275 RET 276SET_SIZE(_key_expansion_256b) 277 278 279/* 280 * rijndael_key_setup_enc_intel() 281 * Expand the cipher key into the encryption key schedule. 282 * 283 * For kernel code, caller is responsible for ensuring kpreempt_disable() 284 * has been called. This is because %xmm registers are not saved/restored. 285 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 286 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 287 * on the stack. 288 * 289 * OpenSolaris interface: 290 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 291 * uint64_t keyBits); 292 * Return value is 0 on error, number of rounds on success. 293 * 294 * Original Intel OpenSSL interface: 295 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 296 * const int bits, AES_KEY *key); 297 * Return value is non-zero on error, 0 on success. 298 */ 299 300#ifdef OPENSSL_INTERFACE 301#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key 302#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key 303 304#define USERCIPHERKEY rdi /* P1, 64 bits */ 305#define KEYSIZE32 esi /* P2, 32 bits */ 306#define KEYSIZE64 rsi /* P2, 64 bits */ 307#define AESKEY rdx /* P3, 64 bits */ 308 309#else /* OpenSolaris Interface */ 310#define AESKEY rdi /* P1, 64 bits */ 311#define USERCIPHERKEY rsi /* P2, 64 bits */ 312#define KEYSIZE32 edx /* P3, 32 bits */ 313#define KEYSIZE64 rdx /* P3, 64 bits */ 314#endif /* OPENSSL_INTERFACE */ 315 316#define ROUNDS32 KEYSIZE32 /* temp */ 317#define ROUNDS64 KEYSIZE64 /* temp */ 318#define ENDAESKEY USERCIPHERKEY /* temp */ 319 320ENTRY_NP(rijndael_key_setup_enc_intel) 321rijndael_key_setup_enc_intel_local: 322 FRAME_BEGIN 323 // NULL pointer sanity check 324 test %USERCIPHERKEY, %USERCIPHERKEY 325 jz .Lenc_key_invalid_param 326 test %AESKEY, %AESKEY 327 jz .Lenc_key_invalid_param 328 329 movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes) 330 movups %xmm0, (%AESKEY) 331 lea 0x10(%AESKEY), %rcx // key addr 332 pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x 333 334 cmp $256, %KEYSIZE32 335 jnz .Lenc_key192 336 337 // AES 256: 14 rounds in encryption key schedule 338#ifdef OPENSSL_INTERFACE 339 mov $14, %ROUNDS32 340 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14 341#endif /* OPENSSL_INTERFACE */ 342 343 movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes) 344 movups %xmm2, (%rcx) 345 add $0x10, %rcx 346 347 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key 348 call _key_expansion_256a_local 349 aeskeygenassist $0x1, %xmm0, %xmm1 350 call _key_expansion_256b_local 351 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key 352 call _key_expansion_256a_local 353 aeskeygenassist $0x2, %xmm0, %xmm1 354 call _key_expansion_256b_local 355 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key 356 call _key_expansion_256a_local 357 aeskeygenassist $0x4, %xmm0, %xmm1 358 call _key_expansion_256b_local 359 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key 360 call _key_expansion_256a_local 361 aeskeygenassist $0x8, %xmm0, %xmm1 362 call _key_expansion_256b_local 363 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key 364 call _key_expansion_256a_local 365 aeskeygenassist $0x10, %xmm0, %xmm1 366 call _key_expansion_256b_local 367 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key 368 call _key_expansion_256a_local 369 aeskeygenassist $0x20, %xmm0, %xmm1 370 call _key_expansion_256b_local 371 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key 372 call _key_expansion_256a_local 373 374#ifdef OPENSSL_INTERFACE 375 xor %rax, %rax // return 0 (OK) 376#else /* Open Solaris Interface */ 377 mov $14, %rax // return # rounds = 14 378#endif 379 FRAME_END 380 RET 381 382.balign 4 383.Lenc_key192: 384 cmp $192, %KEYSIZE32 385 jnz .Lenc_key128 386 387 // AES 192: 12 rounds in encryption key schedule 388#ifdef OPENSSL_INTERFACE 389 mov $12, %ROUNDS32 390 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12 391#endif /* OPENSSL_INTERFACE */ 392 393 movq 0x10(%USERCIPHERKEY), %xmm2 // other user key 394 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key 395 call _key_expansion_192a_local 396 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key 397 call _key_expansion_192b_local 398 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key 399 call _key_expansion_192a_local 400 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key 401 call _key_expansion_192b_local 402 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key 403 call _key_expansion_192a_local 404 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key 405 call _key_expansion_192b_local 406 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key 407 call _key_expansion_192a_local 408 aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key 409 call _key_expansion_192b_local 410 411#ifdef OPENSSL_INTERFACE 412 xor %rax, %rax // return 0 (OK) 413#else /* OpenSolaris Interface */ 414 mov $12, %rax // return # rounds = 12 415#endif 416 FRAME_END 417 RET 418 419.balign 4 420.Lenc_key128: 421 cmp $128, %KEYSIZE32 422 jnz .Lenc_key_invalid_key_bits 423 424 // AES 128: 10 rounds in encryption key schedule 425#ifdef OPENSSL_INTERFACE 426 mov $10, %ROUNDS32 427 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10 428#endif /* OPENSSL_INTERFACE */ 429 430 aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key 431 call _key_expansion_128_local 432 aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key 433 call _key_expansion_128_local 434 aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key 435 call _key_expansion_128_local 436 aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key 437 call _key_expansion_128_local 438 aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key 439 call _key_expansion_128_local 440 aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key 441 call _key_expansion_128_local 442 aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key 443 call _key_expansion_128_local 444 aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key 445 call _key_expansion_128_local 446 aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key 447 call _key_expansion_128_local 448 aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key 449 call _key_expansion_128_local 450 451#ifdef OPENSSL_INTERFACE 452 xor %rax, %rax // return 0 (OK) 453#else /* OpenSolaris Interface */ 454 mov $10, %rax // return # rounds = 10 455#endif 456 FRAME_END 457 RET 458 459.Lenc_key_invalid_param: 460#ifdef OPENSSL_INTERFACE 461 mov $-1, %rax // user key or AES key pointer is NULL 462 FRAME_END 463 RET 464#else 465 /* FALLTHROUGH */ 466#endif /* OPENSSL_INTERFACE */ 467 468.Lenc_key_invalid_key_bits: 469#ifdef OPENSSL_INTERFACE 470 mov $-2, %rax // keysize is invalid 471#else /* Open Solaris Interface */ 472 xor %rax, %rax // a key pointer is NULL or invalid keysize 473#endif /* OPENSSL_INTERFACE */ 474 FRAME_END 475 RET 476 SET_SIZE(rijndael_key_setup_enc_intel) 477 478 479/* 480 * rijndael_key_setup_dec_intel() 481 * Expand the cipher key into the decryption key schedule. 482 * 483 * For kernel code, caller is responsible for ensuring kpreempt_disable() 484 * has been called. This is because %xmm registers are not saved/restored. 485 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 486 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 487 * on the stack. 488 * 489 * OpenSolaris interface: 490 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 491 * uint64_t keyBits); 492 * Return value is 0 on error, number of rounds on success. 493 * P1->P2, P2->P3, P3->P1 494 * 495 * Original Intel OpenSSL interface: 496 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 497 * const int bits, AES_KEY *key); 498 * Return value is non-zero on error, 0 on success. 499 */ 500 501ENTRY_NP(rijndael_key_setup_dec_intel) 502FRAME_BEGIN 503 // Generate round keys used for encryption 504 call rijndael_key_setup_enc_intel_local 505 test %rax, %rax 506#ifdef OPENSSL_INTERFACE 507 jnz .Ldec_key_exit // Failed if returned non-0 508#else /* OpenSolaris Interface */ 509 jz .Ldec_key_exit // Failed if returned 0 510#endif /* OPENSSL_INTERFACE */ 511 512 /* 513 * Convert round keys used for encryption 514 * to a form usable for decryption 515 */ 516#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */ 517 mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14) 518 // (already set for OpenSSL) 519#endif 520 521 lea 0x10(%AESKEY), %rcx // key addr 522 shl $4, %ROUNDS32 523 add %AESKEY, %ROUNDS64 524 mov %ROUNDS64, %ENDAESKEY 525 526.balign 4 527.Ldec_key_reorder_loop: 528 movups (%AESKEY), %xmm0 529 movups (%ROUNDS64), %xmm1 530 movups %xmm0, (%ROUNDS64) 531 movups %xmm1, (%AESKEY) 532 lea 0x10(%AESKEY), %AESKEY 533 lea -0x10(%ROUNDS64), %ROUNDS64 534 cmp %AESKEY, %ROUNDS64 535 ja .Ldec_key_reorder_loop 536 537.balign 4 538.Ldec_key_inv_loop: 539 movups (%rcx), %xmm0 540 // Convert an encryption round key to a form usable for decryption 541 // with the "AES Inverse Mix Columns" instruction 542 aesimc %xmm0, %xmm1 543 movups %xmm1, (%rcx) 544 lea 0x10(%rcx), %rcx 545 cmp %ENDAESKEY, %rcx 546 jnz .Ldec_key_inv_loop 547 548.Ldec_key_exit: 549 // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error 550 // OpenSSL: rax = 0 for OK, or non-zero for error 551 FRAME_END 552 RET 553 SET_SIZE(rijndael_key_setup_dec_intel) 554 555 556/* 557 * aes_encrypt_intel() 558 * Encrypt a single block (in and out can overlap). 559 * 560 * For kernel code, caller is responsible for ensuring kpreempt_disable() 561 * has been called. This is because %xmm registers are not saved/restored. 562 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 563 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 564 * on the stack. 565 * 566 * Temporary register usage: 567 * %xmm0 State 568 * %xmm1 Key 569 * 570 * Original OpenSolaris Interface: 571 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 572 * const uint32_t pt[4], uint32_t ct[4]) 573 * 574 * Original Intel OpenSSL Interface: 575 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 576 * const AES_KEY *key) 577 */ 578 579#ifdef OPENSSL_INTERFACE 580#define aes_encrypt_intel intel_AES_encrypt 581#define aes_decrypt_intel intel_AES_decrypt 582 583#define INP rdi /* P1, 64 bits */ 584#define OUTP rsi /* P2, 64 bits */ 585#define KEYP rdx /* P3, 64 bits */ 586 587/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */ 588#define NROUNDS32 ecx /* temporary, 32 bits */ 589#define NROUNDS cl /* temporary, 8 bits */ 590 591#else /* OpenSolaris Interface */ 592#define KEYP rdi /* P1, 64 bits */ 593#define NROUNDS esi /* P2, 32 bits */ 594#define INP rdx /* P3, 64 bits */ 595#define OUTP rcx /* P4, 64 bits */ 596#endif /* OPENSSL_INTERFACE */ 597 598#define STATE xmm0 /* temporary, 128 bits */ 599#define KEY xmm1 /* temporary, 128 bits */ 600 601 602ENTRY_NP(aes_encrypt_intel) 603 604 movups (%INP), %STATE // input 605 movups (%KEYP), %KEY // key 606#ifdef OPENSSL_INTERFACE 607 mov 240(%KEYP), %NROUNDS32 // round count 608#else /* OpenSolaris Interface */ 609 /* Round count is already present as P2 in %rsi/%esi */ 610#endif /* OPENSSL_INTERFACE */ 611 612 pxor %KEY, %STATE // round 0 613 lea 0x30(%KEYP), %KEYP 614 cmp $12, %NROUNDS 615 jb .Lenc128 616 lea 0x20(%KEYP), %KEYP 617 je .Lenc192 618 619 // AES 256 620 lea 0x20(%KEYP), %KEYP 621 movups -0x60(%KEYP), %KEY 622 aesenc %KEY, %STATE 623 movups -0x50(%KEYP), %KEY 624 aesenc %KEY, %STATE 625 626.balign 4 627.Lenc192: 628 // AES 192 and 256 629 movups -0x40(%KEYP), %KEY 630 aesenc %KEY, %STATE 631 movups -0x30(%KEYP), %KEY 632 aesenc %KEY, %STATE 633 634.balign 4 635.Lenc128: 636 // AES 128, 192, and 256 637 movups -0x20(%KEYP), %KEY 638 aesenc %KEY, %STATE 639 movups -0x10(%KEYP), %KEY 640 aesenc %KEY, %STATE 641 movups (%KEYP), %KEY 642 aesenc %KEY, %STATE 643 movups 0x10(%KEYP), %KEY 644 aesenc %KEY, %STATE 645 movups 0x20(%KEYP), %KEY 646 aesenc %KEY, %STATE 647 movups 0x30(%KEYP), %KEY 648 aesenc %KEY, %STATE 649 movups 0x40(%KEYP), %KEY 650 aesenc %KEY, %STATE 651 movups 0x50(%KEYP), %KEY 652 aesenc %KEY, %STATE 653 movups 0x60(%KEYP), %KEY 654 aesenc %KEY, %STATE 655 movups 0x70(%KEYP), %KEY 656 aesenclast %KEY, %STATE // last round 657 movups %STATE, (%OUTP) // output 658 659 RET 660 SET_SIZE(aes_encrypt_intel) 661 662 663/* 664 * aes_decrypt_intel() 665 * Decrypt a single block (in and out can overlap). 666 * 667 * For kernel code, caller is responsible for ensuring kpreempt_disable() 668 * has been called. This is because %xmm registers are not saved/restored. 669 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 670 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 671 * on the stack. 672 * 673 * Temporary register usage: 674 * %xmm0 State 675 * %xmm1 Key 676 * 677 * Original OpenSolaris Interface: 678 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 679 * const uint32_t pt[4], uint32_t ct[4])/ 680 * 681 * Original Intel OpenSSL Interface: 682 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 683 * const AES_KEY *key); 684 */ 685ENTRY_NP(aes_decrypt_intel) 686 687 movups (%INP), %STATE // input 688 movups (%KEYP), %KEY // key 689#ifdef OPENSSL_INTERFACE 690 mov 240(%KEYP), %NROUNDS32 // round count 691#else /* OpenSolaris Interface */ 692 /* Round count is already present as P2 in %rsi/%esi */ 693#endif /* OPENSSL_INTERFACE */ 694 695 pxor %KEY, %STATE // round 0 696 lea 0x30(%KEYP), %KEYP 697 cmp $12, %NROUNDS 698 jb .Ldec128 699 lea 0x20(%KEYP), %KEYP 700 je .Ldec192 701 702 // AES 256 703 lea 0x20(%KEYP), %KEYP 704 movups -0x60(%KEYP), %KEY 705 aesdec %KEY, %STATE 706 movups -0x50(%KEYP), %KEY 707 aesdec %KEY, %STATE 708 709.balign 4 710.Ldec192: 711 // AES 192 and 256 712 movups -0x40(%KEYP), %KEY 713 aesdec %KEY, %STATE 714 movups -0x30(%KEYP), %KEY 715 aesdec %KEY, %STATE 716 717.balign 4 718.Ldec128: 719 // AES 128, 192, and 256 720 movups -0x20(%KEYP), %KEY 721 aesdec %KEY, %STATE 722 movups -0x10(%KEYP), %KEY 723 aesdec %KEY, %STATE 724 movups (%KEYP), %KEY 725 aesdec %KEY, %STATE 726 movups 0x10(%KEYP), %KEY 727 aesdec %KEY, %STATE 728 movups 0x20(%KEYP), %KEY 729 aesdec %KEY, %STATE 730 movups 0x30(%KEYP), %KEY 731 aesdec %KEY, %STATE 732 movups 0x40(%KEYP), %KEY 733 aesdec %KEY, %STATE 734 movups 0x50(%KEYP), %KEY 735 aesdec %KEY, %STATE 736 movups 0x60(%KEYP), %KEY 737 aesdec %KEY, %STATE 738 movups 0x70(%KEYP), %KEY 739 aesdeclast %KEY, %STATE // last round 740 movups %STATE, (%OUTP) // output 741 742 RET 743 SET_SIZE(aes_decrypt_intel) 744 745#endif /* lint || __lint */ 746 747#ifdef __ELF__ 748.section .note.GNU-stack,"",%progbits 749#endif 750