1/* 2 * ==================================================================== 3 * Written by Intel Corporation for the OpenSSL project to add support 4 * for Intel AES-NI instructions. Rights for redistribution and usage 5 * in source and binary forms are granted according to the OpenSSL 6 * license. 7 * 8 * Author: Huang Ying <ying.huang at intel dot com> 9 * Vinodh Gopal <vinodh.gopal at intel dot com> 10 * Kahraman Akdemir 11 * 12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) 13 * instructions that are going to be introduced in the next generation 14 * of Intel processor, as of 2009. These instructions enable fast and 15 * secure data encryption and decryption, using the Advanced Encryption 16 * Standard (AES), defined by FIPS Publication number 197. The 17 * architecture introduces six instructions that offer full hardware 18 * support for AES. Four of them support high performance data 19 * encryption and decryption, and the other two instructions support 20 * the AES key expansion procedure. 21 * ==================================================================== 22 */ 23 24/* 25 * ==================================================================== 26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 32 * 1. Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * 35 * 2. Redistributions in binary form must reproduce the above copyright 36 * notice, this list of conditions and the following disclaimer in 37 * the documentation and/or other materials provided with the 38 * distribution. 39 * 40 * 3. All advertising materials mentioning features or use of this 41 * software must display the following acknowleoudgment: 42 * "This product includes software developed by the OpenSSL Project 43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 44 * 45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 46 * endorse or promote products derived from this software without 47 * prior written permission. For written permission, please contact 48 * openssl-core@openssl.org. 49 * 50 * 5. Products derived from this software may not be called "OpenSSL" 51 * nor may "OpenSSL" appear in their names without prior written 52 * permission of the OpenSSL Project. 53 * 54 * 6. Redistributions of any form whatsoever must retain the following 55 * acknowledgment: 56 * "This product includes software developed by the OpenSSL Project 57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 70 * OF THE POSSIBILITY OF SUCH DAMAGE. 71 * ==================================================================== 72 */ 73 74/* 75 * ==================================================================== 76 * OpenSolaris OS modifications 77 * 78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in 79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by 80 * Huang Ying of Intel to the openssl-dev mailing list under the subject 81 * of "Add support to Intel AES-NI instruction set for x86_64 platform". 82 * 83 * This OpenSolaris version has these major changes from the original source: 84 * 85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from 86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, EXPORT DELETE START 87 * and EXPORT DELETE END markers, and dummy C function definitions for lint. 88 * 89 * 2. Formatted code, added comments, and added #includes and #defines. 90 * 91 * 3. Replaced aes* and palignr instructions with .byte sequences 92 * (as they are not supported yet by all of the gas, as, and aw assemblers). 93 * 94 * 4. If bit CR0.TS is set, clear and set the TS bit, after and before 95 * calling kpreempt_disable() and kpreempt_enable(). 96 * If the TS bit is not set, Save and restore %xmm registers at the beginning 97 * and end of function calls (%xmm* registers are not saved and restored by 98 * during kernel thread preemption). 99 * 100 * 5. Renamed functions, reordered parameters, and changed return value 101 * to match OpenSolaris: 102 * 103 * OpenSSL interface: 104 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 105 * const int bits, AES_KEY *key); 106 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 107 * const int bits, AES_KEY *key); 108 * Return values for above are non-zero on error, 0 on success. 109 * 110 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 111 * const AES_KEY *key); 112 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 113 * const AES_KEY *key); 114 * typedef struct aes_key_st { 115 * unsigned int rd_key[4 *(AES_MAXNR + 1)]; 116 * int rounds; 117 * unsigned int pad[3]; 118 * } AES_KEY; 119 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules 120 * (ks32) instead of 64-bit (ks64). 121 * Number of rounds (aka round count) is at offset 240 of AES_KEY. 122 * 123 * OpenSolaris OS interface (#ifdefs removed for readability): 124 * int rijndael_key_setup_dec_intel(uint32_t rk[], 125 * const uint32_t cipherKey[], uint64_t keyBits); 126 * int rijndael_key_setup_enc_intel(uint32_t rk[], 127 * const uint32_t cipherKey[], uint64_t keyBits); 128 * Return values for above are 0 on error, number of rounds on success. 129 * 130 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 131 * const uint32_t pt[4], uint32_t ct[4]); 132 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 133 * const uint32_t pt[4], uint32_t ct[4]); 134 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]; 135 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t; 136 * 137 * typedef union { 138 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)]; 139 * } aes_ks_t; 140 * typedef struct aes_key { 141 * aes_ks_t encr_ks, decr_ks; 142 * long double align128; 143 * int flags, nr, type; 144 * } aes_key_t; 145 * 146 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text, 147 * ct is crypto text, and MAX_AES_NR is 14. 148 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64. 149 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary. 150 * ==================================================================== 151 */ 152 153#if defined(lint) || defined(__lint) 154 155#include <sys/types.h> 156 157/* ARGSUSED */ 158void 159aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], 160 uint32_t ct[4]) { 161} 162/* ARGSUSED */ 163void 164aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], 165 uint32_t pt[4]) { 166} 167/* ARGSUSED */ 168int 169rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 170 uint64_t keyBits) { 171 return (0); 172} 173/* ARGSUSED */ 174int 175rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 176 uint64_t keyBits) { 177 return (0); 178} 179 180 181#else /* lint */ 182 183#include <sys/asm_linkage.h> 184#include <sys/controlregs.h> 185#ifdef _KERNEL 186#include <sys/machprivregs.h> 187#endif 188 189#ifdef _KERNEL 190 /* 191 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is, 192 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it 193 * uses it to pass P2 to syscall. 194 * This also occurs with the STTS macro, but we don't care if 195 * P2 (%rsi) is modified just before function exit. 196 * The CLTS and STTS macros push and pop P1 (%rdi) already. 197 */ 198#ifdef __xpv 199#define PROTECTED_CLTS \ 200 push %rsi; \ 201 CLTS; \ 202 pop %rsi 203#else 204#define PROTECTED_CLTS \ 205 CLTS 206#endif /* __xpv */ 207 208#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \ 209 push %rbp; \ 210 mov %rsp, %rbp; \ 211 movq %cr0, tmpreg; \ 212 testq $CR0_TS, tmpreg; \ 213 jnz 1f; \ 214 and $-XMM_ALIGN, %rsp; \ 215 sub $[XMM_SIZE * 2], %rsp; \ 216 movaps %xmm0, 16(%rsp); \ 217 movaps %xmm1, (%rsp); \ 218 jmp 2f; \ 2191: \ 220 PROTECTED_CLTS; \ 2212: 222 223 /* 224 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack, 225 * otherwise set CR0_TS. 226 */ 227#define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \ 228 testq $CR0_TS, tmpreg; \ 229 jnz 1f; \ 230 movaps (%rsp), %xmm1; \ 231 movaps 16(%rsp), %xmm0; \ 232 jmp 2f; \ 2331: \ 234 STTS(tmpreg); \ 2352: \ 236 mov %rbp, %rsp; \ 237 pop %rbp 238 239 /* 240 * If CR0_TS is not set, align stack (with push %rbp) and push 241 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS 242 */ 243#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \ 244 push %rbp; \ 245 mov %rsp, %rbp; \ 246 movq %cr0, tmpreg; \ 247 testq $CR0_TS, tmpreg; \ 248 jnz 1f; \ 249 and $-XMM_ALIGN, %rsp; \ 250 sub $[XMM_SIZE * 7], %rsp; \ 251 movaps %xmm0, 96(%rsp); \ 252 movaps %xmm1, 80(%rsp); \ 253 movaps %xmm2, 64(%rsp); \ 254 movaps %xmm3, 48(%rsp); \ 255 movaps %xmm4, 32(%rsp); \ 256 movaps %xmm5, 16(%rsp); \ 257 movaps %xmm6, (%rsp); \ 258 jmp 2f; \ 2591: \ 260 PROTECTED_CLTS; \ 2612: 262 263 264 /* 265 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack, 266 * otherwise set CR0_TS. 267 */ 268#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \ 269 testq $CR0_TS, tmpreg; \ 270 jnz 1f; \ 271 movaps (%rsp), %xmm6; \ 272 movaps 16(%rsp), %xmm5; \ 273 movaps 32(%rsp), %xmm4; \ 274 movaps 48(%rsp), %xmm3; \ 275 movaps 64(%rsp), %xmm2; \ 276 movaps 80(%rsp), %xmm1; \ 277 movaps 96(%rsp), %xmm0; \ 278 jmp 2f; \ 2791: \ 280 STTS(tmpreg); \ 2812: \ 282 mov %rbp, %rsp; \ 283 pop %rbp 284 285 286#else 287#define PROTECTED_CLTS 288#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) 289#define SET_TS_OR_POP_XMM0_XMM1(tmpreg) 290#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) 291#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) 292#endif /* _KERNEL */ 293 294 295/* 296 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(), 297 * _key_expansion_256a(), _key_expansion_256b() 298 * 299 * Helper functions called by rijndael_key_setup_inc_intel(). 300 * Also used indirectly by rijndael_key_setup_dec_intel(). 301 * 302 * Input: 303 * %xmm0 User-provided cipher key 304 * %xmm1 Round constant 305 * Output: 306 * (%rcx) AES key 307 */ 308 309 /* EXPORT DELETE START */ 310.align 16 311_key_expansion_128: 312_key_expansion_256a: 313 pshufd $0b11111111, %xmm1, %xmm1 314 shufps $0b00010000, %xmm0, %xmm4 315 pxor %xmm4, %xmm0 316 shufps $0b10001100, %xmm0, %xmm4 317 pxor %xmm4, %xmm0 318 pxor %xmm1, %xmm0 319 movaps %xmm0, (%rcx) 320 add $0x10, %rcx 321 ret 322 SET_SIZE(_key_expansion_128) 323 SET_SIZE(_key_expansion_256a) 324 325.align 16 326_key_expansion_192a: 327 pshufd $0b01010101, %xmm1, %xmm1 328 shufps $0b00010000, %xmm0, %xmm4 329 pxor %xmm4, %xmm0 330 shufps $0b10001100, %xmm0, %xmm4 331 pxor %xmm4, %xmm0 332 pxor %xmm1, %xmm0 333 334 movaps %xmm2, %xmm5 335 movaps %xmm2, %xmm6 336 pslldq $4, %xmm5 337 pshufd $0b11111111, %xmm0, %xmm3 338 pxor %xmm3, %xmm2 339 pxor %xmm5, %xmm2 340 341 movaps %xmm0, %xmm1 342 shufps $0b01000100, %xmm0, %xmm6 343 movaps %xmm6, (%rcx) 344 shufps $0b01001110, %xmm2, %xmm1 345 movaps %xmm1, 0x10(%rcx) 346 add $0x20, %rcx 347 ret 348 SET_SIZE(_key_expansion_192a) 349 350.align 16 351_key_expansion_192b: 352 pshufd $0b01010101, %xmm1, %xmm1 353 shufps $0b00010000, %xmm0, %xmm4 354 pxor %xmm4, %xmm0 355 shufps $0b10001100, %xmm0, %xmm4 356 pxor %xmm4, %xmm0 357 pxor %xmm1, %xmm0 358 359 movaps %xmm2, %xmm5 360 pslldq $4, %xmm5 361 pshufd $0b11111111, %xmm0, %xmm3 362 pxor %xmm3, %xmm2 363 pxor %xmm5, %xmm2 364 365 movaps %xmm0, (%rcx) 366 add $0x10, %rcx 367 ret 368 SET_SIZE(_key_expansion_192b) 369 370.align 16 371_key_expansion_256b: 372 pshufd $0b10101010, %xmm1, %xmm1 373 shufps $0b00010000, %xmm2, %xmm4 374 pxor %xmm4, %xmm2 375 shufps $0b10001100, %xmm2, %xmm4 376 pxor %xmm4, %xmm2 377 pxor %xmm1, %xmm2 378 movaps %xmm2, (%rcx) 379 add $0x10, %rcx 380 ret 381 SET_SIZE(_key_expansion_256b) 382 /* EXPORT DELETE END */ 383 384 385/* 386 * rijndael_key_setup_enc_intel() 387 * Expand the cipher key into the encryption key schedule. 388 * 389 * For kernel code, caller is responsible for ensuring kpreempt_disable() 390 * has been called. This is because %xmm registers are not saved/restored. 391 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 392 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 393 * on the stack. 394 * 395 * OpenSolaris interface: 396 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 397 * uint64_t keyBits); 398 * Return value is 0 on error, number of rounds on success. 399 * 400 * Original Intel OpenSSL interface: 401 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 402 * const int bits, AES_KEY *key); 403 * Return value is non-zero on error, 0 on success. 404 */ 405 406#ifdef OPENSSL_INTERFACE 407#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key 408#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key 409 410#define USERCIPHERKEY rdi /* P1, 64 bits */ 411#define KEYSIZE32 esi /* P2, 32 bits */ 412#define KEYSIZE64 rsi /* P2, 64 bits */ 413#define AESKEY rdx /* P3, 64 bits */ 414 415#else /* OpenSolaris Interface */ 416#define AESKEY rdi /* P1, 64 bits */ 417#define USERCIPHERKEY rsi /* P2, 64 bits */ 418#define KEYSIZE32 edx /* P3, 32 bits */ 419#define KEYSIZE64 rdx /* P3, 64 bits */ 420#endif /* OPENSSL_INTERFACE */ 421 422#define ROUNDS32 KEYSIZE32 /* temp */ 423#define ROUNDS64 KEYSIZE64 /* temp */ 424#define ENDAESKEY USERCIPHERKEY /* temp */ 425 426 427ENTRY_NP(rijndael_key_setup_enc_intel) 428 /* EXPORT DELETE START */ 429 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10) 430 431 / NULL pointer sanity check 432 test %USERCIPHERKEY, %USERCIPHERKEY 433 jz .Lenc_key_invalid_param 434 test %AESKEY, %AESKEY 435 jz .Lenc_key_invalid_param 436 437 movups (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes) 438 movaps %xmm0, (%AESKEY) 439 lea 0x10(%AESKEY), %rcx / key addr 440 pxor %xmm4, %xmm4 / xmm4 is assumed 0 in _key_expansion_x 441 442 cmp $256, %KEYSIZE32 443 jnz .Lenc_key192 444 445 / AES 256: 14 rounds 446#ifdef OPENSSL_INTERFACE 447 mov $14, %ROUNDS32 448 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 14 449#endif /* OPENSSL_INTERFACE */ 450 451 movups 0x10(%USERCIPHERKEY), %xmm2 / other user key (2nd 16 bytes) 452 movaps %xmm2, (%rcx) 453 add $0x10, %rcx 454 455 / aeskeygenassist $0x1, %xmm2, %xmm1 / round 1 456 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 457 call _key_expansion_256a 458 / aeskeygenassist $0x1, %xmm0, %xmm1 459 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 460 call _key_expansion_256b 461 / aeskeygenassist $0x2, %xmm2, %xmm1 / round 2 462 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 463 call _key_expansion_256a 464 / aeskeygenassist $0x2, %xmm0, %xmm1 465 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 466 call _key_expansion_256b 467 / aeskeygenassist $0x4, %xmm2, %xmm1 / round 3 468 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 469 call _key_expansion_256a 470 / aeskeygenassist $0x4, %xmm0, %xmm1 471 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 472 call _key_expansion_256b 473 / aeskeygenassist $0x8, %xmm2, %xmm1 / round 4 474 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 475 call _key_expansion_256a 476 / aeskeygenassist $0x8, %xmm0, %xmm1 477 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 478 call _key_expansion_256b 479 / aeskeygenassist $0x10, %xmm2, %xmm1 / round 5 480 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 481 call _key_expansion_256a 482 / aeskeygenassist $0x10, %xmm0, %xmm1 483 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 484 call _key_expansion_256b 485 / aeskeygenassist $0x20, %xmm2, %xmm1 / round 6 486 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 487 call _key_expansion_256a 488 / aeskeygenassist $0x20, %xmm0, %xmm1 489 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 490 call _key_expansion_256b 491 / aeskeygenassist $0x40, %xmm2, %xmm1 / round 7 492 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 493 call _key_expansion_256a 494 495 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 496#ifdef OPENSSL_INTERFACE 497 xor %rax, %rax / return 0 (OK) 498#else /* Open Solaris Interface */ 499 mov $14, %rax / return # rounds = 14 500#endif 501 ret 502 503.align 4 504.Lenc_key192: 505 cmp $192, %KEYSIZE32 506 jnz .Lenc_key128 507 508 / AES 192: 12 rounds 509#ifdef OPENSSL_INTERFACE 510 mov $12, %ROUNDS32 511 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 12 512#endif /* OPENSSL_INTERFACE */ 513 514 movq 0x10(%USERCIPHERKEY), %xmm2 / other user key 515 / aeskeygenassist $0x1, %xmm2, %xmm1 / round 1 516 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 517 call _key_expansion_192a 518 / aeskeygenassist $0x2, %xmm2, %xmm1 / round 2 519 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 520 call _key_expansion_192b 521 / aeskeygenassist $0x4, %xmm2, %xmm1 / round 3 522 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 523 call _key_expansion_192a 524 / aeskeygenassist $0x8, %xmm2, %xmm1 / round 4 525 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 526 call _key_expansion_192b 527 / aeskeygenassist $0x10, %xmm2, %xmm1 / round 5 528 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 529 call _key_expansion_192a 530 / aeskeygenassist $0x20, %xmm2, %xmm1 / round 6 531 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 532 call _key_expansion_192b 533 / aeskeygenassist $0x40, %xmm2, %xmm1 / round 7 534 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 535 call _key_expansion_192a 536 / aeskeygenassist $0x80, %xmm2, %xmm1 / round 8 537 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 538 call _key_expansion_192b 539 540 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 541#ifdef OPENSSL_INTERFACE 542 xor %rax, %rax / return 0 (OK) 543#else /* OpenSolaris Interface */ 544 mov $12, %rax / return # rounds = 12 545#endif 546 ret 547 548.align 4 549.Lenc_key128: 550 cmp $128, %KEYSIZE32 551 jnz .Lenc_key_invalid_key_bits 552#ifdef OPENSSL_INTERFACE 553 mov $10, %ROUNDS32 554 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 10 555#endif /* OPENSSL_INTERFACE */ 556 557 / aeskeygenassist $0x1, %xmm0, %xmm1 / round 1 558 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 559 call _key_expansion_128 560 / aeskeygenassist $0x2, %xmm0, %xmm1 / round 2 561 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 562 call _key_expansion_128 563 / aeskeygenassist $0x4, %xmm0, %xmm1 / round 3 564 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 565 call _key_expansion_128 566 / aeskeygenassist $0x8, %xmm0, %xmm1 / round 4 567 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 568 call _key_expansion_128 569 / aeskeygenassist $0x10, %xmm0, %xmm1 / round 5 570 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 571 call _key_expansion_128 572 / aeskeygenassist $0x20, %xmm0, %xmm1 / round 6 573 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 574 call _key_expansion_128 575 / aeskeygenassist $0x40, %xmm0, %xmm1 / round 7 576 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 577 call _key_expansion_128 578 / aeskeygenassist $0x80, %xmm0, %xmm1 / round 8 579 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 580 call _key_expansion_128 581 / aeskeygenassist $0x1b, %xmm0, %xmm1 / round 9 582 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b 583 call _key_expansion_128 584 / aeskeygenassist $0x36, %xmm0, %xmm1 / round 10 585 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 586 call _key_expansion_128 587 588 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 589#ifdef OPENSSL_INTERFACE 590 xor %rax, %rax / return 0 (OK) 591#else /* OpenSolaris Interface */ 592 mov $10, %rax / return # rounds = 10 593#endif 594 ret 595 596.Lenc_key_invalid_param: 597#ifdef OPENSSL_INTERFACE 598 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 599 mov $-1, %rax / user key or AES key pointer is NULL 600 ret 601#else 602 /* FALLTHROUGH */ 603#endif /* OPENSSL_INTERFACE */ 604 605.Lenc_key_invalid_key_bits: 606 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 607#ifdef OPENSSL_INTERFACE 608 mov $-2, %rax / keysize is invalid 609#else /* Open Solaris Interface */ 610 xor %rax, %rax / a key pointer is NULL or invalid keysize 611#endif /* OPENSSL_INTERFACE */ 612 613 /* EXPORT DELETE END */ 614 ret 615 SET_SIZE(rijndael_key_setup_enc_intel) 616 617 618/* 619 * rijndael_key_setup_dec_intel() 620 * Expand the cipher key into the decryption key schedule. 621 * 622 * For kernel code, caller is responsible for ensuring kpreempt_disable() 623 * has been called. This is because %xmm registers are not saved/restored. 624 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 625 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 626 * on the stack. 627 * 628 * OpenSolaris interface: 629 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 630 * uint64_t keyBits); 631 * Return value is 0 on error, number of rounds on success. 632 * P1->P2, P2->P3, P3->P1 633 * 634 * Original Intel OpenSSL interface: 635 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 636 * const int bits, AES_KEY *key); 637 * Return value is non-zero on error, 0 on success. 638 */ 639ENTRY_NP(rijndael_key_setup_dec_intel) 640 /* EXPORT DELETE START */ 641 call rijndael_key_setup_enc_intel 642 test %rax, %rax 643#ifdef OPENSSL_INTERFACE 644 jnz .Ldec_key_exit / Failed if returned non-0 645#else /* OpenSolaris Interface */ 646 jz .Ldec_key_exit / Failed if returned 0 647#endif /* OPENSSL_INTERFACE */ 648 649 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) 650 651#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */ 652 mov %rax, %ROUNDS64 / set # rounds (10, 12, or 14) 653 / (already set for OpenSSL) 654#endif 655 656 lea 0x10(%AESKEY), %rcx / key addr 657 shl $4, %ROUNDS32 658 add %AESKEY, %ROUNDS64 659 mov %ROUNDS64, %ENDAESKEY 660 661.align 4 662.Ldec_key_reorder_loop: 663 movaps (%AESKEY), %xmm0 664 movaps (%ROUNDS64), %xmm1 665 movaps %xmm0, (%ROUNDS64) 666 movaps %xmm1, (%AESKEY) 667 lea 0x10(%AESKEY), %AESKEY 668 lea -0x10(%ROUNDS64), %ROUNDS64 669 cmp %AESKEY, %ROUNDS64 670 ja .Ldec_key_reorder_loop 671 672.align 4 673.Ldec_key_inv_loop: 674 movaps (%rcx), %xmm0 675 /aesimc %xmm0, %xmm1 676 .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 677 movaps %xmm1, (%rcx) 678 lea 0x10(%rcx), %rcx 679 cmp %ENDAESKEY, %rcx 680 jnz .Ldec_key_inv_loop 681 682 SET_TS_OR_POP_XMM0_XMM1(%r10) 683 684.Ldec_key_exit: 685 / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error 686 / OpenSSL: rax = 0 for OK, or non-zero for error 687 /* EXPORT DELETE END */ 688 ret 689 SET_SIZE(rijndael_key_setup_dec_intel) 690 691 692/* 693 * aes_encrypt_intel() 694 * Encrypt a single block (in and out can overlap). 695 * 696 * For kernel code, caller is responsible for ensuring kpreempt_disable() 697 * has been called. This is because %xmm registers are not saved/restored. 698 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 699 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 700 * on the stack. 701 * 702 * Temporary register usage: 703 * %xmm0 State 704 * %xmm1 Key 705 * 706 * Original OpenSolaris Interface: 707 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 708 * const uint32_t pt[4], uint32_t ct[4]) 709 * 710 * Original Intel OpenSSL Interface: 711 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 712 * const AES_KEY *key) 713 */ 714 715#ifdef OPENSSL_INTERFACE 716#define aes_encrypt_intel intel_AES_encrypt 717#define aes_decrypt_intel intel_AES_decrypt 718 719#define INP rdi /* P1, 64 bits */ 720#define OUTP rsi /* P2, 64 bits */ 721#define KEYP rdx /* P3, 64 bits */ 722 723/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */ 724#define NROUNDS32 ecx /* temporary, 32 bits */ 725#define NROUNDS cl /* temporary, 8 bits */ 726 727#else /* OpenSolaris Interface */ 728#define KEYP rdi /* P1, 64 bits */ 729#define NROUNDS esi /* P2, 32 bits */ 730#define INP rdx /* P3, 64 bits */ 731#define OUTP rcx /* P4, 64 bits */ 732#endif /* OPENSSL_INTERFACE */ 733 734#define STATE xmm0 /* temporary, 128 bits */ 735#define KEY xmm1 /* temporary, 128 bits */ 736 737ENTRY_NP(aes_encrypt_intel) 738 /* EXPORT DELETE START */ 739 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) 740 741 movups (%INP), %STATE / input 742 movaps (%KEYP), %KEY / key 743#ifdef OPENSSL_INTERFACE 744 mov 240(%KEYP), %NROUNDS32 / round count 745#else /* OpenSolaris Interface */ 746 /* Round count is already present as P2 in %rsi/%esi */ 747#endif /* OPENSSL_INTERFACE */ 748 749 pxor %KEY, %STATE / round 0 750 lea 0x30(%KEYP), %KEYP 751 cmp $12, %NROUNDS 752 jb .Lenc128 753 lea 0x20(%KEYP), %KEYP 754 je .Lenc192 755 756 / AES 256 757 lea 0x20(%KEYP), %KEYP 758 movaps -0x60(%KEYP), %KEY 759 /aesenc %KEY, %STATE 760 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 761 movaps -0x50(%KEYP), %KEY 762 /aesenc %KEY, %STATE 763 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 764 765.align 4 766.Lenc192: 767 / AES 192 and 256 768 movaps -0x40(%KEYP), %KEY 769 /aesenc %KEY, %STATE 770 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 771 movaps -0x30(%KEYP), %KEY 772 /aesenc %KEY, %STATE 773 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 774 775.align 4 776.Lenc128: 777 / AES 128, 192, and 256 778 movaps -0x20(%KEYP), %KEY 779 /aesenc %KEY, %STATE 780 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 781 movaps -0x10(%KEYP), %KEY 782 /aesenc %KEY, %STATE 783 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 784 movaps (%KEYP), %KEY 785 /aesenc %KEY, %STATE 786 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 787 movaps 0x10(%KEYP), %KEY 788 /aesenc %KEY, %STATE 789 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 790 movaps 0x20(%KEYP), %KEY 791 /aesenc %KEY, %STATE 792 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 793 movaps 0x30(%KEYP), %KEY 794 /aesenc %KEY, %STATE 795 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 796 movaps 0x40(%KEYP), %KEY 797 /aesenc %KEY, %STATE 798 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 799 movaps 0x50(%KEYP), %KEY 800 /aesenc %KEY, %STATE 801 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 802 movaps 0x60(%KEYP), %KEY 803 /aesenc %KEY, %STATE 804 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 805 movaps 0x70(%KEYP), %KEY 806 /aesenclast %KEY, %STATE / last round 807 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc1 808 movups %STATE, (%OUTP) / output 809 810 SET_TS_OR_POP_XMM0_XMM1(%r10) 811 /* EXPORT DELETE END */ 812 ret 813 SET_SIZE(aes_encrypt_intel) 814 815 816/* 817 * aes_decrypt_intel() 818 * Decrypt a single block (in and out can overlap). 819 * 820 * For kernel code, caller is responsible for ensuring kpreempt_disable() 821 * has been called. This is because %xmm registers are not saved/restored. 822 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 823 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 824 * on the stack. 825 * 826 * Temporary register usage: 827 * %xmm0 State 828 * %xmm1 Key 829 * 830 * Original OpenSolaris Interface: 831 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 832 * const uint32_t pt[4], uint32_t ct[4])/ 833 * 834 * Original Intel OpenSSL Interface: 835 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 836 * const AES_KEY *key); 837 */ 838ENTRY_NP(aes_decrypt_intel) 839 /* EXPORT DELETE START */ 840 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) 841 842 movups (%INP), %STATE / input 843 movaps (%KEYP), %KEY / key 844#ifdef OPENSSL_INTERFACE 845 mov 240(%KEYP), %NROUNDS32 / round count 846#else /* OpenSolaris Interface */ 847 /* Round count is already present as P2 in %rsi/%esi */ 848#endif /* OPENSSL_INTERFACE */ 849 850 pxor %KEY, %STATE / round 0 851 lea 0x30(%KEYP), %KEYP 852 cmp $12, %NROUNDS 853 jb .Ldec128 854 lea 0x20(%KEYP), %KEYP 855 je .Ldec192 856 857 / AES 256 858 lea 0x20(%KEYP), %KEYP 859 movaps -0x60(%KEYP), %KEY 860 /aesdec %KEY, %STATE 861 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 862 movaps -0x50(%KEYP), %KEY 863 /aesdec %KEY, %STATE 864 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 865 866.align 4 867.Ldec192: 868 / AES 192 and 256 869 movaps -0x40(%KEYP), %KEY 870 /aesdec %KEY, %STATE 871 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 872 movaps -0x30(%KEYP), %KEY 873 /aesdec %KEY, %STATE 874 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 875 876.align 4 877.Ldec128: 878 / AES 128, 192, and 256 879 movaps -0x20(%KEYP), %KEY 880 /aesdec %KEY, %STATE 881 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 882 movaps -0x10(%KEYP), %KEY 883 /aesdec %KEY, %STATE 884 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 885 movaps (%KEYP), %KEY 886 /aesdec %KEY, %STATE 887 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 888 movaps 0x10(%KEYP), %KEY 889 /aesdec %KEY, %STATE 890 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 891 movaps 0x20(%KEYP), %KEY 892 /aesdec %KEY, %STATE 893 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 894 movaps 0x30(%KEYP), %KEY 895 /aesdec %KEY, %STATE 896 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 897 movaps 0x40(%KEYP), %KEY 898 /aesdec %KEY, %STATE 899 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 900 movaps 0x50(%KEYP), %KEY 901 /aesdec %KEY, %STATE 902 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 903 movaps 0x60(%KEYP), %KEY 904 /aesdec %KEY, %STATE 905 .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 906 movaps 0x70(%KEYP), %KEY 907 /aesdeclast %KEY, %STATE / last round 908 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc1 909 movups %STATE, (%OUTP) / output 910 911 SET_TS_OR_POP_XMM0_XMM1(%r10) 912 ret 913 /* EXPORT DELETE END */ 914 SET_SIZE(aes_decrypt_intel) 915 916#endif /* lint || __lint */ 917