/* * ==================================================================== * Written by Intel Corporation for the OpenSSL project to add support * for Intel AES-NI instructions. Rights for redistribution and usage * in source and binary forms are granted according to the OpenSSL * license. * * Author: Huang Ying * Vinodh Gopal * Kahraman Akdemir * * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) * instructions that are going to be introduced in the next generation * of Intel processor, as of 2009. These instructions enable fast and * secure data encryption and decryption, using the Advanced Encryption * Standard (AES), defined by FIPS Publication number 197. The * architecture introduces six instructions that offer full hardware * support for AES. Four of them support high performance data * encryption and decryption, and the other two instructions support * the AES key expansion procedure. * ==================================================================== */ /* * ==================================================================== * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowleoudgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" * * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to * endorse or promote products derived from this software without * prior written permission. For written permission, please contact * openssl-core@openssl.org. * * 5. Products derived from this software may not be called "OpenSSL" * nor may "OpenSSL" appear in their names without prior written * permission of the OpenSSL Project. * * 6. Redistributions of any form whatsoever must retain the following * acknowledgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit (http://www.openssl.org/)" * * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== */ /* * ==================================================================== * OpenSolaris OS modifications * * This source originates as files aes-intel.S and eng_aesni_asm.pl, in * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by * Huang Ying of Intel to the openssl-dev mailing list under the subject * of "Add support to Intel AES-NI instruction set for x86_64 platform". * * This OpenSolaris version has these major changes from the original source: * * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, lint(1B) guards, EXPORT DELETE START * and EXPORT DELETE END markers, and dummy C function definitions for lint. * * 2. Formatted code, added comments, and added #includes and #defines. * * 3. Replaced aes* and palignr instructions with .byte sequences * (as they are not supported yet by all of the gas, as, and aw assemblers). * * 4. If bit CR0.TS is set, clear and set the TS bit, after and before * calling kpreempt_disable() and kpreempt_enable(). * If the TS bit is not set, Save and restore %xmm registers at the beginning * and end of function calls (%xmm* registers are not saved and restored by * during kernel thread preemption). * * 5. Renamed functions, reordered parameters, and changed return value * to match OpenSolaris: * * OpenSSL interface: * int intel_AES_set_encrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * int intel_AES_set_decrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * Return values for above are non-zero on error, 0 on success. * * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key); * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key); * typedef struct aes_key_st { * unsigned int rd_key[4 *(AES_MAXNR + 1)]; * int rounds; * unsigned int pad[3]; * } AES_KEY; * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules * (ks32) instead of 64-bit (ks64). * Number of rounds (aka round count) is at offset 240 of AES_KEY. * * OpenSolaris OS interface (#ifdefs removed for readability): * int rijndael_key_setup_dec_intel(uint32_t rk[], * const uint32_t cipherKey[], uint64_t keyBits); * int rijndael_key_setup_enc_intel(uint32_t rk[], * const uint32_t cipherKey[], uint64_t keyBits); * Return values for above are 0 on error, number of rounds on success. * * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4]); * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4]); * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]; * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t; * * typedef union { * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)]; * } aes_ks_t; * typedef struct aes_key { * aes_ks_t encr_ks, decr_ks; * long double align128; * int flags, nr, type; * } aes_key_t; * * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text, * ct is crypto text, and MAX_AES_NR is 14. * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64. * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary. * ==================================================================== */ #if defined(lint) || defined(__lint) #include /* ARGSUSED */ void aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]) { } /* ARGSUSED */ void aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]) { } /* ARGSUSED */ int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits) { return (0); } /* ARGSUSED */ int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits) { return (0); } #else /* lint */ #include #include #ifdef _KERNEL #include #endif #ifdef _KERNEL /* * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is, * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it * uses it to pass P2 to syscall. * This also occurs with the STTS macro, but we don't care if * P2 (%rsi) is modified just before function exit. * The CLTS and STTS macros push and pop P1 (%rdi) already. */ #ifdef __xpv #define PROTECTED_CLTS \ push %rsi; \ CLTS; \ pop %rsi #else #define PROTECTED_CLTS \ CLTS #endif /* __xpv */ #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \ push %rbp; \ mov %rsp, %rbp; \ movq %cr0, tmpreg; \ testq $CR0_TS, tmpreg; \ jnz 1f; \ and $-XMM_ALIGN, %rsp; \ sub $[XMM_SIZE * 2], %rsp; \ movaps %xmm0, 16(%rsp); \ movaps %xmm1, (%rsp); \ jmp 2f; \ 1: \ PROTECTED_CLTS; \ 2: /* * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack, * otherwise set CR0_TS. */ #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \ testq $CR0_TS, tmpreg; \ jnz 1f; \ movaps (%rsp), %xmm1; \ movaps 16(%rsp), %xmm0; \ jmp 2f; \ 1: \ STTS(tmpreg); \ 2: \ mov %rbp, %rsp; \ pop %rbp /* * If CR0_TS is not set, align stack (with push %rbp) and push * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS */ #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \ push %rbp; \ mov %rsp, %rbp; \ movq %cr0, tmpreg; \ testq $CR0_TS, tmpreg; \ jnz 1f; \ and $-XMM_ALIGN, %rsp; \ sub $[XMM_SIZE * 7], %rsp; \ movaps %xmm0, 96(%rsp); \ movaps %xmm1, 80(%rsp); \ movaps %xmm2, 64(%rsp); \ movaps %xmm3, 48(%rsp); \ movaps %xmm4, 32(%rsp); \ movaps %xmm5, 16(%rsp); \ movaps %xmm6, (%rsp); \ jmp 2f; \ 1: \ PROTECTED_CLTS; \ 2: /* * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack, * otherwise set CR0_TS. */ #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \ testq $CR0_TS, tmpreg; \ jnz 1f; \ movaps (%rsp), %xmm6; \ movaps 16(%rsp), %xmm5; \ movaps 32(%rsp), %xmm4; \ movaps 48(%rsp), %xmm3; \ movaps 64(%rsp), %xmm2; \ movaps 80(%rsp), %xmm1; \ movaps 96(%rsp), %xmm0; \ jmp 2f; \ 1: \ STTS(tmpreg); \ 2: \ mov %rbp, %rsp; \ pop %rbp #else #define PROTECTED_CLTS #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) #endif /* _KERNEL */ /* * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(), * _key_expansion_256a(), _key_expansion_256b() * * Helper functions called by rijndael_key_setup_inc_intel(). * Also used indirectly by rijndael_key_setup_dec_intel(). * * Input: * %xmm0 User-provided cipher key * %xmm1 Round constant * Output: * (%rcx) AES key */ /* EXPORT DELETE START */ .align 16 _key_expansion_128: _key_expansion_256a: pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 movaps %xmm0, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_128) SET_SIZE(_key_expansion_256a) .align 16 _key_expansion_192a: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 movaps %xmm2, %xmm5 movaps %xmm2, %xmm6 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 movaps %xmm0, %xmm1 shufps $0b01000100, %xmm0, %xmm6 movaps %xmm6, (%rcx) shufps $0b01001110, %xmm2, %xmm1 movaps %xmm1, 0x10(%rcx) add $0x20, %rcx ret SET_SIZE(_key_expansion_192a) .align 16 _key_expansion_192b: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 movaps %xmm2, %xmm5 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 movaps %xmm0, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_192b) .align 16 _key_expansion_256b: pshufd $0b10101010, %xmm1, %xmm1 shufps $0b00010000, %xmm2, %xmm4 pxor %xmm4, %xmm2 shufps $0b10001100, %xmm2, %xmm4 pxor %xmm4, %xmm2 pxor %xmm1, %xmm2 movaps %xmm2, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_256b) /* EXPORT DELETE END */ /* * rijndael_key_setup_enc_intel() * Expand the cipher key into the encryption key schedule. * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * OpenSolaris interface: * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], * uint64_t keyBits); * Return value is 0 on error, number of rounds on success. * * Original Intel OpenSSL interface: * int intel_AES_set_encrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * Return value is non-zero on error, 0 on success. */ #ifdef OPENSSL_INTERFACE #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key #define USERCIPHERKEY rdi /* P1, 64 bits */ #define KEYSIZE32 esi /* P2, 32 bits */ #define KEYSIZE64 rsi /* P2, 64 bits */ #define AESKEY rdx /* P3, 64 bits */ #else /* OpenSolaris Interface */ #define AESKEY rdi /* P1, 64 bits */ #define USERCIPHERKEY rsi /* P2, 64 bits */ #define KEYSIZE32 edx /* P3, 32 bits */ #define KEYSIZE64 rdx /* P3, 64 bits */ #endif /* OPENSSL_INTERFACE */ #define ROUNDS32 KEYSIZE32 /* temp */ #define ROUNDS64 KEYSIZE64 /* temp */ #define ENDAESKEY USERCIPHERKEY /* temp */ ENTRY_NP(rijndael_key_setup_enc_intel) /* EXPORT DELETE START */ CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10) / NULL pointer sanity check test %USERCIPHERKEY, %USERCIPHERKEY jz .Lenc_key_invalid_param test %AESKEY, %AESKEY jz .Lenc_key_invalid_param movups (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes) movaps %xmm0, (%AESKEY) lea 0x10(%AESKEY), %rcx / key addr pxor %xmm4, %xmm4 / xmm4 is assumed 0 in _key_expansion_x cmp $256, %KEYSIZE32 jnz .Lenc_key192 / AES 256: 14 rounds #ifdef OPENSSL_INTERFACE mov $14, %ROUNDS32 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 14 #endif /* OPENSSL_INTERFACE */ movups 0x10(%USERCIPHERKEY), %xmm2 / other user key (2nd 16 bytes) movaps %xmm2, (%rcx) add $0x10, %rcx / aeskeygenassist $0x1, %xmm2, %xmm1 / round 1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 call _key_expansion_256a / aeskeygenassist $0x1, %xmm0, %xmm1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 call _key_expansion_256b / aeskeygenassist $0x2, %xmm2, %xmm1 / round 2 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 call _key_expansion_256a / aeskeygenassist $0x2, %xmm0, %xmm1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 call _key_expansion_256b / aeskeygenassist $0x4, %xmm2, %xmm1 / round 3 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 call _key_expansion_256a / aeskeygenassist $0x4, %xmm0, %xmm1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 call _key_expansion_256b / aeskeygenassist $0x8, %xmm2, %xmm1 / round 4 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 call _key_expansion_256a / aeskeygenassist $0x8, %xmm0, %xmm1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 call _key_expansion_256b / aeskeygenassist $0x10, %xmm2, %xmm1 / round 5 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 call _key_expansion_256a / aeskeygenassist $0x10, %xmm0, %xmm1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 call _key_expansion_256b / aeskeygenassist $0x20, %xmm2, %xmm1 / round 6 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 call _key_expansion_256a / aeskeygenassist $0x20, %xmm0, %xmm1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 call _key_expansion_256b / aeskeygenassist $0x40, %xmm2, %xmm1 / round 7 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 call _key_expansion_256a SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE xor %rax, %rax / return 0 (OK) #else /* Open Solaris Interface */ mov $14, %rax / return # rounds = 14 #endif ret .align 4 .Lenc_key192: cmp $192, %KEYSIZE32 jnz .Lenc_key128 / AES 192: 12 rounds #ifdef OPENSSL_INTERFACE mov $12, %ROUNDS32 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 12 #endif /* OPENSSL_INTERFACE */ movq 0x10(%USERCIPHERKEY), %xmm2 / other user key / aeskeygenassist $0x1, %xmm2, %xmm1 / round 1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 call _key_expansion_192a / aeskeygenassist $0x2, %xmm2, %xmm1 / round 2 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 call _key_expansion_192b / aeskeygenassist $0x4, %xmm2, %xmm1 / round 3 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 call _key_expansion_192a / aeskeygenassist $0x8, %xmm2, %xmm1 / round 4 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 call _key_expansion_192b / aeskeygenassist $0x10, %xmm2, %xmm1 / round 5 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 call _key_expansion_192a / aeskeygenassist $0x20, %xmm2, %xmm1 / round 6 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 call _key_expansion_192b / aeskeygenassist $0x40, %xmm2, %xmm1 / round 7 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 call _key_expansion_192a / aeskeygenassist $0x80, %xmm2, %xmm1 / round 8 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 call _key_expansion_192b SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE xor %rax, %rax / return 0 (OK) #else /* OpenSolaris Interface */ mov $12, %rax / return # rounds = 12 #endif ret .align 4 .Lenc_key128: cmp $128, %KEYSIZE32 jnz .Lenc_key_invalid_key_bits #ifdef OPENSSL_INTERFACE mov $10, %ROUNDS32 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 10 #endif /* OPENSSL_INTERFACE */ / aeskeygenassist $0x1, %xmm0, %xmm1 / round 1 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 call _key_expansion_128 / aeskeygenassist $0x2, %xmm0, %xmm1 / round 2 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 call _key_expansion_128 / aeskeygenassist $0x4, %xmm0, %xmm1 / round 3 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 call _key_expansion_128 / aeskeygenassist $0x8, %xmm0, %xmm1 / round 4 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 call _key_expansion_128 / aeskeygenassist $0x10, %xmm0, %xmm1 / round 5 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 call _key_expansion_128 / aeskeygenassist $0x20, %xmm0, %xmm1 / round 6 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 call _key_expansion_128 / aeskeygenassist $0x40, %xmm0, %xmm1 / round 7 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 call _key_expansion_128 / aeskeygenassist $0x80, %xmm0, %xmm1 / round 8 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 call _key_expansion_128 / aeskeygenassist $0x1b, %xmm0, %xmm1 / round 9 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b call _key_expansion_128 / aeskeygenassist $0x36, %xmm0, %xmm1 / round 10 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 call _key_expansion_128 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE xor %rax, %rax / return 0 (OK) #else /* OpenSolaris Interface */ mov $10, %rax / return # rounds = 10 #endif ret .Lenc_key_invalid_param: #ifdef OPENSSL_INTERFACE SET_TS_OR_POP_XMM0_TO_XMM6(%r10) mov $-1, %rax / user key or AES key pointer is NULL ret #else /* FALLTHROUGH */ #endif /* OPENSSL_INTERFACE */ .Lenc_key_invalid_key_bits: SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE mov $-2, %rax / keysize is invalid #else /* Open Solaris Interface */ xor %rax, %rax / a key pointer is NULL or invalid keysize #endif /* OPENSSL_INTERFACE */ /* EXPORT DELETE END */ ret SET_SIZE(rijndael_key_setup_enc_intel) /* * rijndael_key_setup_dec_intel() * Expand the cipher key into the decryption key schedule. * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * OpenSolaris interface: * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], * uint64_t keyBits); * Return value is 0 on error, number of rounds on success. * P1->P2, P2->P3, P3->P1 * * Original Intel OpenSSL interface: * int intel_AES_set_decrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * Return value is non-zero on error, 0 on success. */ ENTRY_NP(rijndael_key_setup_dec_intel) /* EXPORT DELETE START */ call rijndael_key_setup_enc_intel test %rax, %rax #ifdef OPENSSL_INTERFACE jnz .Ldec_key_exit / Failed if returned non-0 #else /* OpenSolaris Interface */ jz .Ldec_key_exit / Failed if returned 0 #endif /* OPENSSL_INTERFACE */ CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */ mov %rax, %ROUNDS64 / set # rounds (10, 12, or 14) / (already set for OpenSSL) #endif lea 0x10(%AESKEY), %rcx / key addr shl $4, %ROUNDS32 add %AESKEY, %ROUNDS64 mov %ROUNDS64, %ENDAESKEY .align 4 .Ldec_key_reorder_loop: movaps (%AESKEY), %xmm0 movaps (%ROUNDS64), %xmm1 movaps %xmm0, (%ROUNDS64) movaps %xmm1, (%AESKEY) lea 0x10(%AESKEY), %AESKEY lea -0x10(%ROUNDS64), %ROUNDS64 cmp %AESKEY, %ROUNDS64 ja .Ldec_key_reorder_loop .align 4 .Ldec_key_inv_loop: movaps (%rcx), %xmm0 /aesimc %xmm0, %xmm1 .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 movaps %xmm1, (%rcx) lea 0x10(%rcx), %rcx cmp %ENDAESKEY, %rcx jnz .Ldec_key_inv_loop SET_TS_OR_POP_XMM0_XMM1(%r10) .Ldec_key_exit: / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error / OpenSSL: rax = 0 for OK, or non-zero for error /* EXPORT DELETE END */ ret SET_SIZE(rijndael_key_setup_dec_intel) /* * aes_encrypt_intel() * Encrypt a single block (in and out can overlap). * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * Temporary register usage: * %xmm0 State * %xmm1 Key * * Original OpenSolaris Interface: * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4]) * * Original Intel OpenSSL Interface: * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key) */ #ifdef OPENSSL_INTERFACE #define aes_encrypt_intel intel_AES_encrypt #define aes_decrypt_intel intel_AES_decrypt #define INP rdi /* P1, 64 bits */ #define OUTP rsi /* P2, 64 bits */ #define KEYP rdx /* P3, 64 bits */ /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */ #define NROUNDS32 ecx /* temporary, 32 bits */ #define NROUNDS cl /* temporary, 8 bits */ #else /* OpenSolaris Interface */ #define KEYP rdi /* P1, 64 bits */ #define NROUNDS esi /* P2, 32 bits */ #define INP rdx /* P3, 64 bits */ #define OUTP rcx /* P4, 64 bits */ #endif /* OPENSSL_INTERFACE */ #define STATE xmm0 /* temporary, 128 bits */ #define KEY xmm1 /* temporary, 128 bits */ ENTRY_NP(aes_encrypt_intel) /* EXPORT DELETE START */ CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) movups (%INP), %STATE / input movaps (%KEYP), %KEY / key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 / round count #else /* OpenSolaris Interface */ /* Round count is already present as P2 in %rsi/%esi */ #endif /* OPENSSL_INTERFACE */ pxor %KEY, %STATE / round 0 lea 0x30(%KEYP), %KEYP cmp $12, %NROUNDS jb .Lenc128 lea 0x20(%KEYP), %KEYP je .Lenc192 / AES 256 lea 0x20(%KEYP), %KEYP movaps -0x60(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps -0x50(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 .align 4 .Lenc192: / AES 192 and 256 movaps -0x40(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps -0x30(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 .align 4 .Lenc128: / AES 128, 192, and 256 movaps -0x20(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps -0x10(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps (%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps 0x10(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps 0x20(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps 0x30(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps 0x40(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps 0x50(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps 0x60(%KEYP), %KEY /aesenc %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1 movaps 0x70(%KEYP), %KEY /aesenclast %KEY, %STATE / last round .byte 0x66, 0x0f, 0x38, 0xdd, 0xc1 movups %STATE, (%OUTP) / output SET_TS_OR_POP_XMM0_XMM1(%r10) /* EXPORT DELETE END */ ret SET_SIZE(aes_encrypt_intel) /* * aes_decrypt_intel() * Decrypt a single block (in and out can overlap). * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * Temporary register usage: * %xmm0 State * %xmm1 Key * * Original OpenSolaris Interface: * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4])/ * * Original Intel OpenSSL Interface: * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key); */ ENTRY_NP(aes_decrypt_intel) /* EXPORT DELETE START */ CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) movups (%INP), %STATE / input movaps (%KEYP), %KEY / key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 / round count #else /* OpenSolaris Interface */ /* Round count is already present as P2 in %rsi/%esi */ #endif /* OPENSSL_INTERFACE */ pxor %KEY, %STATE / round 0 lea 0x30(%KEYP), %KEYP cmp $12, %NROUNDS jb .Ldec128 lea 0x20(%KEYP), %KEYP je .Ldec192 / AES 256 lea 0x20(%KEYP), %KEYP movaps -0x60(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps -0x50(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 .align 4 .Ldec192: / AES 192 and 256 movaps -0x40(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps -0x30(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 .align 4 .Ldec128: / AES 128, 192, and 256 movaps -0x20(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps -0x10(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps (%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps 0x10(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps 0x20(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps 0x30(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps 0x40(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps 0x50(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps 0x60(%KEYP), %KEY /aesdec %KEY, %STATE .byte 0x66, 0x0f, 0x38, 0xde, 0xc1 movaps 0x70(%KEYP), %KEY /aesdeclast %KEY, %STATE / last round .byte 0x66, 0x0f, 0x38, 0xdf, 0xc1 movups %STATE, (%OUTP) / output SET_TS_OR_POP_XMM0_XMM1(%r10) ret /* EXPORT DELETE END */ SET_SIZE(aes_decrypt_intel) #endif /* lint || __lint */