xref: /titanic_51/usr/src/common/crypto/aes/amd64/aes_intel.s (revision a29e56d91db891741f1af9f6bbd3e3c3cac5f19b)
1/*
2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
6 * license.
7 *
8 *   Author: Huang Ying <ying.huang at intel dot com>
9 *           Vinodh Gopal <vinodh.gopal at intel dot com>
10 *           Kahraman Akdemir
11 *
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
22 */
23
24/*
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * 1. Redistributions of source code must retain the above copyright
33 *    notice, this list of conditions and the following disclaimer.
34 *
35 * 2. Redistributions in binary form must reproduce the above copyright
36 *    notice, this list of conditions and the following disclaimer in
37 *    the documentation and/or other materials provided with the
38 *    distribution.
39 *
40 * 3. All advertising materials mentioning features or use of this
41 *    software must display the following acknowledgment:
42 *    "This product includes software developed by the OpenSSL Project
43 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
44 *
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 *    endorse or promote products derived from this software without
47 *    prior written permission. For written permission, please contact
48 *    openssl-core@openssl.org.
49 *
50 * 5. Products derived from this software may not be called "OpenSSL"
51 *    nor may "OpenSSL" appear in their names without prior written
52 *    permission of the OpenSSL Project.
53 *
54 * 6. Redistributions of any form whatsoever must retain the following
55 *    acknowledgment:
56 *    "This product includes software developed by the OpenSSL Project
57 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
72 */
73
74/*
75 * ====================================================================
76 * OpenSolaris OS modifications
77 *
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
82 *
83 * This OpenSolaris version has these major changes from the original source:
84 *
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
88 *
89 * 2. Formatted code, added comments, and added #includes and #defines.
90 *
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
96 *
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
99 *
100 * OpenSSL interface:
101 *	int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 *		const int bits, AES_KEY *key);
103 *	int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 *		const int bits, AES_KEY *key);
105 *	Return values for above are non-zero on error, 0 on success.
106 *
107 *	void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 *		const AES_KEY *key);
109 *	void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 *		const AES_KEY *key);
111 *	typedef struct aes_key_st {
112 *		unsigned int	rd_key[4 *(AES_MAXNR + 1)];
113 *		int		rounds;
114 *		unsigned int	pad[3];
115 *	} AES_KEY;
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
119 *
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 *	int rijndael_key_setup_dec_intel(uint32_t rk[],
122 *		const uint32_t cipherKey[], uint64_t keyBits);
123 *	int rijndael_key_setup_enc_intel(uint32_t rk[],
124 *		const uint32_t cipherKey[], uint64_t keyBits);
125 *	Return values for above are 0 on error, number of rounds on success.
126 *
127 *	void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 *		const uint32_t pt[4], uint32_t ct[4]);
129 *	void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 *		const uint32_t pt[4], uint32_t ct[4]);
131 *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
133 *
134 *	typedef union {
135 *		uint32_t	ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
136 *	} aes_ks_t;
137 *	typedef struct aes_key {
138 *		aes_ks_t	encr_ks, decr_ks;
139 *		long double	align128;
140 *		int		flags, nr, type;
141 *	} aes_key_t;
142 *
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
146 *
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
148 *
149 * ====================================================================
150 */
151
152#if defined(lint) || defined(__lint)
153
154#include <sys/types.h>
155
156/* ARGSUSED */
157void
158aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
159    uint32_t ct[4]) {
160}
161/* ARGSUSED */
162void
163aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
164    uint32_t pt[4]) {
165}
166/* ARGSUSED */
167int
168rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
169    uint64_t keyBits) {
170	return (0);
171}
172/* ARGSUSED */
173int
174rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
175   uint64_t keyBits) {
176	return (0);
177}
178
179
180#else	/* lint */
181
182#include <sys/asm_linkage.h>
183#include <sys/controlregs.h>
184#ifdef _KERNEL
185#include <sys/machprivregs.h>
186#endif
187
188#ifdef _KERNEL
189	/*
190	 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv.  That is,
191	 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
192	 * uses it to pass P2 to syscall.
193	 * This also occurs with the STTS macro, but we don't care if
194	 * P2 (%rsi) is modified just before function exit.
195	 * The CLTS and STTS macros push and pop P1 (%rdi) already.
196	 */
197#ifdef __xpv
198#define	PROTECTED_CLTS \
199	push	%rsi; \
200	CLTS; \
201	pop	%rsi
202#else
203#define	PROTECTED_CLTS \
204	CLTS
205#endif	/* __xpv */
206
207#define	CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
208	push	%rbp; \
209	mov	%rsp, %rbp; \
210	movq	%cr0, tmpreg; \
211	testq	$CR0_TS, tmpreg; \
212	jnz	1f; \
213	and	$-XMM_ALIGN, %rsp; \
214	sub	$[XMM_SIZE * 2], %rsp; \
215	movaps	%xmm0, 16(%rsp); \
216	movaps	%xmm1, (%rsp); \
217	jmp	2f; \
2181: \
219	PROTECTED_CLTS; \
2202:
221
222	/*
223	 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
224	 * otherwise set CR0_TS.
225	 */
226#define	SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
227	testq	$CR0_TS, tmpreg; \
228	jnz	1f; \
229	movaps	(%rsp), %xmm1; \
230	movaps	16(%rsp), %xmm0; \
231	jmp	2f; \
2321: \
233	STTS(tmpreg); \
2342: \
235	mov	%rbp, %rsp; \
236	pop	%rbp
237
238	/*
239	 * If CR0_TS is not set, align stack (with push %rbp) and push
240	 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
241	 */
242#define	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
243	push	%rbp; \
244	mov	%rsp, %rbp; \
245	movq	%cr0, tmpreg; \
246	testq	$CR0_TS, tmpreg; \
247	jnz	1f; \
248	and	$-XMM_ALIGN, %rsp; \
249	sub	$[XMM_SIZE * 7], %rsp; \
250	movaps	%xmm0, 96(%rsp); \
251	movaps	%xmm1, 80(%rsp); \
252	movaps	%xmm2, 64(%rsp); \
253	movaps	%xmm3, 48(%rsp); \
254	movaps	%xmm4, 32(%rsp); \
255	movaps	%xmm5, 16(%rsp); \
256	movaps	%xmm6, (%rsp); \
257	jmp	2f; \
2581: \
259	PROTECTED_CLTS; \
2602:
261
262
263	/*
264	 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
265	 * otherwise set CR0_TS.
266	 */
267#define	SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
268	testq	$CR0_TS, tmpreg; \
269	jnz	1f; \
270	movaps	(%rsp), %xmm6; \
271	movaps	16(%rsp), %xmm5; \
272	movaps	32(%rsp), %xmm4; \
273	movaps	48(%rsp), %xmm3; \
274	movaps	64(%rsp), %xmm2; \
275	movaps	80(%rsp), %xmm1; \
276	movaps	96(%rsp), %xmm0; \
277	jmp	2f; \
2781: \
279	STTS(tmpreg); \
2802: \
281	mov	%rbp, %rsp; \
282	pop	%rbp
283
284
285#else
286#define	PROTECTED_CLTS
287#define	CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
288#define	SET_TS_OR_POP_XMM0_XMM1(tmpreg)
289#define	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
290#define	SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
291#endif	/* _KERNEL */
292
293
294/*
295 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
296 * _key_expansion_256a(), _key_expansion_256b()
297 *
298 * Helper functions called by rijndael_key_setup_inc_intel().
299 * Also used indirectly by rijndael_key_setup_dec_intel().
300 *
301 * Input:
302 * %xmm0	User-provided cipher key
303 * %xmm1	Round constant
304 * Output:
305 * (%rcx)	AES key
306 */
307
308.align	16
309_key_expansion_128:
310_key_expansion_256a:
311	pshufd	$0b11111111, %xmm1, %xmm1
312	shufps	$0b00010000, %xmm0, %xmm4
313	pxor	%xmm4, %xmm0
314	shufps	$0b10001100, %xmm0, %xmm4
315	pxor	%xmm4, %xmm0
316	pxor	%xmm1, %xmm0
317	movaps	%xmm0, (%rcx)
318	add	$0x10, %rcx
319	ret
320	SET_SIZE(_key_expansion_128)
321	SET_SIZE(_key_expansion_256a)
322
323.align 16
324_key_expansion_192a:
325	pshufd	$0b01010101, %xmm1, %xmm1
326	shufps	$0b00010000, %xmm0, %xmm4
327	pxor	%xmm4, %xmm0
328	shufps	$0b10001100, %xmm0, %xmm4
329	pxor	%xmm4, %xmm0
330	pxor	%xmm1, %xmm0
331
332	movaps	%xmm2, %xmm5
333	movaps	%xmm2, %xmm6
334	pslldq	$4, %xmm5
335	pshufd	$0b11111111, %xmm0, %xmm3
336	pxor	%xmm3, %xmm2
337	pxor	%xmm5, %xmm2
338
339	movaps	%xmm0, %xmm1
340	shufps	$0b01000100, %xmm0, %xmm6
341	movaps	%xmm6, (%rcx)
342	shufps	$0b01001110, %xmm2, %xmm1
343	movaps	%xmm1, 0x10(%rcx)
344	add	$0x20, %rcx
345	ret
346	SET_SIZE(_key_expansion_192a)
347
348.align 16
349_key_expansion_192b:
350	pshufd	$0b01010101, %xmm1, %xmm1
351	shufps	$0b00010000, %xmm0, %xmm4
352	pxor	%xmm4, %xmm0
353	shufps	$0b10001100, %xmm0, %xmm4
354	pxor	%xmm4, %xmm0
355	pxor	%xmm1, %xmm0
356
357	movaps	%xmm2, %xmm5
358	pslldq	$4, %xmm5
359	pshufd	$0b11111111, %xmm0, %xmm3
360	pxor	%xmm3, %xmm2
361	pxor	%xmm5, %xmm2
362
363	movaps	%xmm0, (%rcx)
364	add	$0x10, %rcx
365	ret
366	SET_SIZE(_key_expansion_192b)
367
368.align 16
369_key_expansion_256b:
370	pshufd	$0b10101010, %xmm1, %xmm1
371	shufps	$0b00010000, %xmm2, %xmm4
372	pxor	%xmm4, %xmm2
373	shufps	$0b10001100, %xmm2, %xmm4
374	pxor	%xmm4, %xmm2
375	pxor	%xmm1, %xmm2
376	movaps	%xmm2, (%rcx)
377	add	$0x10, %rcx
378	ret
379	SET_SIZE(_key_expansion_256b)
380
381
382/*
383 * rijndael_key_setup_enc_intel()
384 * Expand the cipher key into the encryption key schedule.
385 *
386 * For kernel code, caller is responsible for ensuring kpreempt_disable()
387 * has been called.  This is because %xmm registers are not saved/restored.
388 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
389 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
390 * on the stack.
391 *
392 * OpenSolaris interface:
393 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
394 *	uint64_t keyBits);
395 * Return value is 0 on error, number of rounds on success.
396 *
397 * Original Intel OpenSSL interface:
398 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
399 *	const int bits, AES_KEY *key);
400 * Return value is non-zero on error, 0 on success.
401 */
402
403#ifdef	OPENSSL_INTERFACE
404#define	rijndael_key_setup_enc_intel	intel_AES_set_encrypt_key
405#define	rijndael_key_setup_dec_intel	intel_AES_set_decrypt_key
406
407#define	USERCIPHERKEY		rdi	/* P1, 64 bits */
408#define	KEYSIZE32		esi	/* P2, 32 bits */
409#define	KEYSIZE64		rsi	/* P2, 64 bits */
410#define	AESKEY			rdx	/* P3, 64 bits */
411
412#else	/* OpenSolaris Interface */
413#define	AESKEY			rdi	/* P1, 64 bits */
414#define	USERCIPHERKEY		rsi	/* P2, 64 bits */
415#define	KEYSIZE32		edx	/* P3, 32 bits */
416#define	KEYSIZE64		rdx	/* P3, 64 bits */
417#endif	/* OPENSSL_INTERFACE */
418
419#define	ROUNDS32		KEYSIZE32	/* temp */
420#define	ROUNDS64		KEYSIZE64	/* temp */
421#define	ENDAESKEY		USERCIPHERKEY	/* temp */
422
423
424ENTRY_NP(rijndael_key_setup_enc_intel)
425	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
426
427	/ NULL pointer sanity check
428	test	%USERCIPHERKEY, %USERCIPHERKEY
429	jz	.Lenc_key_invalid_param
430	test	%AESKEY, %AESKEY
431	jz	.Lenc_key_invalid_param
432
433	movups	(%USERCIPHERKEY), %xmm0	/ user key (first 16 bytes)
434	movaps	%xmm0, (%AESKEY)
435	lea	0x10(%AESKEY), %rcx	/ key addr
436	pxor	%xmm4, %xmm4		/ xmm4 is assumed 0 in _key_expansion_x
437
438	cmp	$256, %KEYSIZE32
439	jnz	.Lenc_key192
440
441	/ AES 256: 14 rounds in encryption key schedule
442#ifdef OPENSSL_INTERFACE
443	mov	$14, %ROUNDS32
444	movl	%ROUNDS32, 240(%AESKEY)		/ key.rounds = 14
445#endif	/* OPENSSL_INTERFACE */
446
447	movups	0x10(%USERCIPHERKEY), %xmm2	/ other user key (2nd 16 bytes)
448	movaps	%xmm2, (%rcx)
449	add	$0x10, %rcx
450
451	aeskeygenassist $0x1, %xmm2, %xmm1	/ expand the key
452	call	_key_expansion_256a
453	aeskeygenassist $0x1, %xmm0, %xmm1
454	call	_key_expansion_256b
455	aeskeygenassist $0x2, %xmm2, %xmm1	/ expand the key
456	call	_key_expansion_256a
457	aeskeygenassist $0x2, %xmm0, %xmm1
458	call	_key_expansion_256b
459	aeskeygenassist $0x4, %xmm2, %xmm1	/ expand the key
460	call	_key_expansion_256a
461	aeskeygenassist $0x4, %xmm0, %xmm1
462	call	_key_expansion_256b
463	aeskeygenassist $0x8, %xmm2, %xmm1	/ expand the key
464	call	_key_expansion_256a
465	aeskeygenassist $0x8, %xmm0, %xmm1
466	call	_key_expansion_256b
467	aeskeygenassist $0x10, %xmm2, %xmm1	/ expand the key
468	call	_key_expansion_256a
469	aeskeygenassist $0x10, %xmm0, %xmm1
470	call	_key_expansion_256b
471	aeskeygenassist $0x20, %xmm2, %xmm1	/ expand the key
472	call	_key_expansion_256a
473	aeskeygenassist $0x20, %xmm0, %xmm1
474	call	_key_expansion_256b
475	aeskeygenassist $0x40, %xmm2, %xmm1	/ expand the key
476	call	_key_expansion_256a
477
478	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
479#ifdef	OPENSSL_INTERFACE
480	xor	%rax, %rax			/ return 0 (OK)
481#else	/* Open Solaris Interface */
482	mov	$14, %rax			/ return # rounds = 14
483#endif
484	ret
485
486.align 4
487.Lenc_key192:
488	cmp	$192, %KEYSIZE32
489	jnz	.Lenc_key128
490
491	/ AES 192: 12 rounds in encryption key schedule
492#ifdef OPENSSL_INTERFACE
493	mov	$12, %ROUNDS32
494	movl	%ROUNDS32, 240(%AESKEY)	/ key.rounds = 12
495#endif	/* OPENSSL_INTERFACE */
496
497	movq	0x10(%USERCIPHERKEY), %xmm2	/ other user key
498	aeskeygenassist $0x1, %xmm2, %xmm1	/ expand the key
499	call	_key_expansion_192a
500	aeskeygenassist $0x2, %xmm2, %xmm1	/ expand the key
501	call	_key_expansion_192b
502	aeskeygenassist $0x4, %xmm2, %xmm1	/ expand the key
503	call	_key_expansion_192a
504	aeskeygenassist $0x8, %xmm2, %xmm1	/ expand the key
505	call	_key_expansion_192b
506	aeskeygenassist $0x10, %xmm2, %xmm1	/ expand the key
507	call	_key_expansion_192a
508	aeskeygenassist $0x20, %xmm2, %xmm1	/ expand the key
509	call	_key_expansion_192b
510	aeskeygenassist $0x40, %xmm2, %xmm1	/ expand the key
511	call	_key_expansion_192a
512	aeskeygenassist $0x80, %xmm2, %xmm1	/ expand the key
513	call	_key_expansion_192b
514
515	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
516#ifdef	OPENSSL_INTERFACE
517	xor	%rax, %rax			/ return 0 (OK)
518#else	/* OpenSolaris Interface */
519	mov	$12, %rax			/ return # rounds = 12
520#endif
521	ret
522
523.align 4
524.Lenc_key128:
525	cmp $128, %KEYSIZE32
526	jnz .Lenc_key_invalid_key_bits
527
528	/ AES 128: 10 rounds in encryption key schedule
529#ifdef OPENSSL_INTERFACE
530	mov	$10, %ROUNDS32
531	movl	%ROUNDS32, 240(%AESKEY)		/ key.rounds = 10
532#endif	/* OPENSSL_INTERFACE */
533
534	aeskeygenassist $0x1, %xmm0, %xmm1	/ expand the key
535	call	_key_expansion_128
536	aeskeygenassist $0x2, %xmm0, %xmm1	/ expand the key
537	call	_key_expansion_128
538	aeskeygenassist $0x4, %xmm0, %xmm1	/ expand the key
539	call	_key_expansion_128
540	aeskeygenassist $0x8, %xmm0, %xmm1	/ expand the key
541	call	_key_expansion_128
542	aeskeygenassist $0x10, %xmm0, %xmm1	/ expand the key
543	call	_key_expansion_128
544	aeskeygenassist $0x20, %xmm0, %xmm1	/ expand the key
545	call	_key_expansion_128
546	aeskeygenassist $0x40, %xmm0, %xmm1	/ expand the key
547	call	_key_expansion_128
548	aeskeygenassist $0x80, %xmm0, %xmm1	/ expand the key
549	call	_key_expansion_128
550	aeskeygenassist $0x1b, %xmm0, %xmm1	/ expand the key
551	call	_key_expansion_128
552	aeskeygenassist $0x36, %xmm0, %xmm1	/ expand the key
553	call	_key_expansion_128
554
555	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
556#ifdef	OPENSSL_INTERFACE
557	xor	%rax, %rax			/ return 0 (OK)
558#else	/* OpenSolaris Interface */
559	mov	$10, %rax			/ return # rounds = 10
560#endif
561	ret
562
563.Lenc_key_invalid_param:
564#ifdef	OPENSSL_INTERFACE
565	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
566	mov	$-1, %rax	/ user key or AES key pointer is NULL
567	ret
568#else
569	/* FALLTHROUGH */
570#endif	/* OPENSSL_INTERFACE */
571
572.Lenc_key_invalid_key_bits:
573	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
574#ifdef	OPENSSL_INTERFACE
575	mov	$-2, %rax	/ keysize is invalid
576#else	/* Open Solaris Interface */
577	xor	%rax, %rax	/ a key pointer is NULL or invalid keysize
578#endif	/* OPENSSL_INTERFACE */
579
580	ret
581	SET_SIZE(rijndael_key_setup_enc_intel)
582
583
584/*
585 * rijndael_key_setup_dec_intel()
586 * Expand the cipher key into the decryption key schedule.
587 *
588 * For kernel code, caller is responsible for ensuring kpreempt_disable()
589 * has been called.  This is because %xmm registers are not saved/restored.
590 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
591 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
592 * on the stack.
593 *
594 * OpenSolaris interface:
595 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
596 *	uint64_t keyBits);
597 * Return value is 0 on error, number of rounds on success.
598 * P1->P2, P2->P3, P3->P1
599 *
600 * Original Intel OpenSSL interface:
601 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
602 *	const int bits, AES_KEY *key);
603 * Return value is non-zero on error, 0 on success.
604 */
605ENTRY_NP(rijndael_key_setup_dec_intel)
606	/ Generate round keys used for encryption
607	call	rijndael_key_setup_enc_intel
608	test	%rax, %rax
609#ifdef	OPENSSL_INTERFACE
610	jnz	.Ldec_key_exit	/ Failed if returned non-0
611#else	/* OpenSolaris Interface */
612	jz	.Ldec_key_exit	/ Failed if returned 0
613#endif	/* OPENSSL_INTERFACE */
614
615	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
616
617	/*
618	 * Convert round keys used for encryption
619	 * to a form usable for decryption
620	 */
621#ifndef	OPENSSL_INTERFACE		/* OpenSolaris Interface */
622	mov	%rax, %ROUNDS64		/ set # rounds (10, 12, or 14)
623					/ (already set for OpenSSL)
624#endif
625
626	lea	0x10(%AESKEY), %rcx	/ key addr
627	shl	$4, %ROUNDS32
628	add	%AESKEY, %ROUNDS64
629	mov	%ROUNDS64, %ENDAESKEY
630
631.align 4
632.Ldec_key_reorder_loop:
633	movaps	(%AESKEY), %xmm0
634	movaps	(%ROUNDS64), %xmm1
635	movaps	%xmm0, (%ROUNDS64)
636	movaps	%xmm1, (%AESKEY)
637	lea	0x10(%AESKEY), %AESKEY
638	lea	-0x10(%ROUNDS64), %ROUNDS64
639	cmp	%AESKEY, %ROUNDS64
640	ja	.Ldec_key_reorder_loop
641
642.align 4
643.Ldec_key_inv_loop:
644	movaps	(%rcx), %xmm0
645	/ Convert an encryption round key to a form usable for decryption
646	/ with the "AES Inverse Mix Columns" instruction
647	aesimc	%xmm0, %xmm1
648	movaps	%xmm1, (%rcx)
649	lea	0x10(%rcx), %rcx
650	cmp	%ENDAESKEY, %rcx
651	jnz	.Ldec_key_inv_loop
652
653	SET_TS_OR_POP_XMM0_XMM1(%r10)
654
655.Ldec_key_exit:
656	/ OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
657	/ OpenSSL: rax = 0 for OK, or non-zero for error
658	ret
659	SET_SIZE(rijndael_key_setup_dec_intel)
660
661
662/*
663 * aes_encrypt_intel()
664 * Encrypt a single block (in and out can overlap).
665 *
666 * For kernel code, caller is responsible for ensuring kpreempt_disable()
667 * has been called.  This is because %xmm registers are not saved/restored.
668 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
669 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
670 * on the stack.
671 *
672 * Temporary register usage:
673 * %xmm0	State
674 * %xmm1	Key
675 *
676 * Original OpenSolaris Interface:
677 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
678 *	const uint32_t pt[4], uint32_t ct[4])
679 *
680 * Original Intel OpenSSL Interface:
681 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
682 *	const AES_KEY *key)
683 */
684
685#ifdef	OPENSSL_INTERFACE
686#define	aes_encrypt_intel	intel_AES_encrypt
687#define	aes_decrypt_intel	intel_AES_decrypt
688
689#define	INP		rdi	/* P1, 64 bits */
690#define	OUTP		rsi	/* P2, 64 bits */
691#define	KEYP		rdx	/* P3, 64 bits */
692
693/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
694#define	NROUNDS32	ecx	/* temporary, 32 bits */
695#define	NROUNDS		cl	/* temporary,  8 bits */
696
697#else	/* OpenSolaris Interface */
698#define	KEYP		rdi	/* P1, 64 bits */
699#define	NROUNDS		esi	/* P2, 32 bits */
700#define	INP		rdx	/* P3, 64 bits */
701#define	OUTP		rcx	/* P4, 64 bits */
702#endif	/* OPENSSL_INTERFACE */
703
704#define	STATE		xmm0	/* temporary, 128 bits */
705#define	KEY		xmm1	/* temporary, 128 bits */
706
707ENTRY_NP(aes_encrypt_intel)
708	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
709
710	movups	(%INP), %STATE			/ input
711	movaps	(%KEYP), %KEY			/ key
712#ifdef	OPENSSL_INTERFACE
713	mov	240(%KEYP), %NROUNDS32		/ round count
714#else	/* OpenSolaris Interface */
715	/* Round count is already present as P2 in %rsi/%esi */
716#endif	/* OPENSSL_INTERFACE */
717
718	pxor	%KEY, %STATE			/ round 0
719	lea	0x30(%KEYP), %KEYP
720	cmp	$12, %NROUNDS
721	jb	.Lenc128
722	lea	0x20(%KEYP), %KEYP
723	je	.Lenc192
724
725	/ AES 256
726	lea	0x20(%KEYP), %KEYP
727	movaps	-0x60(%KEYP), %KEY
728	aesenc	%KEY, %STATE
729	movaps	-0x50(%KEYP), %KEY
730	aesenc	%KEY, %STATE
731
732.align 4
733.Lenc192:
734	/ AES 192 and 256
735	movaps	-0x40(%KEYP), %KEY
736	aesenc	%KEY, %STATE
737	movaps	-0x30(%KEYP), %KEY
738	aesenc	%KEY, %STATE
739
740.align 4
741.Lenc128:
742	/ AES 128, 192, and 256
743	movaps	-0x20(%KEYP), %KEY
744	aesenc	%KEY, %STATE
745	movaps	-0x10(%KEYP), %KEY
746	aesenc	%KEY, %STATE
747	movaps	(%KEYP), %KEY
748	aesenc	%KEY, %STATE
749	movaps	0x10(%KEYP), %KEY
750	aesenc	%KEY, %STATE
751	movaps	0x20(%KEYP), %KEY
752	aesenc	%KEY, %STATE
753	movaps	0x30(%KEYP), %KEY
754	aesenc	%KEY, %STATE
755	movaps	0x40(%KEYP), %KEY
756	aesenc	%KEY, %STATE
757	movaps	0x50(%KEYP), %KEY
758	aesenc	%KEY, %STATE
759	movaps	0x60(%KEYP), %KEY
760	aesenc	%KEY, %STATE
761	movaps	0x70(%KEYP), %KEY
762	aesenclast	 %KEY, %STATE		/ last round
763	movups	%STATE, (%OUTP)			/ output
764
765	SET_TS_OR_POP_XMM0_XMM1(%r10)
766	ret
767	SET_SIZE(aes_encrypt_intel)
768
769
770/*
771 * aes_decrypt_intel()
772 * Decrypt a single block (in and out can overlap).
773 *
774 * For kernel code, caller is responsible for ensuring kpreempt_disable()
775 * has been called.  This is because %xmm registers are not saved/restored.
776 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
777 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
778 * on the stack.
779 *
780 * Temporary register usage:
781 * %xmm0	State
782 * %xmm1	Key
783 *
784 * Original OpenSolaris Interface:
785 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
786 *	const uint32_t pt[4], uint32_t ct[4])/
787 *
788 * Original Intel OpenSSL Interface:
789 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
790 *	const AES_KEY *key);
791 */
792ENTRY_NP(aes_decrypt_intel)
793	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
794
795	movups	(%INP), %STATE			/ input
796	movaps	(%KEYP), %KEY			/ key
797#ifdef	OPENSSL_INTERFACE
798	mov	240(%KEYP), %NROUNDS32		/ round count
799#else	/* OpenSolaris Interface */
800	/* Round count is already present as P2 in %rsi/%esi */
801#endif	/* OPENSSL_INTERFACE */
802
803	pxor	%KEY, %STATE			/ round 0
804	lea	0x30(%KEYP), %KEYP
805	cmp	$12, %NROUNDS
806	jb	.Ldec128
807	lea	0x20(%KEYP), %KEYP
808	je	.Ldec192
809
810	/ AES 256
811	lea	0x20(%KEYP), %KEYP
812	movaps	-0x60(%KEYP), %KEY
813	aesdec	%KEY, %STATE
814	movaps	-0x50(%KEYP), %KEY
815	aesdec	%KEY, %STATE
816
817.align 4
818.Ldec192:
819	/ AES 192 and 256
820	movaps	-0x40(%KEYP), %KEY
821	aesdec	%KEY, %STATE
822	movaps	-0x30(%KEYP), %KEY
823	aesdec	%KEY, %STATE
824
825.align 4
826.Ldec128:
827	/ AES 128, 192, and 256
828	movaps	-0x20(%KEYP), %KEY
829	aesdec	%KEY, %STATE
830	movaps	-0x10(%KEYP), %KEY
831	aesdec	%KEY, %STATE
832	movaps	(%KEYP), %KEY
833	aesdec	%KEY, %STATE
834	movaps	0x10(%KEYP), %KEY
835	aesdec	%KEY, %STATE
836	movaps	0x20(%KEYP), %KEY
837	aesdec	%KEY, %STATE
838	movaps	0x30(%KEYP), %KEY
839	aesdec	%KEY, %STATE
840	movaps	0x40(%KEYP), %KEY
841	aesdec	%KEY, %STATE
842	movaps	0x50(%KEYP), %KEY
843	aesdec	%KEY, %STATE
844	movaps	0x60(%KEYP), %KEY
845	aesdec	%KEY, %STATE
846	movaps	0x70(%KEYP), %KEY
847	aesdeclast	%KEY, %STATE		/ last round
848	movups	%STATE, (%OUTP)			/ output
849
850	SET_TS_OR_POP_XMM0_XMM1(%r10)
851	ret
852	SET_SIZE(aes_decrypt_intel)
853
854#endif	/* lint || __lint */
855