xref: /freebsd/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S (revision 15f0b8c309dea1dcb14d3e374686576ff68ac43f)
1/*
2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
6 * license.
7 *
8 *   Author: Huang Ying <ying.huang at intel dot com>
9 *           Vinodh Gopal <vinodh.gopal at intel dot com>
10 *           Kahraman Akdemir
11 *
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
22 */
23
24/*
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * 1. Redistributions of source code must retain the above copyright
33 *    notice, this list of conditions and the following disclaimer.
34 *
35 * 2. Redistributions in binary form must reproduce the above copyright
36 *    notice, this list of conditions and the following disclaimer in
37 *    the documentation and/or other materials provided with the
38 *    distribution.
39 *
40 * 3. All advertising materials mentioning features or use of this
41 *    software must display the following acknowledgment:
42 *    "This product includes software developed by the OpenSSL Project
43 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
44 *
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 *    endorse or promote products derived from this software without
47 *    prior written permission. For written permission, please contact
48 *    openssl-core@openssl.org.
49 *
50 * 5. Products derived from this software may not be called "OpenSSL"
51 *    nor may "OpenSSL" appear in their names without prior written
52 *    permission of the OpenSSL Project.
53 *
54 * 6. Redistributions of any form whatsoever must retain the following
55 *    acknowledgment:
56 *    "This product includes software developed by the OpenSSL Project
57 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
72 */
73
74/*
75 * ====================================================================
76 * OpenSolaris OS modifications
77 *
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
82 *
83 * This OpenSolaris version has these major changes from the original source:
84 *
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
88 *
89 * 2. Formatted code, added comments, and added #includes and #defines.
90 *
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
96 *
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
99 *
100 * OpenSSL interface:
101 *	int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 *		const int bits, AES_KEY *key);
103 *	int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 *		const int bits, AES_KEY *key);
105 *	Return values for above are non-zero on error, 0 on success.
106 *
107 *	void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 *		const AES_KEY *key);
109 *	void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 *		const AES_KEY *key);
111 *	typedef struct aes_key_st {
112 *		unsigned int	rd_key[4 *(AES_MAXNR + 1)];
113 *		int		rounds;
114 *		unsigned int	pad[3];
115 *	} AES_KEY;
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
119 *
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 *	int rijndael_key_setup_dec_intel(uint32_t rk[],
122 *		const uint32_t cipherKey[], uint64_t keyBits);
123 *	int rijndael_key_setup_enc_intel(uint32_t rk[],
124 *		const uint32_t cipherKey[], uint64_t keyBits);
125 *	Return values for above are 0 on error, number of rounds on success.
126 *
127 *	void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 *		const uint32_t pt[4], uint32_t ct[4]);
129 *	void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 *		const uint32_t pt[4], uint32_t ct[4]);
131 *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
133 *
134 *	typedef union {
135 *		uint32_t	ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
136 *	} aes_ks_t;
137 *	typedef struct aes_key {
138 *		aes_ks_t	encr_ks, decr_ks;
139 *		long double	align128;
140 *		int		flags, nr, type;
141 *	} aes_key_t;
142 *
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
146 *
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
148 *
149 * ====================================================================
150 */
151
152
153#if defined(lint) || defined(__lint)
154
155#include <sys/types.h>
156
157void
158aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
159    uint32_t ct[4]) {
160	(void) rk, (void) Nr, (void) pt, (void) ct;
161}
162void
163aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
164    uint32_t pt[4]) {
165	(void) rk, (void) Nr, (void) ct, (void) pt;
166}
167int
168rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
169    uint64_t keyBits) {
170	(void) rk, (void) cipherKey, (void) keyBits;
171	return (0);
172}
173int
174rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
175   uint64_t keyBits) {
176	(void) rk, (void) cipherKey, (void) keyBits;
177	return (0);
178}
179
180
181#elif defined(HAVE_AES)	/* guard by instruction set */
182
183#define _ASM
184#include <sys/asm_linkage.h>
185
186/*
187 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
188 * _key_expansion_256a(), _key_expansion_256b()
189 *
190 * Helper functions called by rijndael_key_setup_inc_intel().
191 * Also used indirectly by rijndael_key_setup_dec_intel().
192 *
193 * Input:
194 * %xmm0	User-provided cipher key
195 * %xmm1	Round constant
196 * Output:
197 * (%rcx)	AES key
198 */
199
200ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
201_key_expansion_128_local:
202_key_expansion_256a_local:
203	pshufd	$0b11111111, %xmm1, %xmm1
204	shufps	$0b00010000, %xmm0, %xmm4
205	pxor	%xmm4, %xmm0
206	shufps	$0b10001100, %xmm0, %xmm4
207	pxor	%xmm4, %xmm0
208	pxor	%xmm1, %xmm0
209	movups	%xmm0, (%rcx)
210	add	$0x10, %rcx
211	RET
212	nop
213SET_SIZE(_key_expansion_128)
214SET_SIZE(_key_expansion_256a)
215
216
217ENTRY_NP(_key_expansion_192a)
218_key_expansion_192a_local:
219	pshufd	$0b01010101, %xmm1, %xmm1
220	shufps	$0b00010000, %xmm0, %xmm4
221	pxor	%xmm4, %xmm0
222	shufps	$0b10001100, %xmm0, %xmm4
223	pxor	%xmm4, %xmm0
224	pxor	%xmm1, %xmm0
225
226	movups	%xmm2, %xmm5
227	movups	%xmm2, %xmm6
228	pslldq	$4, %xmm5
229	pshufd	$0b11111111, %xmm0, %xmm3
230	pxor	%xmm3, %xmm2
231	pxor	%xmm5, %xmm2
232
233	movups	%xmm0, %xmm1
234	shufps	$0b01000100, %xmm0, %xmm6
235	movups	%xmm6, (%rcx)
236	shufps	$0b01001110, %xmm2, %xmm1
237	movups	%xmm1, 0x10(%rcx)
238	add	$0x20, %rcx
239	RET
240SET_SIZE(_key_expansion_192a)
241
242
243ENTRY_NP(_key_expansion_192b)
244_key_expansion_192b_local:
245	pshufd	$0b01010101, %xmm1, %xmm1
246	shufps	$0b00010000, %xmm0, %xmm4
247	pxor	%xmm4, %xmm0
248	shufps	$0b10001100, %xmm0, %xmm4
249	pxor	%xmm4, %xmm0
250	pxor	%xmm1, %xmm0
251
252	movups	%xmm2, %xmm5
253	pslldq	$4, %xmm5
254	pshufd	$0b11111111, %xmm0, %xmm3
255	pxor	%xmm3, %xmm2
256	pxor	%xmm5, %xmm2
257
258	movups	%xmm0, (%rcx)
259	add	$0x10, %rcx
260	RET
261SET_SIZE(_key_expansion_192b)
262
263
264ENTRY_NP(_key_expansion_256b)
265_key_expansion_256b_local:
266	pshufd	$0b10101010, %xmm1, %xmm1
267	shufps	$0b00010000, %xmm2, %xmm4
268	pxor	%xmm4, %xmm2
269	shufps	$0b10001100, %xmm2, %xmm4
270	pxor	%xmm4, %xmm2
271	pxor	%xmm1, %xmm2
272	movups	%xmm2, (%rcx)
273	add	$0x10, %rcx
274	RET
275SET_SIZE(_key_expansion_256b)
276
277
278/*
279 * rijndael_key_setup_enc_intel()
280 * Expand the cipher key into the encryption key schedule.
281 *
282 * For kernel code, caller is responsible for ensuring kpreempt_disable()
283 * has been called.  This is because %xmm registers are not saved/restored.
284 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
285 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
286 * on the stack.
287 *
288 * OpenSolaris interface:
289 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
290 *	uint64_t keyBits);
291 * Return value is 0 on error, number of rounds on success.
292 *
293 * Original Intel OpenSSL interface:
294 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
295 *	const int bits, AES_KEY *key);
296 * Return value is non-zero on error, 0 on success.
297 */
298
299#ifdef	OPENSSL_INTERFACE
300#define	rijndael_key_setup_enc_intel	intel_AES_set_encrypt_key
301#define	rijndael_key_setup_dec_intel	intel_AES_set_decrypt_key
302
303#define	USERCIPHERKEY		rdi	/* P1, 64 bits */
304#define	KEYSIZE32		esi	/* P2, 32 bits */
305#define	KEYSIZE64		rsi	/* P2, 64 bits */
306#define	AESKEY			rdx	/* P3, 64 bits */
307
308#else	/* OpenSolaris Interface */
309#define	AESKEY			rdi	/* P1, 64 bits */
310#define	USERCIPHERKEY		rsi	/* P2, 64 bits */
311#define	KEYSIZE32		edx	/* P3, 32 bits */
312#define	KEYSIZE64		rdx	/* P3, 64 bits */
313#endif	/* OPENSSL_INTERFACE */
314
315#define	ROUNDS32		KEYSIZE32	/* temp */
316#define	ROUNDS64		KEYSIZE64	/* temp */
317#define	ENDAESKEY		USERCIPHERKEY	/* temp */
318
319ENTRY_NP(rijndael_key_setup_enc_intel)
320rijndael_key_setup_enc_intel_local:
321	FRAME_BEGIN
322	// NULL pointer sanity check
323	test	%USERCIPHERKEY, %USERCIPHERKEY
324	jz	.Lenc_key_invalid_param
325	test	%AESKEY, %AESKEY
326	jz	.Lenc_key_invalid_param
327
328	movups	(%USERCIPHERKEY), %xmm0	// user key (first 16 bytes)
329	movups	%xmm0, (%AESKEY)
330	lea	0x10(%AESKEY), %rcx	// key addr
331	pxor	%xmm4, %xmm4		// xmm4 is assumed 0 in _key_expansion_x
332
333	cmp	$256, %KEYSIZE32
334	jnz	.Lenc_key192
335
336	// AES 256: 14 rounds in encryption key schedule
337#ifdef OPENSSL_INTERFACE
338	mov	$14, %ROUNDS32
339	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 14
340#endif	/* OPENSSL_INTERFACE */
341
342	movups	0x10(%USERCIPHERKEY), %xmm2	// other user key (2nd 16 bytes)
343	movups	%xmm2, (%rcx)
344	add	$0x10, %rcx
345
346	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
347	call	_key_expansion_256a_local
348	aeskeygenassist $0x1, %xmm0, %xmm1
349	call	_key_expansion_256b_local
350	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
351	call	_key_expansion_256a_local
352	aeskeygenassist $0x2, %xmm0, %xmm1
353	call	_key_expansion_256b_local
354	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
355	call	_key_expansion_256a_local
356	aeskeygenassist $0x4, %xmm0, %xmm1
357	call	_key_expansion_256b_local
358	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
359	call	_key_expansion_256a_local
360	aeskeygenassist $0x8, %xmm0, %xmm1
361	call	_key_expansion_256b_local
362	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
363	call	_key_expansion_256a_local
364	aeskeygenassist $0x10, %xmm0, %xmm1
365	call	_key_expansion_256b_local
366	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
367	call	_key_expansion_256a_local
368	aeskeygenassist $0x20, %xmm0, %xmm1
369	call	_key_expansion_256b_local
370	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
371	call	_key_expansion_256a_local
372
373#ifdef	OPENSSL_INTERFACE
374	xor	%rax, %rax			// return 0 (OK)
375#else	/* Open Solaris Interface */
376	mov	$14, %rax			// return # rounds = 14
377#endif
378	FRAME_END
379	RET
380
381.balign 4
382.Lenc_key192:
383	cmp	$192, %KEYSIZE32
384	jnz	.Lenc_key128
385
386	// AES 192: 12 rounds in encryption key schedule
387#ifdef OPENSSL_INTERFACE
388	mov	$12, %ROUNDS32
389	movl	%ROUNDS32, 240(%AESKEY)	// key.rounds = 12
390#endif	/* OPENSSL_INTERFACE */
391
392	movq	0x10(%USERCIPHERKEY), %xmm2	// other user key
393	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
394	call	_key_expansion_192a_local
395	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
396	call	_key_expansion_192b_local
397	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
398	call	_key_expansion_192a_local
399	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
400	call	_key_expansion_192b_local
401	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
402	call	_key_expansion_192a_local
403	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
404	call	_key_expansion_192b_local
405	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
406	call	_key_expansion_192a_local
407	aeskeygenassist $0x80, %xmm2, %xmm1	// expand the key
408	call	_key_expansion_192b_local
409
410#ifdef	OPENSSL_INTERFACE
411	xor	%rax, %rax			// return 0 (OK)
412#else	/* OpenSolaris Interface */
413	mov	$12, %rax			// return # rounds = 12
414#endif
415	FRAME_END
416	RET
417
418.balign 4
419.Lenc_key128:
420	cmp $128, %KEYSIZE32
421	jnz .Lenc_key_invalid_key_bits
422
423	// AES 128: 10 rounds in encryption key schedule
424#ifdef OPENSSL_INTERFACE
425	mov	$10, %ROUNDS32
426	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 10
427#endif	/* OPENSSL_INTERFACE */
428
429	aeskeygenassist $0x1, %xmm0, %xmm1	// expand the key
430	call	_key_expansion_128_local
431	aeskeygenassist $0x2, %xmm0, %xmm1	// expand the key
432	call	_key_expansion_128_local
433	aeskeygenassist $0x4, %xmm0, %xmm1	// expand the key
434	call	_key_expansion_128_local
435	aeskeygenassist $0x8, %xmm0, %xmm1	// expand the key
436	call	_key_expansion_128_local
437	aeskeygenassist $0x10, %xmm0, %xmm1	// expand the key
438	call	_key_expansion_128_local
439	aeskeygenassist $0x20, %xmm0, %xmm1	// expand the key
440	call	_key_expansion_128_local
441	aeskeygenassist $0x40, %xmm0, %xmm1	// expand the key
442	call	_key_expansion_128_local
443	aeskeygenassist $0x80, %xmm0, %xmm1	// expand the key
444	call	_key_expansion_128_local
445	aeskeygenassist $0x1b, %xmm0, %xmm1	// expand the key
446	call	_key_expansion_128_local
447	aeskeygenassist $0x36, %xmm0, %xmm1	// expand the key
448	call	_key_expansion_128_local
449
450#ifdef	OPENSSL_INTERFACE
451	xor	%rax, %rax			// return 0 (OK)
452#else	/* OpenSolaris Interface */
453	mov	$10, %rax			// return # rounds = 10
454#endif
455	FRAME_END
456	RET
457
458.Lenc_key_invalid_param:
459#ifdef	OPENSSL_INTERFACE
460	mov	$-1, %rax	// user key or AES key pointer is NULL
461	FRAME_END
462	RET
463#else
464	/* FALLTHROUGH */
465#endif	/* OPENSSL_INTERFACE */
466
467.Lenc_key_invalid_key_bits:
468#ifdef	OPENSSL_INTERFACE
469	mov	$-2, %rax	// keysize is invalid
470#else	/* Open Solaris Interface */
471	xor	%rax, %rax	// a key pointer is NULL or invalid keysize
472#endif	/* OPENSSL_INTERFACE */
473	FRAME_END
474	RET
475	SET_SIZE(rijndael_key_setup_enc_intel)
476
477
478/*
479 * rijndael_key_setup_dec_intel()
480 * Expand the cipher key into the decryption key schedule.
481 *
482 * For kernel code, caller is responsible for ensuring kpreempt_disable()
483 * has been called.  This is because %xmm registers are not saved/restored.
484 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
485 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
486 * on the stack.
487 *
488 * OpenSolaris interface:
489 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
490 *	uint64_t keyBits);
491 * Return value is 0 on error, number of rounds on success.
492 * P1->P2, P2->P3, P3->P1
493 *
494 * Original Intel OpenSSL interface:
495 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
496 *	const int bits, AES_KEY *key);
497 * Return value is non-zero on error, 0 on success.
498 */
499
500ENTRY_NP(rijndael_key_setup_dec_intel)
501FRAME_BEGIN
502	// Generate round keys used for encryption
503	call	rijndael_key_setup_enc_intel_local
504	test	%rax, %rax
505#ifdef	OPENSSL_INTERFACE
506	jnz	.Ldec_key_exit	// Failed if returned non-0
507#else	/* OpenSolaris Interface */
508	jz	.Ldec_key_exit	// Failed if returned 0
509#endif	/* OPENSSL_INTERFACE */
510
511	/*
512	 * Convert round keys used for encryption
513	 * to a form usable for decryption
514	 */
515#ifndef	OPENSSL_INTERFACE		/* OpenSolaris Interface */
516	mov	%rax, %ROUNDS64		// set # rounds (10, 12, or 14)
517					// (already set for OpenSSL)
518#endif
519
520	lea	0x10(%AESKEY), %rcx	// key addr
521	shl	$4, %ROUNDS32
522	add	%AESKEY, %ROUNDS64
523	mov	%ROUNDS64, %ENDAESKEY
524
525.balign 4
526.Ldec_key_reorder_loop:
527	movups	(%AESKEY), %xmm0
528	movups	(%ROUNDS64), %xmm1
529	movups	%xmm0, (%ROUNDS64)
530	movups	%xmm1, (%AESKEY)
531	lea	0x10(%AESKEY), %AESKEY
532	lea	-0x10(%ROUNDS64), %ROUNDS64
533	cmp	%AESKEY, %ROUNDS64
534	ja	.Ldec_key_reorder_loop
535
536.balign 4
537.Ldec_key_inv_loop:
538	movups	(%rcx), %xmm0
539	// Convert an encryption round key to a form usable for decryption
540	// with the "AES Inverse Mix Columns" instruction
541	aesimc	%xmm0, %xmm1
542	movups	%xmm1, (%rcx)
543	lea	0x10(%rcx), %rcx
544	cmp	%ENDAESKEY, %rcx
545	jnz	.Ldec_key_inv_loop
546
547.Ldec_key_exit:
548	// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
549	// OpenSSL: rax = 0 for OK, or non-zero for error
550	FRAME_END
551	RET
552	SET_SIZE(rijndael_key_setup_dec_intel)
553
554
555/*
556 * aes_encrypt_intel()
557 * Encrypt a single block (in and out can overlap).
558 *
559 * For kernel code, caller is responsible for ensuring kpreempt_disable()
560 * has been called.  This is because %xmm registers are not saved/restored.
561 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
562 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
563 * on the stack.
564 *
565 * Temporary register usage:
566 * %xmm0	State
567 * %xmm1	Key
568 *
569 * Original OpenSolaris Interface:
570 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
571 *	const uint32_t pt[4], uint32_t ct[4])
572 *
573 * Original Intel OpenSSL Interface:
574 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
575 *	const AES_KEY *key)
576 */
577
578#ifdef	OPENSSL_INTERFACE
579#define	aes_encrypt_intel	intel_AES_encrypt
580#define	aes_decrypt_intel	intel_AES_decrypt
581
582#define	INP		rdi	/* P1, 64 bits */
583#define	OUTP		rsi	/* P2, 64 bits */
584#define	KEYP		rdx	/* P3, 64 bits */
585
586/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
587#define	NROUNDS32	ecx	/* temporary, 32 bits */
588#define	NROUNDS		cl	/* temporary,  8 bits */
589
590#else	/* OpenSolaris Interface */
591#define	KEYP		rdi	/* P1, 64 bits */
592#define	NROUNDS		esi	/* P2, 32 bits */
593#define	INP		rdx	/* P3, 64 bits */
594#define	OUTP		rcx	/* P4, 64 bits */
595#endif	/* OPENSSL_INTERFACE */
596
597#define	STATE		xmm0	/* temporary, 128 bits */
598#define	KEY		xmm1	/* temporary, 128 bits */
599
600
601ENTRY_NP(aes_encrypt_intel)
602
603	movups	(%INP), %STATE			// input
604	movups	(%KEYP), %KEY			// key
605#ifdef	OPENSSL_INTERFACE
606	mov	240(%KEYP), %NROUNDS32		// round count
607#else	/* OpenSolaris Interface */
608	/* Round count is already present as P2 in %rsi/%esi */
609#endif	/* OPENSSL_INTERFACE */
610
611	pxor	%KEY, %STATE			// round 0
612	lea	0x30(%KEYP), %KEYP
613	cmp	$12, %NROUNDS
614	jb	.Lenc128
615	lea	0x20(%KEYP), %KEYP
616	je	.Lenc192
617
618	// AES 256
619	lea	0x20(%KEYP), %KEYP
620	movups	-0x60(%KEYP), %KEY
621	aesenc	%KEY, %STATE
622	movups	-0x50(%KEYP), %KEY
623	aesenc	%KEY, %STATE
624
625.balign 4
626.Lenc192:
627	// AES 192 and 256
628	movups	-0x40(%KEYP), %KEY
629	aesenc	%KEY, %STATE
630	movups	-0x30(%KEYP), %KEY
631	aesenc	%KEY, %STATE
632
633.balign 4
634.Lenc128:
635	// AES 128, 192, and 256
636	movups	-0x20(%KEYP), %KEY
637	aesenc	%KEY, %STATE
638	movups	-0x10(%KEYP), %KEY
639	aesenc	%KEY, %STATE
640	movups	(%KEYP), %KEY
641	aesenc	%KEY, %STATE
642	movups	0x10(%KEYP), %KEY
643	aesenc	%KEY, %STATE
644	movups	0x20(%KEYP), %KEY
645	aesenc	%KEY, %STATE
646	movups	0x30(%KEYP), %KEY
647	aesenc	%KEY, %STATE
648	movups	0x40(%KEYP), %KEY
649	aesenc	%KEY, %STATE
650	movups	0x50(%KEYP), %KEY
651	aesenc	%KEY, %STATE
652	movups	0x60(%KEYP), %KEY
653	aesenc	%KEY, %STATE
654	movups	0x70(%KEYP), %KEY
655	aesenclast	 %KEY, %STATE		// last round
656	movups	%STATE, (%OUTP)			// output
657
658	RET
659	SET_SIZE(aes_encrypt_intel)
660
661
662/*
663 * aes_decrypt_intel()
664 * Decrypt a single block (in and out can overlap).
665 *
666 * For kernel code, caller is responsible for ensuring kpreempt_disable()
667 * has been called.  This is because %xmm registers are not saved/restored.
668 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
669 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
670 * on the stack.
671 *
672 * Temporary register usage:
673 * %xmm0	State
674 * %xmm1	Key
675 *
676 * Original OpenSolaris Interface:
677 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
678 *	const uint32_t pt[4], uint32_t ct[4])/
679 *
680 * Original Intel OpenSSL Interface:
681 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
682 *	const AES_KEY *key);
683 */
684ENTRY_NP(aes_decrypt_intel)
685
686	movups	(%INP), %STATE			// input
687	movups	(%KEYP), %KEY			// key
688#ifdef	OPENSSL_INTERFACE
689	mov	240(%KEYP), %NROUNDS32		// round count
690#else	/* OpenSolaris Interface */
691	/* Round count is already present as P2 in %rsi/%esi */
692#endif	/* OPENSSL_INTERFACE */
693
694	pxor	%KEY, %STATE			// round 0
695	lea	0x30(%KEYP), %KEYP
696	cmp	$12, %NROUNDS
697	jb	.Ldec128
698	lea	0x20(%KEYP), %KEYP
699	je	.Ldec192
700
701	// AES 256
702	lea	0x20(%KEYP), %KEYP
703	movups	-0x60(%KEYP), %KEY
704	aesdec	%KEY, %STATE
705	movups	-0x50(%KEYP), %KEY
706	aesdec	%KEY, %STATE
707
708.balign 4
709.Ldec192:
710	// AES 192 and 256
711	movups	-0x40(%KEYP), %KEY
712	aesdec	%KEY, %STATE
713	movups	-0x30(%KEYP), %KEY
714	aesdec	%KEY, %STATE
715
716.balign 4
717.Ldec128:
718	// AES 128, 192, and 256
719	movups	-0x20(%KEYP), %KEY
720	aesdec	%KEY, %STATE
721	movups	-0x10(%KEYP), %KEY
722	aesdec	%KEY, %STATE
723	movups	(%KEYP), %KEY
724	aesdec	%KEY, %STATE
725	movups	0x10(%KEYP), %KEY
726	aesdec	%KEY, %STATE
727	movups	0x20(%KEYP), %KEY
728	aesdec	%KEY, %STATE
729	movups	0x30(%KEYP), %KEY
730	aesdec	%KEY, %STATE
731	movups	0x40(%KEYP), %KEY
732	aesdec	%KEY, %STATE
733	movups	0x50(%KEYP), %KEY
734	aesdec	%KEY, %STATE
735	movups	0x60(%KEYP), %KEY
736	aesdec	%KEY, %STATE
737	movups	0x70(%KEYP), %KEY
738	aesdeclast	%KEY, %STATE		// last round
739	movups	%STATE, (%OUTP)			// output
740
741	RET
742	SET_SIZE(aes_decrypt_intel)
743
744#endif	/* lint || __lint */
745
746#ifdef __ELF__
747.section .note.GNU-stack,"",%progbits
748#endif
749