xref: /freebsd/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1// SPDX-License-Identifier: OpenSSL-standalone
2/*
3 * ====================================================================
4 * Written by Intel Corporation for the OpenSSL project to add support
5 * for Intel AES-NI instructions. Rights for redistribution and usage
6 * in source and binary forms are granted according to the OpenSSL
7 * license.
8 *
9 *   Author: Huang Ying <ying.huang at intel dot com>
10 *           Vinodh Gopal <vinodh.gopal at intel dot com>
11 *           Kahraman Akdemir
12 *
13 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
14 * instructions that are going to be introduced in the next generation
15 * of Intel processor, as of 2009. These instructions enable fast and
16 * secure data encryption and decryption, using the Advanced Encryption
17 * Standard (AES), defined by FIPS Publication number 197. The
18 * architecture introduces six instructions that offer full hardware
19 * support for AES. Four of them support high performance data
20 * encryption and decryption, and the other two instructions support
21 * the AES key expansion procedure.
22 * ====================================================================
23 */
24
25/*
26 * ====================================================================
27 * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 *
33 * 1. Redistributions of source code must retain the above copyright
34 *    notice, this list of conditions and the following disclaimer.
35 *
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in
38 *    the documentation and/or other materials provided with the
39 *    distribution.
40 *
41 * 3. All advertising materials mentioning features or use of this
42 *    software must display the following acknowledgment:
43 *    "This product includes software developed by the OpenSSL Project
44 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
45 *
46 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
47 *    endorse or promote products derived from this software without
48 *    prior written permission. For written permission, please contact
49 *    openssl-core@openssl.org.
50 *
51 * 5. Products derived from this software may not be called "OpenSSL"
52 *    nor may "OpenSSL" appear in their names without prior written
53 *    permission of the OpenSSL Project.
54 *
55 * 6. Redistributions of any form whatsoever must retain the following
56 *    acknowledgment:
57 *    "This product includes software developed by the OpenSSL Project
58 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
59 *
60 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
61 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
63 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
64 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
65 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
66 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
67 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
69 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
70 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
71 * OF THE POSSIBILITY OF SUCH DAMAGE.
72 * ====================================================================
73 */
74
75/*
76 * ====================================================================
77 * OpenSolaris OS modifications
78 *
79 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
80 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
81 * Huang Ying of Intel to the openssl-dev mailing list under the subject
82 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
83 *
84 * This OpenSolaris version has these major changes from the original source:
85 *
86 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
87 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
88 * definitions for lint.
89 *
90 * 2. Formatted code, added comments, and added #includes and #defines.
91 *
92 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
93 * calling kpreempt_disable() and kpreempt_enable().
94 * If the TS bit is not set, Save and restore %xmm registers at the beginning
95 * and end of function calls (%xmm* registers are not saved and restored by
96 * during kernel thread preemption).
97 *
98 * 4. Renamed functions, reordered parameters, and changed return value
99 * to match OpenSolaris:
100 *
101 * OpenSSL interface:
102 *	int intel_AES_set_encrypt_key(const unsigned char *userKey,
103 *		const int bits, AES_KEY *key);
104 *	int intel_AES_set_decrypt_key(const unsigned char *userKey,
105 *		const int bits, AES_KEY *key);
106 *	Return values for above are non-zero on error, 0 on success.
107 *
108 *	void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
109 *		const AES_KEY *key);
110 *	void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
111 *		const AES_KEY *key);
112 *	typedef struct aes_key_st {
113 *		unsigned int	rd_key[4 *(AES_MAXNR + 1)];
114 *		int		rounds;
115 *		unsigned int	pad[3];
116 *	} AES_KEY;
117 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
118 * (ks32) instead of 64-bit (ks64).
119 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
120 *
121 * OpenSolaris OS interface (#ifdefs removed for readability):
122 *	int rijndael_key_setup_dec_intel(uint32_t rk[],
123 *		const uint32_t cipherKey[], uint64_t keyBits);
124 *	int rijndael_key_setup_enc_intel(uint32_t rk[],
125 *		const uint32_t cipherKey[], uint64_t keyBits);
126 *	Return values for above are 0 on error, number of rounds on success.
127 *
128 *	void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
129 *		const uint32_t pt[4], uint32_t ct[4]);
130 *	void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
131 *		const uint32_t pt[4], uint32_t ct[4]);
132 *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
133 *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
134 *
135 *	typedef union {
136 *		uint32_t	ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
137 *	} aes_ks_t;
138 *	typedef struct aes_key {
139 *		aes_ks_t	encr_ks, decr_ks;
140 *		long double	align128;
141 *		int		flags, nr, type;
142 *	} aes_key_t;
143 *
144 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
145 * ct is crypto text, and MAX_AES_NR is 14.
146 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
147 *
148 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
149 *
150 * ====================================================================
151 */
152
153
154#if defined(lint) || defined(__lint)
155
156#include <sys/types.h>
157
158void
159aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
160    uint32_t ct[4]) {
161	(void) rk, (void) Nr, (void) pt, (void) ct;
162}
163void
164aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
165    uint32_t pt[4]) {
166	(void) rk, (void) Nr, (void) ct, (void) pt;
167}
168int
169rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
170    uint64_t keyBits) {
171	(void) rk, (void) cipherKey, (void) keyBits;
172	return (0);
173}
174int
175rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
176   uint64_t keyBits) {
177	(void) rk, (void) cipherKey, (void) keyBits;
178	return (0);
179}
180
181
182#elif defined(HAVE_AES)	/* guard by instruction set */
183
184#define _ASM
185#include <sys/asm_linkage.h>
186
187/*
188 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
189 * _key_expansion_256a(), _key_expansion_256b()
190 *
191 * Helper functions called by rijndael_key_setup_inc_intel().
192 * Also used indirectly by rijndael_key_setup_dec_intel().
193 *
194 * Input:
195 * %xmm0	User-provided cipher key
196 * %xmm1	Round constant
197 * Output:
198 * (%rcx)	AES key
199 */
200
201ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
202_key_expansion_128_local:
203_key_expansion_256a_local:
204	pshufd	$0b11111111, %xmm1, %xmm1
205	shufps	$0b00010000, %xmm0, %xmm4
206	pxor	%xmm4, %xmm0
207	shufps	$0b10001100, %xmm0, %xmm4
208	pxor	%xmm4, %xmm0
209	pxor	%xmm1, %xmm0
210	movups	%xmm0, (%rcx)
211	add	$0x10, %rcx
212	RET
213	nop
214SET_SIZE(_key_expansion_128)
215SET_SIZE(_key_expansion_256a)
216
217
218ENTRY_NP(_key_expansion_192a)
219_key_expansion_192a_local:
220	pshufd	$0b01010101, %xmm1, %xmm1
221	shufps	$0b00010000, %xmm0, %xmm4
222	pxor	%xmm4, %xmm0
223	shufps	$0b10001100, %xmm0, %xmm4
224	pxor	%xmm4, %xmm0
225	pxor	%xmm1, %xmm0
226
227	movups	%xmm2, %xmm5
228	movups	%xmm2, %xmm6
229	pslldq	$4, %xmm5
230	pshufd	$0b11111111, %xmm0, %xmm3
231	pxor	%xmm3, %xmm2
232	pxor	%xmm5, %xmm2
233
234	movups	%xmm0, %xmm1
235	shufps	$0b01000100, %xmm0, %xmm6
236	movups	%xmm6, (%rcx)
237	shufps	$0b01001110, %xmm2, %xmm1
238	movups	%xmm1, 0x10(%rcx)
239	add	$0x20, %rcx
240	RET
241SET_SIZE(_key_expansion_192a)
242
243
244ENTRY_NP(_key_expansion_192b)
245_key_expansion_192b_local:
246	pshufd	$0b01010101, %xmm1, %xmm1
247	shufps	$0b00010000, %xmm0, %xmm4
248	pxor	%xmm4, %xmm0
249	shufps	$0b10001100, %xmm0, %xmm4
250	pxor	%xmm4, %xmm0
251	pxor	%xmm1, %xmm0
252
253	movups	%xmm2, %xmm5
254	pslldq	$4, %xmm5
255	pshufd	$0b11111111, %xmm0, %xmm3
256	pxor	%xmm3, %xmm2
257	pxor	%xmm5, %xmm2
258
259	movups	%xmm0, (%rcx)
260	add	$0x10, %rcx
261	RET
262SET_SIZE(_key_expansion_192b)
263
264
265ENTRY_NP(_key_expansion_256b)
266_key_expansion_256b_local:
267	pshufd	$0b10101010, %xmm1, %xmm1
268	shufps	$0b00010000, %xmm2, %xmm4
269	pxor	%xmm4, %xmm2
270	shufps	$0b10001100, %xmm2, %xmm4
271	pxor	%xmm4, %xmm2
272	pxor	%xmm1, %xmm2
273	movups	%xmm2, (%rcx)
274	add	$0x10, %rcx
275	RET
276SET_SIZE(_key_expansion_256b)
277
278
279/*
280 * rijndael_key_setup_enc_intel()
281 * Expand the cipher key into the encryption key schedule.
282 *
283 * For kernel code, caller is responsible for ensuring kpreempt_disable()
284 * has been called.  This is because %xmm registers are not saved/restored.
285 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
286 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
287 * on the stack.
288 *
289 * OpenSolaris interface:
290 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
291 *	uint64_t keyBits);
292 * Return value is 0 on error, number of rounds on success.
293 *
294 * Original Intel OpenSSL interface:
295 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
296 *	const int bits, AES_KEY *key);
297 * Return value is non-zero on error, 0 on success.
298 */
299
300#ifdef	OPENSSL_INTERFACE
301#define	rijndael_key_setup_enc_intel	intel_AES_set_encrypt_key
302#define	rijndael_key_setup_dec_intel	intel_AES_set_decrypt_key
303
304#define	USERCIPHERKEY		rdi	/* P1, 64 bits */
305#define	KEYSIZE32		esi	/* P2, 32 bits */
306#define	KEYSIZE64		rsi	/* P2, 64 bits */
307#define	AESKEY			rdx	/* P3, 64 bits */
308
309#else	/* OpenSolaris Interface */
310#define	AESKEY			rdi	/* P1, 64 bits */
311#define	USERCIPHERKEY		rsi	/* P2, 64 bits */
312#define	KEYSIZE32		edx	/* P3, 32 bits */
313#define	KEYSIZE64		rdx	/* P3, 64 bits */
314#endif	/* OPENSSL_INTERFACE */
315
316#define	ROUNDS32		KEYSIZE32	/* temp */
317#define	ROUNDS64		KEYSIZE64	/* temp */
318#define	ENDAESKEY		USERCIPHERKEY	/* temp */
319
320ENTRY_NP(rijndael_key_setup_enc_intel)
321rijndael_key_setup_enc_intel_local:
322	FRAME_BEGIN
323	// NULL pointer sanity check
324	test	%USERCIPHERKEY, %USERCIPHERKEY
325	jz	.Lenc_key_invalid_param
326	test	%AESKEY, %AESKEY
327	jz	.Lenc_key_invalid_param
328
329	movups	(%USERCIPHERKEY), %xmm0	// user key (first 16 bytes)
330	movups	%xmm0, (%AESKEY)
331	lea	0x10(%AESKEY), %rcx	// key addr
332	pxor	%xmm4, %xmm4		// xmm4 is assumed 0 in _key_expansion_x
333
334	cmp	$256, %KEYSIZE32
335	jnz	.Lenc_key192
336
337	// AES 256: 14 rounds in encryption key schedule
338#ifdef OPENSSL_INTERFACE
339	mov	$14, %ROUNDS32
340	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 14
341#endif	/* OPENSSL_INTERFACE */
342
343	movups	0x10(%USERCIPHERKEY), %xmm2	// other user key (2nd 16 bytes)
344	movups	%xmm2, (%rcx)
345	add	$0x10, %rcx
346
347	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
348	call	_key_expansion_256a_local
349	aeskeygenassist $0x1, %xmm0, %xmm1
350	call	_key_expansion_256b_local
351	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
352	call	_key_expansion_256a_local
353	aeskeygenassist $0x2, %xmm0, %xmm1
354	call	_key_expansion_256b_local
355	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
356	call	_key_expansion_256a_local
357	aeskeygenassist $0x4, %xmm0, %xmm1
358	call	_key_expansion_256b_local
359	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
360	call	_key_expansion_256a_local
361	aeskeygenassist $0x8, %xmm0, %xmm1
362	call	_key_expansion_256b_local
363	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
364	call	_key_expansion_256a_local
365	aeskeygenassist $0x10, %xmm0, %xmm1
366	call	_key_expansion_256b_local
367	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
368	call	_key_expansion_256a_local
369	aeskeygenassist $0x20, %xmm0, %xmm1
370	call	_key_expansion_256b_local
371	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
372	call	_key_expansion_256a_local
373
374#ifdef	OPENSSL_INTERFACE
375	xor	%rax, %rax			// return 0 (OK)
376#else	/* Open Solaris Interface */
377	mov	$14, %rax			// return # rounds = 14
378#endif
379	FRAME_END
380	RET
381
382.balign 4
383.Lenc_key192:
384	cmp	$192, %KEYSIZE32
385	jnz	.Lenc_key128
386
387	// AES 192: 12 rounds in encryption key schedule
388#ifdef OPENSSL_INTERFACE
389	mov	$12, %ROUNDS32
390	movl	%ROUNDS32, 240(%AESKEY)	// key.rounds = 12
391#endif	/* OPENSSL_INTERFACE */
392
393	movq	0x10(%USERCIPHERKEY), %xmm2	// other user key
394	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
395	call	_key_expansion_192a_local
396	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
397	call	_key_expansion_192b_local
398	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
399	call	_key_expansion_192a_local
400	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
401	call	_key_expansion_192b_local
402	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
403	call	_key_expansion_192a_local
404	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
405	call	_key_expansion_192b_local
406	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
407	call	_key_expansion_192a_local
408	aeskeygenassist $0x80, %xmm2, %xmm1	// expand the key
409	call	_key_expansion_192b_local
410
411#ifdef	OPENSSL_INTERFACE
412	xor	%rax, %rax			// return 0 (OK)
413#else	/* OpenSolaris Interface */
414	mov	$12, %rax			// return # rounds = 12
415#endif
416	FRAME_END
417	RET
418
419.balign 4
420.Lenc_key128:
421	cmp $128, %KEYSIZE32
422	jnz .Lenc_key_invalid_key_bits
423
424	// AES 128: 10 rounds in encryption key schedule
425#ifdef OPENSSL_INTERFACE
426	mov	$10, %ROUNDS32
427	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 10
428#endif	/* OPENSSL_INTERFACE */
429
430	aeskeygenassist $0x1, %xmm0, %xmm1	// expand the key
431	call	_key_expansion_128_local
432	aeskeygenassist $0x2, %xmm0, %xmm1	// expand the key
433	call	_key_expansion_128_local
434	aeskeygenassist $0x4, %xmm0, %xmm1	// expand the key
435	call	_key_expansion_128_local
436	aeskeygenassist $0x8, %xmm0, %xmm1	// expand the key
437	call	_key_expansion_128_local
438	aeskeygenassist $0x10, %xmm0, %xmm1	// expand the key
439	call	_key_expansion_128_local
440	aeskeygenassist $0x20, %xmm0, %xmm1	// expand the key
441	call	_key_expansion_128_local
442	aeskeygenassist $0x40, %xmm0, %xmm1	// expand the key
443	call	_key_expansion_128_local
444	aeskeygenassist $0x80, %xmm0, %xmm1	// expand the key
445	call	_key_expansion_128_local
446	aeskeygenassist $0x1b, %xmm0, %xmm1	// expand the key
447	call	_key_expansion_128_local
448	aeskeygenassist $0x36, %xmm0, %xmm1	// expand the key
449	call	_key_expansion_128_local
450
451#ifdef	OPENSSL_INTERFACE
452	xor	%rax, %rax			// return 0 (OK)
453#else	/* OpenSolaris Interface */
454	mov	$10, %rax			// return # rounds = 10
455#endif
456	FRAME_END
457	RET
458
459.Lenc_key_invalid_param:
460#ifdef	OPENSSL_INTERFACE
461	mov	$-1, %rax	// user key or AES key pointer is NULL
462	FRAME_END
463	RET
464#else
465	/* FALLTHROUGH */
466#endif	/* OPENSSL_INTERFACE */
467
468.Lenc_key_invalid_key_bits:
469#ifdef	OPENSSL_INTERFACE
470	mov	$-2, %rax	// keysize is invalid
471#else	/* Open Solaris Interface */
472	xor	%rax, %rax	// a key pointer is NULL or invalid keysize
473#endif	/* OPENSSL_INTERFACE */
474	FRAME_END
475	RET
476	SET_SIZE(rijndael_key_setup_enc_intel)
477
478
479/*
480 * rijndael_key_setup_dec_intel()
481 * Expand the cipher key into the decryption key schedule.
482 *
483 * For kernel code, caller is responsible for ensuring kpreempt_disable()
484 * has been called.  This is because %xmm registers are not saved/restored.
485 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
486 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
487 * on the stack.
488 *
489 * OpenSolaris interface:
490 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
491 *	uint64_t keyBits);
492 * Return value is 0 on error, number of rounds on success.
493 * P1->P2, P2->P3, P3->P1
494 *
495 * Original Intel OpenSSL interface:
496 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
497 *	const int bits, AES_KEY *key);
498 * Return value is non-zero on error, 0 on success.
499 */
500
501ENTRY_NP(rijndael_key_setup_dec_intel)
502FRAME_BEGIN
503	// Generate round keys used for encryption
504	call	rijndael_key_setup_enc_intel_local
505	test	%rax, %rax
506#ifdef	OPENSSL_INTERFACE
507	jnz	.Ldec_key_exit	// Failed if returned non-0
508#else	/* OpenSolaris Interface */
509	jz	.Ldec_key_exit	// Failed if returned 0
510#endif	/* OPENSSL_INTERFACE */
511
512	/*
513	 * Convert round keys used for encryption
514	 * to a form usable for decryption
515	 */
516#ifndef	OPENSSL_INTERFACE		/* OpenSolaris Interface */
517	mov	%rax, %ROUNDS64		// set # rounds (10, 12, or 14)
518					// (already set for OpenSSL)
519#endif
520
521	lea	0x10(%AESKEY), %rcx	// key addr
522	shl	$4, %ROUNDS32
523	add	%AESKEY, %ROUNDS64
524	mov	%ROUNDS64, %ENDAESKEY
525
526.balign 4
527.Ldec_key_reorder_loop:
528	movups	(%AESKEY), %xmm0
529	movups	(%ROUNDS64), %xmm1
530	movups	%xmm0, (%ROUNDS64)
531	movups	%xmm1, (%AESKEY)
532	lea	0x10(%AESKEY), %AESKEY
533	lea	-0x10(%ROUNDS64), %ROUNDS64
534	cmp	%AESKEY, %ROUNDS64
535	ja	.Ldec_key_reorder_loop
536
537.balign 4
538.Ldec_key_inv_loop:
539	movups	(%rcx), %xmm0
540	// Convert an encryption round key to a form usable for decryption
541	// with the "AES Inverse Mix Columns" instruction
542	aesimc	%xmm0, %xmm1
543	movups	%xmm1, (%rcx)
544	lea	0x10(%rcx), %rcx
545	cmp	%ENDAESKEY, %rcx
546	jnz	.Ldec_key_inv_loop
547
548.Ldec_key_exit:
549	// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
550	// OpenSSL: rax = 0 for OK, or non-zero for error
551	FRAME_END
552	RET
553	SET_SIZE(rijndael_key_setup_dec_intel)
554
555
556/*
557 * aes_encrypt_intel()
558 * Encrypt a single block (in and out can overlap).
559 *
560 * For kernel code, caller is responsible for ensuring kpreempt_disable()
561 * has been called.  This is because %xmm registers are not saved/restored.
562 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
563 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
564 * on the stack.
565 *
566 * Temporary register usage:
567 * %xmm0	State
568 * %xmm1	Key
569 *
570 * Original OpenSolaris Interface:
571 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
572 *	const uint32_t pt[4], uint32_t ct[4])
573 *
574 * Original Intel OpenSSL Interface:
575 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
576 *	const AES_KEY *key)
577 */
578
579#ifdef	OPENSSL_INTERFACE
580#define	aes_encrypt_intel	intel_AES_encrypt
581#define	aes_decrypt_intel	intel_AES_decrypt
582
583#define	INP		rdi	/* P1, 64 bits */
584#define	OUTP		rsi	/* P2, 64 bits */
585#define	KEYP		rdx	/* P3, 64 bits */
586
587/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
588#define	NROUNDS32	ecx	/* temporary, 32 bits */
589#define	NROUNDS		cl	/* temporary,  8 bits */
590
591#else	/* OpenSolaris Interface */
592#define	KEYP		rdi	/* P1, 64 bits */
593#define	NROUNDS		esi	/* P2, 32 bits */
594#define	INP		rdx	/* P3, 64 bits */
595#define	OUTP		rcx	/* P4, 64 bits */
596#endif	/* OPENSSL_INTERFACE */
597
598#define	STATE		xmm0	/* temporary, 128 bits */
599#define	KEY		xmm1	/* temporary, 128 bits */
600
601
602ENTRY_NP(aes_encrypt_intel)
603
604	movups	(%INP), %STATE			// input
605	movups	(%KEYP), %KEY			// key
606#ifdef	OPENSSL_INTERFACE
607	mov	240(%KEYP), %NROUNDS32		// round count
608#else	/* OpenSolaris Interface */
609	/* Round count is already present as P2 in %rsi/%esi */
610#endif	/* OPENSSL_INTERFACE */
611
612	pxor	%KEY, %STATE			// round 0
613	lea	0x30(%KEYP), %KEYP
614	cmp	$12, %NROUNDS
615	jb	.Lenc128
616	lea	0x20(%KEYP), %KEYP
617	je	.Lenc192
618
619	// AES 256
620	lea	0x20(%KEYP), %KEYP
621	movups	-0x60(%KEYP), %KEY
622	aesenc	%KEY, %STATE
623	movups	-0x50(%KEYP), %KEY
624	aesenc	%KEY, %STATE
625
626.balign 4
627.Lenc192:
628	// AES 192 and 256
629	movups	-0x40(%KEYP), %KEY
630	aesenc	%KEY, %STATE
631	movups	-0x30(%KEYP), %KEY
632	aesenc	%KEY, %STATE
633
634.balign 4
635.Lenc128:
636	// AES 128, 192, and 256
637	movups	-0x20(%KEYP), %KEY
638	aesenc	%KEY, %STATE
639	movups	-0x10(%KEYP), %KEY
640	aesenc	%KEY, %STATE
641	movups	(%KEYP), %KEY
642	aesenc	%KEY, %STATE
643	movups	0x10(%KEYP), %KEY
644	aesenc	%KEY, %STATE
645	movups	0x20(%KEYP), %KEY
646	aesenc	%KEY, %STATE
647	movups	0x30(%KEYP), %KEY
648	aesenc	%KEY, %STATE
649	movups	0x40(%KEYP), %KEY
650	aesenc	%KEY, %STATE
651	movups	0x50(%KEYP), %KEY
652	aesenc	%KEY, %STATE
653	movups	0x60(%KEYP), %KEY
654	aesenc	%KEY, %STATE
655	movups	0x70(%KEYP), %KEY
656	aesenclast	 %KEY, %STATE		// last round
657	movups	%STATE, (%OUTP)			// output
658
659	RET
660	SET_SIZE(aes_encrypt_intel)
661
662
663/*
664 * aes_decrypt_intel()
665 * Decrypt a single block (in and out can overlap).
666 *
667 * For kernel code, caller is responsible for ensuring kpreempt_disable()
668 * has been called.  This is because %xmm registers are not saved/restored.
669 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
670 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
671 * on the stack.
672 *
673 * Temporary register usage:
674 * %xmm0	State
675 * %xmm1	Key
676 *
677 * Original OpenSolaris Interface:
678 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
679 *	const uint32_t pt[4], uint32_t ct[4])/
680 *
681 * Original Intel OpenSSL Interface:
682 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
683 *	const AES_KEY *key);
684 */
685ENTRY_NP(aes_decrypt_intel)
686
687	movups	(%INP), %STATE			// input
688	movups	(%KEYP), %KEY			// key
689#ifdef	OPENSSL_INTERFACE
690	mov	240(%KEYP), %NROUNDS32		// round count
691#else	/* OpenSolaris Interface */
692	/* Round count is already present as P2 in %rsi/%esi */
693#endif	/* OPENSSL_INTERFACE */
694
695	pxor	%KEY, %STATE			// round 0
696	lea	0x30(%KEYP), %KEYP
697	cmp	$12, %NROUNDS
698	jb	.Ldec128
699	lea	0x20(%KEYP), %KEYP
700	je	.Ldec192
701
702	// AES 256
703	lea	0x20(%KEYP), %KEYP
704	movups	-0x60(%KEYP), %KEY
705	aesdec	%KEY, %STATE
706	movups	-0x50(%KEYP), %KEY
707	aesdec	%KEY, %STATE
708
709.balign 4
710.Ldec192:
711	// AES 192 and 256
712	movups	-0x40(%KEYP), %KEY
713	aesdec	%KEY, %STATE
714	movups	-0x30(%KEYP), %KEY
715	aesdec	%KEY, %STATE
716
717.balign 4
718.Ldec128:
719	// AES 128, 192, and 256
720	movups	-0x20(%KEYP), %KEY
721	aesdec	%KEY, %STATE
722	movups	-0x10(%KEYP), %KEY
723	aesdec	%KEY, %STATE
724	movups	(%KEYP), %KEY
725	aesdec	%KEY, %STATE
726	movups	0x10(%KEYP), %KEY
727	aesdec	%KEY, %STATE
728	movups	0x20(%KEYP), %KEY
729	aesdec	%KEY, %STATE
730	movups	0x30(%KEYP), %KEY
731	aesdec	%KEY, %STATE
732	movups	0x40(%KEYP), %KEY
733	aesdec	%KEY, %STATE
734	movups	0x50(%KEYP), %KEY
735	aesdec	%KEY, %STATE
736	movups	0x60(%KEYP), %KEY
737	aesdec	%KEY, %STATE
738	movups	0x70(%KEYP), %KEY
739	aesdeclast	%KEY, %STATE		// last round
740	movups	%STATE, (%OUTP)			// output
741
742	RET
743	SET_SIZE(aes_decrypt_intel)
744
745#endif	/* lint || __lint */
746
747#ifdef __ELF__
748.section .note.GNU-stack,"",%progbits
749#endif
750