xref: /freebsd/sys/crypto/aesni/aesencdec.h (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
1ff6c7bf5SJohn-Mark Gurney /*-
2ff6c7bf5SJohn-Mark Gurney  * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
3ff6c7bf5SJohn-Mark Gurney  * All rights reserved.
4ff6c7bf5SJohn-Mark Gurney  *
5*52467047SWarner Losh  * Copyright 2015 Netflix, Inc.
6*52467047SWarner Losh  *
7ff6c7bf5SJohn-Mark Gurney  * Redistribution and use in source and binary forms, with or without
8ff6c7bf5SJohn-Mark Gurney  * modification, are permitted provided that the following conditions
9ff6c7bf5SJohn-Mark Gurney  * are met:
10ff6c7bf5SJohn-Mark Gurney  * 1. Redistributions of source code must retain the above copyright
11ff6c7bf5SJohn-Mark Gurney  *    notice, this list of conditions and the following disclaimer.
12ff6c7bf5SJohn-Mark Gurney  * 2. Redistributions in binary form must reproduce the above copyright
13ff6c7bf5SJohn-Mark Gurney  *    notice, this list of conditions and the following disclaimer in the
14ff6c7bf5SJohn-Mark Gurney  *    documentation and/or other materials provided with the distribution.
15ff6c7bf5SJohn-Mark Gurney  *
16ff6c7bf5SJohn-Mark Gurney  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17ff6c7bf5SJohn-Mark Gurney  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18ff6c7bf5SJohn-Mark Gurney  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ff6c7bf5SJohn-Mark Gurney  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20ff6c7bf5SJohn-Mark Gurney  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21ff6c7bf5SJohn-Mark Gurney  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22ff6c7bf5SJohn-Mark Gurney  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23ff6c7bf5SJohn-Mark Gurney  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24ff6c7bf5SJohn-Mark Gurney  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25ff6c7bf5SJohn-Mark Gurney  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26ff6c7bf5SJohn-Mark Gurney  * SUCH DAMAGE.
27ff6c7bf5SJohn-Mark Gurney  *
28ff6c7bf5SJohn-Mark Gurney  */
29ff6c7bf5SJohn-Mark Gurney 
30a13589bcSJohn-Mark Gurney #ifndef _AESENCDEC_H_
31a13589bcSJohn-Mark Gurney #define _AESENCDEC_H_
32a13589bcSJohn-Mark Gurney 
33800be1b6SCraig Rodrigues #include <crypto/aesni/aesni_os.h>
34800be1b6SCraig Rodrigues 
35ff6c7bf5SJohn-Mark Gurney #include <wmmintrin.h>
36ff6c7bf5SJohn-Mark Gurney 
37ff6c7bf5SJohn-Mark Gurney static inline void
aesni_enc8(int rounds,const __m128i * keysched,__m128i a,__m128i b,__m128i c,__m128i d,__m128i e,__m128i f,__m128i g,__m128i h,__m128i out[8])38038ffd3eSJohn-Mark Gurney aesni_enc8(int rounds, const __m128i *keysched, __m128i a,
39ff6c7bf5SJohn-Mark Gurney     __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
40ff6c7bf5SJohn-Mark Gurney     __m128i h, __m128i out[8])
41ff6c7bf5SJohn-Mark Gurney {
42ff6c7bf5SJohn-Mark Gurney 	int i;
43ff6c7bf5SJohn-Mark Gurney 
44ff6c7bf5SJohn-Mark Gurney 	a ^= keysched[0];
45ff6c7bf5SJohn-Mark Gurney 	b ^= keysched[0];
46ff6c7bf5SJohn-Mark Gurney 	c ^= keysched[0];
47ff6c7bf5SJohn-Mark Gurney 	d ^= keysched[0];
48ff6c7bf5SJohn-Mark Gurney 	e ^= keysched[0];
49ff6c7bf5SJohn-Mark Gurney 	f ^= keysched[0];
50ff6c7bf5SJohn-Mark Gurney 	g ^= keysched[0];
51ff6c7bf5SJohn-Mark Gurney 	h ^= keysched[0];
52ff6c7bf5SJohn-Mark Gurney 
53ff6c7bf5SJohn-Mark Gurney 	for (i = 0; i < rounds; i++) {
54ff6c7bf5SJohn-Mark Gurney 		a = _mm_aesenc_si128(a, keysched[i + 1]);
55ff6c7bf5SJohn-Mark Gurney 		b = _mm_aesenc_si128(b, keysched[i + 1]);
56ff6c7bf5SJohn-Mark Gurney 		c = _mm_aesenc_si128(c, keysched[i + 1]);
57ff6c7bf5SJohn-Mark Gurney 		d = _mm_aesenc_si128(d, keysched[i + 1]);
58ff6c7bf5SJohn-Mark Gurney 		e = _mm_aesenc_si128(e, keysched[i + 1]);
59ff6c7bf5SJohn-Mark Gurney 		f = _mm_aesenc_si128(f, keysched[i + 1]);
60ff6c7bf5SJohn-Mark Gurney 		g = _mm_aesenc_si128(g, keysched[i + 1]);
61ff6c7bf5SJohn-Mark Gurney 		h = _mm_aesenc_si128(h, keysched[i + 1]);
62ff6c7bf5SJohn-Mark Gurney 	}
63ff6c7bf5SJohn-Mark Gurney 
64ff6c7bf5SJohn-Mark Gurney 	out[0] = _mm_aesenclast_si128(a, keysched[i + 1]);
65ff6c7bf5SJohn-Mark Gurney 	out[1] = _mm_aesenclast_si128(b, keysched[i + 1]);
66ff6c7bf5SJohn-Mark Gurney 	out[2] = _mm_aesenclast_si128(c, keysched[i + 1]);
67ff6c7bf5SJohn-Mark Gurney 	out[3] = _mm_aesenclast_si128(d, keysched[i + 1]);
68ff6c7bf5SJohn-Mark Gurney 	out[4] = _mm_aesenclast_si128(e, keysched[i + 1]);
69ff6c7bf5SJohn-Mark Gurney 	out[5] = _mm_aesenclast_si128(f, keysched[i + 1]);
70ff6c7bf5SJohn-Mark Gurney 	out[6] = _mm_aesenclast_si128(g, keysched[i + 1]);
71ff6c7bf5SJohn-Mark Gurney 	out[7] = _mm_aesenclast_si128(h, keysched[i + 1]);
72ff6c7bf5SJohn-Mark Gurney }
73ff6c7bf5SJohn-Mark Gurney 
74ff6c7bf5SJohn-Mark Gurney static inline void
aesni_dec8(int rounds,const __m128i * keysched,__m128i a,__m128i b,__m128i c,__m128i d,__m128i e,__m128i f,__m128i g,__m128i h,__m128i out[8])75038ffd3eSJohn-Mark Gurney aesni_dec8(int rounds, const __m128i *keysched, __m128i a,
76ff6c7bf5SJohn-Mark Gurney     __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
77ff6c7bf5SJohn-Mark Gurney     __m128i h, __m128i out[8])
78ff6c7bf5SJohn-Mark Gurney {
79ff6c7bf5SJohn-Mark Gurney 	int i;
80ff6c7bf5SJohn-Mark Gurney 
81ff6c7bf5SJohn-Mark Gurney 	a ^= keysched[0];
82ff6c7bf5SJohn-Mark Gurney 	b ^= keysched[0];
83ff6c7bf5SJohn-Mark Gurney 	c ^= keysched[0];
84ff6c7bf5SJohn-Mark Gurney 	d ^= keysched[0];
85ff6c7bf5SJohn-Mark Gurney 	e ^= keysched[0];
86ff6c7bf5SJohn-Mark Gurney 	f ^= keysched[0];
87ff6c7bf5SJohn-Mark Gurney 	g ^= keysched[0];
88ff6c7bf5SJohn-Mark Gurney 	h ^= keysched[0];
89ff6c7bf5SJohn-Mark Gurney 
90ff6c7bf5SJohn-Mark Gurney 	for (i = 0; i < rounds; i++) {
91ff6c7bf5SJohn-Mark Gurney 		a = _mm_aesdec_si128(a, keysched[i + 1]);
92ff6c7bf5SJohn-Mark Gurney 		b = _mm_aesdec_si128(b, keysched[i + 1]);
93ff6c7bf5SJohn-Mark Gurney 		c = _mm_aesdec_si128(c, keysched[i + 1]);
94ff6c7bf5SJohn-Mark Gurney 		d = _mm_aesdec_si128(d, keysched[i + 1]);
95ff6c7bf5SJohn-Mark Gurney 		e = _mm_aesdec_si128(e, keysched[i + 1]);
96ff6c7bf5SJohn-Mark Gurney 		f = _mm_aesdec_si128(f, keysched[i + 1]);
97ff6c7bf5SJohn-Mark Gurney 		g = _mm_aesdec_si128(g, keysched[i + 1]);
98ff6c7bf5SJohn-Mark Gurney 		h = _mm_aesdec_si128(h, keysched[i + 1]);
99ff6c7bf5SJohn-Mark Gurney 	}
100ff6c7bf5SJohn-Mark Gurney 
101ff6c7bf5SJohn-Mark Gurney 	out[0] = _mm_aesdeclast_si128(a, keysched[i + 1]);
102ff6c7bf5SJohn-Mark Gurney 	out[1] = _mm_aesdeclast_si128(b, keysched[i + 1]);
103ff6c7bf5SJohn-Mark Gurney 	out[2] = _mm_aesdeclast_si128(c, keysched[i + 1]);
104ff6c7bf5SJohn-Mark Gurney 	out[3] = _mm_aesdeclast_si128(d, keysched[i + 1]);
105ff6c7bf5SJohn-Mark Gurney 	out[4] = _mm_aesdeclast_si128(e, keysched[i + 1]);
106ff6c7bf5SJohn-Mark Gurney 	out[5] = _mm_aesdeclast_si128(f, keysched[i + 1]);
107ff6c7bf5SJohn-Mark Gurney 	out[6] = _mm_aesdeclast_si128(g, keysched[i + 1]);
108ff6c7bf5SJohn-Mark Gurney 	out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]);
109ff6c7bf5SJohn-Mark Gurney }
110ff6c7bf5SJohn-Mark Gurney 
111a13589bcSJohn-Mark Gurney /* rounds is passed in as rounds - 1 */
112ff6c7bf5SJohn-Mark Gurney static inline __m128i
aesni_enc(int rounds,const __m128i * keysched,const __m128i from)113038ffd3eSJohn-Mark Gurney aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
114ff6c7bf5SJohn-Mark Gurney {
115ff6c7bf5SJohn-Mark Gurney 	__m128i tmp;
116ff6c7bf5SJohn-Mark Gurney 	int i;
117ff6c7bf5SJohn-Mark Gurney 
118ff6c7bf5SJohn-Mark Gurney 	tmp = from ^ keysched[0];
119a13589bcSJohn-Mark Gurney 	for (i = 1; i < rounds; i += 2) {
120a13589bcSJohn-Mark Gurney 		tmp = _mm_aesenc_si128(tmp, keysched[i]);
121ff6c7bf5SJohn-Mark Gurney 		tmp = _mm_aesenc_si128(tmp, keysched[i + 1]);
122a13589bcSJohn-Mark Gurney 	}
123ff6c7bf5SJohn-Mark Gurney 
124a13589bcSJohn-Mark Gurney 	tmp = _mm_aesenc_si128(tmp, keysched[rounds]);
125a13589bcSJohn-Mark Gurney 	return _mm_aesenclast_si128(tmp, keysched[rounds + 1]);
126ff6c7bf5SJohn-Mark Gurney }
127ff6c7bf5SJohn-Mark Gurney 
128ff6c7bf5SJohn-Mark Gurney static inline __m128i
aesni_dec(int rounds,const __m128i * keysched,const __m128i from)129038ffd3eSJohn-Mark Gurney aesni_dec(int rounds, const __m128i *keysched, const __m128i from)
130ff6c7bf5SJohn-Mark Gurney {
131ff6c7bf5SJohn-Mark Gurney 	__m128i tmp;
132ff6c7bf5SJohn-Mark Gurney 	int i;
133ff6c7bf5SJohn-Mark Gurney 
134ff6c7bf5SJohn-Mark Gurney 	tmp = from ^ keysched[0];
135ff6c7bf5SJohn-Mark Gurney 
136a13589bcSJohn-Mark Gurney 	for (i = 1; i < rounds; i += 2) {
137a13589bcSJohn-Mark Gurney 		tmp = _mm_aesdec_si128(tmp, keysched[i]);
138ff6c7bf5SJohn-Mark Gurney 		tmp = _mm_aesdec_si128(tmp, keysched[i + 1]);
139ff6c7bf5SJohn-Mark Gurney 	}
140a13589bcSJohn-Mark Gurney 
141a13589bcSJohn-Mark Gurney 	tmp = _mm_aesdec_si128(tmp, keysched[rounds]);
142a13589bcSJohn-Mark Gurney 	return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]);
143a13589bcSJohn-Mark Gurney }
144a13589bcSJohn-Mark Gurney 
145a13589bcSJohn-Mark Gurney #endif /* _AESENCDEC_H_ */
146