xref: /illumos-gate/usr/src/common/crypto/aes/amd64/aeskey.c (revision 9b9d39d2a32ff806d2431dbcc50968ef1e6d46b2)
1 /*
2  * ---------------------------------------------------------------------------
3  * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
4  *
5  * LICENSE TERMS
6  *
7  * The free distribution and use of this software is allowed (with or without
8  * changes) provided that:
9  *
10  *  1. source code distributions include the above copyright notice, this
11  *	 list of conditions and the following disclaimer;
12  *
13  *  2. binary distributions include the above copyright notice, this list
14  *	 of conditions and the following disclaimer in their documentation;
15  *
16  *  3. the name of the copyright holder is not used to endorse products
17  *	 built using this software without specific written permission.
18  *
19  * DISCLAIMER
20  *
21  * This software is provided 'as is' with no explicit or implied warranties
22  * in respect of its properties, including, but not limited to, correctness
23  * and/or fitness for purpose.
24  * ---------------------------------------------------------------------------
25  * Issue Date: 20/12/2007
26  */
27 
28 #include "aes_impl.h"
29 #include "aesopt.h"
30 #include "aestab.h"
31 #include "aestab2.h"
32 
33 /*
34  *	Initialise the key schedule from the user supplied key. The key
35  *	length can be specified in bytes, with legal values of 16, 24
36  *	and 32, or in bits, with legal values of 128, 192 and 256. These
37  *	values correspond with Nk values of 4, 6 and 8 respectively.
38  *
39  *	The following macros implement a single cycle in the key
40  *	schedule generation process. The number of cycles needed
41  *	for each cx->n_col and nk value is:
42  *
43  *	nk =		4  5  6  7  8
44  *	------------------------------
45  *	cx->n_col = 4	10  9  8  7  7
46  *	cx->n_col = 5	14 11 10  9  9
47  *	cx->n_col = 6	19 15 12 11 11
48  *	cx->n_col = 7	21 19 16 13 14
49  *	cx->n_col = 8	29 23 19 17 14
50  */
51 
52 /*
53  * OpenSolaris changes
54  * 1. Added header files aes_impl.h and aestab2.h
55  * 2. Changed uint_8t and uint_32t to uint8_t and uint32_t
56  * 3. Remove code under ifdef USE_VIA_ACE_IF_PRESENT (always undefined)
57  * 4. Removed always-defined ifdefs FUNCS_IN_C, ENC_KEYING_IN_C,
58  *	AES_128, AES_192, AES_256, AES_VAR defines
59  * 5. Changed aes_encrypt_key* aes_decrypt_key* functions to "static void"
60  * 6. Changed N_COLS to MAX_AES_NB
61  * 7. Replaced functions aes_encrypt_key and aes_decrypt_key with
62  *	OpenSolaris-compatible functions rijndael_key_setup_enc_amd64 and
63  *	rijndael_key_setup_dec_amd64
64  * 8. cstyled code and removed lint warnings
65  */
66 
67 #if defined(REDUCE_CODE_SIZE)
68 #define	ls_box ls_sub
69 	uint32_t	ls_sub(const uint32_t t, const uint32_t n);
70 #define	inv_mcol im_sub
71 	uint32_t	im_sub(const uint32_t x);
72 #ifdef ENC_KS_UNROLL
73 #undef ENC_KS_UNROLL
74 #endif
75 #ifdef DEC_KS_UNROLL
76 #undef DEC_KS_UNROLL
77 #endif
78 #endif	/* REDUCE_CODE_SIZE */
79 
80 
81 #define	ke4(k, i) \
82 {	k[4 * (i) + 4] = ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
83 	k[4 * (i) + 5] = ss[1] ^= ss[0]; \
84 	k[4 * (i) + 6] = ss[2] ^= ss[1]; \
85 	k[4 * (i) + 7] = ss[3] ^= ss[2]; \
86 }
87 
88 static void
89 aes_encrypt_key128(const unsigned char *key, uint32_t rk[])
90 {
91 	uint32_t	ss[4];
92 
93 	rk[0] = ss[0] = word_in(key, 0);
94 	rk[1] = ss[1] = word_in(key, 1);
95 	rk[2] = ss[2] = word_in(key, 2);
96 	rk[3] = ss[3] = word_in(key, 3);
97 
98 #ifdef ENC_KS_UNROLL
99 	ke4(rk, 0);  ke4(rk, 1);
100 	ke4(rk, 2);  ke4(rk, 3);
101 	ke4(rk, 4);  ke4(rk, 5);
102 	ke4(rk, 6);  ke4(rk, 7);
103 	ke4(rk, 8);
104 #else
105 	{
106 		uint32_t	i;
107 		for (i = 0; i < 9; ++i)
108 			ke4(rk, i);
109 	}
110 #endif	/* ENC_KS_UNROLL */
111 	ke4(rk, 9);
112 }
113 
114 
115 #define	kef6(k, i) \
116 {	k[6 * (i) + 6] = ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
117 	k[6 * (i) + 7] = ss[1] ^= ss[0]; \
118 	k[6 * (i) + 8] = ss[2] ^= ss[1]; \
119 	k[6 * (i) + 9] = ss[3] ^= ss[2]; \
120 }
121 
122 #define	ke6(k, i) \
123 {	kef6(k, i); \
124 	k[6 * (i) + 10] = ss[4] ^= ss[3]; \
125 	k[6 * (i) + 11] = ss[5] ^= ss[4]; \
126 }
127 
128 static void
129 aes_encrypt_key192(const unsigned char *key, uint32_t rk[])
130 {
131 	uint32_t	ss[6];
132 
133 	rk[0] = ss[0] = word_in(key, 0);
134 	rk[1] = ss[1] = word_in(key, 1);
135 	rk[2] = ss[2] = word_in(key, 2);
136 	rk[3] = ss[3] = word_in(key, 3);
137 	rk[4] = ss[4] = word_in(key, 4);
138 	rk[5] = ss[5] = word_in(key, 5);
139 
140 #ifdef ENC_KS_UNROLL
141 	ke6(rk, 0);  ke6(rk, 1);
142 	ke6(rk, 2);  ke6(rk, 3);
143 	ke6(rk, 4);  ke6(rk, 5);
144 	ke6(rk, 6);
145 #else
146 	{
147 		uint32_t	i;
148 		for (i = 0; i < 7; ++i)
149 			ke6(rk, i);
150 	}
151 #endif	/* ENC_KS_UNROLL */
152 	kef6(rk, 7);
153 }
154 
155 
156 
157 #define	kef8(k, i) \
158 {	k[8 * (i) + 8] = ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
159 	k[8 * (i) + 9] = ss[1] ^= ss[0]; \
160 	k[8 * (i) + 10] = ss[2] ^= ss[1]; \
161 	k[8 * (i) + 11] = ss[3] ^= ss[2]; \
162 }
163 
164 #define	ke8(k, i) \
165 {   kef8(k, i); \
166 	k[8 * (i) + 12] = ss[4] ^= ls_box(ss[3], 0); \
167 	k[8 * (i) + 13] = ss[5] ^= ss[4]; \
168 	k[8 * (i) + 14] = ss[6] ^= ss[5]; \
169 	k[8 * (i) + 15] = ss[7] ^= ss[6]; \
170 }
171 
172 static void
173 aes_encrypt_key256(const unsigned char *key, uint32_t rk[])
174 {
175 	uint32_t	ss[8];
176 
177 	rk[0] = ss[0] = word_in(key, 0);
178 	rk[1] = ss[1] = word_in(key, 1);
179 	rk[2] = ss[2] = word_in(key, 2);
180 	rk[3] = ss[3] = word_in(key, 3);
181 	rk[4] = ss[4] = word_in(key, 4);
182 	rk[5] = ss[5] = word_in(key, 5);
183 	rk[6] = ss[6] = word_in(key, 6);
184 	rk[7] = ss[7] = word_in(key, 7);
185 
186 #ifdef ENC_KS_UNROLL
187 	ke8(rk, 0); ke8(rk, 1);
188 	ke8(rk, 2); ke8(rk, 3);
189 	ke8(rk, 4); ke8(rk, 5);
190 #else
191 	{
192 		uint32_t	i;
193 		for (i = 0; i < 6; ++i)
194 			ke8(rk,  i);
195 	}
196 #endif	/* ENC_KS_UNROLL */
197 	kef8(rk, 6);
198 }
199 
200 
201 /*
202  * Expand the cipher key into the encryption key schedule.
203  *
204  * Return the number of rounds for the given cipher key size.
205  * The size of the key schedule depends on the number of rounds
206  * (which can be computed from the size of the key), i.e. 4 * (Nr + 1).
207  *
208  * Parameters:
209  * rk		AES key schedule 32-bit array to be initialized
210  * cipherKey	User key
211  * keyBits	AES key size (128, 192, or 256 bits)
212  */
213 int
214 rijndael_key_setup_enc_amd64(uint32_t rk[], const uint32_t cipherKey[],
215 	int keyBits)
216 {
217 	switch (keyBits) {
218 	case 128:
219 		aes_encrypt_key128((unsigned char *)&cipherKey[0], rk);
220 		return (10);
221 	case 192:
222 		aes_encrypt_key192((unsigned char *)&cipherKey[0], rk);
223 		return (12);
224 	case 256:
225 		aes_encrypt_key256((unsigned char *)&cipherKey[0], rk);
226 		return (14);
227 	default: /* should never get here */
228 		break;
229 	}
230 
231 	return (0);
232 }
233 
234 
235 /* this is used to store the decryption round keys  */
236 /* in forward or reverse order */
237 
238 #ifdef AES_REV_DKS
239 #define	v(n, i)  ((n) - (i) + 2 * ((i) & 3))
240 #else
241 #define	v(n, i)  (i)
242 #endif
243 
244 #if DEC_ROUND == NO_TABLES
245 #define	ff(x)   (x)
246 #else
247 #define	ff(x)   inv_mcol(x)
248 #if defined(dec_imvars)
249 #define	d_vars  dec_imvars
250 #endif
251 #endif	/* FUNCS_IN_C & DEC_KEYING_IN_C */
252 
253 
254 #define	k4e(k, i) \
255 {	k[v(40, (4 * (i)) + 4)] = ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
256 	k[v(40, (4 * (i)) + 5)] = ss[1] ^= ss[0]; \
257 	k[v(40, (4 * (i)) + 6)] = ss[2] ^= ss[1]; \
258 	k[v(40, (4 * (i)) + 7)] = ss[3] ^= ss[2]; \
259 }
260 
261 #if 1
262 
263 #define	kdf4(k, i) \
264 {	ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
265 	ss[1] = ss[1] ^ ss[3]; \
266 	ss[2] = ss[2] ^ ss[3]; \
267 	ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
268 	ss[i % 4] ^= ss[4]; \
269 	ss[4] ^= k[v(40, (4 * (i)))];   k[v(40, (4 * (i)) + 4)] = ff(ss[4]); \
270 	ss[4] ^= k[v(40, (4 * (i)) + 1)]; k[v(40, (4 * (i)) + 5)] = ff(ss[4]); \
271 	ss[4] ^= k[v(40, (4 * (i)) + 2)]; k[v(40, (4 * (i)) + 6)] = ff(ss[4]); \
272 	ss[4] ^= k[v(40, (4 * (i)) + 3)]; k[v(40, (4 * (i)) + 7)] = ff(ss[4]); \
273 }
274 
275 #define	kd4(k, i) \
276 {	ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
277 	ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
278 	k[v(40, (4 * (i)) + 4)] = ss[4] ^= k[v(40, (4 * (i)))]; \
279 	k[v(40, (4 * (i)) + 5)] = ss[4] ^= k[v(40, (4 * (i)) + 1)]; \
280 	k[v(40, (4 * (i)) + 6)] = ss[4] ^= k[v(40, (4 * (i)) + 2)]; \
281 	k[v(40, (4 * (i)) + 7)] = ss[4] ^= k[v(40, (4 * (i)) + 3)]; \
282 }
283 
284 #define	kdl4(k, i) \
285 {	ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
286 	ss[i % 4] ^= ss[4]; \
287 	k[v(40, (4 * (i)) + 4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
288 	k[v(40, (4 * (i)) + 5)] = ss[1] ^ ss[3]; \
289 	k[v(40, (4 * (i)) + 6)] = ss[0]; \
290 	k[v(40, (4 * (i)) + 7)] = ss[1]; \
291 }
292 
293 #else
294 
295 #define	kdf4(k, i) \
296 {	ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
297 	k[v(40, (4 * (i)) + 4)] = ff(ss[0]); \
298 	ss[1] ^= ss[0]; k[v(40, (4 * (i)) + 5)] = ff(ss[1]); \
299 	ss[2] ^= ss[1]; k[v(40, (4 * (i)) + 6)] = ff(ss[2]); \
300 	ss[3] ^= ss[2]; k[v(40, (4 * (i)) + 7)] = ff(ss[3]); \
301 }
302 
303 #define	kd4(k, i) \
304 {	ss[4] = ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
305 	ss[0] ^= ss[4]; \
306 	ss[4] = ff(ss[4]); \
307 	k[v(40, (4 * (i)) + 4)] = ss[4] ^= k[v(40, (4 * (i)))]; \
308 	ss[1] ^= ss[0]; \
309 	k[v(40, (4 * (i)) + 5)] = ss[4] ^= k[v(40, (4 * (i)) + 1)]; \
310 	ss[2] ^= ss[1]; \
311 	k[v(40, (4 * (i)) + 6)] = ss[4] ^= k[v(40, (4 * (i)) + 2)]; \
312 	ss[3] ^= ss[2]; \
313 	k[v(40, (4 * (i)) + 7)] = ss[4] ^= k[v(40, (4 * (i)) + 3)]; \
314 }
315 
316 #define	kdl4(k, i) \
317 {	ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
318 	k[v(40, (4 * (i)) + 4)] = ss[0]; \
319 	ss[1] ^= ss[0]; k[v(40, (4 * (i)) + 5)] = ss[1]; \
320 	ss[2] ^= ss[1]; k[v(40, (4 * (i)) + 6)] = ss[2]; \
321 	ss[3] ^= ss[2]; k[v(40, (4 * (i)) + 7)] = ss[3]; \
322 }
323 
324 #endif
325 
326 static void
327 aes_decrypt_key128(const unsigned char *key, uint32_t rk[])
328 {
329 	uint32_t	ss[5];
330 #if defined(d_vars)
331 	d_vars;
332 #endif
333 	rk[v(40, (0))] = ss[0] = word_in(key, 0);
334 	rk[v(40, (1))] = ss[1] = word_in(key, 1);
335 	rk[v(40, (2))] = ss[2] = word_in(key, 2);
336 	rk[v(40, (3))] = ss[3] = word_in(key, 3);
337 
338 #ifdef DEC_KS_UNROLL
339 	kdf4(rk, 0); kd4(rk, 1);
340 	kd4(rk, 2);  kd4(rk, 3);
341 	kd4(rk, 4);  kd4(rk, 5);
342 	kd4(rk, 6);  kd4(rk, 7);
343 	kd4(rk, 8);  kdl4(rk, 9);
344 #else
345 	{
346 		uint32_t	i;
347 		for (i = 0; i < 10; ++i)
348 			k4e(rk, i);
349 #if !(DEC_ROUND == NO_TABLES)
350 		for (i = MAX_AES_NB; i < 10 * MAX_AES_NB; ++i)
351 			rk[i] = inv_mcol(rk[i]);
352 #endif
353 	}
354 #endif	/* DEC_KS_UNROLL */
355 }
356 
357 
358 
359 #define	k6ef(k, i) \
360 {	k[v(48, (6 * (i)) + 6)] = ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
361 	k[v(48, (6 * (i)) + 7)] = ss[1] ^= ss[0]; \
362 	k[v(48, (6 * (i)) + 8)] = ss[2] ^= ss[1]; \
363 	k[v(48, (6 * (i)) + 9)] = ss[3] ^= ss[2]; \
364 }
365 
366 #define	k6e(k, i) \
367 {	k6ef(k, i); \
368 	k[v(48, (6 * (i)) + 10)] = ss[4] ^= ss[3]; \
369 	k[v(48, (6 * (i)) + 11)] = ss[5] ^= ss[4]; \
370 }
371 
372 #define	kdf6(k, i) \
373 {	ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
374 	k[v(48, (6 * (i)) + 6)] = ff(ss[0]); \
375 	ss[1] ^= ss[0]; k[v(48, (6 * (i)) + 7)] = ff(ss[1]); \
376 	ss[2] ^= ss[1]; k[v(48, (6 * (i)) + 8)] = ff(ss[2]); \
377 	ss[3] ^= ss[2]; k[v(48, (6 * (i)) + 9)] = ff(ss[3]); \
378 	ss[4] ^= ss[3]; k[v(48, (6 * (i)) + 10)] = ff(ss[4]); \
379 	ss[5] ^= ss[4]; k[v(48, (6 * (i)) + 11)] = ff(ss[5]); \
380 }
381 
382 #define	kd6(k, i) \
383 {	ss[6] = ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
384 	ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
385 	k[v(48, (6 * (i)) + 6)] = ss[6] ^= k[v(48, (6 * (i)))]; \
386 	ss[1] ^= ss[0]; \
387 	k[v(48, (6 * (i)) + 7)] = ss[6] ^= k[v(48, (6 * (i)) + 1)]; \
388 	ss[2] ^= ss[1]; \
389 	k[v(48, (6 * (i)) + 8)] = ss[6] ^= k[v(48, (6 * (i)) + 2)]; \
390 	ss[3] ^= ss[2]; \
391 	k[v(48, (6 * (i)) + 9)] = ss[6] ^= k[v(48, (6 * (i)) + 3)]; \
392 	ss[4] ^= ss[3]; \
393 	k[v(48, (6 * (i)) + 10)] = ss[6] ^= k[v(48, (6 * (i)) + 4)]; \
394 	ss[5] ^= ss[4]; \
395 	k[v(48, (6 * (i)) + 11)] = ss[6] ^= k[v(48, (6 * (i)) + 5)]; \
396 }
397 
398 #define	kdl6(k, i) \
399 {	ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
400 	k[v(48, (6 * (i)) + 6)] = ss[0]; \
401 	ss[1] ^= ss[0]; k[v(48, (6 * (i)) + 7)] = ss[1]; \
402 	ss[2] ^= ss[1]; k[v(48, (6 * (i)) + 8)] = ss[2]; \
403 	ss[3] ^= ss[2]; k[v(48, (6 * (i)) + 9)] = ss[3]; \
404 }
405 
406 static void
407 aes_decrypt_key192(const unsigned char *key, uint32_t rk[])
408 {
409 	uint32_t	ss[7];
410 #if defined(d_vars)
411 	d_vars;
412 #endif
413 	rk[v(48, (0))] = ss[0] = word_in(key, 0);
414 	rk[v(48, (1))] = ss[1] = word_in(key, 1);
415 	rk[v(48, (2))] = ss[2] = word_in(key, 2);
416 	rk[v(48, (3))] = ss[3] = word_in(key, 3);
417 
418 #ifdef DEC_KS_UNROLL
419 	ss[4] = word_in(key, 4);
420 	rk[v(48, (4))] = ff(ss[4]);
421 	ss[5] = word_in(key, 5);
422 	rk[v(48, (5))] = ff(ss[5]);
423 	kdf6(rk, 0); kd6(rk, 1);
424 	kd6(rk, 2);  kd6(rk, 3);
425 	kd6(rk, 4);  kd6(rk, 5);
426 	kd6(rk, 6);  kdl6(rk, 7);
427 #else
428 	rk[v(48, (4))] = ss[4] = word_in(key, 4);
429 	rk[v(48, (5))] = ss[5] = word_in(key, 5);
430 	{
431 		uint32_t	i;
432 
433 		for (i = 0; i < 7; ++i)
434 			k6e(rk, i);
435 		k6ef(rk, 7);
436 #if !(DEC_ROUND == NO_TABLES)
437 		for (i = MAX_AES_NB; i < 12 * MAX_AES_NB; ++i)
438 			rk[i] = inv_mcol(rk[i]);
439 #endif
440 	}
441 #endif
442 }
443 
444 
445 
446 #define	k8ef(k, i) \
447 {	k[v(56, (8 * (i)) + 8)] = ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
448 	k[v(56, (8 * (i)) + 9)] = ss[1] ^= ss[0]; \
449 	k[v(56, (8 * (i)) + 10)] = ss[2] ^= ss[1]; \
450 	k[v(56, (8 * (i)) + 11)] = ss[3] ^= ss[2]; \
451 }
452 
453 #define	k8e(k, i) \
454 {	k8ef(k, i); \
455 	k[v(56, (8 * (i)) + 12)] = ss[4] ^= ls_box(ss[3], 0); \
456 	k[v(56, (8 * (i)) + 13)] = ss[5] ^= ss[4]; \
457 	k[v(56, (8 * (i)) + 14)] = ss[6] ^= ss[5]; \
458 	k[v(56, (8 * (i)) + 15)] = ss[7] ^= ss[6]; \
459 }
460 
461 #define	kdf8(k, i) \
462 {	ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
463 	k[v(56, (8 * (i)) + 8)] = ff(ss[0]); \
464 	ss[1] ^= ss[0]; k[v(56, (8 * (i)) + 9)] = ff(ss[1]); \
465 	ss[2] ^= ss[1]; k[v(56, (8 * (i)) + 10)] = ff(ss[2]); \
466 	ss[3] ^= ss[2]; k[v(56, (8 * (i)) + 11)] = ff(ss[3]); \
467 	ss[4] ^= ls_box(ss[3], 0); k[v(56, (8 * (i)) + 12)] = ff(ss[4]); \
468 	ss[5] ^= ss[4]; k[v(56, (8 * (i)) + 13)] = ff(ss[5]); \
469 	ss[6] ^= ss[5]; k[v(56, (8 * (i)) + 14)] = ff(ss[6]); \
470 	ss[7] ^= ss[6]; k[v(56, (8 * (i)) + 15)] = ff(ss[7]); \
471 }
472 
473 #define	kd8(k, i) \
474 {	ss[8] = ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
475 	ss[0] ^= ss[8]; \
476 	ss[8] = ff(ss[8]); \
477 	k[v(56, (8 * (i)) + 8)] = ss[8] ^= k[v(56, (8 * (i)))]; \
478 	ss[1] ^= ss[0]; \
479 	k[v(56, (8 * (i)) + 9)] = ss[8] ^= k[v(56, (8 * (i)) + 1)]; \
480 	ss[2] ^= ss[1]; \
481 	k[v(56, (8 * (i)) + 10)] = ss[8] ^= k[v(56, (8 * (i)) + 2)]; \
482 	ss[3] ^= ss[2]; \
483 	k[v(56, (8 * (i)) + 11)] = ss[8] ^= k[v(56, (8 * (i)) + 3)]; \
484 	ss[8] = ls_box(ss[3], 0); \
485 	ss[4] ^= ss[8]; \
486 	ss[8] = ff(ss[8]); \
487 	k[v(56, (8 * (i)) + 12)] = ss[8] ^= k[v(56, (8 * (i)) + 4)]; \
488 	ss[5] ^= ss[4]; \
489 	k[v(56, (8 * (i)) + 13)] = ss[8] ^= k[v(56, (8 * (i)) + 5)]; \
490 	ss[6] ^= ss[5]; \
491 	k[v(56, (8 * (i)) + 14)] = ss[8] ^= k[v(56, (8 * (i)) + 6)]; \
492 	ss[7] ^= ss[6]; \
493 	k[v(56, (8 * (i)) + 15)] = ss[8] ^= k[v(56, (8 * (i)) + 7)]; \
494 }
495 
496 #define	kdl8(k, i) \
497 {	ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
498 	k[v(56, (8 * (i)) + 8)] = ss[0]; \
499 	ss[1] ^= ss[0]; k[v(56, (8 * (i)) + 9)] = ss[1]; \
500 	ss[2] ^= ss[1]; k[v(56, (8 * (i)) + 10)] = ss[2]; \
501 	ss[3] ^= ss[2]; k[v(56, (8 * (i)) + 11)] = ss[3]; \
502 }
503 
504 static void
505 aes_decrypt_key256(const unsigned char *key, uint32_t rk[])
506 {
507 	uint32_t	ss[9];
508 #if defined(d_vars)
509 	d_vars;
510 #endif
511 	rk[v(56, (0))] = ss[0] = word_in(key, 0);
512 	rk[v(56, (1))] = ss[1] = word_in(key, 1);
513 	rk[v(56, (2))] = ss[2] = word_in(key, 2);
514 	rk[v(56, (3))] = ss[3] = word_in(key, 3);
515 
516 #ifdef DEC_KS_UNROLL
517 	ss[4] = word_in(key, 4);
518 	rk[v(56, (4))] = ff(ss[4]);
519 	ss[5] = word_in(key, 5);
520 	rk[v(56, (5))] = ff(ss[5]);
521 	ss[6] = word_in(key, 6);
522 	rk[v(56, (6))] = ff(ss[6]);
523 	ss[7] = word_in(key, 7);
524 	rk[v(56, (7))] = ff(ss[7]);
525 	kdf8(rk, 0); kd8(rk, 1);
526 	kd8(rk, 2);  kd8(rk, 3);
527 	kd8(rk, 4);  kd8(rk, 5);
528 	kdl8(rk, 6);
529 #else
530 	rk[v(56, (4))] = ss[4] = word_in(key, 4);
531 	rk[v(56, (5))] = ss[5] = word_in(key, 5);
532 	rk[v(56, (6))] = ss[6] = word_in(key, 6);
533 	rk[v(56, (7))] = ss[7] = word_in(key, 7);
534 	{
535 		uint32_t	i;
536 
537 		for (i = 0; i < 6; ++i)
538 			k8e(rk,  i);
539 		k8ef(rk,  6);
540 #if !(DEC_ROUND == NO_TABLES)
541 		for (i = MAX_AES_NB; i < 14 * MAX_AES_NB; ++i)
542 			rk[i] = inv_mcol(rk[i]);
543 #endif
544 	}
545 #endif	/* DEC_KS_UNROLL */
546 }
547 
548 
549 /*
550  * Expand the cipher key into the decryption key schedule.
551  *
552  * Return the number of rounds for the given cipher key size.
553  * The size of the key schedule depends on the number of rounds
554  * (which can be computed from the size of the key), i.e. 4 * (Nr + 1).
555  *
556  * Parameters:
557  * rk		AES key schedule 32-bit array to be initialized
558  * cipherKey	User key
559  * keyBits	AES key size (128, 192, or 256 bits)
560  */
561 int
562 rijndael_key_setup_dec_amd64(uint32_t rk[], const uint32_t cipherKey[],
563 	int keyBits)
564 {
565 	switch (keyBits) {
566 	case 128:
567 		aes_decrypt_key128((unsigned char *)&cipherKey[0], rk);
568 		return (10);
569 	case 192:
570 		aes_decrypt_key192((unsigned char *)&cipherKey[0], rk);
571 		return (12);
572 	case 256:
573 		aes_decrypt_key256((unsigned char *)&cipherKey[0], rk);
574 		return (14);
575 	default: /* should never get here */
576 		break;
577 	}
578 
579 	return (0);
580 }
581