xref: /linux/arch/powerpc/crypto/aes-spe-modes.S (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1/*
2 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <asm/ppc_asm.h>
14#include "aes-spe-regs.h"
15
16#ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/
17
18#define LOAD_DATA(reg, off) \
19	lwz		reg,off(rSP);	/* load with offset		*/
20#define SAVE_DATA(reg, off) \
21	stw		reg,off(rDP);	/* save with offset		*/
22#define NEXT_BLOCK \
23	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
24	addi		rDP,rDP,16;
25#define LOAD_IV(reg, off) \
26	lwz		reg,off(rIP);	/* IV loading with offset	*/
27#define SAVE_IV(reg, off) \
28	stw		reg,off(rIP);	/* IV saving with offset	*/
29#define START_IV			/* nothing to reset		*/
30#define CBC_DEC 16			/* CBC decrement per block	*/
31#define CTR_DEC 1			/* CTR decrement one byte	*/
32
33#else					/* Macros for little endian	*/
34
35#define LOAD_DATA(reg, off) \
36	lwbrx		reg,0,rSP;	/* load reversed		*/ \
37	addi		rSP,rSP,4;	/* and increment pointer	*/
38#define SAVE_DATA(reg, off) \
39	stwbrx		reg,0,rDP;	/* save reversed		*/ \
40	addi		rDP,rDP,4;	/* and increment pointer	*/
41#define NEXT_BLOCK			/* nothing todo			*/
42#define LOAD_IV(reg, off) \
43	lwbrx		reg,0,rIP;	/* load reversed		*/ \
44	addi		rIP,rIP,4;	/* and increment pointer	*/
45#define SAVE_IV(reg, off) \
46	stwbrx		reg,0,rIP;	/* load reversed		*/ \
47	addi		rIP,rIP,4;	/* and increment pointer	*/
48#define START_IV \
49	subi		rIP,rIP,16;	/* must reset pointer		*/
50#define CBC_DEC 32			/* 2 blocks because of incs	*/
51#define CTR_DEC 17			/* 1 block because of incs	*/
52
53#endif
54
55#define SAVE_0_REGS
56#define LOAD_0_REGS
57
58#define SAVE_4_REGS \
59	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
60	stw		rI1,100(r1);					   \
61	stw		rI2,104(r1);					   \
62	stw		rI3,108(r1);
63
64#define LOAD_4_REGS \
65	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
66	lwz		rI1,100(r1);					   \
67	lwz		rI2,104(r1);					   \
68	lwz		rI3,108(r1);
69
70#define SAVE_8_REGS \
71	SAVE_4_REGS							   \
72	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
73	stw		rG1,116(r1);					   \
74	stw		rG2,120(r1);					   \
75	stw		rG3,124(r1);
76
77#define LOAD_8_REGS \
78	LOAD_4_REGS							   \
79	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
80	lwz		rG1,116(r1);					   \
81	lwz		rG2,120(r1);					   \
82	lwz		rG3,124(r1);
83
84#define INITIALIZE_CRYPT(tab,nr32bitregs) \
85	mflr		r0;						   \
86	stwu		r1,-160(r1);	/* create stack frame		*/ \
87	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
88	stw		r0,8(r1);	/* save link register		*/ \
89	ori		rT0,rT0,tab@l;					   \
90	evstdw		r14,16(r1);					   \
91	mr		rKS,rKP;					   \
92	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
93	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
94	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
95	evstdw		r18,48(r1);					   \
96	evstdw		r19,56(r1);					   \
97	evstdw		r20,64(r1);					   \
98	evstdw		r21,72(r1);					   \
99	evstdw		r22,80(r1);					   \
100	evstdw		r23,88(r1);					   \
101	SAVE_##nr32bitregs##_REGS
102
103#define FINALIZE_CRYPT(nr32bitregs) \
104	lwz		r0,8(r1);					   \
105	evldw		r14,16(r1);	/* restore SPE registers	*/ \
106	evldw		r15,24(r1);					   \
107	evldw		r16,32(r1);					   \
108	evldw		r17,40(r1);					   \
109	evldw		r18,48(r1);					   \
110	evldw		r19,56(r1);					   \
111	evldw		r20,64(r1);					   \
112	evldw		r21,72(r1);					   \
113	evldw		r22,80(r1);					   \
114	evldw		r23,88(r1);					   \
115	LOAD_##nr32bitregs##_REGS					   \
116	mtlr		r0;		/* restore link register	*/ \
117	xor		r0,r0,r0;					   \
118	stw		r0,16(r1);	/* delete sensitive data	*/ \
119	stw		r0,24(r1);	/* that we might have pushed	*/ \
120	stw		r0,32(r1);	/* from other context that runs	*/ \
121	stw		r0,40(r1);	/* the same code		*/ \
122	stw		r0,48(r1);					   \
123	stw		r0,56(r1);					   \
124	stw		r0,64(r1);					   \
125	stw		r0,72(r1);					   \
126	stw		r0,80(r1);					   \
127	stw		r0,88(r1);					   \
128	addi		r1,r1,160;	/* cleanup stack frame		*/
129
130#define ENDIAN_SWAP(t0, t1, s0, s1) \
131	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
132	rotrwi		t1,s1,8;					   \
133	rlwimi		t0,s0,8,8,15;					   \
134	rlwimi		t1,s1,8,8,15;					   \
135	rlwimi		t0,s0,8,24,31;					   \
136	rlwimi		t1,s1,8,24,31;
137
138#define GF128_MUL(d0, d1, d2, d3, t0) \
139	li		t0,0x87;	/* multiplication in GF128	*/ \
140	cmpwi		d3,-1;						   \
141	iselgt		t0,0,t0;					   \
142	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
143	rotlwi		d3,d3,1;					   \
144	rlwimi		d2,d1,0,0,0;					   \
145	rotlwi		d2,d2,1;					   \
146	rlwimi		d1,d0,0,0,0;					   \
147	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
148	rotlwi		d1,d1,1;					   \
149	xor		d0,d0,t0;
150
151#define START_KEY(d0, d1, d2, d3) \
152	lwz		rW0,0(rKP);					   \
153	mtctr		rRR;						   \
154	lwz		rW1,4(rKP);					   \
155	lwz		rW2,8(rKP);					   \
156	lwz		rW3,12(rKP);					   \
157	xor		rD0,d0,rW0;					   \
158	xor		rD1,d1,rW1;					   \
159	xor		rD2,d2,rW2;					   \
160	xor		rD3,d3,rW3;
161
162/*
163 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
164 *		   u32 rounds)
165 *
166 * called from glue layer to encrypt a single 16 byte block
167 * round values are AES128 = 4, AES192 = 5, AES256 = 6
168 *
169 */
170_GLOBAL(ppc_encrypt_aes)
171	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
172	LOAD_DATA(rD0, 0)
173	LOAD_DATA(rD1, 4)
174	LOAD_DATA(rD2, 8)
175	LOAD_DATA(rD3, 12)
176	START_KEY(rD0, rD1, rD2, rD3)
177	bl		ppc_encrypt_block
178	xor		rD0,rD0,rW0
179	SAVE_DATA(rD0, 0)
180	xor		rD1,rD1,rW1
181	SAVE_DATA(rD1, 4)
182	xor		rD2,rD2,rW2
183	SAVE_DATA(rD2, 8)
184	xor		rD3,rD3,rW3
185	SAVE_DATA(rD3, 12)
186	FINALIZE_CRYPT(0)
187	blr
188
189/*
190 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
191 *		   u32 rounds)
192 *
193 * called from glue layer to decrypt a single 16 byte block
194 * round values are AES128 = 4, AES192 = 5, AES256 = 6
195 *
196 */
197_GLOBAL(ppc_decrypt_aes)
198	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
199	LOAD_DATA(rD0, 0)
200	addi		rT1,rT0,4096
201	LOAD_DATA(rD1, 4)
202	LOAD_DATA(rD2, 8)
203	LOAD_DATA(rD3, 12)
204	START_KEY(rD0, rD1, rD2, rD3)
205	bl		ppc_decrypt_block
206	xor		rD0,rD0,rW0
207	SAVE_DATA(rD0, 0)
208	xor		rD1,rD1,rW1
209	SAVE_DATA(rD1, 4)
210	xor		rD2,rD2,rW2
211	SAVE_DATA(rD2, 8)
212	xor		rD3,rD3,rW3
213	SAVE_DATA(rD3, 12)
214	FINALIZE_CRYPT(0)
215	blr
216
217/*
218 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
219 *		   u32 rounds, u32 bytes);
220 *
221 * called from glue layer to encrypt multiple blocks via ECB
222 * Bytes must be larger or equal 16 and only whole blocks are
223 * processed. round values are AES128 = 4, AES192 = 5 and
224 * AES256 = 6
225 *
226 */
227_GLOBAL(ppc_encrypt_ecb)
228	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
229ppc_encrypt_ecb_loop:
230	LOAD_DATA(rD0, 0)
231	mr		rKP,rKS
232	LOAD_DATA(rD1, 4)
233	subi		rLN,rLN,16
234	LOAD_DATA(rD2, 8)
235	cmpwi		rLN,15
236	LOAD_DATA(rD3, 12)
237	START_KEY(rD0, rD1, rD2, rD3)
238	bl		ppc_encrypt_block
239	xor		rD0,rD0,rW0
240	SAVE_DATA(rD0, 0)
241	xor		rD1,rD1,rW1
242	SAVE_DATA(rD1, 4)
243	xor		rD2,rD2,rW2
244	SAVE_DATA(rD2, 8)
245	xor		rD3,rD3,rW3
246	SAVE_DATA(rD3, 12)
247	NEXT_BLOCK
248	bt		gt,ppc_encrypt_ecb_loop
249	FINALIZE_CRYPT(0)
250	blr
251
252/*
253 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
254 *		   u32 rounds, u32 bytes);
255 *
256 * called from glue layer to decrypt multiple blocks via ECB
257 * Bytes must be larger or equal 16 and only whole blocks are
258 * processed. round values are AES128 = 4, AES192 = 5 and
259 * AES256 = 6
260 *
261 */
262_GLOBAL(ppc_decrypt_ecb)
263	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
264	addi		rT1,rT0,4096
265ppc_decrypt_ecb_loop:
266	LOAD_DATA(rD0, 0)
267	mr		rKP,rKS
268	LOAD_DATA(rD1, 4)
269	subi		rLN,rLN,16
270	LOAD_DATA(rD2, 8)
271	cmpwi		rLN,15
272	LOAD_DATA(rD3, 12)
273	START_KEY(rD0, rD1, rD2, rD3)
274	bl		ppc_decrypt_block
275	xor		rD0,rD0,rW0
276	SAVE_DATA(rD0, 0)
277	xor		rD1,rD1,rW1
278	SAVE_DATA(rD1, 4)
279	xor		rD2,rD2,rW2
280	SAVE_DATA(rD2, 8)
281	xor		rD3,rD3,rW3
282	SAVE_DATA(rD3, 12)
283	NEXT_BLOCK
284	bt		gt,ppc_decrypt_ecb_loop
285	FINALIZE_CRYPT(0)
286	blr
287
288/*
289 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
290 *		   32 rounds, u32 bytes, u8 *iv);
291 *
292 * called from glue layer to encrypt multiple blocks via CBC
293 * Bytes must be larger or equal 16 and only whole blocks are
294 * processed. round values are AES128 = 4, AES192 = 5 and
295 * AES256 = 6
296 *
297 */
298_GLOBAL(ppc_encrypt_cbc)
299	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
300	LOAD_IV(rI0, 0)
301	LOAD_IV(rI1, 4)
302	LOAD_IV(rI2, 8)
303	LOAD_IV(rI3, 12)
304ppc_encrypt_cbc_loop:
305	LOAD_DATA(rD0, 0)
306	mr		rKP,rKS
307	LOAD_DATA(rD1, 4)
308	subi		rLN,rLN,16
309	LOAD_DATA(rD2, 8)
310	cmpwi		rLN,15
311	LOAD_DATA(rD3, 12)
312	xor		rD0,rD0,rI0
313	xor		rD1,rD1,rI1
314	xor		rD2,rD2,rI2
315	xor		rD3,rD3,rI3
316	START_KEY(rD0, rD1, rD2, rD3)
317	bl		ppc_encrypt_block
318	xor		rI0,rD0,rW0
319	SAVE_DATA(rI0, 0)
320	xor		rI1,rD1,rW1
321	SAVE_DATA(rI1, 4)
322	xor		rI2,rD2,rW2
323	SAVE_DATA(rI2, 8)
324	xor		rI3,rD3,rW3
325	SAVE_DATA(rI3, 12)
326	NEXT_BLOCK
327	bt		gt,ppc_encrypt_cbc_loop
328	START_IV
329	SAVE_IV(rI0, 0)
330	SAVE_IV(rI1, 4)
331	SAVE_IV(rI2, 8)
332	SAVE_IV(rI3, 12)
333	FINALIZE_CRYPT(4)
334	blr
335
336/*
337 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
338 *		   u32 rounds, u32 bytes, u8 *iv);
339 *
340 * called from glue layer to decrypt multiple blocks via CBC
341 * round values are AES128 = 4, AES192 = 5, AES256 = 6
342 *
343 */
344_GLOBAL(ppc_decrypt_cbc)
345	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
346	li		rT1,15
347	LOAD_IV(rI0, 0)
348	andc		rLN,rLN,rT1
349	LOAD_IV(rI1, 4)
350	subi		rLN,rLN,16
351	LOAD_IV(rI2, 8)
352	add		rSP,rSP,rLN	/* reverse processing		*/
353	LOAD_IV(rI3, 12)
354	add		rDP,rDP,rLN
355	LOAD_DATA(rD0, 0)
356	addi		rT1,rT0,4096
357	LOAD_DATA(rD1, 4)
358	LOAD_DATA(rD2, 8)
359	LOAD_DATA(rD3, 12)
360	START_IV
361	SAVE_IV(rD0, 0)
362	SAVE_IV(rD1, 4)
363	SAVE_IV(rD2, 8)
364	cmpwi		rLN,16
365	SAVE_IV(rD3, 12)
366	bt		lt,ppc_decrypt_cbc_end
367ppc_decrypt_cbc_loop:
368	mr		rKP,rKS
369	START_KEY(rD0, rD1, rD2, rD3)
370	bl		ppc_decrypt_block
371	subi		rLN,rLN,16
372	subi		rSP,rSP,CBC_DEC
373	xor		rW0,rD0,rW0
374	LOAD_DATA(rD0, 0)
375	xor		rW1,rD1,rW1
376	LOAD_DATA(rD1, 4)
377	xor		rW2,rD2,rW2
378	LOAD_DATA(rD2, 8)
379	xor		rW3,rD3,rW3
380	LOAD_DATA(rD3, 12)
381	xor		rW0,rW0,rD0
382	SAVE_DATA(rW0, 0)
383	xor		rW1,rW1,rD1
384	SAVE_DATA(rW1, 4)
385	xor		rW2,rW2,rD2
386	SAVE_DATA(rW2, 8)
387	xor		rW3,rW3,rD3
388	SAVE_DATA(rW3, 12)
389	cmpwi		rLN,15
390	subi		rDP,rDP,CBC_DEC
391	bt		gt,ppc_decrypt_cbc_loop
392ppc_decrypt_cbc_end:
393	mr		rKP,rKS
394	START_KEY(rD0, rD1, rD2, rD3)
395	bl		ppc_decrypt_block
396	xor		rW0,rW0,rD0
397	xor		rW1,rW1,rD1
398	xor		rW2,rW2,rD2
399	xor		rW3,rW3,rD3
400	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
401	SAVE_DATA(rW0, 0)
402	xor		rW1,rW1,rI1
403	SAVE_DATA(rW1, 4)
404	xor		rW2,rW2,rI2
405	SAVE_DATA(rW2, 8)
406	xor		rW3,rW3,rI3
407	SAVE_DATA(rW3, 12)
408	FINALIZE_CRYPT(4)
409	blr
410
411/*
412 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
413 *		 u32 rounds, u32 bytes, u8 *iv);
414 *
415 * called from glue layer to encrypt/decrypt multiple blocks
416 * via CTR. Number of bytes does not need to be a multiple of
417 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
418 *
419 */
420_GLOBAL(ppc_crypt_ctr)
421	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
422	LOAD_IV(rI0, 0)
423	LOAD_IV(rI1, 4)
424	LOAD_IV(rI2, 8)
425	cmpwi		rLN,16
426	LOAD_IV(rI3, 12)
427	START_IV
428	bt		lt,ppc_crypt_ctr_partial
429ppc_crypt_ctr_loop:
430	mr		rKP,rKS
431	START_KEY(rI0, rI1, rI2, rI3)
432	bl		ppc_encrypt_block
433	xor		rW0,rD0,rW0
434	xor		rW1,rD1,rW1
435	xor		rW2,rD2,rW2
436	xor		rW3,rD3,rW3
437	LOAD_DATA(rD0, 0)
438	subi		rLN,rLN,16
439	LOAD_DATA(rD1, 4)
440	LOAD_DATA(rD2, 8)
441	LOAD_DATA(rD3, 12)
442	xor		rD0,rD0,rW0
443	SAVE_DATA(rD0, 0)
444	xor		rD1,rD1,rW1
445	SAVE_DATA(rD1, 4)
446	xor		rD2,rD2,rW2
447	SAVE_DATA(rD2, 8)
448	xor		rD3,rD3,rW3
449	SAVE_DATA(rD3, 12)
450	addic		rI3,rI3,1	/* increase counter			*/
451	addze		rI2,rI2
452	addze		rI1,rI1
453	addze		rI0,rI0
454	NEXT_BLOCK
455	cmpwi		rLN,15
456	bt		gt,ppc_crypt_ctr_loop
457ppc_crypt_ctr_partial:
458	cmpwi		rLN,0
459	bt		eq,ppc_crypt_ctr_end
460	mr		rKP,rKS
461	START_KEY(rI0, rI1, rI2, rI3)
462	bl		ppc_encrypt_block
463	xor		rW0,rD0,rW0
464	SAVE_IV(rW0, 0)
465	xor		rW1,rD1,rW1
466	SAVE_IV(rW1, 4)
467	xor		rW2,rD2,rW2
468	SAVE_IV(rW2, 8)
469	xor		rW3,rD3,rW3
470	SAVE_IV(rW3, 12)
471	mtctr		rLN
472	subi		rIP,rIP,CTR_DEC
473	subi		rSP,rSP,1
474	subi		rDP,rDP,1
475ppc_crypt_ctr_xorbyte:
476	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
477	lbzu		rW5,1(rSP)
478	xor		rW4,rW4,rW5
479	stbu		rW4,1(rDP)
480	bdnz		ppc_crypt_ctr_xorbyte
481	subf		rIP,rLN,rIP
482	addi		rIP,rIP,1
483	addic		rI3,rI3,1
484	addze		rI2,rI2
485	addze		rI1,rI1
486	addze		rI0,rI0
487ppc_crypt_ctr_end:
488	SAVE_IV(rI0, 0)
489	SAVE_IV(rI1, 4)
490	SAVE_IV(rI2, 8)
491	SAVE_IV(rI3, 12)
492	FINALIZE_CRYPT(4)
493	blr
494
495/*
496 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
497 *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
498 *
499 * called from glue layer to encrypt multiple blocks via XTS
500 * If key_twk is given, the initial IV encryption will be
501 * processed too. Round values are AES128 = 4, AES192 = 5,
502 * AES256 = 6
503 *
504 */
505_GLOBAL(ppc_encrypt_xts)
506	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
507	LOAD_IV(rI0, 0)
508	LOAD_IV(rI1, 4)
509	LOAD_IV(rI2, 8)
510	cmpwi		rKT,0
511	LOAD_IV(rI3, 12)
512	bt		eq,ppc_encrypt_xts_notweak
513	mr		rKP,rKT
514	START_KEY(rI0, rI1, rI2, rI3)
515	bl		ppc_encrypt_block
516	xor		rI0,rD0,rW0
517	xor		rI1,rD1,rW1
518	xor		rI2,rD2,rW2
519	xor		rI3,rD3,rW3
520ppc_encrypt_xts_notweak:
521	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
522	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
523ppc_encrypt_xts_loop:
524	LOAD_DATA(rD0, 0)
525	mr		rKP,rKS
526	LOAD_DATA(rD1, 4)
527	subi		rLN,rLN,16
528	LOAD_DATA(rD2, 8)
529	LOAD_DATA(rD3, 12)
530	xor		rD0,rD0,rI0
531	xor		rD1,rD1,rI1
532	xor		rD2,rD2,rI2
533	xor		rD3,rD3,rI3
534	START_KEY(rD0, rD1, rD2, rD3)
535	bl		ppc_encrypt_block
536	xor		rD0,rD0,rW0
537	xor		rD1,rD1,rW1
538	xor		rD2,rD2,rW2
539	xor		rD3,rD3,rW3
540	xor		rD0,rD0,rI0
541	SAVE_DATA(rD0, 0)
542	xor		rD1,rD1,rI1
543	SAVE_DATA(rD1, 4)
544	xor		rD2,rD2,rI2
545	SAVE_DATA(rD2, 8)
546	xor		rD3,rD3,rI3
547	SAVE_DATA(rD3, 12)
548	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
549	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
550	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
551	cmpwi		rLN,0
552	NEXT_BLOCK
553	bt		gt,ppc_encrypt_xts_loop
554	START_IV
555	SAVE_IV(rI0, 0)
556	SAVE_IV(rI1, 4)
557	SAVE_IV(rI2, 8)
558	SAVE_IV(rI3, 12)
559	FINALIZE_CRYPT(8)
560	blr
561
562/*
563 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
564 *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
565 *
566 * called from glue layer to decrypt multiple blocks via XTS
567 * If key_twk is given, the initial IV encryption will be
568 * processed too. Round values are AES128 = 4, AES192 = 5,
569 * AES256 = 6
570 *
571 */
572_GLOBAL(ppc_decrypt_xts)
573	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
574	LOAD_IV(rI0, 0)
575	addi		rT1,rT0,4096
576	LOAD_IV(rI1, 4)
577	LOAD_IV(rI2, 8)
578	cmpwi		rKT,0
579	LOAD_IV(rI3, 12)
580	bt		eq,ppc_decrypt_xts_notweak
581	subi		rT0,rT0,4096
582	mr		rKP,rKT
583	START_KEY(rI0, rI1, rI2, rI3)
584	bl		ppc_encrypt_block
585	xor		rI0,rD0,rW0
586	xor		rI1,rD1,rW1
587	xor		rI2,rD2,rW2
588	xor		rI3,rD3,rW3
589	addi		rT0,rT0,4096
590ppc_decrypt_xts_notweak:
591	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
592	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
593ppc_decrypt_xts_loop:
594	LOAD_DATA(rD0, 0)
595	mr		rKP,rKS
596	LOAD_DATA(rD1, 4)
597	subi		rLN,rLN,16
598	LOAD_DATA(rD2, 8)
599	LOAD_DATA(rD3, 12)
600	xor		rD0,rD0,rI0
601	xor		rD1,rD1,rI1
602	xor		rD2,rD2,rI2
603	xor		rD3,rD3,rI3
604	START_KEY(rD0, rD1, rD2, rD3)
605	bl		ppc_decrypt_block
606	xor		rD0,rD0,rW0
607	xor		rD1,rD1,rW1
608	xor		rD2,rD2,rW2
609	xor		rD3,rD3,rW3
610	xor		rD0,rD0,rI0
611	SAVE_DATA(rD0, 0)
612	xor		rD1,rD1,rI1
613	SAVE_DATA(rD1, 4)
614	xor		rD2,rD2,rI2
615	SAVE_DATA(rD2, 8)
616	xor		rD3,rD3,rI3
617	SAVE_DATA(rD3, 12)
618	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
619	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
620	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
621	cmpwi		rLN,0
622	NEXT_BLOCK
623	bt		gt,ppc_decrypt_xts_loop
624	START_IV
625	SAVE_IV(rI0, 0)
626	SAVE_IV(rI1, 4)
627	SAVE_IV(rI2, 8)
628	SAVE_IV(rI3, 12)
629	FINALIZE_CRYPT(8)
630	blr
631