xref: /linux/arch/sparc/crypto/camellia_asm.S (revision 005438a8eef063495ac059d128eea71b58de50e5)
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
7	CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \
8	CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \
9	CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \
10	CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \
11	CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \
12	CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
13
14#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
15	CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
16	CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
17	CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
18
19	.data
20
21	.align	8
22SIGMA:	.xword	0xA09E667F3BCC908B
23	.xword	0xB67AE8584CAA73B2
24	.xword	0xC6EF372FE94F82BE
25	.xword	0x54FF53A5F1D36F1C
26	.xword	0x10E527FADE682D1D
27	.xword	0xB05688C2B3E6C1FD
28
29	.text
30
31	.align	32
32ENTRY(camellia_sparc64_key_expand)
33	/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
34	VISEntry
35	ld	[%o0 + 0x00], %f0	! i0, k[0]
36	ld	[%o0 + 0x04], %f1	! i1, k[1]
37	ld	[%o0 + 0x08], %f2	! i2, k[2]
38	ld	[%o0 + 0x0c], %f3	! i3, k[3]
39	std	%f0, [%o1 + 0x00]	! k[0, 1]
40	fsrc2	%f0, %f28
41	std	%f2, [%o1 + 0x08]	! k[2, 3]
42	cmp	%o2, 16
43	be	10f
44	 fsrc2	%f2, %f30
45
46	ld	[%o0 + 0x10], %f0
47	ld	[%o0 + 0x14], %f1
48	std	%f0, [%o1 + 0x20]	! k[8, 9]
49	cmp	%o2, 24
50	fone	%f10
51	be,a	1f
52	 fxor	%f10, %f0, %f2
53	ld	[%o0 + 0x18], %f2
54	ld	[%o0 + 0x1c], %f3
551:
56	std	%f2, [%o1 + 0x28]	! k[10, 11]
57	fxor	%f28, %f0, %f0
58	fxor	%f30, %f2, %f2
59
6010:
61	sethi	%hi(SIGMA), %g3
62	or	%g3, %lo(SIGMA), %g3
63	ldd	[%g3 + 0x00], %f16
64	ldd	[%g3 + 0x08], %f18
65	ldd	[%g3 + 0x10], %f20
66	ldd	[%g3 + 0x18], %f22
67	ldd	[%g3 + 0x20], %f24
68	ldd	[%g3 + 0x28], %f26
69	CAMELLIA_F(16, 2, 0, 2)
70	CAMELLIA_F(18, 0, 2, 0)
71	fxor	%f28, %f0, %f0
72	fxor	%f30, %f2, %f2
73	CAMELLIA_F(20, 2, 0, 2)
74	CAMELLIA_F(22, 0, 2, 0)
75
76#define ROTL128(S01, S23, TMP1, TMP2, N)	\
77	srlx	S01, (64 - N), TMP1;		\
78	sllx	S01, N, S01;			\
79	srlx	S23, (64 - N), TMP2;		\
80	sllx	S23, N, S23;			\
81	or	S01, TMP2, S01;			\
82	or	S23, TMP1, S23
83
84	cmp	%o2, 16
85	bne	1f
86	 nop
87	/* 128-bit key */
88	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
89	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
90	MOVDTOX_F0_O4
91	MOVDTOX_F2_O5
92	ROTL128(%o4, %o5, %g2, %g3, 15)
93	stx	%o4, [%o1 + 0x30]	! k[12, 13]
94	stx	%o5, [%o1 + 0x38]	! k[14, 15]
95	ROTL128(%o4, %o5, %g2, %g3, 15)
96	stx	%o4, [%o1 + 0x40]	! k[16, 17]
97	stx	%o5, [%o1 + 0x48]	! k[18, 19]
98	ROTL128(%o4, %o5, %g2, %g3, 15)
99	stx	%o4, [%o1 + 0x60]	! k[24, 25]
100	ROTL128(%o4, %o5, %g2, %g3, 15)
101	stx	%o4, [%o1 + 0x70]	! k[28, 29]
102	stx	%o5, [%o1 + 0x78]	! k[30, 31]
103	ROTL128(%o4, %o5, %g2, %g3, 34)
104	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
105	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
106	ROTL128(%o4, %o5, %g2, %g3, 17)
107	stx	%o4, [%o1 + 0xc0]	! k[48, 49]
108	stx	%o5, [%o1 + 0xc8]	! k[50, 51]
109
110	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
111	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
112	ROTL128(%o4, %o5, %g2, %g3, 15)
113	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
114	stx	%o5, [%o1 + 0x28]	! k[10, 11]
115	ROTL128(%o4, %o5, %g2, %g3, 30)
116	stx	%o4, [%o1 + 0x50]	! k[20, 21]
117	stx	%o5, [%o1 + 0x58]	! k[22, 23]
118	ROTL128(%o4, %o5, %g2, %g3, 15)
119	stx	%o5, [%o1 + 0x68]	! k[26, 27]
120	ROTL128(%o4, %o5, %g2, %g3, 17)
121	stx	%o4, [%o1 + 0x80]	! k[32, 33]
122	stx	%o5, [%o1 + 0x88]	! k[34, 35]
123	ROTL128(%o4, %o5, %g2, %g3, 17)
124	stx	%o4, [%o1 + 0x90]	! k[36, 37]
125	stx	%o5, [%o1 + 0x98]	! k[38, 39]
126	ROTL128(%o4, %o5, %g2, %g3, 17)
127	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
128	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
129
130	ba,pt	%xcc, 2f
131	 mov	(3 * 16 * 4), %o0
132
1331:
134	/* 192-bit or 256-bit key */
135	std	%f0, [%o1 + 0x30]	! k[12, 13]
136	std	%f2, [%o1 + 0x38]	! k[14, 15]
137	ldd	[%o1 + 0x20], %f4	! k[ 8,  9]
138	ldd	[%o1 + 0x28], %f6	! k[10, 11]
139	fxor	%f0, %f4, %f0
140	fxor	%f2, %f6, %f2
141	CAMELLIA_F(24, 2, 0, 2)
142	CAMELLIA_F(26, 0, 2, 0)
143	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
144	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
145	MOVDTOX_F0_O4
146	MOVDTOX_F2_O5
147	ROTL128(%o4, %o5, %g2, %g3, 30)
148	stx	%o4, [%o1 + 0x50]	! k[20, 21]
149	stx	%o5, [%o1 + 0x58]	! k[22, 23]
150	ROTL128(%o4, %o5, %g2, %g3, 30)
151	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
152	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
153	ROTL128(%o4, %o5, %g2, %g3, 51)
154	stx	%o4, [%o1 + 0x100]	! k[64, 65]
155	stx	%o5, [%o1 + 0x108]	! k[66, 67]
156	ldx	[%o1 + 0x20], %o4	! k[ 8,  9]
157	ldx	[%o1 + 0x28], %o5	! k[10, 11]
158	ROTL128(%o4, %o5, %g2, %g3, 15)
159	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
160	stx	%o5, [%o1 + 0x28]	! k[10, 11]
161	ROTL128(%o4, %o5, %g2, %g3, 15)
162	stx	%o4, [%o1 + 0x40]	! k[16, 17]
163	stx	%o5, [%o1 + 0x48]	! k[18, 19]
164	ROTL128(%o4, %o5, %g2, %g3, 30)
165	stx	%o4, [%o1 + 0x90]	! k[36, 37]
166	stx	%o5, [%o1 + 0x98]	! k[38, 39]
167	ROTL128(%o4, %o5, %g2, %g3, 34)
168	stx	%o4, [%o1 + 0xd0]	! k[52, 53]
169	stx	%o5, [%o1 + 0xd8]	! k[54, 55]
170	ldx	[%o1 + 0x30], %o4	! k[12, 13]
171	ldx	[%o1 + 0x38], %o5	! k[14, 15]
172	ROTL128(%o4, %o5, %g2, %g3, 15)
173	stx	%o4, [%o1 + 0x30]	! k[12, 13]
174	stx	%o5, [%o1 + 0x38]	! k[14, 15]
175	ROTL128(%o4, %o5, %g2, %g3, 30)
176	stx	%o4, [%o1 + 0x70]	! k[28, 29]
177	stx	%o5, [%o1 + 0x78]	! k[30, 31]
178	srlx	%o4, 32, %g2
179	srlx	%o5, 32, %g3
180	stw	%o4, [%o1 + 0xc0]	! k[48]
181	stw	%g3, [%o1 + 0xc4]	! k[49]
182	stw	%o5, [%o1 + 0xc8]	! k[50]
183	stw	%g2, [%o1 + 0xcc]	! k[51]
184	ROTL128(%o4, %o5, %g2, %g3, 49)
185	stx	%o4, [%o1 + 0xe0]	! k[56, 57]
186	stx	%o5, [%o1 + 0xe8]	! k[58, 59]
187	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
188	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
189	ROTL128(%o4, %o5, %g2, %g3, 45)
190	stx	%o4, [%o1 + 0x60]	! k[24, 25]
191	stx	%o5, [%o1 + 0x68]	! k[26, 27]
192	ROTL128(%o4, %o5, %g2, %g3, 15)
193	stx	%o4, [%o1 + 0x80]	! k[32, 33]
194	stx	%o5, [%o1 + 0x88]	! k[34, 35]
195	ROTL128(%o4, %o5, %g2, %g3, 17)
196	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
197	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
198	ROTL128(%o4, %o5, %g2, %g3, 34)
199	stx	%o4, [%o1 + 0xf0]	! k[60, 61]
200	stx	%o5, [%o1 + 0xf8]	! k[62, 63]
201	mov	(4 * 16 * 4), %o0
2022:
203	add	%o1, %o0, %o1
204	ldd	[%o1 + 0x00], %f0
205	ldd	[%o1 + 0x08], %f2
206	std	%f0, [%o3 + 0x00]
207	std	%f2, [%o3 + 0x08]
208	add	%o3, 0x10, %o3
2091:
210	sub	%o1, (16 * 4), %o1
211	ldd	[%o1 + 0x38], %f0
212	ldd	[%o1 + 0x30], %f2
213	ldd	[%o1 + 0x28], %f4
214	ldd	[%o1 + 0x20], %f6
215	ldd	[%o1 + 0x18], %f8
216	ldd	[%o1 + 0x10], %f10
217	std	%f0, [%o3 + 0x00]
218	std	%f2, [%o3 + 0x08]
219	std	%f4, [%o3 + 0x10]
220	std	%f6, [%o3 + 0x18]
221	std	%f8, [%o3 + 0x20]
222	std	%f10, [%o3 + 0x28]
223
224	ldd	[%o1 + 0x08], %f0
225	ldd	[%o1 + 0x00], %f2
226	std	%f0, [%o3 + 0x30]
227	std	%f2, [%o3 + 0x38]
228	subcc	%o0, (16 * 4), %o0
229	bne,pt	%icc, 1b
230	 add	%o3, (16 * 4), %o3
231
232	std	%f2, [%o3 - 0x10]
233	std	%f0, [%o3 - 0x08]
234
235	retl
236	 VISExit
237ENDPROC(camellia_sparc64_key_expand)
238
239	.align	32
240ENTRY(camellia_sparc64_crypt)
241	/* %o0=key, %o1=input, %o2=output, %o3=key_len */
242	VISEntry
243
244	ld	[%o1 + 0x00], %f0
245	ld	[%o1 + 0x04], %f1
246	ld	[%o1 + 0x08], %f2
247	ld	[%o1 + 0x0c], %f3
248
249	ldd	[%o0 + 0x00], %f4
250	ldd	[%o0 + 0x08], %f6
251
252	cmp	%o3, 16
253	fxor	%f4, %f0, %f0
254	be	1f
255	 fxor	%f6, %f2, %f2
256
257	ldd	[%o0 + 0x10], %f8
258	ldd	[%o0 + 0x18], %f10
259	ldd	[%o0 + 0x20], %f12
260	ldd	[%o0 + 0x28], %f14
261	ldd	[%o0 + 0x30], %f16
262	ldd	[%o0 + 0x38], %f18
263	ldd	[%o0 + 0x40], %f20
264	ldd	[%o0 + 0x48], %f22
265	add	%o0, 0x40, %o0
266
267	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
268
2691:
270	ldd	[%o0 + 0x10], %f8
271	ldd	[%o0 + 0x18], %f10
272	ldd	[%o0 + 0x20], %f12
273	ldd	[%o0 + 0x28], %f14
274	ldd	[%o0 + 0x30], %f16
275	ldd	[%o0 + 0x38], %f18
276	ldd	[%o0 + 0x40], %f20
277	ldd	[%o0 + 0x48], %f22
278	ldd	[%o0 + 0x50], %f24
279	ldd	[%o0 + 0x58], %f26
280	ldd	[%o0 + 0x60], %f28
281	ldd	[%o0 + 0x68], %f30
282	ldd	[%o0 + 0x70], %f32
283	ldd	[%o0 + 0x78], %f34
284	ldd	[%o0 + 0x80], %f36
285	ldd	[%o0 + 0x88], %f38
286	ldd	[%o0 + 0x90], %f40
287	ldd	[%o0 + 0x98], %f42
288	ldd	[%o0 + 0xa0], %f44
289	ldd	[%o0 + 0xa8], %f46
290	ldd	[%o0 + 0xb0], %f48
291	ldd	[%o0 + 0xb8], %f50
292	ldd	[%o0 + 0xc0], %f52
293	ldd	[%o0 + 0xc8], %f54
294
295	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
296	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
297	CAMELLIA_6ROUNDS(40, 0, 2)
298	fxor	%f52, %f2, %f2
299	fxor	%f54, %f0, %f0
300
301	st	%f2, [%o2 + 0x00]
302	st	%f3, [%o2 + 0x04]
303	st	%f0, [%o2 + 0x08]
304	st	%f1, [%o2 + 0x0c]
305
306	retl
307	 VISExit
308ENDPROC(camellia_sparc64_crypt)
309
310	.align	32
311ENTRY(camellia_sparc64_load_keys)
312	/* %o0=key, %o1=key_len */
313	VISEntry
314	ldd	[%o0 + 0x00], %f4
315	ldd	[%o0 + 0x08], %f6
316	ldd	[%o0 + 0x10], %f8
317	ldd	[%o0 + 0x18], %f10
318	ldd	[%o0 + 0x20], %f12
319	ldd	[%o0 + 0x28], %f14
320	ldd	[%o0 + 0x30], %f16
321	ldd	[%o0 + 0x38], %f18
322	ldd	[%o0 + 0x40], %f20
323	ldd	[%o0 + 0x48], %f22
324	ldd	[%o0 + 0x50], %f24
325	ldd	[%o0 + 0x58], %f26
326	ldd	[%o0 + 0x60], %f28
327	ldd	[%o0 + 0x68], %f30
328	ldd	[%o0 + 0x70], %f32
329	ldd	[%o0 + 0x78], %f34
330	ldd	[%o0 + 0x80], %f36
331	ldd	[%o0 + 0x88], %f38
332	ldd	[%o0 + 0x90], %f40
333	ldd	[%o0 + 0x98], %f42
334	ldd	[%o0 + 0xa0], %f44
335	ldd	[%o0 + 0xa8], %f46
336	ldd	[%o0 + 0xb0], %f48
337	ldd	[%o0 + 0xb8], %f50
338	ldd	[%o0 + 0xc0], %f52
339	retl
340	 ldd	[%o0 + 0xc8], %f54
341ENDPROC(camellia_sparc64_load_keys)
342
343	.align	32
344ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
345	/* %o0=input, %o1=output, %o2=len, %o3=key */
3461:	ldd	[%o0 + 0x00], %f0
347	ldd	[%o0 + 0x08], %f2
348	add	%o0, 0x10, %o0
349	fxor	%f4, %f0, %f0
350	fxor	%f6, %f2, %f2
351	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
352	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
353	CAMELLIA_6ROUNDS(40, 0, 2)
354	fxor	%f52, %f2, %f2
355	fxor	%f54, %f0, %f0
356	std	%f2, [%o1 + 0x00]
357	std	%f0, [%o1 + 0x08]
358	subcc	%o2, 0x10, %o2
359	bne,pt	%icc, 1b
360	 add	%o1, 0x10, %o1
361	retl
362	 nop
363ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
364
365	.align	32
366ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
367	/* %o0=input, %o1=output, %o2=len, %o3=key */
3681:	ldd	[%o0 + 0x00], %f0
369	ldd	[%o0 + 0x08], %f2
370	add	%o0, 0x10, %o0
371	fxor	%f4, %f0, %f0
372	fxor	%f6, %f2, %f2
373	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
374	ldd	[%o3 + 0xd0], %f8
375	ldd	[%o3 + 0xd8], %f10
376	ldd	[%o3 + 0xe0], %f12
377	ldd	[%o3 + 0xe8], %f14
378	ldd	[%o3 + 0xf0], %f16
379	ldd	[%o3 + 0xf8], %f18
380	ldd	[%o3 + 0x100], %f20
381	ldd	[%o3 + 0x108], %f22
382	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
383	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
384	CAMELLIA_F(8, 2, 0, 2)
385	CAMELLIA_F(10, 0, 2, 0)
386	ldd	[%o3 + 0x10], %f8
387	ldd	[%o3 + 0x18], %f10
388	CAMELLIA_F(12, 2, 0, 2)
389	CAMELLIA_F(14, 0, 2, 0)
390	ldd	[%o3 + 0x20], %f12
391	ldd	[%o3 + 0x28], %f14
392	CAMELLIA_F(16, 2, 0, 2)
393	CAMELLIA_F(18, 0, 2, 0)
394	ldd	[%o3 + 0x30], %f16
395	ldd	[%o3 + 0x38], %f18
396	fxor	%f20, %f2, %f2
397	fxor	%f22, %f0, %f0
398	ldd	[%o3 + 0x40], %f20
399	ldd	[%o3 + 0x48], %f22
400	std	%f2, [%o1 + 0x00]
401	std	%f0, [%o1 + 0x08]
402	subcc	%o2, 0x10, %o2
403	bne,pt	%icc, 1b
404	 add	%o1, 0x10, %o1
405	retl
406	 nop
407ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
408
409	.align	32
410ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
411	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
412	ldd	[%o4 + 0x00], %f60
413	ldd	[%o4 + 0x08], %f62
4141:	ldd	[%o0 + 0x00], %f0
415	ldd	[%o0 + 0x08], %f2
416	add	%o0, 0x10, %o0
417	fxor	%f60, %f0, %f0
418	fxor	%f62, %f2, %f2
419	fxor	%f4, %f0, %f0
420	fxor	%f6, %f2, %f2
421	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
422	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
423	CAMELLIA_6ROUNDS(40, 0, 2)
424	fxor	%f52, %f2, %f60
425	fxor	%f54, %f0, %f62
426	std	%f60, [%o1 + 0x00]
427	std	%f62, [%o1 + 0x08]
428	subcc	%o2, 0x10, %o2
429	bne,pt	%icc, 1b
430	 add	%o1, 0x10, %o1
431	std	%f60, [%o4 + 0x00]
432	retl
433	 std	%f62, [%o4 + 0x08]
434ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
435
436	.align	32
437ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
438	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
439	ldd	[%o4 + 0x00], %f60
440	ldd	[%o4 + 0x08], %f62
4411:	ldd	[%o0 + 0x00], %f0
442	ldd	[%o0 + 0x08], %f2
443	add	%o0, 0x10, %o0
444	fxor	%f60, %f0, %f0
445	fxor	%f62, %f2, %f2
446	fxor	%f4, %f0, %f0
447	fxor	%f6, %f2, %f2
448	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
449	ldd	[%o3 + 0xd0], %f8
450	ldd	[%o3 + 0xd8], %f10
451	ldd	[%o3 + 0xe0], %f12
452	ldd	[%o3 + 0xe8], %f14
453	ldd	[%o3 + 0xf0], %f16
454	ldd	[%o3 + 0xf8], %f18
455	ldd	[%o3 + 0x100], %f20
456	ldd	[%o3 + 0x108], %f22
457	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
458	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
459	CAMELLIA_F(8, 2, 0, 2)
460	CAMELLIA_F(10, 0, 2, 0)
461	ldd	[%o3 + 0x10], %f8
462	ldd	[%o3 + 0x18], %f10
463	CAMELLIA_F(12, 2, 0, 2)
464	CAMELLIA_F(14, 0, 2, 0)
465	ldd	[%o3 + 0x20], %f12
466	ldd	[%o3 + 0x28], %f14
467	CAMELLIA_F(16, 2, 0, 2)
468	CAMELLIA_F(18, 0, 2, 0)
469	ldd	[%o3 + 0x30], %f16
470	ldd	[%o3 + 0x38], %f18
471	fxor	%f20, %f2, %f60
472	fxor	%f22, %f0, %f62
473	ldd	[%o3 + 0x40], %f20
474	ldd	[%o3 + 0x48], %f22
475	std	%f60, [%o1 + 0x00]
476	std	%f62, [%o1 + 0x08]
477	subcc	%o2, 0x10, %o2
478	bne,pt	%icc, 1b
479	 add	%o1, 0x10, %o1
480	std	%f60, [%o4 + 0x00]
481	retl
482	 std	%f62, [%o4 + 0x08]
483ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
484
485	.align	32
486ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
487	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
488	ldd	[%o4 + 0x00], %f60
489	ldd	[%o4 + 0x08], %f62
4901:	ldd	[%o0 + 0x00], %f56
491	ldd	[%o0 + 0x08], %f58
492	add	%o0, 0x10, %o0
493	fxor	%f4, %f56, %f0
494	fxor	%f6, %f58, %f2
495	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
496	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
497	CAMELLIA_6ROUNDS(40, 0, 2)
498	fxor	%f52, %f2, %f2
499	fxor	%f54, %f0, %f0
500	fxor	%f60, %f2, %f2
501	fxor	%f62, %f0, %f0
502	fsrc2	%f56, %f60
503	fsrc2	%f58, %f62
504	std	%f2, [%o1 + 0x00]
505	std	%f0, [%o1 + 0x08]
506	subcc	%o2, 0x10, %o2
507	bne,pt	%icc, 1b
508	 add	%o1, 0x10, %o1
509	std	%f60, [%o4 + 0x00]
510	retl
511	 std	%f62, [%o4 + 0x08]
512ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
513
514	.align	32
515ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
516	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
517	ldd	[%o4 + 0x00], %f60
518	ldd	[%o4 + 0x08], %f62
5191:	ldd	[%o0 + 0x00], %f56
520	ldd	[%o0 + 0x08], %f58
521	add	%o0, 0x10, %o0
522	fxor	%f4, %f56, %f0
523	fxor	%f6, %f58, %f2
524	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
525	ldd	[%o3 + 0xd0], %f8
526	ldd	[%o3 + 0xd8], %f10
527	ldd	[%o3 + 0xe0], %f12
528	ldd	[%o3 + 0xe8], %f14
529	ldd	[%o3 + 0xf0], %f16
530	ldd	[%o3 + 0xf8], %f18
531	ldd	[%o3 + 0x100], %f20
532	ldd	[%o3 + 0x108], %f22
533	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
534	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
535	CAMELLIA_F(8, 2, 0, 2)
536	CAMELLIA_F(10, 0, 2, 0)
537	ldd	[%o3 + 0x10], %f8
538	ldd	[%o3 + 0x18], %f10
539	CAMELLIA_F(12, 2, 0, 2)
540	CAMELLIA_F(14, 0, 2, 0)
541	ldd	[%o3 + 0x20], %f12
542	ldd	[%o3 + 0x28], %f14
543	CAMELLIA_F(16, 2, 0, 2)
544	CAMELLIA_F(18, 0, 2, 0)
545	ldd	[%o3 + 0x30], %f16
546	ldd	[%o3 + 0x38], %f18
547	fxor	%f20, %f2, %f2
548	fxor	%f22, %f0, %f0
549	ldd	[%o3 + 0x40], %f20
550	ldd	[%o3 + 0x48], %f22
551	fxor	%f60, %f2, %f2
552	fxor	%f62, %f0, %f0
553	fsrc2	%f56, %f60
554	fsrc2	%f58, %f62
555	std	%f2, [%o1 + 0x00]
556	std	%f0, [%o1 + 0x08]
557	subcc	%o2, 0x10, %o2
558	bne,pt	%icc, 1b
559	 add	%o1, 0x10, %o1
560	std	%f60, [%o4 + 0x00]
561	retl
562	 std	%f62, [%o4 + 0x08]
563ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
564