xref: /linux/arch/sparc/crypto/camellia_asm.S (revision c8bfe3fad4f86a029da7157bae9699c816f0c309)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/linkage.h>
3#include <asm/visasm.h>
4
5#include "opcodes.h"
6
7#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
8	CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \
9	CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \
10	CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \
11	CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \
12	CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \
13	CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
14
15#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
16	CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
17	CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
18	CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
19
20	.data
21
22	.align	8
23SIGMA:	.xword	0xA09E667F3BCC908B
24	.xword	0xB67AE8584CAA73B2
25	.xword	0xC6EF372FE94F82BE
26	.xword	0x54FF53A5F1D36F1C
27	.xword	0x10E527FADE682D1D
28	.xword	0xB05688C2B3E6C1FD
29
30	.text
31
32	.align	32
33ENTRY(camellia_sparc64_key_expand)
34	/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
35	VISEntry
36	ld	[%o0 + 0x00], %f0	! i0, k[0]
37	ld	[%o0 + 0x04], %f1	! i1, k[1]
38	ld	[%o0 + 0x08], %f2	! i2, k[2]
39	ld	[%o0 + 0x0c], %f3	! i3, k[3]
40	std	%f0, [%o1 + 0x00]	! k[0, 1]
41	fsrc2	%f0, %f28
42	std	%f2, [%o1 + 0x08]	! k[2, 3]
43	cmp	%o2, 16
44	be	10f
45	 fsrc2	%f2, %f30
46
47	ld	[%o0 + 0x10], %f0
48	ld	[%o0 + 0x14], %f1
49	std	%f0, [%o1 + 0x20]	! k[8, 9]
50	cmp	%o2, 24
51	fone	%f10
52	be,a	1f
53	 fxor	%f10, %f0, %f2
54	ld	[%o0 + 0x18], %f2
55	ld	[%o0 + 0x1c], %f3
561:
57	std	%f2, [%o1 + 0x28]	! k[10, 11]
58	fxor	%f28, %f0, %f0
59	fxor	%f30, %f2, %f2
60
6110:
62	sethi	%hi(SIGMA), %g3
63	or	%g3, %lo(SIGMA), %g3
64	ldd	[%g3 + 0x00], %f16
65	ldd	[%g3 + 0x08], %f18
66	ldd	[%g3 + 0x10], %f20
67	ldd	[%g3 + 0x18], %f22
68	ldd	[%g3 + 0x20], %f24
69	ldd	[%g3 + 0x28], %f26
70	CAMELLIA_F(16, 2, 0, 2)
71	CAMELLIA_F(18, 0, 2, 0)
72	fxor	%f28, %f0, %f0
73	fxor	%f30, %f2, %f2
74	CAMELLIA_F(20, 2, 0, 2)
75	CAMELLIA_F(22, 0, 2, 0)
76
77#define ROTL128(S01, S23, TMP1, TMP2, N)	\
78	srlx	S01, (64 - N), TMP1;		\
79	sllx	S01, N, S01;			\
80	srlx	S23, (64 - N), TMP2;		\
81	sllx	S23, N, S23;			\
82	or	S01, TMP2, S01;			\
83	or	S23, TMP1, S23
84
85	cmp	%o2, 16
86	bne	1f
87	 nop
88	/* 128-bit key */
89	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
90	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
91	MOVDTOX_F0_O4
92	MOVDTOX_F2_O5
93	ROTL128(%o4, %o5, %g2, %g3, 15)
94	stx	%o4, [%o1 + 0x30]	! k[12, 13]
95	stx	%o5, [%o1 + 0x38]	! k[14, 15]
96	ROTL128(%o4, %o5, %g2, %g3, 15)
97	stx	%o4, [%o1 + 0x40]	! k[16, 17]
98	stx	%o5, [%o1 + 0x48]	! k[18, 19]
99	ROTL128(%o4, %o5, %g2, %g3, 15)
100	stx	%o4, [%o1 + 0x60]	! k[24, 25]
101	ROTL128(%o4, %o5, %g2, %g3, 15)
102	stx	%o4, [%o1 + 0x70]	! k[28, 29]
103	stx	%o5, [%o1 + 0x78]	! k[30, 31]
104	ROTL128(%o4, %o5, %g2, %g3, 34)
105	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
106	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
107	ROTL128(%o4, %o5, %g2, %g3, 17)
108	stx	%o4, [%o1 + 0xc0]	! k[48, 49]
109	stx	%o5, [%o1 + 0xc8]	! k[50, 51]
110
111	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
112	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
113	ROTL128(%o4, %o5, %g2, %g3, 15)
114	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
115	stx	%o5, [%o1 + 0x28]	! k[10, 11]
116	ROTL128(%o4, %o5, %g2, %g3, 30)
117	stx	%o4, [%o1 + 0x50]	! k[20, 21]
118	stx	%o5, [%o1 + 0x58]	! k[22, 23]
119	ROTL128(%o4, %o5, %g2, %g3, 15)
120	stx	%o5, [%o1 + 0x68]	! k[26, 27]
121	ROTL128(%o4, %o5, %g2, %g3, 17)
122	stx	%o4, [%o1 + 0x80]	! k[32, 33]
123	stx	%o5, [%o1 + 0x88]	! k[34, 35]
124	ROTL128(%o4, %o5, %g2, %g3, 17)
125	stx	%o4, [%o1 + 0x90]	! k[36, 37]
126	stx	%o5, [%o1 + 0x98]	! k[38, 39]
127	ROTL128(%o4, %o5, %g2, %g3, 17)
128	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
129	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
130
131	ba,pt	%xcc, 2f
132	 mov	(3 * 16 * 4), %o0
133
1341:
135	/* 192-bit or 256-bit key */
136	std	%f0, [%o1 + 0x30]	! k[12, 13]
137	std	%f2, [%o1 + 0x38]	! k[14, 15]
138	ldd	[%o1 + 0x20], %f4	! k[ 8,  9]
139	ldd	[%o1 + 0x28], %f6	! k[10, 11]
140	fxor	%f0, %f4, %f0
141	fxor	%f2, %f6, %f2
142	CAMELLIA_F(24, 2, 0, 2)
143	CAMELLIA_F(26, 0, 2, 0)
144	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
145	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
146	MOVDTOX_F0_O4
147	MOVDTOX_F2_O5
148	ROTL128(%o4, %o5, %g2, %g3, 30)
149	stx	%o4, [%o1 + 0x50]	! k[20, 21]
150	stx	%o5, [%o1 + 0x58]	! k[22, 23]
151	ROTL128(%o4, %o5, %g2, %g3, 30)
152	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
153	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
154	ROTL128(%o4, %o5, %g2, %g3, 51)
155	stx	%o4, [%o1 + 0x100]	! k[64, 65]
156	stx	%o5, [%o1 + 0x108]	! k[66, 67]
157	ldx	[%o1 + 0x20], %o4	! k[ 8,  9]
158	ldx	[%o1 + 0x28], %o5	! k[10, 11]
159	ROTL128(%o4, %o5, %g2, %g3, 15)
160	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
161	stx	%o5, [%o1 + 0x28]	! k[10, 11]
162	ROTL128(%o4, %o5, %g2, %g3, 15)
163	stx	%o4, [%o1 + 0x40]	! k[16, 17]
164	stx	%o5, [%o1 + 0x48]	! k[18, 19]
165	ROTL128(%o4, %o5, %g2, %g3, 30)
166	stx	%o4, [%o1 + 0x90]	! k[36, 37]
167	stx	%o5, [%o1 + 0x98]	! k[38, 39]
168	ROTL128(%o4, %o5, %g2, %g3, 34)
169	stx	%o4, [%o1 + 0xd0]	! k[52, 53]
170	stx	%o5, [%o1 + 0xd8]	! k[54, 55]
171	ldx	[%o1 + 0x30], %o4	! k[12, 13]
172	ldx	[%o1 + 0x38], %o5	! k[14, 15]
173	ROTL128(%o4, %o5, %g2, %g3, 15)
174	stx	%o4, [%o1 + 0x30]	! k[12, 13]
175	stx	%o5, [%o1 + 0x38]	! k[14, 15]
176	ROTL128(%o4, %o5, %g2, %g3, 30)
177	stx	%o4, [%o1 + 0x70]	! k[28, 29]
178	stx	%o5, [%o1 + 0x78]	! k[30, 31]
179	srlx	%o4, 32, %g2
180	srlx	%o5, 32, %g3
181	stw	%o4, [%o1 + 0xc0]	! k[48]
182	stw	%g3, [%o1 + 0xc4]	! k[49]
183	stw	%o5, [%o1 + 0xc8]	! k[50]
184	stw	%g2, [%o1 + 0xcc]	! k[51]
185	ROTL128(%o4, %o5, %g2, %g3, 49)
186	stx	%o4, [%o1 + 0xe0]	! k[56, 57]
187	stx	%o5, [%o1 + 0xe8]	! k[58, 59]
188	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
189	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
190	ROTL128(%o4, %o5, %g2, %g3, 45)
191	stx	%o4, [%o1 + 0x60]	! k[24, 25]
192	stx	%o5, [%o1 + 0x68]	! k[26, 27]
193	ROTL128(%o4, %o5, %g2, %g3, 15)
194	stx	%o4, [%o1 + 0x80]	! k[32, 33]
195	stx	%o5, [%o1 + 0x88]	! k[34, 35]
196	ROTL128(%o4, %o5, %g2, %g3, 17)
197	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
198	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
199	ROTL128(%o4, %o5, %g2, %g3, 34)
200	stx	%o4, [%o1 + 0xf0]	! k[60, 61]
201	stx	%o5, [%o1 + 0xf8]	! k[62, 63]
202	mov	(4 * 16 * 4), %o0
2032:
204	add	%o1, %o0, %o1
205	ldd	[%o1 + 0x00], %f0
206	ldd	[%o1 + 0x08], %f2
207	std	%f0, [%o3 + 0x00]
208	std	%f2, [%o3 + 0x08]
209	add	%o3, 0x10, %o3
2101:
211	sub	%o1, (16 * 4), %o1
212	ldd	[%o1 + 0x38], %f0
213	ldd	[%o1 + 0x30], %f2
214	ldd	[%o1 + 0x28], %f4
215	ldd	[%o1 + 0x20], %f6
216	ldd	[%o1 + 0x18], %f8
217	ldd	[%o1 + 0x10], %f10
218	std	%f0, [%o3 + 0x00]
219	std	%f2, [%o3 + 0x08]
220	std	%f4, [%o3 + 0x10]
221	std	%f6, [%o3 + 0x18]
222	std	%f8, [%o3 + 0x20]
223	std	%f10, [%o3 + 0x28]
224
225	ldd	[%o1 + 0x08], %f0
226	ldd	[%o1 + 0x00], %f2
227	std	%f0, [%o3 + 0x30]
228	std	%f2, [%o3 + 0x38]
229	subcc	%o0, (16 * 4), %o0
230	bne,pt	%icc, 1b
231	 add	%o3, (16 * 4), %o3
232
233	std	%f2, [%o3 - 0x10]
234	std	%f0, [%o3 - 0x08]
235
236	retl
237	 VISExit
238ENDPROC(camellia_sparc64_key_expand)
239
240	.align	32
241ENTRY(camellia_sparc64_crypt)
242	/* %o0=key, %o1=input, %o2=output, %o3=key_len */
243	VISEntry
244
245	ld	[%o1 + 0x00], %f0
246	ld	[%o1 + 0x04], %f1
247	ld	[%o1 + 0x08], %f2
248	ld	[%o1 + 0x0c], %f3
249
250	ldd	[%o0 + 0x00], %f4
251	ldd	[%o0 + 0x08], %f6
252
253	cmp	%o3, 16
254	fxor	%f4, %f0, %f0
255	be	1f
256	 fxor	%f6, %f2, %f2
257
258	ldd	[%o0 + 0x10], %f8
259	ldd	[%o0 + 0x18], %f10
260	ldd	[%o0 + 0x20], %f12
261	ldd	[%o0 + 0x28], %f14
262	ldd	[%o0 + 0x30], %f16
263	ldd	[%o0 + 0x38], %f18
264	ldd	[%o0 + 0x40], %f20
265	ldd	[%o0 + 0x48], %f22
266	add	%o0, 0x40, %o0
267
268	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
269
2701:
271	ldd	[%o0 + 0x10], %f8
272	ldd	[%o0 + 0x18], %f10
273	ldd	[%o0 + 0x20], %f12
274	ldd	[%o0 + 0x28], %f14
275	ldd	[%o0 + 0x30], %f16
276	ldd	[%o0 + 0x38], %f18
277	ldd	[%o0 + 0x40], %f20
278	ldd	[%o0 + 0x48], %f22
279	ldd	[%o0 + 0x50], %f24
280	ldd	[%o0 + 0x58], %f26
281	ldd	[%o0 + 0x60], %f28
282	ldd	[%o0 + 0x68], %f30
283	ldd	[%o0 + 0x70], %f32
284	ldd	[%o0 + 0x78], %f34
285	ldd	[%o0 + 0x80], %f36
286	ldd	[%o0 + 0x88], %f38
287	ldd	[%o0 + 0x90], %f40
288	ldd	[%o0 + 0x98], %f42
289	ldd	[%o0 + 0xa0], %f44
290	ldd	[%o0 + 0xa8], %f46
291	ldd	[%o0 + 0xb0], %f48
292	ldd	[%o0 + 0xb8], %f50
293	ldd	[%o0 + 0xc0], %f52
294	ldd	[%o0 + 0xc8], %f54
295
296	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
297	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
298	CAMELLIA_6ROUNDS(40, 0, 2)
299	fxor	%f52, %f2, %f2
300	fxor	%f54, %f0, %f0
301
302	st	%f2, [%o2 + 0x00]
303	st	%f3, [%o2 + 0x04]
304	st	%f0, [%o2 + 0x08]
305	st	%f1, [%o2 + 0x0c]
306
307	retl
308	 VISExit
309ENDPROC(camellia_sparc64_crypt)
310
311	.align	32
312ENTRY(camellia_sparc64_load_keys)
313	/* %o0=key, %o1=key_len */
314	VISEntry
315	ldd	[%o0 + 0x00], %f4
316	ldd	[%o0 + 0x08], %f6
317	ldd	[%o0 + 0x10], %f8
318	ldd	[%o0 + 0x18], %f10
319	ldd	[%o0 + 0x20], %f12
320	ldd	[%o0 + 0x28], %f14
321	ldd	[%o0 + 0x30], %f16
322	ldd	[%o0 + 0x38], %f18
323	ldd	[%o0 + 0x40], %f20
324	ldd	[%o0 + 0x48], %f22
325	ldd	[%o0 + 0x50], %f24
326	ldd	[%o0 + 0x58], %f26
327	ldd	[%o0 + 0x60], %f28
328	ldd	[%o0 + 0x68], %f30
329	ldd	[%o0 + 0x70], %f32
330	ldd	[%o0 + 0x78], %f34
331	ldd	[%o0 + 0x80], %f36
332	ldd	[%o0 + 0x88], %f38
333	ldd	[%o0 + 0x90], %f40
334	ldd	[%o0 + 0x98], %f42
335	ldd	[%o0 + 0xa0], %f44
336	ldd	[%o0 + 0xa8], %f46
337	ldd	[%o0 + 0xb0], %f48
338	ldd	[%o0 + 0xb8], %f50
339	ldd	[%o0 + 0xc0], %f52
340	retl
341	 ldd	[%o0 + 0xc8], %f54
342ENDPROC(camellia_sparc64_load_keys)
343
344	.align	32
345ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
346	/* %o0=input, %o1=output, %o2=len, %o3=key */
3471:	ldd	[%o0 + 0x00], %f0
348	ldd	[%o0 + 0x08], %f2
349	add	%o0, 0x10, %o0
350	fxor	%f4, %f0, %f0
351	fxor	%f6, %f2, %f2
352	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
353	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
354	CAMELLIA_6ROUNDS(40, 0, 2)
355	fxor	%f52, %f2, %f2
356	fxor	%f54, %f0, %f0
357	std	%f2, [%o1 + 0x00]
358	std	%f0, [%o1 + 0x08]
359	subcc	%o2, 0x10, %o2
360	bne,pt	%icc, 1b
361	 add	%o1, 0x10, %o1
362	retl
363	 nop
364ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
365
366	.align	32
367ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
368	/* %o0=input, %o1=output, %o2=len, %o3=key */
3691:	ldd	[%o0 + 0x00], %f0
370	ldd	[%o0 + 0x08], %f2
371	add	%o0, 0x10, %o0
372	fxor	%f4, %f0, %f0
373	fxor	%f6, %f2, %f2
374	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
375	ldd	[%o3 + 0xd0], %f8
376	ldd	[%o3 + 0xd8], %f10
377	ldd	[%o3 + 0xe0], %f12
378	ldd	[%o3 + 0xe8], %f14
379	ldd	[%o3 + 0xf0], %f16
380	ldd	[%o3 + 0xf8], %f18
381	ldd	[%o3 + 0x100], %f20
382	ldd	[%o3 + 0x108], %f22
383	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
384	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
385	CAMELLIA_F(8, 2, 0, 2)
386	CAMELLIA_F(10, 0, 2, 0)
387	ldd	[%o3 + 0x10], %f8
388	ldd	[%o3 + 0x18], %f10
389	CAMELLIA_F(12, 2, 0, 2)
390	CAMELLIA_F(14, 0, 2, 0)
391	ldd	[%o3 + 0x20], %f12
392	ldd	[%o3 + 0x28], %f14
393	CAMELLIA_F(16, 2, 0, 2)
394	CAMELLIA_F(18, 0, 2, 0)
395	ldd	[%o3 + 0x30], %f16
396	ldd	[%o3 + 0x38], %f18
397	fxor	%f20, %f2, %f2
398	fxor	%f22, %f0, %f0
399	ldd	[%o3 + 0x40], %f20
400	ldd	[%o3 + 0x48], %f22
401	std	%f2, [%o1 + 0x00]
402	std	%f0, [%o1 + 0x08]
403	subcc	%o2, 0x10, %o2
404	bne,pt	%icc, 1b
405	 add	%o1, 0x10, %o1
406	retl
407	 nop
408ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
409
410	.align	32
411ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
412	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
413	ldd	[%o4 + 0x00], %f60
414	ldd	[%o4 + 0x08], %f62
4151:	ldd	[%o0 + 0x00], %f0
416	ldd	[%o0 + 0x08], %f2
417	add	%o0, 0x10, %o0
418	fxor	%f60, %f0, %f0
419	fxor	%f62, %f2, %f2
420	fxor	%f4, %f0, %f0
421	fxor	%f6, %f2, %f2
422	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
423	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
424	CAMELLIA_6ROUNDS(40, 0, 2)
425	fxor	%f52, %f2, %f60
426	fxor	%f54, %f0, %f62
427	std	%f60, [%o1 + 0x00]
428	std	%f62, [%o1 + 0x08]
429	subcc	%o2, 0x10, %o2
430	bne,pt	%icc, 1b
431	 add	%o1, 0x10, %o1
432	std	%f60, [%o4 + 0x00]
433	retl
434	 std	%f62, [%o4 + 0x08]
435ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
436
437	.align	32
438ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
439	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
440	ldd	[%o4 + 0x00], %f60
441	ldd	[%o4 + 0x08], %f62
4421:	ldd	[%o0 + 0x00], %f0
443	ldd	[%o0 + 0x08], %f2
444	add	%o0, 0x10, %o0
445	fxor	%f60, %f0, %f0
446	fxor	%f62, %f2, %f2
447	fxor	%f4, %f0, %f0
448	fxor	%f6, %f2, %f2
449	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
450	ldd	[%o3 + 0xd0], %f8
451	ldd	[%o3 + 0xd8], %f10
452	ldd	[%o3 + 0xe0], %f12
453	ldd	[%o3 + 0xe8], %f14
454	ldd	[%o3 + 0xf0], %f16
455	ldd	[%o3 + 0xf8], %f18
456	ldd	[%o3 + 0x100], %f20
457	ldd	[%o3 + 0x108], %f22
458	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
459	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
460	CAMELLIA_F(8, 2, 0, 2)
461	CAMELLIA_F(10, 0, 2, 0)
462	ldd	[%o3 + 0x10], %f8
463	ldd	[%o3 + 0x18], %f10
464	CAMELLIA_F(12, 2, 0, 2)
465	CAMELLIA_F(14, 0, 2, 0)
466	ldd	[%o3 + 0x20], %f12
467	ldd	[%o3 + 0x28], %f14
468	CAMELLIA_F(16, 2, 0, 2)
469	CAMELLIA_F(18, 0, 2, 0)
470	ldd	[%o3 + 0x30], %f16
471	ldd	[%o3 + 0x38], %f18
472	fxor	%f20, %f2, %f60
473	fxor	%f22, %f0, %f62
474	ldd	[%o3 + 0x40], %f20
475	ldd	[%o3 + 0x48], %f22
476	std	%f60, [%o1 + 0x00]
477	std	%f62, [%o1 + 0x08]
478	subcc	%o2, 0x10, %o2
479	bne,pt	%icc, 1b
480	 add	%o1, 0x10, %o1
481	std	%f60, [%o4 + 0x00]
482	retl
483	 std	%f62, [%o4 + 0x08]
484ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
485
486	.align	32
487ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
488	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
489	ldd	[%o4 + 0x00], %f60
490	ldd	[%o4 + 0x08], %f62
4911:	ldd	[%o0 + 0x00], %f56
492	ldd	[%o0 + 0x08], %f58
493	add	%o0, 0x10, %o0
494	fxor	%f4, %f56, %f0
495	fxor	%f6, %f58, %f2
496	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
497	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
498	CAMELLIA_6ROUNDS(40, 0, 2)
499	fxor	%f52, %f2, %f2
500	fxor	%f54, %f0, %f0
501	fxor	%f60, %f2, %f2
502	fxor	%f62, %f0, %f0
503	fsrc2	%f56, %f60
504	fsrc2	%f58, %f62
505	std	%f2, [%o1 + 0x00]
506	std	%f0, [%o1 + 0x08]
507	subcc	%o2, 0x10, %o2
508	bne,pt	%icc, 1b
509	 add	%o1, 0x10, %o1
510	std	%f60, [%o4 + 0x00]
511	retl
512	 std	%f62, [%o4 + 0x08]
513ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
514
515	.align	32
516ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
517	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
518	ldd	[%o4 + 0x00], %f60
519	ldd	[%o4 + 0x08], %f62
5201:	ldd	[%o0 + 0x00], %f56
521	ldd	[%o0 + 0x08], %f58
522	add	%o0, 0x10, %o0
523	fxor	%f4, %f56, %f0
524	fxor	%f6, %f58, %f2
525	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
526	ldd	[%o3 + 0xd0], %f8
527	ldd	[%o3 + 0xd8], %f10
528	ldd	[%o3 + 0xe0], %f12
529	ldd	[%o3 + 0xe8], %f14
530	ldd	[%o3 + 0xf0], %f16
531	ldd	[%o3 + 0xf8], %f18
532	ldd	[%o3 + 0x100], %f20
533	ldd	[%o3 + 0x108], %f22
534	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
535	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
536	CAMELLIA_F(8, 2, 0, 2)
537	CAMELLIA_F(10, 0, 2, 0)
538	ldd	[%o3 + 0x10], %f8
539	ldd	[%o3 + 0x18], %f10
540	CAMELLIA_F(12, 2, 0, 2)
541	CAMELLIA_F(14, 0, 2, 0)
542	ldd	[%o3 + 0x20], %f12
543	ldd	[%o3 + 0x28], %f14
544	CAMELLIA_F(16, 2, 0, 2)
545	CAMELLIA_F(18, 0, 2, 0)
546	ldd	[%o3 + 0x30], %f16
547	ldd	[%o3 + 0x38], %f18
548	fxor	%f20, %f2, %f2
549	fxor	%f22, %f0, %f0
550	ldd	[%o3 + 0x40], %f20
551	ldd	[%o3 + 0x48], %f22
552	fxor	%f60, %f2, %f2
553	fxor	%f62, %f0, %f0
554	fsrc2	%f56, %f60
555	fsrc2	%f58, %f62
556	std	%f2, [%o1 + 0x00]
557	std	%f0, [%o1 + 0x08]
558	subcc	%o2, 0x10, %o2
559	bne,pt	%icc, 1b
560	 add	%o1, 0x10, %o1
561	std	%f60, [%o4 + 0x00]
562	retl
563	 std	%f62, [%o4 + 0x08]
564ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
565