xref: /linux/arch/powerpc/crypto/md5-asm.S (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Fast MD5 implementation for PPC
4 *
5 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6 */
7#include <asm/ppc_asm.h>
8#include <asm/asm-offsets.h>
9#include <asm/asm-compat.h>
10
11#define rHP	r3
12#define rWP	r4
13
14#define rH0	r0
15#define rH1	r6
16#define rH2	r7
17#define rH3	r5
18
19#define rW00	r8
20#define rW01	r9
21#define rW02	r10
22#define rW03	r11
23#define rW04	r12
24#define rW05	r14
25#define rW06	r15
26#define rW07	r16
27#define rW08	r17
28#define rW09	r18
29#define rW10	r19
30#define rW11	r20
31#define rW12	r21
32#define rW13	r22
33#define rW14	r23
34#define rW15	r24
35
36#define rT0	r25
37#define rT1	r26
38
39#define INITIALIZE \
40	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
41	SAVE_GPRS(14, 26, r1)		/* push registers onto stack	*/
42
43#define FINALIZE \
44	REST_GPRS(14, 26, r1);		/* pop registers from stack	*/ \
45	addi	r1,r1,INT_FRAME_SIZE
46
47#ifdef __BIG_ENDIAN__
48#define LOAD_DATA(reg, off) \
49	lwbrx		reg,0,rWP;	/* load data			*/
50#define INC_PTR \
51	addi		rWP,rWP,4;	/* increment per word		*/
52#define NEXT_BLOCK			/* nothing to do		*/
53#else
54#define LOAD_DATA(reg, off) \
55	lwz		reg,off(rWP);	/* load data			*/
56#define INC_PTR				/* nothing to do		*/
57#define NEXT_BLOCK \
58	addi		rWP,rWP,64;	/* increment per block		*/
59#endif
60
61#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
62	LOAD_DATA(w0, off)		/*    W				*/ \
63	and		rT0,b,c;	/* 1: f = b and c		*/ \
64	INC_PTR				/*    ptr++			*/ \
65	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
66	LOAD_DATA(w1, off+4)		/*    W				*/ \
67	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
68	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
69	add		a,a,rT0;	/* 1: a = a + f			*/ \
70	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
71	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
72	add		a,a,w0;		/* 1: a = a + wk		*/ \
73	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
74	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
75	add		d,d,w1;		/* 2: a = a + wk		*/ \
76	add		a,a,b;		/* 1: a = a + b			*/ \
77	and		rT0,a,b;	/* 2: f = b and c		*/ \
78	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
79	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
80	add		d,d,rT0;	/* 2: a = a + f			*/ \
81	INC_PTR				/*    ptr++			*/ \
82	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
83	add		d,d,a;		/* 2: a = a + b			*/
84
85#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
86	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
87	and		rT1,b,d;	/* 1: f' = b and d		*/ \
88	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
89	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
90	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
91	add		a,a,rT0;	/* 1: a = a + f			*/ \
92	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
93	add		a,a,w0;		/* 1: a = a + wk		*/ \
94	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
95	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
96	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
97	add		a,a,b;		/* 1: a = a + b			*/ \
98	add		d,d,w1;		/* 2: a = a + wk		*/ \
99	and		rT1,a,c;	/* 2: f' = b and d		*/ \
100	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
101	add		d,d,rT0;	/* 2: a = a + f			*/ \
102	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
103	add		d,d,a;		/* 2: a = a +b			*/
104
105#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
106	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
107	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
108	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
109	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
110	add		a,a,rT1;	/* 1: a = a + f			*/ \
111	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
112	add		a,a,w0;		/* 1: a = a + wk		*/ \
113	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
114	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
115	add		d,d,w1;		/* 2: a = a + wk		*/ \
116	add		a,a,b;		/* 1: a = a + b			*/ \
117	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
118	add		d,d,rT1;	/* 2: a = a + f			*/ \
119	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
120	add		d,d,a;		/* 2: a = a + b			*/
121
122#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
123	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
124	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
125	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
126	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
127	add		a,a,w0;		/* 1: a = a + wk		*/ \
128	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
129	add		a,a,rT0;	/* 1: a = a + f			*/ \
130	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
131	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
132	add		a,a,b;		/* 1: a = a + b			*/ \
133	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
134	add		d,d,w1;		/* 2: a = a + wk		*/ \
135	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
136	add		d,d,rT0;	/* 2: a = a + f			*/ \
137	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
138	add		d,d,a;		/* 2: a = a + b			*/
139
140_GLOBAL(ppc_md5_transform)
141	INITIALIZE
142
143	mtctr		r5
144	lwz		rH0,0(rHP)
145	lwz		rH1,4(rHP)
146	lwz		rH2,8(rHP)
147	lwz		rH3,12(rHP)
148
149ppc_md5_main:
150	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
151		0xd76b, -23432, 0xe8c8, -18602)
152	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
153		0x2420, 0x70db, 0xc1be, -12562)
154	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
155		0xf57c, 0x0faf, 0x4788, -14806)
156	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
157		0xa830, 0x4613, 0xfd47, -27391)
158	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
159		0x6981, -26408, 0x8b45,  -2129)
160	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
161		0xffff, 0x5bb1, 0x895d, -10306)
162	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
163		0x6b90, 0x1122, 0xfd98, 0x7193)
164	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
165		0xa679, 0x438e, 0x49b4, 0x0821)
166
167	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
168		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
169	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
170		0x9d02, -32109, 0x124c, 0x2332)
171	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
172		0x8ea7, 0x4a33, 0x0245, -18270)
173	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
174		0x8eee,  -8608, 0xf258,  -5095)
175	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
176		0x969d, -10697, 0x1cbe, -15288)
177	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
178		0x3317, 0x3e99, 0xdbd9, 0x7c15)
179	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
180		0xac4b, 0x7772, 0xd8cf, 0x331d)
181	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
182		0x6a28, 0x6dd8, 0x219a, 0x3b68)
183
184	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
185		0x29cb, 0x28e5, 0x4218,  -7788)
186	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
187		0x473f, 0x06d1, 0x3aae, 0x3036)
188	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
189		0xaea1, -15134, 0x640b, -11295)
190	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
191		0x8f4c, 0x4887, 0xbc7c, -22499)
192	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
193		0x7eb8, -27199, 0x00ea, 0x6050)
194	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
195		0xe01a, 0x22fe, 0x4447, 0x69c5)
196	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
197		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
198	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
199		0x4701, -27017, 0xc7bd, -19859)
200
201	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
202		0x0988,  -1462, 0x4c70, -19401)
203	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
204		0xadaf,  -5221, 0xfc99, 0x66f7)
205	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
206		0x7e80, -16418, 0xba1e, -25587)
207	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
208		0x4130, 0x380d, 0xe0c5, 0x738d)
209	lwz		rW00,0(rHP)
210	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
211		0xe837, -30770, 0xde8a, 0x69e8)
212	lwz		rW14,4(rHP)
213	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
214		0x9e79, 0x260f, 0x256d, -27941)
215	lwz		rW12,8(rHP)
216	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
217		0xab75, -20775, 0x4f9e, -28397)
218	lwz		rW10,12(rHP)
219	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
220		0x662b, 0x7c56, 0x11b2, 0x0358)
221
222	add		rH0,rH0,rW00
223	stw		rH0,0(rHP)
224	add		rH1,rH1,rW14
225	stw		rH1,4(rHP)
226	add		rH2,rH2,rW12
227	stw		rH2,8(rHP)
228	add		rH3,rH3,rW10
229	stw		rH3,12(rHP)
230	NEXT_BLOCK
231
232	bdnz		ppc_md5_main
233
234	FINALIZE
235	blr
236