xref: /linux/arch/powerpc/crypto/md5-asm.S (revision a44e4f3ab16bc808590763a543a93b6fbf3abcc4)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Fast MD5 implementation for PPC
4 *
5 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6 */
7#include <asm/ppc_asm.h>
8#include <asm/asm-offsets.h>
9#include <asm/asm-compat.h>
10
11#define rHP	r3
12#define rWP	r4
13
14#define rH0	r0
15#define rH1	r6
16#define rH2	r7
17#define rH3	r5
18
19#define rW00	r8
20#define rW01	r9
21#define rW02	r10
22#define rW03	r11
23#define rW04	r12
24#define rW05	r14
25#define rW06	r15
26#define rW07	r16
27#define rW08	r17
28#define rW09	r18
29#define rW10	r19
30#define rW11	r20
31#define rW12	r21
32#define rW13	r22
33#define rW14	r23
34#define rW15	r24
35
36#define rT0	r25
37#define rT1	r26
38
39#define INITIALIZE \
40	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
41	SAVE_8GPRS(14, r1);		/* push registers onto stack	*/ \
42	SAVE_4GPRS(22, r1);						   \
43	SAVE_GPR(26, r1)
44
45#define FINALIZE \
46	REST_8GPRS(14, r1);		/* pop registers from stack	*/ \
47	REST_4GPRS(22, r1);						   \
48	REST_GPR(26, r1);						   \
49	addi	r1,r1,INT_FRAME_SIZE;
50
51#ifdef __BIG_ENDIAN__
52#define LOAD_DATA(reg, off) \
53	lwbrx		reg,0,rWP;	/* load data			*/
54#define INC_PTR \
55	addi		rWP,rWP,4;	/* increment per word		*/
56#define NEXT_BLOCK			/* nothing to do		*/
57#else
58#define LOAD_DATA(reg, off) \
59	lwz		reg,off(rWP);	/* load data			*/
60#define INC_PTR				/* nothing to do		*/
61#define NEXT_BLOCK \
62	addi		rWP,rWP,64;	/* increment per block		*/
63#endif
64
65#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
66	LOAD_DATA(w0, off)		/*    W				*/ \
67	and		rT0,b,c;	/* 1: f = b and c		*/ \
68	INC_PTR				/*    ptr++			*/ \
69	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
70	LOAD_DATA(w1, off+4)		/*    W				*/ \
71	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
72	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
73	add		a,a,rT0;	/* 1: a = a + f			*/ \
74	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
75	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
76	add		a,a,w0;		/* 1: a = a + wk		*/ \
77	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
78	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
79	add		d,d,w1;		/* 2: a = a + wk		*/ \
80	add		a,a,b;		/* 1: a = a + b			*/ \
81	and		rT0,a,b;	/* 2: f = b and c		*/ \
82	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
83	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
84	add		d,d,rT0;	/* 2: a = a + f			*/ \
85	INC_PTR				/*    ptr++			*/ \
86	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
87	add		d,d,a;		/* 2: a = a + b			*/
88
89#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
90	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
91	and		rT1,b,d;	/* 1: f' = b and d		*/ \
92	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
93	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
94	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
95	add		a,a,rT0;	/* 1: a = a + f			*/ \
96	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
97	add		a,a,w0;		/* 1: a = a + wk		*/ \
98	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
99	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
100	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
101	add		a,a,b;		/* 1: a = a + b			*/ \
102	add		d,d,w1;		/* 2: a = a + wk		*/ \
103	and		rT1,a,c;	/* 2: f' = b and d		*/ \
104	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
105	add		d,d,rT0;	/* 2: a = a + f			*/ \
106	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
107	add		d,d,a;		/* 2: a = a +b			*/
108
109#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
110	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
111	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
112	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
113	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
114	add		a,a,rT1;	/* 1: a = a + f			*/ \
115	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
116	add		a,a,w0;		/* 1: a = a + wk		*/ \
117	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
118	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
119	add		d,d,w1;		/* 2: a = a + wk		*/ \
120	add		a,a,b;		/* 1: a = a + b			*/ \
121	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
122	add		d,d,rT1;	/* 2: a = a + f			*/ \
123	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
124	add		d,d,a;		/* 2: a = a + b			*/
125
126#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
127	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
128	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
129	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
130	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
131	add		a,a,w0;		/* 1: a = a + wk		*/ \
132	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
133	add		a,a,rT0;	/* 1: a = a + f			*/ \
134	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
135	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
136	add		a,a,b;		/* 1: a = a + b			*/ \
137	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
138	add		d,d,w1;		/* 2: a = a + wk		*/ \
139	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
140	add		d,d,rT0;	/* 2: a = a + f			*/ \
141	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
142	add		d,d,a;		/* 2: a = a + b			*/
143
144_GLOBAL(ppc_md5_transform)
145	INITIALIZE
146
147	mtctr		r5
148	lwz		rH0,0(rHP)
149	lwz		rH1,4(rHP)
150	lwz		rH2,8(rHP)
151	lwz		rH3,12(rHP)
152
153ppc_md5_main:
154	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
155		0xd76b, -23432, 0xe8c8, -18602)
156	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
157		0x2420, 0x70db, 0xc1be, -12562)
158	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
159		0xf57c, 0x0faf, 0x4788, -14806)
160	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
161		0xa830, 0x4613, 0xfd47, -27391)
162	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
163		0x6981, -26408, 0x8b45,  -2129)
164	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
165		0xffff, 0x5bb1, 0x895d, -10306)
166	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
167		0x6b90, 0x1122, 0xfd98, 0x7193)
168	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
169		0xa679, 0x438e, 0x49b4, 0x0821)
170
171	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
172		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
173	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
174		0x9d02, -32109, 0x124c, 0x2332)
175	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
176		0x8ea7, 0x4a33, 0x0245, -18270)
177	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
178		0x8eee,  -8608, 0xf258,  -5095)
179	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
180		0x969d, -10697, 0x1cbe, -15288)
181	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
182		0x3317, 0x3e99, 0xdbd9, 0x7c15)
183	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
184		0xac4b, 0x7772, 0xd8cf, 0x331d)
185	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
186		0x6a28, 0x6dd8, 0x219a, 0x3b68)
187
188	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
189		0x29cb, 0x28e5, 0x4218,  -7788)
190	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
191		0x473f, 0x06d1, 0x3aae, 0x3036)
192	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
193		0xaea1, -15134, 0x640b, -11295)
194	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
195		0x8f4c, 0x4887, 0xbc7c, -22499)
196	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
197		0x7eb8, -27199, 0x00ea, 0x6050)
198	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
199		0xe01a, 0x22fe, 0x4447, 0x69c5)
200	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
201		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
202	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
203		0x4701, -27017, 0xc7bd, -19859)
204
205	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
206		0x0988,  -1462, 0x4c70, -19401)
207	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
208		0xadaf,  -5221, 0xfc99, 0x66f7)
209	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
210		0x7e80, -16418, 0xba1e, -25587)
211	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
212		0x4130, 0x380d, 0xe0c5, 0x738d)
213	lwz		rW00,0(rHP)
214	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
215		0xe837, -30770, 0xde8a, 0x69e8)
216	lwz		rW14,4(rHP)
217	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
218		0x9e79, 0x260f, 0x256d, -27941)
219	lwz		rW12,8(rHP)
220	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
221		0xab75, -20775, 0x4f9e, -28397)
222	lwz		rW10,12(rHP)
223	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
224		0x662b, 0x7c56, 0x11b2, 0x0358)
225
226	add		rH0,rH0,rW00
227	stw		rH0,0(rHP)
228	add		rH1,rH1,rW14
229	stw		rH1,4(rHP)
230	add		rH2,rH2,rW12
231	stw		rH2,8(rHP)
232	add		rH3,rH3,rW10
233	stw		rH3,12(rHP)
234	NEXT_BLOCK
235
236	bdnz		ppc_md5_main
237
238	FINALIZE
239	blr
240