xref: /linux/arch/powerpc/crypto/md5-asm.S (revision b85d45947951d23cb22d90caecf4c1eb81342c96)
1/*
2 * Fast MD5 implementation for PPC
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12#include <asm/ppc_asm.h>
13#include <asm/asm-offsets.h>
14
15#define rHP	r3
16#define rWP	r4
17
18#define rH0	r0
19#define rH1	r6
20#define rH2	r7
21#define rH3	r5
22
23#define rW00	r8
24#define rW01	r9
25#define rW02	r10
26#define rW03	r11
27#define rW04	r12
28#define rW05	r14
29#define rW06	r15
30#define rW07	r16
31#define rW08	r17
32#define rW09	r18
33#define rW10	r19
34#define rW11	r20
35#define rW12	r21
36#define rW13	r22
37#define rW14	r23
38#define rW15	r24
39
40#define rT0	r25
41#define rT1	r26
42
43#define INITIALIZE \
44	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
45	SAVE_8GPRS(14, r1);		/* push registers onto stack	*/ \
46	SAVE_4GPRS(22, r1);						   \
47	SAVE_GPR(26, r1)
48
49#define FINALIZE \
50	REST_8GPRS(14, r1);		/* pop registers from stack	*/ \
51	REST_4GPRS(22, r1);						   \
52	REST_GPR(26, r1);						   \
53	addi	r1,r1,INT_FRAME_SIZE;
54
55#ifdef __BIG_ENDIAN__
56#define LOAD_DATA(reg, off) \
57	lwbrx		reg,0,rWP;	/* load data			*/
58#define INC_PTR \
59	addi		rWP,rWP,4;	/* increment per word		*/
60#define NEXT_BLOCK			/* nothing to do		*/
61#else
62#define LOAD_DATA(reg, off) \
63	lwz		reg,off(rWP);	/* load data			*/
64#define INC_PTR				/* nothing to do		*/
65#define NEXT_BLOCK \
66	addi		rWP,rWP,64;	/* increment per block		*/
67#endif
68
69#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
70	LOAD_DATA(w0, off)		/*    W				*/ \
71	and		rT0,b,c;	/* 1: f = b and c		*/ \
72	INC_PTR				/*    ptr++			*/ \
73	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
74	LOAD_DATA(w1, off+4)		/*    W				*/ \
75	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
76	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
77	add		a,a,rT0;	/* 1: a = a + f			*/ \
78	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
79	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
80	add		a,a,w0;		/* 1: a = a + wk		*/ \
81	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
82	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
83	add		d,d,w1;		/* 2: a = a + wk		*/ \
84	add		a,a,b;		/* 1: a = a + b			*/ \
85	and		rT0,a,b;	/* 2: f = b and c		*/ \
86	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
87	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
88	add		d,d,rT0;	/* 2: a = a + f			*/ \
89	INC_PTR				/*    ptr++			*/ \
90	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
91	add		d,d,a;		/* 2: a = a + b			*/
92
93#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
94	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
95	and		rT1,b,d;	/* 1: f' = b and d		*/ \
96	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
97	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
98	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
99	add		a,a,rT0;	/* 1: a = a + f			*/ \
100	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
101	add		a,a,w0;		/* 1: a = a + wk		*/ \
102	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
103	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
104	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
105	add		a,a,b;		/* 1: a = a + b			*/ \
106	add		d,d,w1;		/* 2: a = a + wk		*/ \
107	and		rT1,a,c;	/* 2: f' = b and d		*/ \
108	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
109	add		d,d,rT0;	/* 2: a = a + f			*/ \
110	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
111	add		d,d,a;		/* 2: a = a +b			*/
112
113#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
114	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
115	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
116	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
117	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
118	add		a,a,rT1;	/* 1: a = a + f			*/ \
119	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
120	add		a,a,w0;		/* 1: a = a + wk		*/ \
121	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
122	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
123	add		d,d,w1;		/* 2: a = a + wk		*/ \
124	add		a,a,b;		/* 1: a = a + b			*/ \
125	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
126	add		d,d,rT1;	/* 2: a = a + f			*/ \
127	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
128	add		d,d,a;		/* 2: a = a + b			*/
129
130#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
131	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
132	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
133	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
134	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
135	add		a,a,w0;		/* 1: a = a + wk		*/ \
136	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
137	add		a,a,rT0;	/* 1: a = a + f			*/ \
138	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
139	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
140	add		a,a,b;		/* 1: a = a + b			*/ \
141	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
142	add		d,d,w1;		/* 2: a = a + wk		*/ \
143	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
144	add		d,d,rT0;	/* 2: a = a + f			*/ \
145	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
146	add		d,d,a;		/* 2: a = a + b			*/
147
148_GLOBAL(ppc_md5_transform)
149	INITIALIZE
150
151	mtctr		r5
152	lwz		rH0,0(rHP)
153	lwz		rH1,4(rHP)
154	lwz		rH2,8(rHP)
155	lwz		rH3,12(rHP)
156
157ppc_md5_main:
158	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
159		0xd76b, -23432, 0xe8c8, -18602)
160	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
161		0x2420, 0x70db, 0xc1be, -12562)
162	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
163		0xf57c, 0x0faf, 0x4788, -14806)
164	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
165		0xa830, 0x4613, 0xfd47, -27391)
166	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
167		0x6981, -26408, 0x8b45,  -2129)
168	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
169		0xffff, 0x5bb1, 0x895d, -10306)
170	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
171		0x6b90, 0x1122, 0xfd98, 0x7193)
172	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
173		0xa679, 0x438e, 0x49b4, 0x0821)
174
175	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
176		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
177	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
178		0x9d02, -32109, 0x124c, 0x2332)
179	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
180		0x8ea7, 0x4a33, 0x0245, -18270)
181	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
182		0x8eee,  -8608, 0xf258,  -5095)
183	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
184		0x969d, -10697, 0x1cbe, -15288)
185	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
186		0x3317, 0x3e99, 0xdbd9, 0x7c15)
187	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
188		0xac4b, 0x7772, 0xd8cf, 0x331d)
189	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
190		0x6a28, 0x6dd8, 0x219a, 0x3b68)
191
192	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
193		0x29cb, 0x28e5, 0x4218,  -7788)
194	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
195		0x473f, 0x06d1, 0x3aae, 0x3036)
196	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
197		0xaea1, -15134, 0x640b, -11295)
198	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
199		0x8f4c, 0x4887, 0xbc7c, -22499)
200	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
201		0x7eb8, -27199, 0x00ea, 0x6050)
202	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
203		0xe01a, 0x22fe, 0x4447, 0x69c5)
204	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
205		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
206	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
207		0x4701, -27017, 0xc7bd, -19859)
208
209	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
210		0x0988,  -1462, 0x4c70, -19401)
211	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
212		0xadaf,  -5221, 0xfc99, 0x66f7)
213	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
214		0x7e80, -16418, 0xba1e, -25587)
215	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
216		0x4130, 0x380d, 0xe0c5, 0x738d)
217	lwz		rW00,0(rHP)
218	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
219		0xe837, -30770, 0xde8a, 0x69e8)
220	lwz		rW14,4(rHP)
221	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
222		0x9e79, 0x260f, 0x256d, -27941)
223	lwz		rW12,8(rHP)
224	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
225		0xab75, -20775, 0x4f9e, -28397)
226	lwz		rW10,12(rHP)
227	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
228		0x662b, 0x7c56, 0x11b2, 0x0358)
229
230	add		rH0,rH0,rW00
231	stw		rH0,0(rHP)
232	add		rH1,rH1,rW14
233	stw		rH1,4(rHP)
234	add		rH2,rH2,rW12
235	stw		rH2,8(rHP)
236	add		rH3,rH3,rW10
237	stw		rH3,12(rHP)
238	NEXT_BLOCK
239
240	bdnz		ppc_md5_main
241
242	FINALIZE
243	blr
244