xref: /linux/lib/crypto/powerpc/md5-asm.S (revision d8768fb12a14c30436bd0466b4fc28edeef45078)
1*09371e13SEric Biggers/* SPDX-License-Identifier: GPL-2.0-or-later */
2*09371e13SEric Biggers/*
3*09371e13SEric Biggers * Fast MD5 implementation for PPC
4*09371e13SEric Biggers *
5*09371e13SEric Biggers * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6*09371e13SEric Biggers */
7*09371e13SEric Biggers#include <asm/ppc_asm.h>
8*09371e13SEric Biggers#include <asm/asm-offsets.h>
9*09371e13SEric Biggers#include <asm/asm-compat.h>
10*09371e13SEric Biggers
11*09371e13SEric Biggers#define rHP	r3
12*09371e13SEric Biggers#define rWP	r4
13*09371e13SEric Biggers
14*09371e13SEric Biggers#define rH0	r0
15*09371e13SEric Biggers#define rH1	r6
16*09371e13SEric Biggers#define rH2	r7
17*09371e13SEric Biggers#define rH3	r5
18*09371e13SEric Biggers
19*09371e13SEric Biggers#define rW00	r8
20*09371e13SEric Biggers#define rW01	r9
21*09371e13SEric Biggers#define rW02	r10
22*09371e13SEric Biggers#define rW03	r11
23*09371e13SEric Biggers#define rW04	r12
24*09371e13SEric Biggers#define rW05	r14
25*09371e13SEric Biggers#define rW06	r15
26*09371e13SEric Biggers#define rW07	r16
27*09371e13SEric Biggers#define rW08	r17
28*09371e13SEric Biggers#define rW09	r18
29*09371e13SEric Biggers#define rW10	r19
30*09371e13SEric Biggers#define rW11	r20
31*09371e13SEric Biggers#define rW12	r21
32*09371e13SEric Biggers#define rW13	r22
33*09371e13SEric Biggers#define rW14	r23
34*09371e13SEric Biggers#define rW15	r24
35*09371e13SEric Biggers
36*09371e13SEric Biggers#define rT0	r25
37*09371e13SEric Biggers#define rT1	r26
38*09371e13SEric Biggers
39*09371e13SEric Biggers#define INITIALIZE \
40*09371e13SEric Biggers	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
41*09371e13SEric Biggers	SAVE_GPRS(14, 26, r1)		/* push registers onto stack	*/
42*09371e13SEric Biggers
43*09371e13SEric Biggers#define FINALIZE \
44*09371e13SEric Biggers	REST_GPRS(14, 26, r1);		/* pop registers from stack	*/ \
45*09371e13SEric Biggers	addi	r1,r1,INT_FRAME_SIZE
46*09371e13SEric Biggers
47*09371e13SEric Biggers#ifdef __BIG_ENDIAN__
48*09371e13SEric Biggers#define LOAD_DATA(reg, off) \
49*09371e13SEric Biggers	lwbrx		reg,0,rWP;	/* load data			*/
50*09371e13SEric Biggers#define INC_PTR \
51*09371e13SEric Biggers	addi		rWP,rWP,4;	/* increment per word		*/
52*09371e13SEric Biggers#define NEXT_BLOCK			/* nothing to do		*/
53*09371e13SEric Biggers#else
54*09371e13SEric Biggers#define LOAD_DATA(reg, off) \
55*09371e13SEric Biggers	lwz		reg,off(rWP);	/* load data			*/
56*09371e13SEric Biggers#define INC_PTR				/* nothing to do		*/
57*09371e13SEric Biggers#define NEXT_BLOCK \
58*09371e13SEric Biggers	addi		rWP,rWP,64;	/* increment per block		*/
59*09371e13SEric Biggers#endif
60*09371e13SEric Biggers
61*09371e13SEric Biggers#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
62*09371e13SEric Biggers	LOAD_DATA(w0, off)		/*    W				*/ \
63*09371e13SEric Biggers	and		rT0,b,c;	/* 1: f = b and c		*/ \
64*09371e13SEric Biggers	INC_PTR				/*    ptr++			*/ \
65*09371e13SEric Biggers	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
66*09371e13SEric Biggers	LOAD_DATA(w1, off+4)		/*    W				*/ \
67*09371e13SEric Biggers	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
68*09371e13SEric Biggers	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
69*09371e13SEric Biggers	add		a,a,rT0;	/* 1: a = a + f			*/ \
70*09371e13SEric Biggers	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
71*09371e13SEric Biggers	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
72*09371e13SEric Biggers	add		a,a,w0;		/* 1: a = a + wk		*/ \
73*09371e13SEric Biggers	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
74*09371e13SEric Biggers	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
75*09371e13SEric Biggers	add		d,d,w1;		/* 2: a = a + wk		*/ \
76*09371e13SEric Biggers	add		a,a,b;		/* 1: a = a + b			*/ \
77*09371e13SEric Biggers	and		rT0,a,b;	/* 2: f = b and c		*/ \
78*09371e13SEric Biggers	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
79*09371e13SEric Biggers	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
80*09371e13SEric Biggers	add		d,d,rT0;	/* 2: a = a + f			*/ \
81*09371e13SEric Biggers	INC_PTR				/*    ptr++			*/ \
82*09371e13SEric Biggers	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
83*09371e13SEric Biggers	add		d,d,a;		/* 2: a = a + b			*/
84*09371e13SEric Biggers
85*09371e13SEric Biggers#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
86*09371e13SEric Biggers	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
87*09371e13SEric Biggers	and		rT1,b,d;	/* 1: f' = b and d		*/ \
88*09371e13SEric Biggers	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
89*09371e13SEric Biggers	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
90*09371e13SEric Biggers	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
91*09371e13SEric Biggers	add		a,a,rT0;	/* 1: a = a + f			*/ \
92*09371e13SEric Biggers	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
93*09371e13SEric Biggers	add		a,a,w0;		/* 1: a = a + wk		*/ \
94*09371e13SEric Biggers	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
95*09371e13SEric Biggers	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
96*09371e13SEric Biggers	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
97*09371e13SEric Biggers	add		a,a,b;		/* 1: a = a + b			*/ \
98*09371e13SEric Biggers	add		d,d,w1;		/* 2: a = a + wk		*/ \
99*09371e13SEric Biggers	and		rT1,a,c;	/* 2: f' = b and d		*/ \
100*09371e13SEric Biggers	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
101*09371e13SEric Biggers	add		d,d,rT0;	/* 2: a = a + f			*/ \
102*09371e13SEric Biggers	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
103*09371e13SEric Biggers	add		d,d,a;		/* 2: a = a +b			*/
104*09371e13SEric Biggers
105*09371e13SEric Biggers#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
106*09371e13SEric Biggers	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
107*09371e13SEric Biggers	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
108*09371e13SEric Biggers	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
109*09371e13SEric Biggers	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
110*09371e13SEric Biggers	add		a,a,rT1;	/* 1: a = a + f			*/ \
111*09371e13SEric Biggers	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
112*09371e13SEric Biggers	add		a,a,w0;		/* 1: a = a + wk		*/ \
113*09371e13SEric Biggers	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
114*09371e13SEric Biggers	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
115*09371e13SEric Biggers	add		d,d,w1;		/* 2: a = a + wk		*/ \
116*09371e13SEric Biggers	add		a,a,b;		/* 1: a = a + b			*/ \
117*09371e13SEric Biggers	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
118*09371e13SEric Biggers	add		d,d,rT1;	/* 2: a = a + f			*/ \
119*09371e13SEric Biggers	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
120*09371e13SEric Biggers	add		d,d,a;		/* 2: a = a + b			*/
121*09371e13SEric Biggers
122*09371e13SEric Biggers#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
123*09371e13SEric Biggers	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
124*09371e13SEric Biggers	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
125*09371e13SEric Biggers	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
126*09371e13SEric Biggers	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
127*09371e13SEric Biggers	add		a,a,w0;		/* 1: a = a + wk		*/ \
128*09371e13SEric Biggers	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
129*09371e13SEric Biggers	add		a,a,rT0;	/* 1: a = a + f			*/ \
130*09371e13SEric Biggers	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
131*09371e13SEric Biggers	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
132*09371e13SEric Biggers	add		a,a,b;		/* 1: a = a + b			*/ \
133*09371e13SEric Biggers	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
134*09371e13SEric Biggers	add		d,d,w1;		/* 2: a = a + wk		*/ \
135*09371e13SEric Biggers	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
136*09371e13SEric Biggers	add		d,d,rT0;	/* 2: a = a + f			*/ \
137*09371e13SEric Biggers	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
138*09371e13SEric Biggers	add		d,d,a;		/* 2: a = a + b			*/
139*09371e13SEric Biggers
140*09371e13SEric Biggers_GLOBAL(ppc_md5_transform)
141*09371e13SEric Biggers	INITIALIZE
142*09371e13SEric Biggers
143*09371e13SEric Biggers	mtctr		r5
144*09371e13SEric Biggers	lwz		rH0,0(rHP)
145*09371e13SEric Biggers	lwz		rH1,4(rHP)
146*09371e13SEric Biggers	lwz		rH2,8(rHP)
147*09371e13SEric Biggers	lwz		rH3,12(rHP)
148*09371e13SEric Biggers
149*09371e13SEric Biggersppc_md5_main:
150*09371e13SEric Biggers	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
151*09371e13SEric Biggers		0xd76b, -23432, 0xe8c8, -18602)
152*09371e13SEric Biggers	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
153*09371e13SEric Biggers		0x2420, 0x70db, 0xc1be, -12562)
154*09371e13SEric Biggers	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
155*09371e13SEric Biggers		0xf57c, 0x0faf, 0x4788, -14806)
156*09371e13SEric Biggers	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
157*09371e13SEric Biggers		0xa830, 0x4613, 0xfd47, -27391)
158*09371e13SEric Biggers	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
159*09371e13SEric Biggers		0x6981, -26408, 0x8b45,  -2129)
160*09371e13SEric Biggers	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
161*09371e13SEric Biggers		0xffff, 0x5bb1, 0x895d, -10306)
162*09371e13SEric Biggers	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
163*09371e13SEric Biggers		0x6b90, 0x1122, 0xfd98, 0x7193)
164*09371e13SEric Biggers	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
165*09371e13SEric Biggers		0xa679, 0x438e, 0x49b4, 0x0821)
166*09371e13SEric Biggers
167*09371e13SEric Biggers	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
168*09371e13SEric Biggers		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
169*09371e13SEric Biggers	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
170*09371e13SEric Biggers		0x9d02, -32109, 0x124c, 0x2332)
171*09371e13SEric Biggers	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
172*09371e13SEric Biggers		0x8ea7, 0x4a33, 0x0245, -18270)
173*09371e13SEric Biggers	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
174*09371e13SEric Biggers		0x8eee,  -8608, 0xf258,  -5095)
175*09371e13SEric Biggers	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
176*09371e13SEric Biggers		0x969d, -10697, 0x1cbe, -15288)
177*09371e13SEric Biggers	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
178*09371e13SEric Biggers		0x3317, 0x3e99, 0xdbd9, 0x7c15)
179*09371e13SEric Biggers	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
180*09371e13SEric Biggers		0xac4b, 0x7772, 0xd8cf, 0x331d)
181*09371e13SEric Biggers	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
182*09371e13SEric Biggers		0x6a28, 0x6dd8, 0x219a, 0x3b68)
183*09371e13SEric Biggers
184*09371e13SEric Biggers	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
185*09371e13SEric Biggers		0x29cb, 0x28e5, 0x4218,  -7788)
186*09371e13SEric Biggers	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
187*09371e13SEric Biggers		0x473f, 0x06d1, 0x3aae, 0x3036)
188*09371e13SEric Biggers	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
189*09371e13SEric Biggers		0xaea1, -15134, 0x640b, -11295)
190*09371e13SEric Biggers	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
191*09371e13SEric Biggers		0x8f4c, 0x4887, 0xbc7c, -22499)
192*09371e13SEric Biggers	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
193*09371e13SEric Biggers		0x7eb8, -27199, 0x00ea, 0x6050)
194*09371e13SEric Biggers	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
195*09371e13SEric Biggers		0xe01a, 0x22fe, 0x4447, 0x69c5)
196*09371e13SEric Biggers	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
197*09371e13SEric Biggers		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
198*09371e13SEric Biggers	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
199*09371e13SEric Biggers		0x4701, -27017, 0xc7bd, -19859)
200*09371e13SEric Biggers
201*09371e13SEric Biggers	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
202*09371e13SEric Biggers		0x0988,  -1462, 0x4c70, -19401)
203*09371e13SEric Biggers	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
204*09371e13SEric Biggers		0xadaf,  -5221, 0xfc99, 0x66f7)
205*09371e13SEric Biggers	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
206*09371e13SEric Biggers		0x7e80, -16418, 0xba1e, -25587)
207*09371e13SEric Biggers	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
208*09371e13SEric Biggers		0x4130, 0x380d, 0xe0c5, 0x738d)
209*09371e13SEric Biggers	lwz		rW00,0(rHP)
210*09371e13SEric Biggers	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
211*09371e13SEric Biggers		0xe837, -30770, 0xde8a, 0x69e8)
212*09371e13SEric Biggers	lwz		rW14,4(rHP)
213*09371e13SEric Biggers	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
214*09371e13SEric Biggers		0x9e79, 0x260f, 0x256d, -27941)
215*09371e13SEric Biggers	lwz		rW12,8(rHP)
216*09371e13SEric Biggers	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
217*09371e13SEric Biggers		0xab75, -20775, 0x4f9e, -28397)
218*09371e13SEric Biggers	lwz		rW10,12(rHP)
219*09371e13SEric Biggers	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
220*09371e13SEric Biggers		0x662b, 0x7c56, 0x11b2, 0x0358)
221*09371e13SEric Biggers
222*09371e13SEric Biggers	add		rH0,rH0,rW00
223*09371e13SEric Biggers	stw		rH0,0(rHP)
224*09371e13SEric Biggers	add		rH1,rH1,rW14
225*09371e13SEric Biggers	stw		rH1,4(rHP)
226*09371e13SEric Biggers	add		rH2,rH2,rW12
227*09371e13SEric Biggers	stw		rH2,8(rHP)
228*09371e13SEric Biggers	add		rH3,rH3,rW10
229*09371e13SEric Biggers	stw		rH3,12(rHP)
230*09371e13SEric Biggers	NEXT_BLOCK
231*09371e13SEric Biggers
232*09371e13SEric Biggers	bdnz		ppc_md5_main
233*09371e13SEric Biggers
234*09371e13SEric Biggers	FINALIZE
235*09371e13SEric Biggers	blr
236