xref: /linux/arch/powerpc/crypto/sha1-powerpc-asm.S (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * SHA-1 implementation for PowerPC.
4 *
5 * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
6 */
7
8#include <asm/ppc_asm.h>
9#include <asm/asm-offsets.h>
10#include <asm/asm-compat.h>
11
12#ifdef __BIG_ENDIAN__
13#define LWZ(rt, d, ra)	\
14	lwz	rt,d(ra)
15#else
16#define LWZ(rt, d, ra)	\
17	li	rt,d;	\
18	lwbrx	rt,rt,ra
19#endif
20
21/*
22 * We roll the registers for T, A, B, C, D, E around on each
23 * iteration; T on iteration t is A on iteration t+1, and so on.
24 * We use registers 7 - 12 for this.
25 */
26#define RT(t)	((((t)+5)%6)+7)
27#define RA(t)	((((t)+4)%6)+7)
28#define RB(t)	((((t)+3)%6)+7)
29#define RC(t)	((((t)+2)%6)+7)
30#define RD(t)	((((t)+1)%6)+7)
31#define RE(t)	((((t)+0)%6)+7)
32
33/* We use registers 16 - 31 for the W values */
34#define W(t)	(((t)%16)+16)
35
36#define LOADW(t)				\
37	LWZ(W(t),(t)*4,r4)
38
39#define STEPD0_LOAD(t)				\
40	andc	r0,RD(t),RB(t);		\
41	and	r6,RB(t),RC(t);		\
42	rotlwi	RT(t),RA(t),5;			\
43	or	r6,r6,r0;			\
44	add	r0,RE(t),r15;			\
45	add	RT(t),RT(t),r6;		\
46	add	r14,r0,W(t);			\
47	LWZ(W((t)+4),((t)+4)*4,r4);	\
48	rotlwi	RB(t),RB(t),30;			\
49	add	RT(t),RT(t),r14
50
51#define STEPD0_UPDATE(t)			\
52	and	r6,RB(t),RC(t);		\
53	andc	r0,RD(t),RB(t);		\
54	rotlwi	RT(t),RA(t),5;			\
55	rotlwi	RB(t),RB(t),30;			\
56	or	r6,r6,r0;			\
57	add	r0,RE(t),r15;			\
58	xor	r5,W((t)+4-3),W((t)+4-8);		\
59	add	RT(t),RT(t),r6;		\
60	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
61	add	r0,r0,W(t);			\
62	xor	W((t)+4),W((t)+4),r5;			\
63	add	RT(t),RT(t),r0;		\
64	rotlwi	W((t)+4),W((t)+4),1
65
66#define STEPD1(t)				\
67	xor	r6,RB(t),RC(t);		\
68	rotlwi	RT(t),RA(t),5;			\
69	rotlwi	RB(t),RB(t),30;			\
70	xor	r6,r6,RD(t);			\
71	add	r0,RE(t),r15;			\
72	add	RT(t),RT(t),r6;		\
73	add	r0,r0,W(t);			\
74	add	RT(t),RT(t),r0
75
76#define STEPD1_UPDATE(t)				\
77	xor	r6,RB(t),RC(t);		\
78	rotlwi	RT(t),RA(t),5;			\
79	rotlwi	RB(t),RB(t),30;			\
80	xor	r6,r6,RD(t);			\
81	add	r0,RE(t),r15;			\
82	xor	r5,W((t)+4-3),W((t)+4-8);		\
83	add	RT(t),RT(t),r6;		\
84	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
85	add	r0,r0,W(t);			\
86	xor	W((t)+4),W((t)+4),r5;			\
87	add	RT(t),RT(t),r0;		\
88	rotlwi	W((t)+4),W((t)+4),1
89
90#define STEPD2_UPDATE(t)			\
91	and	r6,RB(t),RC(t);		\
92	and	r0,RB(t),RD(t);		\
93	rotlwi	RT(t),RA(t),5;			\
94	or	r6,r6,r0;			\
95	rotlwi	RB(t),RB(t),30;			\
96	and	r0,RC(t),RD(t);		\
97	xor	r5,W((t)+4-3),W((t)+4-8);	\
98	or	r6,r6,r0;			\
99	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
100	add	r0,RE(t),r15;			\
101	add	RT(t),RT(t),r6;		\
102	add	r0,r0,W(t);			\
103	xor	W((t)+4),W((t)+4),r5;		\
104	add	RT(t),RT(t),r0;		\
105	rotlwi	W((t)+4),W((t)+4),1
106
107#define STEP0LD4(t)				\
108	STEPD0_LOAD(t);				\
109	STEPD0_LOAD((t)+1);			\
110	STEPD0_LOAD((t)+2);			\
111	STEPD0_LOAD((t)+3)
112
113#define STEPUP4(t, fn)				\
114	STEP##fn##_UPDATE(t);			\
115	STEP##fn##_UPDATE((t)+1);		\
116	STEP##fn##_UPDATE((t)+2);		\
117	STEP##fn##_UPDATE((t)+3)
118
119#define STEPUP20(t, fn)				\
120	STEPUP4(t, fn);				\
121	STEPUP4((t)+4, fn);			\
122	STEPUP4((t)+8, fn);			\
123	STEPUP4((t)+12, fn);			\
124	STEPUP4((t)+16, fn)
125
126_GLOBAL(powerpc_sha_transform)
127	PPC_STLU r1,-INT_FRAME_SIZE(r1)
128	SAVE_8GPRS(14, r1)
129	SAVE_10GPRS(22, r1)
130
131	/* Load up A - E */
132	lwz	RA(0),0(r3)	/* A */
133	lwz	RB(0),4(r3)	/* B */
134	lwz	RC(0),8(r3)	/* C */
135	lwz	RD(0),12(r3)	/* D */
136	lwz	RE(0),16(r3)	/* E */
137
138	LOADW(0)
139	LOADW(1)
140	LOADW(2)
141	LOADW(3)
142
143	lis	r15,0x5a82	/* K0-19 */
144	ori	r15,r15,0x7999
145	STEP0LD4(0)
146	STEP0LD4(4)
147	STEP0LD4(8)
148	STEPUP4(12, D0)
149	STEPUP4(16, D0)
150
151	lis	r15,0x6ed9	/* K20-39 */
152	ori	r15,r15,0xeba1
153	STEPUP20(20, D1)
154
155	lis	r15,0x8f1b	/* K40-59 */
156	ori	r15,r15,0xbcdc
157	STEPUP20(40, D2)
158
159	lis	r15,0xca62	/* K60-79 */
160	ori	r15,r15,0xc1d6
161	STEPUP4(60, D1)
162	STEPUP4(64, D1)
163	STEPUP4(68, D1)
164	STEPUP4(72, D1)
165	lwz	r20,16(r3)
166	STEPD1(76)
167	lwz	r19,12(r3)
168	STEPD1(77)
169	lwz	r18,8(r3)
170	STEPD1(78)
171	lwz	r17,4(r3)
172	STEPD1(79)
173
174	lwz	r16,0(r3)
175	add	r20,RE(80),r20
176	add	RD(0),RD(80),r19
177	add	RC(0),RC(80),r18
178	add	RB(0),RB(80),r17
179	add	RA(0),RA(80),r16
180	mr	RE(0),r20
181	stw	RA(0),0(r3)
182	stw	RB(0),4(r3)
183	stw	RC(0),8(r3)
184	stw	RD(0),12(r3)
185	stw	RE(0),16(r3)
186
187	REST_8GPRS(14, r1)
188	REST_10GPRS(22, r1)
189	addi	r1,r1,INT_FRAME_SIZE
190	blr
191