xref: /linux/arch/powerpc/crypto/sha1-powerpc-asm.S (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * SHA-1 implementation for PowerPC.
4 *
5 * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
6 */
7
8#include <asm/ppc_asm.h>
9#include <asm/asm-offsets.h>
10#include <asm/asm-compat.h>
11
12#ifdef __BIG_ENDIAN__
13#define LWZ(rt, d, ra)	\
14	lwz	rt,d(ra)
15#else
16#define LWZ(rt, d, ra)	\
17	li	rt,d;	\
18	lwbrx	rt,rt,ra
19#endif
20
21/*
22 * We roll the registers for T, A, B, C, D, E around on each
23 * iteration; T on iteration t is A on iteration t+1, and so on.
24 * We use registers 7 - 12 for this.
25 */
26#define RT(t)	((((t)+5)%6)+7)
27#define RA(t)	((((t)+4)%6)+7)
28#define RB(t)	((((t)+3)%6)+7)
29#define RC(t)	((((t)+2)%6)+7)
30#define RD(t)	((((t)+1)%6)+7)
31#define RE(t)	((((t)+0)%6)+7)
32
33/* We use registers 16 - 31 for the W values */
34#define W(t)	(((t)%16)+16)
35
36#define LOADW(t)				\
37	LWZ(W(t),(t)*4,r4)
38
39#define STEPD0_LOAD(t)				\
40	andc	r0,RD(t),RB(t);		\
41	and	r6,RB(t),RC(t);		\
42	rotlwi	RT(t),RA(t),5;			\
43	or	r6,r6,r0;			\
44	add	r0,RE(t),r15;			\
45	add	RT(t),RT(t),r6;		\
46	add	r14,r0,W(t);			\
47	LWZ(W((t)+4),((t)+4)*4,r4);	\
48	rotlwi	RB(t),RB(t),30;			\
49	add	RT(t),RT(t),r14
50
51#define STEPD0_UPDATE(t)			\
52	and	r6,RB(t),RC(t);		\
53	andc	r0,RD(t),RB(t);		\
54	rotlwi	RT(t),RA(t),5;			\
55	rotlwi	RB(t),RB(t),30;			\
56	or	r6,r6,r0;			\
57	add	r0,RE(t),r15;			\
58	xor	r5,W((t)+4-3),W((t)+4-8);		\
59	add	RT(t),RT(t),r6;		\
60	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
61	add	r0,r0,W(t);			\
62	xor	W((t)+4),W((t)+4),r5;			\
63	add	RT(t),RT(t),r0;		\
64	rotlwi	W((t)+4),W((t)+4),1
65
66#define STEPD1(t)				\
67	xor	r6,RB(t),RC(t);		\
68	rotlwi	RT(t),RA(t),5;			\
69	rotlwi	RB(t),RB(t),30;			\
70	xor	r6,r6,RD(t);			\
71	add	r0,RE(t),r15;			\
72	add	RT(t),RT(t),r6;		\
73	add	r0,r0,W(t);			\
74	add	RT(t),RT(t),r0
75
76#define STEPD1_UPDATE(t)				\
77	xor	r6,RB(t),RC(t);		\
78	rotlwi	RT(t),RA(t),5;			\
79	rotlwi	RB(t),RB(t),30;			\
80	xor	r6,r6,RD(t);			\
81	add	r0,RE(t),r15;			\
82	xor	r5,W((t)+4-3),W((t)+4-8);		\
83	add	RT(t),RT(t),r6;		\
84	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
85	add	r0,r0,W(t);			\
86	xor	W((t)+4),W((t)+4),r5;			\
87	add	RT(t),RT(t),r0;		\
88	rotlwi	W((t)+4),W((t)+4),1
89
90#define STEPD2_UPDATE(t)			\
91	and	r6,RB(t),RC(t);		\
92	and	r0,RB(t),RD(t);		\
93	rotlwi	RT(t),RA(t),5;			\
94	or	r6,r6,r0;			\
95	rotlwi	RB(t),RB(t),30;			\
96	and	r0,RC(t),RD(t);		\
97	xor	r5,W((t)+4-3),W((t)+4-8);	\
98	or	r6,r6,r0;			\
99	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
100	add	r0,RE(t),r15;			\
101	add	RT(t),RT(t),r6;		\
102	add	r0,r0,W(t);			\
103	xor	W((t)+4),W((t)+4),r5;		\
104	add	RT(t),RT(t),r0;		\
105	rotlwi	W((t)+4),W((t)+4),1
106
107#define STEP0LD4(t)				\
108	STEPD0_LOAD(t);				\
109	STEPD0_LOAD((t)+1);			\
110	STEPD0_LOAD((t)+2);			\
111	STEPD0_LOAD((t)+3)
112
113#define STEPUP4(t, fn)				\
114	STEP##fn##_UPDATE(t);			\
115	STEP##fn##_UPDATE((t)+1);		\
116	STEP##fn##_UPDATE((t)+2);		\
117	STEP##fn##_UPDATE((t)+3)
118
119#define STEPUP20(t, fn)				\
120	STEPUP4(t, fn);				\
121	STEPUP4((t)+4, fn);			\
122	STEPUP4((t)+8, fn);			\
123	STEPUP4((t)+12, fn);			\
124	STEPUP4((t)+16, fn)
125
126_GLOBAL(powerpc_sha_transform)
127	PPC_STLU r1,-INT_FRAME_SIZE(r1)
128	SAVE_GPRS(14, 31, r1)
129
130	/* Load up A - E */
131	lwz	RA(0),0(r3)	/* A */
132	lwz	RB(0),4(r3)	/* B */
133	lwz	RC(0),8(r3)	/* C */
134	lwz	RD(0),12(r3)	/* D */
135	lwz	RE(0),16(r3)	/* E */
136
137	LOADW(0)
138	LOADW(1)
139	LOADW(2)
140	LOADW(3)
141
142	lis	r15,0x5a82	/* K0-19 */
143	ori	r15,r15,0x7999
144	STEP0LD4(0)
145	STEP0LD4(4)
146	STEP0LD4(8)
147	STEPUP4(12, D0)
148	STEPUP4(16, D0)
149
150	lis	r15,0x6ed9	/* K20-39 */
151	ori	r15,r15,0xeba1
152	STEPUP20(20, D1)
153
154	lis	r15,0x8f1b	/* K40-59 */
155	ori	r15,r15,0xbcdc
156	STEPUP20(40, D2)
157
158	lis	r15,0xca62	/* K60-79 */
159	ori	r15,r15,0xc1d6
160	STEPUP4(60, D1)
161	STEPUP4(64, D1)
162	STEPUP4(68, D1)
163	STEPUP4(72, D1)
164	lwz	r20,16(r3)
165	STEPD1(76)
166	lwz	r19,12(r3)
167	STEPD1(77)
168	lwz	r18,8(r3)
169	STEPD1(78)
170	lwz	r17,4(r3)
171	STEPD1(79)
172
173	lwz	r16,0(r3)
174	add	r20,RE(80),r20
175	add	RD(0),RD(80),r19
176	add	RC(0),RC(80),r18
177	add	RB(0),RB(80),r17
178	add	RA(0),RA(80),r16
179	mr	RE(0),r20
180	stw	RA(0),0(r3)
181	stw	RB(0),4(r3)
182	stw	RC(0),8(r3)
183	stw	RD(0),12(r3)
184	stw	RE(0),16(r3)
185
186	REST_GPRS(14, 31, r1)
187	addi	r1,r1,INT_FRAME_SIZE
188	blr
189