xref: /linux/lib/crypto/x86/sha1-ni-asm.S (revision f3d6cb3dc0394b866bc0d1e15157ce45844cf3d3)
1*f3d6cb3dSEric Biggers/*
2*f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 update function
3*f3d6cb3dSEric Biggers *
4*f3d6cb3dSEric Biggers * This file is provided under a dual BSD/GPLv2 license.  When using or
5*f3d6cb3dSEric Biggers * redistributing this file, you may do so under either license.
6*f3d6cb3dSEric Biggers *
7*f3d6cb3dSEric Biggers * GPL LICENSE SUMMARY
8*f3d6cb3dSEric Biggers *
9*f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation.
10*f3d6cb3dSEric Biggers *
11*f3d6cb3dSEric Biggers * This program is free software; you can redistribute it and/or modify
12*f3d6cb3dSEric Biggers * it under the terms of version 2 of the GNU General Public License as
13*f3d6cb3dSEric Biggers * published by the Free Software Foundation.
14*f3d6cb3dSEric Biggers *
15*f3d6cb3dSEric Biggers * This program is distributed in the hope that it will be useful, but
16*f3d6cb3dSEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of
17*f3d6cb3dSEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18*f3d6cb3dSEric Biggers * General Public License for more details.
19*f3d6cb3dSEric Biggers *
20*f3d6cb3dSEric Biggers * Contact Information:
21*f3d6cb3dSEric Biggers * 	Sean Gulley <sean.m.gulley@intel.com>
22*f3d6cb3dSEric Biggers * 	Tim Chen <tim.c.chen@linux.intel.com>
23*f3d6cb3dSEric Biggers *
24*f3d6cb3dSEric Biggers * BSD LICENSE
25*f3d6cb3dSEric Biggers *
26*f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation.
27*f3d6cb3dSEric Biggers *
28*f3d6cb3dSEric Biggers * Redistribution and use in source and binary forms, with or without
29*f3d6cb3dSEric Biggers * modification, are permitted provided that the following conditions
30*f3d6cb3dSEric Biggers * are met:
31*f3d6cb3dSEric Biggers *
32*f3d6cb3dSEric Biggers * 	* Redistributions of source code must retain the above copyright
33*f3d6cb3dSEric Biggers * 	  notice, this list of conditions and the following disclaimer.
34*f3d6cb3dSEric Biggers * 	* Redistributions in binary form must reproduce the above copyright
35*f3d6cb3dSEric Biggers * 	  notice, this list of conditions and the following disclaimer in
36*f3d6cb3dSEric Biggers * 	  the documentation and/or other materials provided with the
37*f3d6cb3dSEric Biggers * 	  distribution.
38*f3d6cb3dSEric Biggers * 	* Neither the name of Intel Corporation nor the names of its
39*f3d6cb3dSEric Biggers * 	  contributors may be used to endorse or promote products derived
40*f3d6cb3dSEric Biggers * 	  from this software without specific prior written permission.
41*f3d6cb3dSEric Biggers *
42*f3d6cb3dSEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43*f3d6cb3dSEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44*f3d6cb3dSEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45*f3d6cb3dSEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46*f3d6cb3dSEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47*f3d6cb3dSEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48*f3d6cb3dSEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49*f3d6cb3dSEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50*f3d6cb3dSEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51*f3d6cb3dSEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52*f3d6cb3dSEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53*f3d6cb3dSEric Biggers *
54*f3d6cb3dSEric Biggers */
55*f3d6cb3dSEric Biggers
56*f3d6cb3dSEric Biggers#include <linux/linkage.h>
57*f3d6cb3dSEric Biggers
58*f3d6cb3dSEric Biggers#define DIGEST_PTR	%rdi	/* 1st arg */
59*f3d6cb3dSEric Biggers#define DATA_PTR	%rsi	/* 2nd arg */
60*f3d6cb3dSEric Biggers#define NUM_BLKS	%rdx	/* 3rd arg */
61*f3d6cb3dSEric Biggers
62*f3d6cb3dSEric Biggers/* gcc conversion */
63*f3d6cb3dSEric Biggers#define FRAME_SIZE	32	/* space for 2x16 bytes */
64*f3d6cb3dSEric Biggers
65*f3d6cb3dSEric Biggers#define ABCD		%xmm0
66*f3d6cb3dSEric Biggers#define E0		%xmm1	/* Need two E's b/c they ping pong */
67*f3d6cb3dSEric Biggers#define E1		%xmm2
68*f3d6cb3dSEric Biggers#define MSG0		%xmm3
69*f3d6cb3dSEric Biggers#define MSG1		%xmm4
70*f3d6cb3dSEric Biggers#define MSG2		%xmm5
71*f3d6cb3dSEric Biggers#define MSG3		%xmm6
72*f3d6cb3dSEric Biggers#define SHUF_MASK	%xmm7
73*f3d6cb3dSEric Biggers
74*f3d6cb3dSEric Biggers
75*f3d6cb3dSEric Biggers/*
76*f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 block function
77*f3d6cb3dSEric Biggers *
78*f3d6cb3dSEric Biggers * This function takes a pointer to the current SHA-1 state, a pointer to the
79*f3d6cb3dSEric Biggers * input data, and the number of 64-byte blocks to process.  Once all blocks
80*f3d6cb3dSEric Biggers * have been processed, the state is updated with the new state.  This function
81*f3d6cb3dSEric Biggers * only processes complete blocks.  State initialization, buffering of partial
82*f3d6cb3dSEric Biggers * blocks, and digest finalization are expected to be handled elsewhere.
83*f3d6cb3dSEric Biggers *
84*f3d6cb3dSEric Biggers * The indented lines in the loop are instructions related to rounds processing.
85*f3d6cb3dSEric Biggers * The non-indented lines are instructions related to the message schedule.
86*f3d6cb3dSEric Biggers *
87*f3d6cb3dSEric Biggers * void sha1_ni_transform(struct sha1_block_state *state,
88*f3d6cb3dSEric Biggers *			  const u8 *data, size_t nblocks)
89*f3d6cb3dSEric Biggers */
90*f3d6cb3dSEric Biggers.text
91*f3d6cb3dSEric BiggersSYM_FUNC_START(sha1_ni_transform)
92*f3d6cb3dSEric Biggers	push		%rbp
93*f3d6cb3dSEric Biggers	mov		%rsp, %rbp
94*f3d6cb3dSEric Biggers	sub		$FRAME_SIZE, %rsp
95*f3d6cb3dSEric Biggers	and		$~0xF, %rsp
96*f3d6cb3dSEric Biggers
97*f3d6cb3dSEric Biggers	shl		$6, NUM_BLKS		/* convert to bytes */
98*f3d6cb3dSEric Biggers	jz		.Ldone_hash
99*f3d6cb3dSEric Biggers	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
100*f3d6cb3dSEric Biggers
101*f3d6cb3dSEric Biggers	/* load initial hash values */
102*f3d6cb3dSEric Biggers	pinsrd		$3, 1*16(DIGEST_PTR), E0
103*f3d6cb3dSEric Biggers	movdqu		0*16(DIGEST_PTR), ABCD
104*f3d6cb3dSEric Biggers	pand		UPPER_WORD_MASK(%rip), E0
105*f3d6cb3dSEric Biggers	pshufd		$0x1B, ABCD, ABCD
106*f3d6cb3dSEric Biggers
107*f3d6cb3dSEric Biggers	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
108*f3d6cb3dSEric Biggers
109*f3d6cb3dSEric Biggers.Lloop0:
110*f3d6cb3dSEric Biggers	/* Save hash values for addition after rounds */
111*f3d6cb3dSEric Biggers	movdqa		E0, (0*16)(%rsp)
112*f3d6cb3dSEric Biggers	movdqa		ABCD, (1*16)(%rsp)
113*f3d6cb3dSEric Biggers
114*f3d6cb3dSEric Biggers	/* Rounds 0-3 */
115*f3d6cb3dSEric Biggers	movdqu		0*16(DATA_PTR), MSG0
116*f3d6cb3dSEric Biggers	pshufb		SHUF_MASK, MSG0
117*f3d6cb3dSEric Biggers		paddd		MSG0, E0
118*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
119*f3d6cb3dSEric Biggers		sha1rnds4	$0, E0, ABCD
120*f3d6cb3dSEric Biggers
121*f3d6cb3dSEric Biggers	/* Rounds 4-7 */
122*f3d6cb3dSEric Biggers	movdqu		1*16(DATA_PTR), MSG1
123*f3d6cb3dSEric Biggers	pshufb		SHUF_MASK, MSG1
124*f3d6cb3dSEric Biggers		sha1nexte	MSG1, E1
125*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
126*f3d6cb3dSEric Biggers		sha1rnds4	$0, E1, ABCD
127*f3d6cb3dSEric Biggers	sha1msg1	MSG1, MSG0
128*f3d6cb3dSEric Biggers
129*f3d6cb3dSEric Biggers	/* Rounds 8-11 */
130*f3d6cb3dSEric Biggers	movdqu		2*16(DATA_PTR), MSG2
131*f3d6cb3dSEric Biggers	pshufb		SHUF_MASK, MSG2
132*f3d6cb3dSEric Biggers		sha1nexte	MSG2, E0
133*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
134*f3d6cb3dSEric Biggers		sha1rnds4	$0, E0, ABCD
135*f3d6cb3dSEric Biggers	sha1msg1	MSG2, MSG1
136*f3d6cb3dSEric Biggers	pxor		MSG2, MSG0
137*f3d6cb3dSEric Biggers
138*f3d6cb3dSEric Biggers	/* Rounds 12-15 */
139*f3d6cb3dSEric Biggers	movdqu		3*16(DATA_PTR), MSG3
140*f3d6cb3dSEric Biggers	pshufb		SHUF_MASK, MSG3
141*f3d6cb3dSEric Biggers		sha1nexte	MSG3, E1
142*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
143*f3d6cb3dSEric Biggers	sha1msg2	MSG3, MSG0
144*f3d6cb3dSEric Biggers		sha1rnds4	$0, E1, ABCD
145*f3d6cb3dSEric Biggers	sha1msg1	MSG3, MSG2
146*f3d6cb3dSEric Biggers	pxor		MSG3, MSG1
147*f3d6cb3dSEric Biggers
148*f3d6cb3dSEric Biggers	/* Rounds 16-19 */
149*f3d6cb3dSEric Biggers		sha1nexte	MSG0, E0
150*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
151*f3d6cb3dSEric Biggers	sha1msg2	MSG0, MSG1
152*f3d6cb3dSEric Biggers		sha1rnds4	$0, E0, ABCD
153*f3d6cb3dSEric Biggers	sha1msg1	MSG0, MSG3
154*f3d6cb3dSEric Biggers	pxor		MSG0, MSG2
155*f3d6cb3dSEric Biggers
156*f3d6cb3dSEric Biggers	/* Rounds 20-23 */
157*f3d6cb3dSEric Biggers		sha1nexte	MSG1, E1
158*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
159*f3d6cb3dSEric Biggers	sha1msg2	MSG1, MSG2
160*f3d6cb3dSEric Biggers		sha1rnds4	$1, E1, ABCD
161*f3d6cb3dSEric Biggers	sha1msg1	MSG1, MSG0
162*f3d6cb3dSEric Biggers	pxor		MSG1, MSG3
163*f3d6cb3dSEric Biggers
164*f3d6cb3dSEric Biggers	/* Rounds 24-27 */
165*f3d6cb3dSEric Biggers		sha1nexte	MSG2, E0
166*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
167*f3d6cb3dSEric Biggers	sha1msg2	MSG2, MSG3
168*f3d6cb3dSEric Biggers		sha1rnds4	$1, E0, ABCD
169*f3d6cb3dSEric Biggers	sha1msg1	MSG2, MSG1
170*f3d6cb3dSEric Biggers	pxor		MSG2, MSG0
171*f3d6cb3dSEric Biggers
172*f3d6cb3dSEric Biggers	/* Rounds 28-31 */
173*f3d6cb3dSEric Biggers		sha1nexte	MSG3, E1
174*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
175*f3d6cb3dSEric Biggers	sha1msg2	MSG3, MSG0
176*f3d6cb3dSEric Biggers		sha1rnds4	$1, E1, ABCD
177*f3d6cb3dSEric Biggers	sha1msg1	MSG3, MSG2
178*f3d6cb3dSEric Biggers	pxor		MSG3, MSG1
179*f3d6cb3dSEric Biggers
180*f3d6cb3dSEric Biggers	/* Rounds 32-35 */
181*f3d6cb3dSEric Biggers		sha1nexte	MSG0, E0
182*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
183*f3d6cb3dSEric Biggers	sha1msg2	MSG0, MSG1
184*f3d6cb3dSEric Biggers		sha1rnds4	$1, E0, ABCD
185*f3d6cb3dSEric Biggers	sha1msg1	MSG0, MSG3
186*f3d6cb3dSEric Biggers	pxor		MSG0, MSG2
187*f3d6cb3dSEric Biggers
188*f3d6cb3dSEric Biggers	/* Rounds 36-39 */
189*f3d6cb3dSEric Biggers		sha1nexte	MSG1, E1
190*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
191*f3d6cb3dSEric Biggers	sha1msg2	MSG1, MSG2
192*f3d6cb3dSEric Biggers		sha1rnds4	$1, E1, ABCD
193*f3d6cb3dSEric Biggers	sha1msg1	MSG1, MSG0
194*f3d6cb3dSEric Biggers	pxor		MSG1, MSG3
195*f3d6cb3dSEric Biggers
196*f3d6cb3dSEric Biggers	/* Rounds 40-43 */
197*f3d6cb3dSEric Biggers		sha1nexte	MSG2, E0
198*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
199*f3d6cb3dSEric Biggers	sha1msg2	MSG2, MSG3
200*f3d6cb3dSEric Biggers		sha1rnds4	$2, E0, ABCD
201*f3d6cb3dSEric Biggers	sha1msg1	MSG2, MSG1
202*f3d6cb3dSEric Biggers	pxor		MSG2, MSG0
203*f3d6cb3dSEric Biggers
204*f3d6cb3dSEric Biggers	/* Rounds 44-47 */
205*f3d6cb3dSEric Biggers		sha1nexte	MSG3, E1
206*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
207*f3d6cb3dSEric Biggers	sha1msg2	MSG3, MSG0
208*f3d6cb3dSEric Biggers		sha1rnds4	$2, E1, ABCD
209*f3d6cb3dSEric Biggers	sha1msg1	MSG3, MSG2
210*f3d6cb3dSEric Biggers	pxor		MSG3, MSG1
211*f3d6cb3dSEric Biggers
212*f3d6cb3dSEric Biggers	/* Rounds 48-51 */
213*f3d6cb3dSEric Biggers		sha1nexte	MSG0, E0
214*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
215*f3d6cb3dSEric Biggers	sha1msg2	MSG0, MSG1
216*f3d6cb3dSEric Biggers		sha1rnds4	$2, E0, ABCD
217*f3d6cb3dSEric Biggers	sha1msg1	MSG0, MSG3
218*f3d6cb3dSEric Biggers	pxor		MSG0, MSG2
219*f3d6cb3dSEric Biggers
220*f3d6cb3dSEric Biggers	/* Rounds 52-55 */
221*f3d6cb3dSEric Biggers		sha1nexte	MSG1, E1
222*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
223*f3d6cb3dSEric Biggers	sha1msg2	MSG1, MSG2
224*f3d6cb3dSEric Biggers		sha1rnds4	$2, E1, ABCD
225*f3d6cb3dSEric Biggers	sha1msg1	MSG1, MSG0
226*f3d6cb3dSEric Biggers	pxor		MSG1, MSG3
227*f3d6cb3dSEric Biggers
228*f3d6cb3dSEric Biggers	/* Rounds 56-59 */
229*f3d6cb3dSEric Biggers		sha1nexte	MSG2, E0
230*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
231*f3d6cb3dSEric Biggers	sha1msg2	MSG2, MSG3
232*f3d6cb3dSEric Biggers		sha1rnds4	$2, E0, ABCD
233*f3d6cb3dSEric Biggers	sha1msg1	MSG2, MSG1
234*f3d6cb3dSEric Biggers	pxor		MSG2, MSG0
235*f3d6cb3dSEric Biggers
236*f3d6cb3dSEric Biggers	/* Rounds 60-63 */
237*f3d6cb3dSEric Biggers		sha1nexte	MSG3, E1
238*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
239*f3d6cb3dSEric Biggers	sha1msg2	MSG3, MSG0
240*f3d6cb3dSEric Biggers		sha1rnds4	$3, E1, ABCD
241*f3d6cb3dSEric Biggers	sha1msg1	MSG3, MSG2
242*f3d6cb3dSEric Biggers	pxor		MSG3, MSG1
243*f3d6cb3dSEric Biggers
244*f3d6cb3dSEric Biggers	/* Rounds 64-67 */
245*f3d6cb3dSEric Biggers		sha1nexte	MSG0, E0
246*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
247*f3d6cb3dSEric Biggers	sha1msg2	MSG0, MSG1
248*f3d6cb3dSEric Biggers		sha1rnds4	$3, E0, ABCD
249*f3d6cb3dSEric Biggers	sha1msg1	MSG0, MSG3
250*f3d6cb3dSEric Biggers	pxor		MSG0, MSG2
251*f3d6cb3dSEric Biggers
252*f3d6cb3dSEric Biggers	/* Rounds 68-71 */
253*f3d6cb3dSEric Biggers		sha1nexte	MSG1, E1
254*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
255*f3d6cb3dSEric Biggers	sha1msg2	MSG1, MSG2
256*f3d6cb3dSEric Biggers		sha1rnds4	$3, E1, ABCD
257*f3d6cb3dSEric Biggers	pxor		MSG1, MSG3
258*f3d6cb3dSEric Biggers
259*f3d6cb3dSEric Biggers	/* Rounds 72-75 */
260*f3d6cb3dSEric Biggers		sha1nexte	MSG2, E0
261*f3d6cb3dSEric Biggers		movdqa		ABCD, E1
262*f3d6cb3dSEric Biggers	sha1msg2	MSG2, MSG3
263*f3d6cb3dSEric Biggers		sha1rnds4	$3, E0, ABCD
264*f3d6cb3dSEric Biggers
265*f3d6cb3dSEric Biggers	/* Rounds 76-79 */
266*f3d6cb3dSEric Biggers		sha1nexte	MSG3, E1
267*f3d6cb3dSEric Biggers		movdqa		ABCD, E0
268*f3d6cb3dSEric Biggers		sha1rnds4	$3, E1, ABCD
269*f3d6cb3dSEric Biggers
270*f3d6cb3dSEric Biggers	/* Add current hash values with previously saved */
271*f3d6cb3dSEric Biggers	sha1nexte	(0*16)(%rsp), E0
272*f3d6cb3dSEric Biggers	paddd		(1*16)(%rsp), ABCD
273*f3d6cb3dSEric Biggers
274*f3d6cb3dSEric Biggers	/* Increment data pointer and loop if more to process */
275*f3d6cb3dSEric Biggers	add		$64, DATA_PTR
276*f3d6cb3dSEric Biggers	cmp		NUM_BLKS, DATA_PTR
277*f3d6cb3dSEric Biggers	jne		.Lloop0
278*f3d6cb3dSEric Biggers
279*f3d6cb3dSEric Biggers	/* Write hash values back in the correct order */
280*f3d6cb3dSEric Biggers	pshufd		$0x1B, ABCD, ABCD
281*f3d6cb3dSEric Biggers	movdqu		ABCD, 0*16(DIGEST_PTR)
282*f3d6cb3dSEric Biggers	pextrd		$3, E0, 1*16(DIGEST_PTR)
283*f3d6cb3dSEric Biggers
284*f3d6cb3dSEric Biggers.Ldone_hash:
285*f3d6cb3dSEric Biggers	mov		%rbp, %rsp
286*f3d6cb3dSEric Biggers	pop		%rbp
287*f3d6cb3dSEric Biggers
288*f3d6cb3dSEric Biggers	RET
289*f3d6cb3dSEric BiggersSYM_FUNC_END(sha1_ni_transform)
290*f3d6cb3dSEric Biggers
291*f3d6cb3dSEric Biggers.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
292*f3d6cb3dSEric Biggers.align 16
293*f3d6cb3dSEric BiggersPSHUFFLE_BYTE_FLIP_MASK:
294*f3d6cb3dSEric Biggers	.octa 0x000102030405060708090a0b0c0d0e0f
295*f3d6cb3dSEric Biggers
296*f3d6cb3dSEric Biggers.section	.rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
297*f3d6cb3dSEric Biggers.align 16
298*f3d6cb3dSEric BiggersUPPER_WORD_MASK:
299*f3d6cb3dSEric Biggers	.octa 0xFFFFFFFF000000000000000000000000
300