xref: /linux/arch/x86/crypto/sha256_ni_asm.S (revision cfda8617e22a8bf217a613d0b3ba3a38778443ba)
1/*
2 * Intel SHA Extensions optimized implementation of a SHA-256 update function
3 *
4 * This file is provided under a dual BSD/GPLv2 license.  When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2015 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * 	Sean Gulley <sean.m.gulley@intel.com>
22 * 	Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2015 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * 	* Redistributions of source code must retain the above copyright
33 * 	  notice, this list of conditions and the following disclaimer.
34 * 	* Redistributions in binary form must reproduce the above copyright
35 * 	  notice, this list of conditions and the following disclaimer in
36 * 	  the documentation and/or other materials provided with the
37 * 	  distribution.
38 * 	* Neither the name of Intel Corporation nor the names of its
39 * 	  contributors may be used to endorse or promote products derived
40 * 	  from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 *
54 */
55
56#include <linux/linkage.h>
57
58#define DIGEST_PTR	%rdi	/* 1st arg */
59#define DATA_PTR	%rsi	/* 2nd arg */
60#define NUM_BLKS	%rdx	/* 3rd arg */
61
62#define SHA256CONSTANTS	%rax
63
64#define MSG		%xmm0
65#define STATE0		%xmm1
66#define STATE1		%xmm2
67#define MSGTMP0		%xmm3
68#define MSGTMP1		%xmm4
69#define MSGTMP2		%xmm5
70#define MSGTMP3		%xmm6
71#define MSGTMP4		%xmm7
72
73#define SHUF_MASK	%xmm8
74
75#define ABEF_SAVE	%xmm9
76#define CDGH_SAVE	%xmm10
77
78/*
79 * Intel SHA Extensions optimized implementation of a SHA-256 update function
80 *
81 * The function takes a pointer to the current hash values, a pointer to the
82 * input data, and a number of 64 byte blocks to process.  Once all blocks have
83 * been processed, the digest pointer is  updated with the resulting hash value.
84 * The function only processes complete blocks, there is no functionality to
85 * store partial blocks.  All message padding and hash value initialization must
86 * be done outside the update function.
87 *
88 * The indented lines in the loop are instructions related to rounds processing.
89 * The non-indented lines are instructions related to the message schedule.
90 *
91 * void sha256_ni_transform(uint32_t *digest, const void *data,
92		uint32_t numBlocks);
93 * digest : pointer to digest
94 * data: pointer to input data
95 * numBlocks: Number of blocks to process
96 */
97
98.text
99.align 32
100SYM_FUNC_START(sha256_ni_transform)
101
102	shl		$6, NUM_BLKS		/*  convert to bytes */
103	jz		.Ldone_hash
104	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
105
106	/*
107	 * load initial hash values
108	 * Need to reorder these appropriately
109	 * DCBA, HGFE -> ABEF, CDGH
110	 */
111	movdqu		0*16(DIGEST_PTR), STATE0
112	movdqu		1*16(DIGEST_PTR), STATE1
113
114	pshufd		$0xB1, STATE0,  STATE0		/* CDAB */
115	pshufd		$0x1B, STATE1,  STATE1		/* EFGH */
116	movdqa		STATE0, MSGTMP4
117	palignr		$8, STATE1,  STATE0		/* ABEF */
118	pblendw		$0xF0, MSGTMP4, STATE1		/* CDGH */
119
120	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
121	lea		K256(%rip), SHA256CONSTANTS
122
123.Lloop0:
124	/* Save hash values for addition after rounds */
125	movdqa		STATE0, ABEF_SAVE
126	movdqa		STATE1, CDGH_SAVE
127
128	/* Rounds 0-3 */
129	movdqu		0*16(DATA_PTR), MSG
130	pshufb		SHUF_MASK, MSG
131	movdqa		MSG, MSGTMP0
132		paddd		0*16(SHA256CONSTANTS), MSG
133		sha256rnds2	STATE0, STATE1
134		pshufd 		$0x0E, MSG, MSG
135		sha256rnds2	STATE1, STATE0
136
137	/* Rounds 4-7 */
138	movdqu		1*16(DATA_PTR), MSG
139	pshufb		SHUF_MASK, MSG
140	movdqa		MSG, MSGTMP1
141		paddd		1*16(SHA256CONSTANTS), MSG
142		sha256rnds2	STATE0, STATE1
143		pshufd 		$0x0E, MSG, MSG
144		sha256rnds2	STATE1, STATE0
145	sha256msg1	MSGTMP1, MSGTMP0
146
147	/* Rounds 8-11 */
148	movdqu		2*16(DATA_PTR), MSG
149	pshufb		SHUF_MASK, MSG
150	movdqa		MSG, MSGTMP2
151		paddd		2*16(SHA256CONSTANTS), MSG
152		sha256rnds2	STATE0, STATE1
153		pshufd 		$0x0E, MSG, MSG
154		sha256rnds2	STATE1, STATE0
155	sha256msg1	MSGTMP2, MSGTMP1
156
157	/* Rounds 12-15 */
158	movdqu		3*16(DATA_PTR), MSG
159	pshufb		SHUF_MASK, MSG
160	movdqa		MSG, MSGTMP3
161		paddd		3*16(SHA256CONSTANTS), MSG
162		sha256rnds2	STATE0, STATE1
163	movdqa		MSGTMP3, MSGTMP4
164	palignr		$4, MSGTMP2, MSGTMP4
165	paddd		MSGTMP4, MSGTMP0
166	sha256msg2	MSGTMP3, MSGTMP0
167		pshufd 		$0x0E, MSG, MSG
168		sha256rnds2	STATE1, STATE0
169	sha256msg1	MSGTMP3, MSGTMP2
170
171	/* Rounds 16-19 */
172	movdqa		MSGTMP0, MSG
173		paddd		4*16(SHA256CONSTANTS), MSG
174		sha256rnds2	STATE0, STATE1
175	movdqa		MSGTMP0, MSGTMP4
176	palignr		$4, MSGTMP3, MSGTMP4
177	paddd		MSGTMP4, MSGTMP1
178	sha256msg2	MSGTMP0, MSGTMP1
179		pshufd 		$0x0E, MSG, MSG
180		sha256rnds2	STATE1, STATE0
181	sha256msg1	MSGTMP0, MSGTMP3
182
183	/* Rounds 20-23 */
184	movdqa		MSGTMP1, MSG
185		paddd		5*16(SHA256CONSTANTS), MSG
186		sha256rnds2	STATE0, STATE1
187	movdqa		MSGTMP1, MSGTMP4
188	palignr		$4, MSGTMP0, MSGTMP4
189	paddd		MSGTMP4, MSGTMP2
190	sha256msg2	MSGTMP1, MSGTMP2
191		pshufd 		$0x0E, MSG, MSG
192		sha256rnds2	STATE1, STATE0
193	sha256msg1	MSGTMP1, MSGTMP0
194
195	/* Rounds 24-27 */
196	movdqa		MSGTMP2, MSG
197		paddd		6*16(SHA256CONSTANTS), MSG
198		sha256rnds2	STATE0, STATE1
199	movdqa		MSGTMP2, MSGTMP4
200	palignr		$4, MSGTMP1, MSGTMP4
201	paddd		MSGTMP4, MSGTMP3
202	sha256msg2	MSGTMP2, MSGTMP3
203		pshufd 		$0x0E, MSG, MSG
204		sha256rnds2	STATE1, STATE0
205	sha256msg1	MSGTMP2, MSGTMP1
206
207	/* Rounds 28-31 */
208	movdqa		MSGTMP3, MSG
209		paddd		7*16(SHA256CONSTANTS), MSG
210		sha256rnds2	STATE0, STATE1
211	movdqa		MSGTMP3, MSGTMP4
212	palignr		$4, MSGTMP2, MSGTMP4
213	paddd		MSGTMP4, MSGTMP0
214	sha256msg2	MSGTMP3, MSGTMP0
215		pshufd 		$0x0E, MSG, MSG
216		sha256rnds2	STATE1, STATE0
217	sha256msg1	MSGTMP3, MSGTMP2
218
219	/* Rounds 32-35 */
220	movdqa		MSGTMP0, MSG
221		paddd		8*16(SHA256CONSTANTS), MSG
222		sha256rnds2	STATE0, STATE1
223	movdqa		MSGTMP0, MSGTMP4
224	palignr		$4, MSGTMP3, MSGTMP4
225	paddd		MSGTMP4, MSGTMP1
226	sha256msg2	MSGTMP0, MSGTMP1
227		pshufd 		$0x0E, MSG, MSG
228		sha256rnds2	STATE1, STATE0
229	sha256msg1	MSGTMP0, MSGTMP3
230
231	/* Rounds 36-39 */
232	movdqa		MSGTMP1, MSG
233		paddd		9*16(SHA256CONSTANTS), MSG
234		sha256rnds2	STATE0, STATE1
235	movdqa		MSGTMP1, MSGTMP4
236	palignr		$4, MSGTMP0, MSGTMP4
237	paddd		MSGTMP4, MSGTMP2
238	sha256msg2	MSGTMP1, MSGTMP2
239		pshufd 		$0x0E, MSG, MSG
240		sha256rnds2	STATE1, STATE0
241	sha256msg1	MSGTMP1, MSGTMP0
242
243	/* Rounds 40-43 */
244	movdqa		MSGTMP2, MSG
245		paddd		10*16(SHA256CONSTANTS), MSG
246		sha256rnds2	STATE0, STATE1
247	movdqa		MSGTMP2, MSGTMP4
248	palignr		$4, MSGTMP1, MSGTMP4
249	paddd		MSGTMP4, MSGTMP3
250	sha256msg2	MSGTMP2, MSGTMP3
251		pshufd 		$0x0E, MSG, MSG
252		sha256rnds2	STATE1, STATE0
253	sha256msg1	MSGTMP2, MSGTMP1
254
255	/* Rounds 44-47 */
256	movdqa		MSGTMP3, MSG
257		paddd		11*16(SHA256CONSTANTS), MSG
258		sha256rnds2	STATE0, STATE1
259	movdqa		MSGTMP3, MSGTMP4
260	palignr		$4, MSGTMP2, MSGTMP4
261	paddd		MSGTMP4, MSGTMP0
262	sha256msg2	MSGTMP3, MSGTMP0
263		pshufd 		$0x0E, MSG, MSG
264		sha256rnds2	STATE1, STATE0
265	sha256msg1	MSGTMP3, MSGTMP2
266
267	/* Rounds 48-51 */
268	movdqa		MSGTMP0, MSG
269		paddd		12*16(SHA256CONSTANTS), MSG
270		sha256rnds2	STATE0, STATE1
271	movdqa		MSGTMP0, MSGTMP4
272	palignr		$4, MSGTMP3, MSGTMP4
273	paddd		MSGTMP4, MSGTMP1
274	sha256msg2	MSGTMP0, MSGTMP1
275		pshufd 		$0x0E, MSG, MSG
276		sha256rnds2	STATE1, STATE0
277	sha256msg1	MSGTMP0, MSGTMP3
278
279	/* Rounds 52-55 */
280	movdqa		MSGTMP1, MSG
281		paddd		13*16(SHA256CONSTANTS), MSG
282		sha256rnds2	STATE0, STATE1
283	movdqa		MSGTMP1, MSGTMP4
284	palignr		$4, MSGTMP0, MSGTMP4
285	paddd		MSGTMP4, MSGTMP2
286	sha256msg2	MSGTMP1, MSGTMP2
287		pshufd 		$0x0E, MSG, MSG
288		sha256rnds2	STATE1, STATE0
289
290	/* Rounds 56-59 */
291	movdqa		MSGTMP2, MSG
292		paddd		14*16(SHA256CONSTANTS), MSG
293		sha256rnds2	STATE0, STATE1
294	movdqa		MSGTMP2, MSGTMP4
295	palignr		$4, MSGTMP1, MSGTMP4
296	paddd		MSGTMP4, MSGTMP3
297	sha256msg2	MSGTMP2, MSGTMP3
298		pshufd 		$0x0E, MSG, MSG
299		sha256rnds2	STATE1, STATE0
300
301	/* Rounds 60-63 */
302	movdqa		MSGTMP3, MSG
303		paddd		15*16(SHA256CONSTANTS), MSG
304		sha256rnds2	STATE0, STATE1
305		pshufd 		$0x0E, MSG, MSG
306		sha256rnds2	STATE1, STATE0
307
308	/* Add current hash values with previously saved */
309	paddd		ABEF_SAVE, STATE0
310	paddd		CDGH_SAVE, STATE1
311
312	/* Increment data pointer and loop if more to process */
313	add		$64, DATA_PTR
314	cmp		NUM_BLKS, DATA_PTR
315	jne		.Lloop0
316
317	/* Write hash values back in the correct order */
318	pshufd		$0x1B, STATE0,  STATE0		/* FEBA */
319	pshufd		$0xB1, STATE1,  STATE1		/* DCHG */
320	movdqa		STATE0, MSGTMP4
321	pblendw		$0xF0, STATE1,  STATE0		/* DCBA */
322	palignr		$8, MSGTMP4, STATE1		/* HGFE */
323
324	movdqu		STATE0, 0*16(DIGEST_PTR)
325	movdqu		STATE1, 1*16(DIGEST_PTR)
326
327.Ldone_hash:
328
329	ret
330SYM_FUNC_END(sha256_ni_transform)
331
332.section	.rodata.cst256.K256, "aM", @progbits, 256
333.align 64
334K256:
335	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
336	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
337	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
338	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
339	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
340	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
341	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
342	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
343	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
344	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
345	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
346	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
347	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
348	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
349	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
350	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
351
352.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
353.align 16
354PSHUFFLE_BYTE_FLIP_MASK:
355	.octa 0x0c0d0e0f08090a0b0405060700010203
356