xref: /linux/lib/crypto/x86/sha256-ni-asm.S (revision 74750aa78de33794aa9ab55de15da04bd41d1ac8)
1*74750aa7SEric Biggers/*
2*74750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 update function
3*74750aa7SEric Biggers *
4*74750aa7SEric Biggers * This file is provided under a dual BSD/GPLv2 license.  When using or
5*74750aa7SEric Biggers * redistributing this file, you may do so under either license.
6*74750aa7SEric Biggers *
7*74750aa7SEric Biggers * GPL LICENSE SUMMARY
8*74750aa7SEric Biggers *
9*74750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation.
10*74750aa7SEric Biggers *
11*74750aa7SEric Biggers * This program is free software; you can redistribute it and/or modify
12*74750aa7SEric Biggers * it under the terms of version 2 of the GNU General Public License as
13*74750aa7SEric Biggers * published by the Free Software Foundation.
14*74750aa7SEric Biggers *
15*74750aa7SEric Biggers * This program is distributed in the hope that it will be useful, but
16*74750aa7SEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of
17*74750aa7SEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18*74750aa7SEric Biggers * General Public License for more details.
19*74750aa7SEric Biggers *
20*74750aa7SEric Biggers * Contact Information:
21*74750aa7SEric Biggers * 	Sean Gulley <sean.m.gulley@intel.com>
22*74750aa7SEric Biggers * 	Tim Chen <tim.c.chen@linux.intel.com>
23*74750aa7SEric Biggers *
24*74750aa7SEric Biggers * BSD LICENSE
25*74750aa7SEric Biggers *
26*74750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation.
27*74750aa7SEric Biggers *
28*74750aa7SEric Biggers * Redistribution and use in source and binary forms, with or without
29*74750aa7SEric Biggers * modification, are permitted provided that the following conditions
30*74750aa7SEric Biggers * are met:
31*74750aa7SEric Biggers *
32*74750aa7SEric Biggers * 	* Redistributions of source code must retain the above copyright
33*74750aa7SEric Biggers * 	  notice, this list of conditions and the following disclaimer.
34*74750aa7SEric Biggers * 	* Redistributions in binary form must reproduce the above copyright
35*74750aa7SEric Biggers * 	  notice, this list of conditions and the following disclaimer in
36*74750aa7SEric Biggers * 	  the documentation and/or other materials provided with the
37*74750aa7SEric Biggers * 	  distribution.
38*74750aa7SEric Biggers * 	* Neither the name of Intel Corporation nor the names of its
39*74750aa7SEric Biggers * 	  contributors may be used to endorse or promote products derived
40*74750aa7SEric Biggers * 	  from this software without specific prior written permission.
41*74750aa7SEric Biggers *
42*74750aa7SEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43*74750aa7SEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44*74750aa7SEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45*74750aa7SEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46*74750aa7SEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47*74750aa7SEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48*74750aa7SEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49*74750aa7SEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50*74750aa7SEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51*74750aa7SEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52*74750aa7SEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53*74750aa7SEric Biggers *
54*74750aa7SEric Biggers */
55*74750aa7SEric Biggers
56*74750aa7SEric Biggers#include <linux/linkage.h>
57*74750aa7SEric Biggers#include <linux/objtool.h>
58*74750aa7SEric Biggers
59*74750aa7SEric Biggers#define STATE_PTR	%rdi	/* 1st arg */
60*74750aa7SEric Biggers#define DATA_PTR	%rsi	/* 2nd arg */
61*74750aa7SEric Biggers#define NUM_BLKS	%rdx	/* 3rd arg */
62*74750aa7SEric Biggers
63*74750aa7SEric Biggers#define SHA256CONSTANTS	%rax
64*74750aa7SEric Biggers
65*74750aa7SEric Biggers#define MSG		%xmm0  /* sha256rnds2 implicit operand */
66*74750aa7SEric Biggers#define STATE0		%xmm1
67*74750aa7SEric Biggers#define STATE1		%xmm2
68*74750aa7SEric Biggers#define MSG0		%xmm3
69*74750aa7SEric Biggers#define MSG1		%xmm4
70*74750aa7SEric Biggers#define MSG2		%xmm5
71*74750aa7SEric Biggers#define MSG3		%xmm6
72*74750aa7SEric Biggers#define TMP		%xmm7
73*74750aa7SEric Biggers
74*74750aa7SEric Biggers#define SHUF_MASK	%xmm8
75*74750aa7SEric Biggers
76*74750aa7SEric Biggers#define ABEF_SAVE	%xmm9
77*74750aa7SEric Biggers#define CDGH_SAVE	%xmm10
78*74750aa7SEric Biggers
79*74750aa7SEric Biggers.macro do_4rounds	i, m0, m1, m2, m3
80*74750aa7SEric Biggers.if \i < 16
81*74750aa7SEric Biggers	movdqu		\i*4(DATA_PTR), \m0
82*74750aa7SEric Biggers	pshufb		SHUF_MASK, \m0
83*74750aa7SEric Biggers.endif
84*74750aa7SEric Biggers	movdqa		(\i-32)*4(SHA256CONSTANTS), MSG
85*74750aa7SEric Biggers	paddd		\m0, MSG
86*74750aa7SEric Biggers	sha256rnds2	STATE0, STATE1
87*74750aa7SEric Biggers.if \i >= 12 && \i < 60
88*74750aa7SEric Biggers	movdqa		\m0, TMP
89*74750aa7SEric Biggers	palignr		$4, \m3, TMP
90*74750aa7SEric Biggers	paddd		TMP, \m1
91*74750aa7SEric Biggers	sha256msg2	\m0, \m1
92*74750aa7SEric Biggers.endif
93*74750aa7SEric Biggers	punpckhqdq	MSG, MSG
94*74750aa7SEric Biggers	sha256rnds2	STATE1, STATE0
95*74750aa7SEric Biggers.if \i >= 4 && \i < 52
96*74750aa7SEric Biggers	sha256msg1	\m0, \m3
97*74750aa7SEric Biggers.endif
98*74750aa7SEric Biggers.endm
99*74750aa7SEric Biggers
100*74750aa7SEric Biggers/*
101*74750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 block function
102*74750aa7SEric Biggers *
103*74750aa7SEric Biggers * This function takes a pointer to the current SHA-256 state, a pointer to the
104*74750aa7SEric Biggers * input data, and the number of 64-byte blocks to process.  Once all blocks
105*74750aa7SEric Biggers * have been processed, the state is updated with the new state.  This function
106*74750aa7SEric Biggers * only processes complete blocks.  State initialization, buffering of partial
107*74750aa7SEric Biggers * blocks, and digest finalization is expected to be handled elsewhere.
108*74750aa7SEric Biggers *
109*74750aa7SEric Biggers * void sha256_ni_transform(u32 state[SHA256_STATE_WORDS],
110*74750aa7SEric Biggers *			    const u8 *data, size_t nblocks);
111*74750aa7SEric Biggers */
112*74750aa7SEric Biggers.text
113*74750aa7SEric BiggersSYM_FUNC_START(sha256_ni_transform)
114*74750aa7SEric Biggers	ANNOTATE_NOENDBR	# since this is called only via static_call
115*74750aa7SEric Biggers
116*74750aa7SEric Biggers	shl		$6, NUM_BLKS		/*  convert to bytes */
117*74750aa7SEric Biggers	jz		.Ldone_hash
118*74750aa7SEric Biggers	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
119*74750aa7SEric Biggers
120*74750aa7SEric Biggers	/*
121*74750aa7SEric Biggers	 * load initial hash values
122*74750aa7SEric Biggers	 * Need to reorder these appropriately
123*74750aa7SEric Biggers	 * DCBA, HGFE -> ABEF, CDGH
124*74750aa7SEric Biggers	 */
125*74750aa7SEric Biggers	movdqu		0*16(STATE_PTR), STATE0		/* DCBA */
126*74750aa7SEric Biggers	movdqu		1*16(STATE_PTR), STATE1		/* HGFE */
127*74750aa7SEric Biggers
128*74750aa7SEric Biggers	movdqa		STATE0, TMP
129*74750aa7SEric Biggers	punpcklqdq	STATE1, STATE0			/* FEBA */
130*74750aa7SEric Biggers	punpckhqdq	TMP, STATE1			/* DCHG */
131*74750aa7SEric Biggers	pshufd		$0x1B, STATE0, STATE0		/* ABEF */
132*74750aa7SEric Biggers	pshufd		$0xB1, STATE1, STATE1		/* CDGH */
133*74750aa7SEric Biggers
134*74750aa7SEric Biggers	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
135*74750aa7SEric Biggers	lea		K256+32*4(%rip), SHA256CONSTANTS
136*74750aa7SEric Biggers
137*74750aa7SEric Biggers.Lloop0:
138*74750aa7SEric Biggers	/* Save hash values for addition after rounds */
139*74750aa7SEric Biggers	movdqa		STATE0, ABEF_SAVE
140*74750aa7SEric Biggers	movdqa		STATE1, CDGH_SAVE
141*74750aa7SEric Biggers
142*74750aa7SEric Biggers.irp i, 0, 16, 32, 48
143*74750aa7SEric Biggers	do_4rounds	(\i + 0),  MSG0, MSG1, MSG2, MSG3
144*74750aa7SEric Biggers	do_4rounds	(\i + 4),  MSG1, MSG2, MSG3, MSG0
145*74750aa7SEric Biggers	do_4rounds	(\i + 8),  MSG2, MSG3, MSG0, MSG1
146*74750aa7SEric Biggers	do_4rounds	(\i + 12), MSG3, MSG0, MSG1, MSG2
147*74750aa7SEric Biggers.endr
148*74750aa7SEric Biggers
149*74750aa7SEric Biggers	/* Add current hash values with previously saved */
150*74750aa7SEric Biggers	paddd		ABEF_SAVE, STATE0
151*74750aa7SEric Biggers	paddd		CDGH_SAVE, STATE1
152*74750aa7SEric Biggers
153*74750aa7SEric Biggers	/* Increment data pointer and loop if more to process */
154*74750aa7SEric Biggers	add		$64, DATA_PTR
155*74750aa7SEric Biggers	cmp		NUM_BLKS, DATA_PTR
156*74750aa7SEric Biggers	jne		.Lloop0
157*74750aa7SEric Biggers
158*74750aa7SEric Biggers	/* Write hash values back in the correct order */
159*74750aa7SEric Biggers	movdqa		STATE0, TMP
160*74750aa7SEric Biggers	punpcklqdq	STATE1, STATE0			/* GHEF */
161*74750aa7SEric Biggers	punpckhqdq	TMP, STATE1			/* ABCD */
162*74750aa7SEric Biggers	pshufd		$0xB1, STATE0, STATE0		/* HGFE */
163*74750aa7SEric Biggers	pshufd		$0x1B, STATE1, STATE1		/* DCBA */
164*74750aa7SEric Biggers
165*74750aa7SEric Biggers	movdqu		STATE1, 0*16(STATE_PTR)
166*74750aa7SEric Biggers	movdqu		STATE0, 1*16(STATE_PTR)
167*74750aa7SEric Biggers
168*74750aa7SEric Biggers.Ldone_hash:
169*74750aa7SEric Biggers
170*74750aa7SEric Biggers	RET
171*74750aa7SEric BiggersSYM_FUNC_END(sha256_ni_transform)
172*74750aa7SEric Biggers
173*74750aa7SEric Biggers.section	.rodata.cst256.K256, "aM", @progbits, 256
174*74750aa7SEric Biggers.align 64
175*74750aa7SEric BiggersK256:
176*74750aa7SEric Biggers	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
177*74750aa7SEric Biggers	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
178*74750aa7SEric Biggers	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
179*74750aa7SEric Biggers	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
180*74750aa7SEric Biggers	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
181*74750aa7SEric Biggers	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
182*74750aa7SEric Biggers	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
183*74750aa7SEric Biggers	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
184*74750aa7SEric Biggers	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
185*74750aa7SEric Biggers	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
186*74750aa7SEric Biggers	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
187*74750aa7SEric Biggers	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
188*74750aa7SEric Biggers	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
189*74750aa7SEric Biggers	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
190*74750aa7SEric Biggers	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
191*74750aa7SEric Biggers	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
192*74750aa7SEric Biggers
193*74750aa7SEric Biggers.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
194*74750aa7SEric Biggers.align 16
195*74750aa7SEric BiggersPSHUFFLE_BYTE_FLIP_MASK:
196*74750aa7SEric Biggers	.octa 0x0c0d0e0f08090a0b0405060700010203
197