sha1_ssse3_asm.S - OpenGrok cross reference for /linux/arch/x86/crypto/sha1_ssse3

Lines Matching +full:a +full:- +full:c
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3  * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
17 …*   http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorith…
19  * Copyright (C) 2010, Intel Corp.
62 /* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */
67  * This macro implements the SHA-1 function's body for single 64-byte block
109  * This macro implements 80 rounds of SHA-1 for one 64-byte block
114 	mov	  (HASH_PTR), A
116 	mov	 8(HASH_PTR), C
128 	RR F1,A,B,C,D,E,0
129 	RR F1,D,E,A,B,C,2
130 	RR F1,B,C,D,E,A,4
131 	RR F1,E,A,B,C,D,6
132 	RR F1,C,D,E,A,B,8
134 	RR F1,A,B,C,D,E,10
135 	RR F1,D,E,A,B,C,12
136 	RR F1,B,C,D,E,A,14
137 	RR F1,E,A,B,C,D,16
138 	RR F1,C,D,E,A,B,18
140 	RR F2,A,B,C,D,E,20
141 	RR F2,D,E,A,B,C,22
142 	RR F2,B,C,D,E,A,24
143 	RR F2,E,A,B,C,D,26
144 	RR F2,C,D,E,A,B,28
146 	RR F2,A,B,C,D,E,30
147 	RR F2,D,E,A,B,C,32
148 	RR F2,B,C,D,E,A,34
149 	RR F2,E,A,B,C,D,36
150 	RR F2,C,D,E,A,B,38
152 	RR F3,A,B,C,D,E,40
153 	RR F3,D,E,A,B,C,42
154 	RR F3,B,C,D,E,A,44
155 	RR F3,E,A,B,C,D,46
156 	RR F3,C,D,E,A,B,48
158 	RR F3,A,B,C,D,E,50
159 	RR F3,D,E,A,B,C,52
160 	RR F3,B,C,D,E,A,54
161 	RR F3,E,A,B,C,D,56
162 	RR F3,C,D,E,A,B,58
164 	add	$64, BUFFER_PTR		# move to the next 64-byte block
168 	RR F4,A,B,C,D,E,60
169 	RR F4,D,E,A,B,C,62
170 	RR F4,B,C,D,E,A,64
171 	RR F4,E,A,B,C,D,66
172 	RR F4,C,D,E,A,B,68
174 	RR F4,A,B,C,D,E,70
175 	RR F4,D,E,A,B,C,72
176 	RR F4,B,C,D,E,A,74
177 	RR F4,E,A,B,C,D,76
178 	RR F4,C,D,E,A,B,78
180 	UPDATE_HASH   (HASH_PTR), A
182 	UPDATE_HASH  8(HASH_PTR), C
192   .set A, REG_A  define
194   .set C, REG_C  define
205 	mov	A, REG_A
209 .macro SWAP_REG_NAMES  a, b
210   .set _T, \a
211   .set \a, \b
215 .macro F1  b, c, d
216 	mov	\c, T1
217 	SWAP_REG_NAMES \c, T1
223 .macro F2  b, c, d
226 	xor	\c, T1
230 .macro F3  b, c ,d
231 	mov	\c, T1
232 	SWAP_REG_NAMES \c, T1
235 	and	\c, T2
240 .macro F4  b, c, d
241 	F2 \b, \c, \d
250  * RR does two rounds of SHA-1 back to back with W[] pre-calc
251  *   t1 = F(b, c, d);   e += w(i)
253  *   t1 = F(a, b, c);
254  *   d += t1;           a <<= 5;
255  *   e += a;
256  *   t1 = e;            a >>= 7;
260 .macro RR  F, a, b, c, d, e, round
262 	\F   \b, \c, \d		# t1 = F(b, c, d);
268 	\F   \a, \b, \c
270 	rol	$5, \a
271 	add	\a, \e
273 	ror	$7, \a		# (a <<r 5) >>r 7) => a <<r 30)
281 	# write:  \a, \b
282 	# rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c
299     .set i, ((\r) % 80)	    # pre-compute for the next iteration
306   .elseif (i < 80)   // rounds 32-79
347 /* message scheduling pre-compute for rounds 0-15 */
362 /* message scheduling pre-compute for rounds 16-31
364  * - calculating last 32 w[i] values in 8 XMM registers
365  * - pre-calculate K+w[i] values and store to mem, for later load by ALU add
368  * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3]
369  * dependency, but improves for 32-79
372   # blended scheduling of vector and scalar instruction streams, one 4-wide
376 	palignr	$8, W_minus_16, W	# w[i-14]
378 	psrldq	$4, W_TMP1		# w[i-3]
403 /* message scheduling pre-compute for rounds 32-79
405  * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
406  * instead we do equal:    w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
407  * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken
457 .macro xmm_mov a, b
458 	movdqu	\a,\b
464  * extern "C" void sha1_transform_ssse3(struct sha1_state *state,
501 	vpalignr $8, W_minus_16, W_minus_12, W	# w[i-14]
502 	vpsrldq	$4, W_minus_04, W_TMP1		# w[i-3]
545 .macro xmm_mov a, b
546 	vmovdqu	\a,\b
551  *  extern "C" void sha1_transform_avx(struct sha1_state *state,
In current file

In project "undefined"

On Google