sha1-avx2-asm.S - OpenGrok cross reference for /linux/lib/crypto/x86/sha1-avx2-asm.S

Lines Matching +full:0 +full:- +full:7 +full:a +full:- +full:e
2  *	Implement fast SHA-1 with AVX2 instructions. (x86_64)
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
47  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59  * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
62  *Visit http://software.intel.com/en-us/articles/
63  *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
97 	.set A, REG_A  define
101 	.set E, REG_E  define
142  * Keep 2 iterations precalculated at a time:
143  *    - 80 DWORDs per iteration * 2
193 	.if (i == 0) # Initialize and rotate registers
198 	/* message scheduling pre-compute for rounds 0-15 */
199 	.if   ((i & 7) == 0)
205 	.elseif ((i & 7) == 1)
206 		vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
208 	.elseif ((i & 7) == 2)
210 	.elseif ((i & 7) == 4)
212 	.elseif ((i & 7) == 7)
213 		vmovdqu  WY_TMP, PRECALC_WK(i&~7)
221 	 * message scheduling pre-compute for rounds 16-31
223 	 * pre-calculate K+w[i] values and store to mem
226 	 * "brute force" vectorization for rounds 16-31 only
227 	 * due to w[i]->w[i-3] dependency
229 	.if   ((i & 7) == 0)
234 		/* w[i-14] */
236 		vpsrldq	$4, WY_minus_04, WY_TMP               /* w[i-3] */
237 	.elseif ((i & 7) == 1)
240 	.elseif ((i & 7) == 2)
243 	.elseif ((i & 7) == 3)
246 	.elseif ((i & 7) == 4)
249 	.elseif ((i & 7) == 5)
252 	.elseif ((i & 7) == 7)
255 		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
263 	 * in SHA-1 specification:
264 	 * w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
266 	 * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
268 	 * since w[i]=>w[i-3] dependency is broken
271 	.if   ((i & 7) == 0)
277 	.elseif ((i & 7) == 1)
280 	.elseif ((i & 7) == 2)
282 	.elseif ((i & 7) == 3)
284 	.elseif ((i & 7) == 4)
286 	.elseif ((i & 7) == 5)
289 	.elseif ((i & 7) == 7)
291 		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
301 		.set K_XMM, 32*0
320 	.set T_REG, E
321 	.set E, D  define
325 	.set TB, A
326 	.set A, T_REG  define
352 	.if (round_id == 0)        /* Precalculate F for first round */
356 		rorx	$(32-30), B, B    /* b>>>2 */
380 	add	WK(\r), E
382 	andn	C, A, T1			/* ~b&d */
383 	lea	(RE,RTB), E		/* Add F from the previous round */
385 	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
386 	rorx	$(32-30),A, TB		/* b>>>2 for next round */
394 	and	B, A			/* b&c */
395 	xor	T1, A			/* F1 = (b&c) ^ (~b&d) */
397 	lea	(RE,RTA), E		/* E += A >>> 5 */
401 	add	WK(\r), E
402 	lea	(RE,RTB), E		/* Add F from the previous round */
405 	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
407 		rorx	$(32-30), A, TB	/* b>>>2 for next round */
412 		xor	B, A
415 	add	TA, E			/* E += A >>> 5 */
418 		xor	C, A
423 	add	WK(\r), E
426 	lea	(RE,RTB), E		/* Add F from the previous round */
429 	or	A, T1
431 	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
432 	rorx	$(32-30), A, TB		/* b>>>2 for next round */
438 	and	B, A
439 	or	T1, A
441 	add	TA, E			/* E += A >>> 5 */
446  * %1 + %2 >= %3 ? %4 : 0
448 .macro ADD_IF_GE a, b, c, d
449 	mov     \a, RTA
452 	cmovge  RTA, \a
456  * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
462 	mov	(HASH_PTR), A
466 	mov	16(HASH_PTR), E
473 	.set i, 0
488 	 * we use K_BASE value as a signal of a last block,
500 	 * rounds: 0,2,4,6,8
502 	.set j, 0
535 	UPDATE_HASH	(HASH_PTR), A
539 	UPDATE_HASH	16(HASH_PTR), E
549 	 *  0+80, 2+80, 4+80, 6+80, 8+80
553 	.set j, 0
594 	UPDATE_HASH	(HASH_PTR), A
598 	UPDATE_HASH	16(HASH_PTR), E
601 	mov	A, TA
602 	mov	TB, A
604 	mov	E, C
619  * macro implements SHA-1 function's body for several 64-byte blocks
636 	and	$~(0x20-1), %rsp
670 #define K1 0x5a827999
671 #define K2 0x6ed9eba1
672 #define K3 0x8f1bbcdc
673 #define K4 0xca62c1d6
687 	.long 0x00010203
688 	.long 0x04050607
689 	.long 0x08090a0b
690 	.long 0x0c0d0e0f
691 	.long 0x00010203
692 	.long 0x04050607
693 	.long 0x08090a0b
694 	.long 0x0c0d0e0f