Lines Matching +full:0 +full:- +full:7 +full:a +full:- +full:e
2 * Implement fast SHA-1 with AVX2 instructions. (x86_64)
4 * This file is provided under a dual BSD/GPLv2 license. When using or
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
47 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59 * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
62 *Visit http://software.intel.com/en-us/articles/
63 *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
97 .set A, REG_A define
101 .set E, REG_E define
142 * Keep 2 iterations precalculated at a time:
143 * - 80 DWORDs per iteration * 2
193 .if (i == 0) # Initialize and rotate registers
198 /* message scheduling pre-compute for rounds 0-15 */
199 .if ((i & 7) == 0)
205 .elseif ((i & 7) == 1)
206 vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
208 .elseif ((i & 7) == 2)
210 .elseif ((i & 7) == 4)
212 .elseif ((i & 7) == 7)
213 vmovdqu WY_TMP, PRECALC_WK(i&~7)
221 * message scheduling pre-compute for rounds 16-31
223 * pre-calculate K+w[i] values and store to mem
226 * "brute force" vectorization for rounds 16-31 only
227 * due to w[i]->w[i-3] dependency
229 .if ((i & 7) == 0)
234 /* w[i-14] */
236 vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */
237 .elseif ((i & 7) == 1)
240 .elseif ((i & 7) == 2)
243 .elseif ((i & 7) == 3)
246 .elseif ((i & 7) == 4)
249 .elseif ((i & 7) == 5)
252 .elseif ((i & 7) == 7)
255 vmovdqu WY_TMP, PRECALC_WK(i&~7)
263 * in SHA-1 specification:
264 * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
266 * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
268 * since w[i]=>w[i-3] dependency is broken
271 .if ((i & 7) == 0)
277 .elseif ((i & 7) == 1)
280 .elseif ((i & 7) == 2)
282 .elseif ((i & 7) == 3)
284 .elseif ((i & 7) == 4)
286 .elseif ((i & 7) == 5)
289 .elseif ((i & 7) == 7)
291 vmovdqu WY_TMP, PRECALC_WK(i&~7)
301 .set K_XMM, 32*0
320 .set T_REG, E
321 .set E, D define
325 .set TB, A
326 .set A, T_REG define
352 .if (round_id == 0) /* Precalculate F for first round */
356 rorx $(32-30), B, B /* b>>>2 */
380 add WK(\r), E
382 andn C, A, T1 /* ~b&d */
383 lea (RE,RTB), E /* Add F from the previous round */
385 rorx $(32-5), A, TA /* T2 = A >>> 5 */
386 rorx $(32-30),A, TB /* b>>>2 for next round */
394 and B, A /* b&c */
395 xor T1, A /* F1 = (b&c) ^ (~b&d) */
397 lea (RE,RTA), E /* E += A >>> 5 */
401 add WK(\r), E
402 lea (RE,RTB), E /* Add F from the previous round */
405 rorx $(32-5), A, TA /* T2 = A >>> 5 */
407 rorx $(32-30), A, TB /* b>>>2 for next round */
412 xor B, A
415 add TA, E /* E += A >>> 5 */
418 xor C, A
423 add WK(\r), E
426 lea (RE,RTB), E /* Add F from the previous round */
429 or A, T1
431 rorx $(32-5), A, TA /* T2 = A >>> 5 */
432 rorx $(32-30), A, TB /* b>>>2 for next round */
438 and B, A
439 or T1, A
441 add TA, E /* E += A >>> 5 */
446 * %1 + %2 >= %3 ? %4 : 0
448 .macro ADD_IF_GE a, b, c, d
449 mov \a, RTA
452 cmovge RTA, \a
456 * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
462 mov (HASH_PTR), A
466 mov 16(HASH_PTR), E
473 .set i, 0
488 * we use K_BASE value as a signal of a last block,
500 * rounds: 0,2,4,6,8
502 .set j, 0
535 UPDATE_HASH (HASH_PTR), A
539 UPDATE_HASH 16(HASH_PTR), E
549 * 0+80, 2+80, 4+80, 6+80, 8+80
553 .set j, 0
594 UPDATE_HASH (HASH_PTR), A
598 UPDATE_HASH 16(HASH_PTR), E
601 mov A, TA
602 mov TB, A
604 mov E, C
619 * macro implements SHA-1 function's body for several 64-byte blocks
636 and $~(0x20-1), %rsp
670 #define K1 0x5a827999
671 #define K2 0x6ed9eba1
672 #define K3 0x8f1bbcdc
673 #define K4 0xca62c1d6
687 .long 0x00010203
688 .long 0x04050607
689 .long 0x08090a0b
690 .long 0x0c0d0e0f
691 .long 0x00010203
692 .long 0x04050607
693 .long 0x08090a0b
694 .long 0x0c0d0e0f