Lines Matching +full:- +full:set
2 * Implement fast SHA-1 with AVX2 instructions. (x86_64)
59 * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
62 *Visit http://software.intel.com/en-us/articles/
63 *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
97 .set A, REG_A
98 .set B, REG_B
99 .set C, REG_C
100 .set D, REG_D
101 .set E, REG_E
102 .set TB, REG_TB
103 .set TA, REG_TA
105 .set RA, REG_RA
106 .set RB, REG_RB
107 .set RC, REG_RC
108 .set RD, REG_RD
109 .set RE, REG_RE
111 .set RTA, REG_RTA
112 .set RTB, REG_RTB
114 .set T1, REG_T1
143 * - 80 DWORDs per iteration * 2
157 .set WY_00, WY0
158 .set WY_04, WY4
159 .set WY_08, WY08
160 .set WY_12, WY12
161 .set WY_16, WY16
162 .set WY_20, WY20
163 .set WY_24, WY24
164 .set WY_28, WY28
165 .set WY_32, WY_00
170 .set WY_32, WY_28
171 .set WY_28, WY_24
172 .set WY_24, WY_20
173 .set WY_20, WY_16
174 .set WY_16, WY_12
175 .set WY_12, WY_08
176 .set WY_08, WY_04
177 .set WY_04, WY_00
178 .set WY_00, WY_32
181 .set WY, WY_00
182 .set WY_minus_04, WY_04
183 .set WY_minus_08, WY_08
184 .set WY_minus_12, WY_12
185 .set WY_minus_16, WY_16
186 .set WY_minus_20, WY_20
187 .set WY_minus_24, WY_24
188 .set WY_minus_28, WY_28
189 .set WY_minus_32, WY
198 /* message scheduling pre-compute for rounds 0-15 */
206 vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
221 * message scheduling pre-compute for rounds 16-31
223 * pre-calculate K+w[i] values and store to mem
226 * "brute force" vectorization for rounds 16-31 only
227 * due to w[i]->w[i-3] dependency
234 /* w[i-14] */
236 vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */
263 * in SHA-1 specification:
264 * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
266 * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
268 * since w[i]=>w[i-3] dependency is broken
298 .set i, \r
301 .set K_XMM, 32*0
303 .set K_XMM, 32*1
305 .set K_XMM, 32*2
307 .set K_XMM, 32*3
320 .set T_REG, E
321 .set E, D
322 .set D, C
323 .set C, B
324 .set B, TB
325 .set TB, A
326 .set A, T_REG
328 .set T_REG, RE
329 .set RE, RD
330 .set RD, RC
331 .set RC, RB
332 .set RB, RTB
333 .set RTB, RA
334 .set RA, T_REG
350 .set round_id, (\r % 80)
353 .set ROUND_FUNC, RND_F1
356 rorx $(32-30), B, B /* b>>>2 */
366 .set ROUND_FUNC, RND_F2
368 .set ROUND_FUNC, RND_F3
370 .set ROUND_FUNC, RND_F2
373 .set round_id, ( (\r+1) % 80)
385 rorx $(32-5), A, TA /* T2 = A >>> 5 */
386 rorx $(32-30),A, TB /* b>>>2 for next round */
405 rorx $(32-5), A, TA /* T2 = A >>> 5 */
407 rorx $(32-30), A, TB /* b>>>2 for next round */
431 rorx $(32-5), A, TA /* T2 = A >>> 5 */
432 rorx $(32-30), A, TB /* b>>>2 for next round */
456 * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
473 .set i, 0
476 .set i, i + 1
489 * it is set below by: cmovae BUFFER_PTR, K_BASE
502 .set j, 0
505 .set j, j+2
518 .set j, j+2
532 .set j, j+2
553 .set j, 0
556 .set j, j+2
566 .set j, j+2
576 .set j, j+2
591 .set j, j+2
619 * macro implements SHA-1 function's body for several 64-byte blocks
636 and $~(0x20-1), %rsp