Lines Matching +full:0 +full:- +full:7 +full:a +full:- +full:e

2 # Implement fast SHA-256 with AVX1 instructions. (x86_64)
11 # This software is available to you under a choice of one of two
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
32 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40 # This code is described in an Intel White-Paper:
41 # "Fast SHA-256 Implementations on Intel Architecture Processors"
47 # This code schedules 1 block at a time, with 4 lanes per block
58 # Add reg to mem using reg-mem add and store
66 shld $(32-(\p1)), \p2, \p2
93 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
94 SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
104 e = %edx define
106 a = %eax define
121 _XMM_SAVE_SIZE = 0
123 _INP_END = 0
140 # Rotate values of symbols a...h
145 f = e
146 e = d define
149 b = a
150 a = TMP_ define
154 ## compute s0 four at a time and s1 two at a time
155 ## compute W[-16] + W[-7] 4 at a time
157 mov e, y0 # y0 = e
158 MY_ROR (25-11), y0 # y0 = e >> (25-11)
159 mov a, y1 # y1 = a
160 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
161 MY_ROR (22-13), y1 # y1 = a >> (22-13)
162 xor e, y0 # y0 = e ^ (e >> (25-11))
164 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
165 xor a, y1 # y1 = a ^ (a >> (22-13)
167 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]
168 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
169 and e, y2 # y2 = (f^g)&e
170 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
172 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
173 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
174 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
175 xor g, y2 # y2 = CH = ((f^g)&e)^g
176 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
179 mov a, y0 # y0 = a
181 mov a, y2 # y2 = a
182 vpsrld $7, XTMP1, XTMP2
183 or c, y0 # y0 = a|c
185 and c, y2 # y2 = a&c
186 vpslld $(32-7), XTMP1, XTMP3
187 and b, y0 # y0 = (a|c)&b
189 vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7
190 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
193 mov e, y0 # y0 = e
194 mov a, y1 # y1 = a
195 MY_ROR (25-11), y0 # y0 = e >> (25-11)
196 xor e, y0 # y0 = e ^ (e >> (25-11))
198 MY_ROR (22-13), y1 # y1 = a >> (22-13)
200 xor a, y1 # y1 = a ^ (a >> (22-13)
201 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
203 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
204 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
205 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
206 and e, y2 # y2 = (f^g)&e
207 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
208 vpslld $(32-18), XTMP1, XTMP1
209 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
210 xor g, y2 # y2 = CH = ((f^g)&e)^g
214 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
215 vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR
216 mov a, y0 # y0 = a
218 mov a, y2 # y2 = a
220 or c, y0 # y0 = a|c
222 and c, y2 # y2 = a&c
224 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
225 and b, y0 # y0 = (a|c)&b
227 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
228 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
231 mov e, y0 # y0 = e
232 mov a, y1 # y1 = a
233 MY_ROR (25-11), y0 # y0 = e >> (25-11)
234 xor e, y0 # y0 = e ^ (e >> (25-11))
235 MY_ROR (22-13), y1 # y1 = a >> (22-13)
237 xor a, y1 # y1 = a ^ (a >> (22-13)
238 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
239 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
241 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA}
242 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
243 and e, y2 # y2 = (f^g)&e
244 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA}
245 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
246 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
247 xor g, y2 # y2 = CH = ((f^g)&e)^g
248 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
251 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
254 mov a, y0 # y0 = a
256 mov a, y2 # y2 = a
258 or c, y0 # y0 = a|c
260 and c, y2 # y2 = a&c
261 vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
262 and b, y0 # y0 = (a|c)&b
265 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
266 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
269 mov e, y0 # y0 = e
270 MY_ROR (25-11), y0 # y0 = e >> (25-11)
271 mov a, y1 # y1 = a
272 MY_ROR (22-13), y1 # y1 = a >> (22-13)
273 xor e, y0 # y0 = e ^ (e >> (25-11))
275 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
276 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
277 xor a, y1 # y1 = a ^ (a >> (22-13)
279 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC}
280 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
281 and e, y2 # y2 = (f^g)&e
282 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
283 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC}
284 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
285 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
286 xor g, y2 # y2 = CH = ((f^g)&e)^g
288 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
292 mov a, y0 # y0 = a
294 mov a, y2 # y2 = a
296 or c, y0 # y0 = a|c
298 and c, y2 # y2 = a&c
299 vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]}
300 and b, y0 # y0 = (a|c)&b
302 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
310 mov e, y0 # y0 = e
311 MY_ROR (25-11), y0 # y0 = e >> (25-11)
312 mov a, y1 # y1 = a
313 xor e, y0 # y0 = e ^ (e >> (25-11))
314 MY_ROR (22-13), y1 # y1 = a >> (22-13)
316 xor a, y1 # y1 = a ^ (a >> (22-13)
317 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
319 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
320 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
321 and e, y2 # y2 = (f^g)&e
322 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
323 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
324 xor g, y2 # y2 = CH = ((f^g)&e)^g
326 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
329 mov a, y0 # y0 = a
331 mov a, y2 # y2 = a
332 or c, y0 # y0 = a|c
334 and c, y2 # y2 = a&c
335 and b, y0 # y0 = (a|c)&b
337 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
364 mov 4*0(CTX), a
368 mov 4*4(CTX), e
371 mov 4*7(CTX), h
380 COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK
415 DO_ROUND 0
423 DO_ROUND 0
434 addm (4*0)(CTX),a
438 addm (4*4)(CTX),e
441 addm (4*7)(CTX),h
461 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
462 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
463 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
464 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
465 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
466 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
467 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
468 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
469 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
470 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
471 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
472 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
473 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
474 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
475 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
476 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
481 .octa 0x0c0d0e0f08090a0b0405060700010203
485 # shuffle xBxA -> 00BA
487 .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
491 # shuffle xDxC -> DC00
493 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF