Lines Matching +full:0 +full:- +full:7 +full:a +full:- +full:e

2 # Implement fast SHA-256 with SSSE3 instructions. (x86_64)
11 # This software is available to you under a choice of one of two
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
32 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
41 # This code is described in an Intel White-Paper:
42 # "Fast SHA-256 Implementations on Intel Architecture Processors"
57 # Add reg to mem using reg-mem add and store
86 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
87 SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
97 e = %edx define
99 a = %eax define
115 _XMM_SAVE_SIZE = 0
117 _INP_END = 0
134 # Rotate values of symbols a...h
139 f = e
140 e = d define
143 b = a
144 a = TMP_ define
148 ## compute s0 four at a time and s1 two at a time
149 ## compute W[-16] + W[-7] 4 at a time
151 mov e, y0 # y0 = e
152 ror $(25-11), y0 # y0 = e >> (25-11)
153 mov a, y1 # y1 = a
154 palignr $4, X2, XTMP0 # XTMP0 = W[-7]
155 ror $(22-13), y1 # y1 = a >> (22-13)
156 xor e, y0 # y0 = e ^ (e >> (25-11))
158 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
160 xor a, y1 # y1 = a ^ (a >> (22-13)
162 paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16]
163 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
164 and e, y2 # y2 = (f^g)&e
165 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
167 palignr $4, X0, XTMP1 # XTMP1 = W[-15]
168 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
169 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
170 xor g, y2 # y2 = CH = ((f^g)&e)^g
171 movdqa XTMP1, XTMP2 # XTMP2 = W[-15]
172 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
175 movdqa XTMP1, XTMP3 # XTMP3 = W[-15]
176 mov a, y0 # y0 = a
178 mov a, y2 # y2 = a
179 pslld $(32-7), XTMP1 #
180 or c, y0 # y0 = a|c
182 and c, y2 # y2 = a&c
183 psrld $7, XTMP2 #
184 and b, y0 # y0 = (a|c)&b
186 por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7
187 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
191 movdqa XTMP3, XTMP2 # XTMP2 = W[-15]
192 mov e, y0 # y0 = e
193 mov a, y1 # y1 = a
194 movdqa XTMP3, XTMP4 # XTMP4 = W[-15]
195 ror $(25-11), y0 # y0 = e >> (25-11)
196 xor e, y0 # y0 = e ^ (e >> (25-11))
198 ror $(22-13), y1 # y1 = a >> (22-13)
199 pslld $(32-18), XTMP3 #
200 xor a, y1 # y1 = a ^ (a >> (22-13)
201 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
204 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
205 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
206 and e, y2 # y2 = (f^g)&e
207 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
209 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
210 xor g, y2 # y2 = CH = ((f^g)&e)^g
211 psrld $3, XTMP4 # XTMP4 = W[-15] >> 3
214 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
215 pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
216 mov a, y0 # y0 = a
218 mov a, y2 # y2 = a
220 or c, y0 # y0 = a|c
222 and c, y2 # y2 = a&c
224 pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
225 and b, y0 # y0 = (a|c)&b
227 paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
228 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
232 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA}
233 mov e, y0 # y0 = e
234 mov a, y1 # y1 = a
235 ror $(25-11), y0 # y0 = e >> (25-11)
236 movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA}
237 xor e, y0 # y0 = e ^ (e >> (25-11))
238 ror $(22-13), y1 # y1 = a >> (22-13)
240 xor a, y1 # y1 = a ^ (a >> (22-13)
241 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
242 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
244 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
245 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
246 and e, y2 # y2 = (f^g)&e
247 psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
248 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
249 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
250 xor g, y2 # y2 = CH = ((f^g)&e)^g
251 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
254 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
257 mov a, y0 # y0 = a
259 mov a, y2 # y2 = a
261 or c, y0 # y0 = a|c
263 and c, y2 # y2 = a&c
264 paddd XTMP4, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
265 and b, y0 # y0 = (a|c)&b
268 pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA}
269 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
273 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC}
274 mov e, y0 # y0 = e
275 ror $(25-11), y0 # y0 = e >> (25-11)
276 mov a, y1 # y1 = a
277 movdqa XTMP2, X0 # X0 = W[-2] {DDCC}
278 ror $(22-13), y1 # y1 = a >> (22-13)
279 xor e, y0 # y0 = e ^ (e >> (25-11))
281 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
282 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
283 xor a, y1 # y1 = a ^ (a >> (22-13)
285 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
286 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25
287 and e, y2 # y2 = (f^g)&e
288 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
289 psrld $10, X0 # X0 = W[-2] >> 10 {DDCC}
290 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22
291 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2
292 xor g, y2 # y2 = CH = ((f^g)&e)^g
294 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2
298 mov a, y0 # y0 = a
300 mov a, y2 # y2 = a
302 or c, y0 # y0 = a|c
304 and c, y2 # y2 = a&c
305 paddd XTMP0, X0 # X0 = {W[3], W[2], W[1], W[0]}
306 and b, y0 # y0 = (a|c)&b
308 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
317 mov e, y0 # y0 = e
318 ror $(25-11), y0 # y0 = e >> (25-11)
319 mov a, y1 # y1 = a
320 xor e, y0 # y0 = e ^ (e >> (25-11))
321 ror $(22-13), y1 # y1 = a >> (22-13)
323 xor a, y1 # y1 = a ^ (a >> (22-13)
324 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
326 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
327 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
328 and e, y2 # y2 = (f^g)&e
329 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
330 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
331 xor g, y2 # y2 = CH = ((f^g)&e)^g
333 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
336 mov a, y0 # y0 = a
338 mov a, y2 # y2 = a
339 or c, y0 # y0 = a|c
341 and c, y2 # y2 = a&c
342 and b, y0 # y0 = (a|c)&b
344 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
371 mov 4*0(CTX), a
375 mov 4*4(CTX), e
378 mov 4*7(CTX), h
388 COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK
427 DO_ROUND 0
434 DO_ROUND 0
445 addm (4*0)(CTX),a
449 addm (4*4)(CTX),e
452 addm (4*7)(CTX),h
473 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
474 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
475 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
476 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
477 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
478 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
479 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
480 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
481 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
482 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
483 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
484 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
485 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
486 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
487 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
488 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
493 .octa 0x0c0d0e0f08090a0b0405060700010203
497 # shuffle xBxA -> 00BA
499 .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
503 # shuffle xDxC -> DC00
505 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF