Lines Matching +full:- +full:2 +full:g
2 # Implement fast SHA-256 with SSSE3 instructions. (x86_64)
13 # General Public License (GPL) Version 2, available from the file
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
41 # This code is described in an Intel White-Paper:
42 # "Fast SHA-256 Implementations on Intel Architecture Processors"
57 # Add reg to mem using reg-mem add and store
86 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
87 SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
91 INP = %rsi # 2nd arg
103 g = %r10d define
137 h = g
138 g = f define
149 ## compute W[-16] + W[-7] 4 at a time
152 ror $(25-11), y0 # y0 = e >> (25-11)
154 palignr $4, X2, XTMP0 # XTMP0 = W[-7]
155 ror $(22-13), y1 # y1 = a >> (22-13)
156 xor e, y0 # y0 = e ^ (e >> (25-11))
158 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
160 xor a, y1 # y1 = a ^ (a >> (22-13)
161 xor g, y2 # y2 = f^g
162 paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16]
163 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
164 and e, y2 # y2 = (f^g)&e
165 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
167 palignr $4, X0, XTMP1 # XTMP1 = W[-15]
168 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
170 xor g, y2 # y2 = CH = ((f^g)&e)^g
171 movdqa XTMP1, XTMP2 # XTMP2 = W[-15]
172 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
175 movdqa XTMP1, XTMP3 # XTMP3 = W[-15]
179 pslld $(32-7), XTMP1 #
186 por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7
191 movdqa XTMP3, XTMP2 # XTMP2 = W[-15]
194 movdqa XTMP3, XTMP4 # XTMP4 = W[-15]
195 ror $(25-11), y0 # y0 = e >> (25-11)
196 xor e, y0 # y0 = e ^ (e >> (25-11))
198 ror $(22-13), y1 # y1 = a >> (22-13)
199 pslld $(32-18), XTMP3 #
200 xor a, y1 # y1 = a ^ (a >> (22-13)
201 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
202 xor g, y2 # y2 = f^g
204 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
205 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
206 and e, y2 # y2 = (f^g)&e
209 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
210 xor g, y2 # y2 = CH = ((f^g)&e)^g
211 psrld $3, XTMP4 # XTMP4 = W[-15] >> 3
214 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
215 pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
224 pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
227 paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
232 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA}
235 ror $(25-11), y0 # y0 = e >> (25-11)
236 movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA}
237 xor e, y0 # y0 = e ^ (e >> (25-11))
238 ror $(22-13), y1 # y1 = a >> (22-13)
240 xor a, y1 # y1 = a ^ (a >> (22-13)
241 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
242 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
243 xor g, y2 # y2 = f^g
244 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
245 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
246 and e, y2 # y2 = (f^g)&e
247 psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
248 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
249 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
250 xor g, y2 # y2 = CH = ((f^g)&e)^g
254 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
255 add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
268 pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA}
273 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC}
275 ror $(25-11), y0 # y0 = e >> (25-11)
277 movdqa XTMP2, X0 # X0 = W[-2] {DDCC}
278 ror $(22-13), y1 # y1 = a >> (22-13)
279 xor e, y0 # y0 = e ^ (e >> (25-11))
281 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
282 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
283 xor a, y1 # y1 = a ^ (a >> (22-13)
284 xor g, y2 # y2 = f^g
285 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
286 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25
287 and e, y2 # y2 = (f^g)&e
288 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
289 psrld $10, X0 # X0 = W[-2] >> 10 {DDCC}
290 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22
291 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2
292 xor g, y2 # y2 = CH = ((f^g)&e)^g
294 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2
305 paddd XTMP0, X0 # X0 = {W[3], W[2], W[1], W[0]}
318 ror $(25-11), y0 # y0 = e >> (25-11)
320 xor e, y0 # y0 = e ^ (e >> (25-11))
321 ror $(22-13), y1 # y1 = a >> (22-13)
323 xor a, y1 # y1 = a ^ (a >> (22-13)
324 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
325 xor g, y2 # y2 = f^g
326 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
327 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
328 and e, y2 # y2 = (f^g)&e
329 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
331 xor g, y2 # y2 = CH = ((f^g)&e)^g
333 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
373 mov 4*2(CTX), c
377 mov 4*6(CTX), g
390 COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK
409 movdqa 2*16(TBL), XFER
423 mov $2, SRND
429 DO_ROUND 2
433 add $2*16, TBL
436 DO_ROUND 2
447 addm (4*2)(CTX),c
451 addm (4*6)(CTX),g
497 # shuffle xBxA -> 00BA
503 # shuffle xDxC -> DC00