Lines Matching +full:16 +full:g
104 g = %r10d define
115 _XFER_SIZE = 16
138 h = g
139 g = f define
150 ## compute W[-16] + W[-7] 4 at a time
162 xor g, y2 # y2 = f^g
163 paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16]
165 and e, y2 # y2 = (f^g)&e
171 xor g, y2 # y2 = CH = ((f^g)&e)^g
203 xor g, y2 # y2 = f^g
207 and e, y2 # y2 = (f^g)&e
211 xor g, y2 # y2 = CH = ((f^g)&e)^g
228 paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
244 xor g, y2 # y2 = f^g
247 and e, y2 # y2 = (f^g)&e
251 xor g, y2 # y2 = CH = ((f^g)&e)^g
285 xor g, y2 # y2 = f^g
288 and e, y2 # y2 = (f^g)&e
293 xor g, y2 # y2 = CH = ((f^g)&e)^g
326 xor g, y2 # y2 = f^g
329 and e, y2 # y2 = (f^g)&e
332 xor g, y2 # y2 = CH = ((f^g)&e)^g
383 mov 4*6(CTX), g
393 ## byte swap first 16 dwords
394 COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK
395 COPY_XMM_AND_BSWAP X1, 1*16(INP), BYTE_FLIP_MASK
396 COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK
397 COPY_XMM_AND_BSWAP X3, 3*16(INP), BYTE_FLIP_MASK
401 ## schedule 48 input dwords, by doing 3 rounds of 16 each
403 .align 16
410 movdqa 1*16(TBL), XFER
415 movdqa 2*16(TBL), XFER
420 movdqa 3*16(TBL), XFER
423 add $4*16, TBL
437 paddd 1*16(TBL), X1
439 add $2*16, TBL
457 addm (4*6)(CTX),g
498 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
499 .align 16
503 .section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
504 .align 16
509 .section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
510 .align 16