Lines Matching +full:32 +full:k
161 addl \disp(%rsp, SRND), h # h = k + w + h # --
175 add h, d # d = k + w + h + d # --
189 vpslld $(32-7), XTMP1, XTMP3
191 add y1, h # h = k + w + h + S0 # --
193 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
197 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
209 addl offset(%rsp, SRND), h # h = k + w + h # --
224 add h, d # d = k + w + h + d # --
226 vpslld $(32-18), XTMP1, XTMP1
243 add y1, h # h = k + w + h + S0 # --
246 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
247 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
260 addl offset(%rsp, SRND), h # h = k + w + h # --
275 add h, d # d = k + w + h + d # --
295 add y1,h # h = k + w + h + S0 # --
296 add y2,d # d = k + w + h + d + S1 + CH = d + t1 # --
297 add y2,h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
310 addl offset(%rsp, SRND), h # h = k + w + h # --
324 add h, d # d = k + w + h + d # --
337 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
348 add y1, h # h = k + w + h + S0 # --
349 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
376 addl \disp(%rsp, SRND), h # h = k + w + h # --
386 add h, d # d = k + w + h + d # --
388 add y1, h # h = k + w + h + S0 # --
389 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
395 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
415 addl offset(%rsp, SRND), h # h = k + w + h # --
425 add h, d # d = k + w + h + d # --
427 add y1, h # h = k + w + h + S0 # --
429 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
435 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
455 addl offset(%rsp, SRND), h # h = k + w + h # --
465 add h, d # d = k + w + h + d # --
467 add y1, h # h = k + w + h + S0 # --
469 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
475 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
495 addl offset(%rsp, SRND), h # h = k + w + h # --
505 add h, d # d = k + w + h + d # --
507 add y1, h # h = k + w + h + S0 # --
509 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
512 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
538 and $-32, %rsp # align rsp to 32 byte boundary
566 VMOVDQ 0*32(INP),XTMP0
567 VMOVDQ 1*32(INP),XTMP1
568 VMOVDQ 2*32(INP),XTMP2
569 VMOVDQ 3*32(INP),XTMP3
592 leaq K256+0*32(%rip), INP ## reuse INP as scratch reg
594 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
595 FOUR_ROUNDS_AND_SCHED (_XFER + 0*32)
597 leaq K256+1*32(%rip), INP
599 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
600 FOUR_ROUNDS_AND_SCHED (_XFER + 1*32)
602 leaq K256+2*32(%rip), INP
604 vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
605 FOUR_ROUNDS_AND_SCHED (_XFER + 2*32)
607 leaq K256+3*32(%rip), INP
609 vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
610 FOUR_ROUNDS_AND_SCHED (_XFER + 3*32)
612 add $4*32, SRND
613 cmp $3*4*32, SRND
618 leaq K256+0*32(%rip), INP
620 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
621 DO_4ROUNDS (_XFER + 0*32)
623 leaq K256+1*32(%rip), INP
625 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
626 DO_4ROUNDS (_XFER + 1*32)
627 add $2*32, SRND
632 cmp $4*4*32, SRND
654 DO_4ROUNDS (_XFER + 0*32 + 16)
655 DO_4ROUNDS (_XFER + 1*32 + 16)
656 add $2*32, SRND
657 cmp $4*4*32, SRND
759 .section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
760 .align 32
765 .section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32
766 .align 32
771 .section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32
772 .align 32