Lines Matching +full:4 +full:d
38 and x9,x9,#-4
115 and x10,x14,#-4 // final reduction
146 tst x14,#-4 // see if it's carried/borrowed
192 and x10,x14,#-4 // final reduction
219 str w13,[x0,#16*4] // s2
273 and x10,x14,#-4 // ... so reduce
307 .align 4
313 .align 4
364 sub x0,x0,#4
368 sub x0,x0,#4
371 bl poly1305_mult // r^4
372 sub x0,x0,#4
386 .align 4
443 ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
444 ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
445 ld1 {v8.4s},[x15]
472 movi v31.2d,#-1
476 ushr v31.2d,v31.2d,#38
480 .align 4
483 // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
484 // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
486 // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
487 // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
500 umull v23.2d,v14.2s,v7.s[2]
502 umull v22.2d,v14.2s,v5.s[2]
503 umull v21.2d,v14.2s,v3.s[2]
505 umull v20.2d,v14.2s,v1.s[2]
507 umull v19.2d,v14.2s,v0.s[2]
515 umlal v23.2d,v15.2s,v5.s[2]
517 umlal v22.2d,v15.2s,v3.s[2]
519 umlal v21.2d,v15.2s,v1.s[2]
521 umlal v20.2d,v15.2s,v0.s[2]
523 umlal v19.2d,v15.2s,v8.s[2]
526 umlal v23.2d,v16.2s,v3.s[2]
528 umlal v22.2d,v16.2s,v1.s[2]
530 umlal v21.2d,v16.2s,v0.s[2]
532 umlal v20.2d,v16.2s,v8.s[2]
534 umlal v19.2d,v16.2s,v6.s[2]
537 umlal v23.2d,v17.2s,v1.s[2]
539 umlal v22.2d,v17.2s,v0.s[2]
541 umlal v21.2d,v17.2s,v8.s[2]
543 umlal v20.2d,v17.2s,v6.s[2]
545 umlal v19.2d,v17.2s,v4.s[2]
550 umlal v23.2d,v18.2s,v0.s[2]
552 umlal v22.2d,v18.2s,v8.s[2]
554 umlal v21.2d,v18.2s,v6.s[2]
556 umlal v20.2d,v18.2s,v4.s[2]
558 umlal v19.2d,v18.2s,v2.s[2]
562 // (hash+inp[0:1])*r^4 and accumulate
566 umlal v22.2d,v11.2s,v1.s[0]
568 umlal v19.2d,v11.2s,v6.s[0]
570 umlal v23.2d,v11.2s,v3.s[0]
571 umlal v20.2d,v11.2s,v8.s[0]
572 umlal v21.2d,v11.2s,v0.s[0]
581 umlal v22.2d,v9.2s,v5.s[0]
582 umlal v23.2d,v9.2s,v7.s[0]
584 umlal v21.2d,v9.2s,v3.s[0]
586 umlal v19.2d,v9.2s,v0.s[0]
588 umlal v20.2d,v9.2s,v1.s[0]
593 umlal v22.2d,v10.2s,v3.s[0]
595 umlal v23.2d,v10.2s,v5.s[0]
597 umlal v19.2d,v10.2s,v8.s[0]
599 umlal v21.2d,v10.2s,v1.s[0]
601 umlal v20.2d,v10.2s,v0.s[0]
606 umlal v22.2d,v12.2s,v0.s[0]
608 umlal v19.2d,v12.2s,v4.s[0]
610 umlal v23.2d,v12.2s,v1.s[0]
612 umlal v20.2d,v12.2s,v6.s[0]
614 umlal v21.2d,v12.2s,v8.s[0]
617 umlal v22.2d,v13.2s,v8.s[0]
619 umlal v19.2d,v13.2s,v2.s[0]
621 umlal v23.2d,v13.2s,v0.s[0]
623 umlal v20.2d,v13.2s,v4.s[0]
625 umlal v21.2d,v13.2s,v6.s[0]
630 // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
635 ushr v29.2d,v22.2d,#26
636 xtn v27.2s,v22.2d
637 ushr v30.2d,v19.2d,#26
639 add v23.2d,v23.2d,v29.2d // h3 -> h4
641 add v20.2d,v20.2d,v30.2d // h0 -> h1
643 ushr v29.2d,v23.2d,#26
644 xtn v28.2s,v23.2d
645 ushr v30.2d,v20.2d,#26
646 xtn v25.2s,v20.2d
648 add v21.2d,v21.2d,v30.2d // h1 -> h2
650 add v19.2d,v19.2d,v29.2d
651 shl v29.2d,v29.2d,#2
652 shrn v30.2s,v21.2d,#26
653 xtn v26.2s,v21.2d
654 add v19.2d,v19.2d,v29.2d // h4 -> h0
659 shrn v29.2s,v19.2d,#26
660 xtn v24.2s,v19.2d
670 dup v16.2d,v16.d[0]
679 dup v16.2d,v11.d[0]
686 dup v14.2d,v14.d[0]
687 umull2 v19.2d,v16.4s,v6.4s
688 umull2 v22.2d,v16.4s,v1.4s
689 umull2 v23.2d,v16.4s,v3.4s
690 umull2 v21.2d,v16.4s,v0.4s
691 umull2 v20.2d,v16.4s,v8.4s
693 dup v15.2d,v15.d[0]
694 umlal2 v19.2d,v14.4s,v0.4s
695 umlal2 v21.2d,v14.4s,v3.4s
696 umlal2 v22.2d,v14.4s,v5.4s
697 umlal2 v23.2d,v14.4s,v7.4s
698 umlal2 v20.2d,v14.4s,v1.4s
700 dup v17.2d,v17.d[0]
701 umlal2 v19.2d,v15.4s,v8.4s
702 umlal2 v22.2d,v15.4s,v3.4s
703 umlal2 v21.2d,v15.4s,v1.4s
704 umlal2 v23.2d,v15.4s,v5.4s
705 umlal2 v20.2d,v15.4s,v0.4s
707 dup v18.2d,v18.d[0]
708 umlal2 v22.2d,v17.4s,v0.4s
709 umlal2 v23.2d,v17.4s,v1.4s
710 umlal2 v19.2d,v17.4s,v4.4s
711 umlal2 v20.2d,v17.4s,v6.4s
712 umlal2 v21.2d,v17.4s,v8.4s
714 umlal2 v22.2d,v18.4s,v8.4s
715 umlal2 v19.2d,v18.4s,v2.4s
716 umlal2 v23.2d,v18.4s,v0.4s
717 umlal2 v20.2d,v18.4s,v4.4s
718 umlal2 v21.2d,v18.4s,v6.4s
723 // (hash+inp[0:1])*r^4:r^3 and accumulate
726 umlal v22.2d,v11.2s,v1.2s
727 umlal v19.2d,v11.2s,v6.2s
728 umlal v23.2d,v11.2s,v3.2s
729 umlal v20.2d,v11.2s,v8.2s
730 umlal v21.2d,v11.2s,v0.2s
733 umlal v22.2d,v9.2s,v5.2s
734 umlal v19.2d,v9.2s,v0.2s
735 umlal v23.2d,v9.2s,v7.2s
736 umlal v20.2d,v9.2s,v1.2s
737 umlal v21.2d,v9.2s,v3.2s
740 umlal v22.2d,v10.2s,v3.2s
741 umlal v19.2d,v10.2s,v8.2s
742 umlal v23.2d,v10.2s,v5.2s
743 umlal v20.2d,v10.2s,v0.2s
744 umlal v21.2d,v10.2s,v1.2s
747 umlal v22.2d,v12.2s,v0.2s
748 umlal v19.2d,v12.2s,v4.2s
749 umlal v23.2d,v12.2s,v1.2s
750 umlal v20.2d,v12.2s,v6.2s
751 umlal v21.2d,v12.2s,v8.2s
753 umlal v22.2d,v13.2s,v8.2s
754 umlal v19.2d,v13.2s,v2.2s
755 umlal v23.2d,v13.2s,v0.2s
756 umlal v20.2d,v13.2s,v4.2s
757 umlal v21.2d,v13.2s,v6.2s
763 addp v22.2d,v22.2d,v22.2d
765 addp v19.2d,v19.2d,v19.2d
767 addp v23.2d,v23.2d,v23.2d
769 addp v20.2d,v20.2d,v20.2d
771 addp v21.2d,v21.2d,v21.2d
776 ushr v29.2d,v22.2d,#26
778 ushr v30.2d,v19.2d,#26
781 add v23.2d,v23.2d,v29.2d // h3 -> h4
782 add v20.2d,v20.2d,v30.2d // h0 -> h1
784 ushr v29.2d,v23.2d,#26
786 ushr v30.2d,v20.2d,#26
788 add v21.2d,v21.2d,v30.2d // h1 -> h2
790 add v19.2d,v19.2d,v29.2d
791 shl v29.2d,v29.2d,#2
792 ushr v30.2d,v21.2d,#26
794 add v19.2d,v19.2d,v29.2d // h4 -> h0
795 add v22.2d,v22.2d,v30.2d // h2 -> h3
797 ushr v29.2d,v19.2d,#26
799 ushr v30.2d,v22.2d,#26
801 add v20.2d,v20.2d,v29.2d // h0 -> h1
802 add v23.2d,v23.2d,v30.2d // h3 -> h4
841 and x12,x6,#-4 // ... so reduce
852 tst x14,#-4 // see if it's carried/borrowed