Lines Matching +full:2 +full:d

76 	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
117 add x10,x10,x14,lsr#2
138 ldp x4,x5,[x0] // load hash base 2^64
194 add x10,x10,x14,lsr#2
205 and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
213 add w12,w13,w13,lsl#2 // r1*5
215 add w13,w14,w14,lsl#2 // r2*5
216 str w12,[x0,#16*2] // s1
218 add w14,w15,w15,lsl#2 // r3*5
221 add w15,w16,w16,lsl#2 // r4*5
251 ldp w10,w11,[x0] // load hash value base 2^26
260 add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
271 add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
275 add x10,x10,x14,lsr#2
293 and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
302 stp w10,w11,[x0] // store hash value base 2^26
309 stp x4,x5,[x0] // store hash value base 2^64
317 ldp x4,x5,[x0] // load hash value base 2^64
325 add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
337 and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
357 add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
363 bl poly1305_mult // r^2
405 ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
417 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
453 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
472 movi v31.2d,#-1
476 ushr v31.2d,v31.2d,#38
483 // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
484 // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
486 // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
487 // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
490 // Note that we start with inp[2:3]*r^2. This is because it
500 umull v23.2d,v14.2s,v7.s[2]
502 umull v22.2d,v14.2s,v5.s[2]
503 umull v21.2d,v14.2s,v3.s[2]
504 ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
505 umull v20.2d,v14.2s,v1.s[2]
507 umull v19.2d,v14.2s,v0.s[2]
515 umlal v23.2d,v15.2s,v5.s[2]
516 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
517 umlal v22.2d,v15.2s,v3.s[2]
519 umlal v21.2d,v15.2s,v1.s[2]
521 umlal v20.2d,v15.2s,v0.s[2]
523 umlal v19.2d,v15.2s,v8.s[2]
526 umlal v23.2d,v16.2s,v3.s[2]
528 umlal v22.2d,v16.2s,v1.s[2]
530 umlal v21.2d,v16.2s,v0.s[2]
532 umlal v20.2d,v16.2s,v8.s[2]
534 umlal v19.2d,v16.2s,v6.s[2]
537 umlal v23.2d,v17.2s,v1.s[2]
539 umlal v22.2d,v17.2s,v0.s[2]
541 umlal v21.2d,v17.2s,v8.s[2]
543 umlal v20.2d,v17.2s,v6.s[2]
545 umlal v19.2d,v17.2s,v4.s[2]
548 add v11.2s,v11.2s,v26.2s
550 umlal v23.2d,v18.2s,v0.s[2]
552 umlal v22.2d,v18.2s,v8.s[2]
554 umlal v21.2d,v18.2s,v6.s[2]
556 umlal v20.2d,v18.2s,v4.s[2]
558 umlal v19.2d,v18.2s,v2.s[2]
564 add v9.2s,v9.2s,v24.2s
566 umlal v22.2d,v11.2s,v1.s[0]
568 umlal v19.2d,v11.2s,v6.s[0]
570 umlal v23.2d,v11.2s,v3.s[0]
571 umlal v20.2d,v11.2s,v8.s[0]
572 umlal v21.2d,v11.2s,v0.s[0]
580 add v10.2s,v10.2s,v25.2s
581 umlal v22.2d,v9.2s,v5.s[0]
582 umlal v23.2d,v9.2s,v7.s[0]
583 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
584 umlal v21.2d,v9.2s,v3.s[0]
586 umlal v19.2d,v9.2s,v0.s[0]
588 umlal v20.2d,v9.2s,v1.s[0]
591 add v12.2s,v12.2s,v27.2s
593 umlal v22.2d,v10.2s,v3.s[0]
595 umlal v23.2d,v10.2s,v5.s[0]
597 umlal v19.2d,v10.2s,v8.s[0]
599 umlal v21.2d,v10.2s,v1.s[0]
601 umlal v20.2d,v10.2s,v0.s[0]
604 add v13.2s,v13.2s,v28.2s
606 umlal v22.2d,v12.2s,v0.s[0]
608 umlal v19.2d,v12.2s,v4.s[0]
610 umlal v23.2d,v12.2s,v1.s[0]
612 umlal v20.2d,v12.2s,v6.s[0]
614 umlal v21.2d,v12.2s,v8.s[0]
617 umlal v22.2d,v13.2s,v8.s[0]
619 umlal v19.2d,v13.2s,v2.s[0]
621 umlal v23.2d,v13.2s,v0.s[0]
623 umlal v20.2d,v13.2s,v4.s[0]
625 umlal v21.2d,v13.2s,v6.s[0]
630 // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
635 ushr v29.2d,v22.2d,#26
636 xtn v27.2s,v22.2d
637 ushr v30.2d,v19.2d,#26
639 add v23.2d,v23.2d,v29.2d // h3 -> h4
640 bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
641 add v20.2d,v20.2d,v30.2d // h0 -> h1
643 ushr v29.2d,v23.2d,#26
644 xtn v28.2s,v23.2d
645 ushr v30.2d,v20.2d,#26
646 xtn v25.2s,v20.2d
647 bic v28.2s,#0xfc,lsl#24
648 add v21.2d,v21.2d,v30.2d // h1 -> h2
650 add v19.2d,v19.2d,v29.2d
651 shl v29.2d,v29.2d,#2
652 shrn v30.2s,v21.2d,#26
653 xtn v26.2s,v21.2d
654 add v19.2d,v19.2d,v29.2d // h4 -> h0
655 bic v25.2s,#0xfc,lsl#24
656 add v27.2s,v27.2s,v30.2s // h2 -> h3
657 bic v26.2s,#0xfc,lsl#24
659 shrn v29.2s,v19.2d,#26
660 xtn v24.2s,v19.2d
661 ushr v30.2s,v27.2s,#26
662 bic v27.2s,#0xfc,lsl#24
663 bic v24.2s,#0xfc,lsl#24
664 add v25.2s,v25.2s,v29.2s // h0 -> h1
665 add v28.2s,v28.2s,v30.2s // h3 -> h4
670 dup v16.2d,v16.d[0]
671 add v11.2s,v11.2s,v26.2s
674 // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
679 dup v16.2d,v11.d[0]
680 add v14.2s,v9.2s,v24.2s
681 add v17.2s,v12.2s,v27.2s
682 add v15.2s,v10.2s,v25.2s
683 add v18.2s,v13.2s,v28.2s
686 dup v14.2d,v14.d[0]
687 umull2 v19.2d,v16.4s,v6.4s
688 umull2 v22.2d,v16.4s,v1.4s
689 umull2 v23.2d,v16.4s,v3.4s
690 umull2 v21.2d,v16.4s,v0.4s
691 umull2 v20.2d,v16.4s,v8.4s
693 dup v15.2d,v15.d[0]
694 umlal2 v19.2d,v14.4s,v0.4s
695 umlal2 v21.2d,v14.4s,v3.4s
696 umlal2 v22.2d,v14.4s,v5.4s
697 umlal2 v23.2d,v14.4s,v7.4s
698 umlal2 v20.2d,v14.4s,v1.4s
700 dup v17.2d,v17.d[0]
701 umlal2 v19.2d,v15.4s,v8.4s
702 umlal2 v22.2d,v15.4s,v3.4s
703 umlal2 v21.2d,v15.4s,v1.4s
704 umlal2 v23.2d,v15.4s,v5.4s
705 umlal2 v20.2d,v15.4s,v0.4s
707 dup v18.2d,v18.d[0]
708 umlal2 v22.2d,v17.4s,v0.4s
709 umlal2 v23.2d,v17.4s,v1.4s
710 umlal2 v19.2d,v17.4s,v4.4s
711 umlal2 v20.2d,v17.4s,v6.4s
712 umlal2 v21.2d,v17.4s,v8.4s
714 umlal2 v22.2d,v18.4s,v8.4s
715 umlal2 v19.2d,v18.4s,v2.4s
716 umlal2 v23.2d,v18.4s,v0.4s
717 umlal2 v20.2d,v18.4s,v4.4s
718 umlal2 v21.2d,v18.4s,v6.4s
725 add v9.2s,v9.2s,v24.2s
726 umlal v22.2d,v11.2s,v1.2s
727 umlal v19.2d,v11.2s,v6.2s
728 umlal v23.2d,v11.2s,v3.2s
729 umlal v20.2d,v11.2s,v8.2s
730 umlal v21.2d,v11.2s,v0.2s
732 add v10.2s,v10.2s,v25.2s
733 umlal v22.2d,v9.2s,v5.2s
734 umlal v19.2d,v9.2s,v0.2s
735 umlal v23.2d,v9.2s,v7.2s
736 umlal v20.2d,v9.2s,v1.2s
737 umlal v21.2d,v9.2s,v3.2s
739 add v12.2s,v12.2s,v27.2s
740 umlal v22.2d,v10.2s,v3.2s
741 umlal v19.2d,v10.2s,v8.2s
742 umlal v23.2d,v10.2s,v5.2s
743 umlal v20.2d,v10.2s,v0.2s
744 umlal v21.2d,v10.2s,v1.2s
746 add v13.2s,v13.2s,v28.2s
747 umlal v22.2d,v12.2s,v0.2s
748 umlal v19.2d,v12.2s,v4.2s
749 umlal v23.2d,v12.2s,v1.2s
750 umlal v20.2d,v12.2s,v6.2s
751 umlal v21.2d,v12.2s,v8.2s
753 umlal v22.2d,v13.2s,v8.2s
754 umlal v19.2d,v13.2s,v2.2s
755 umlal v23.2d,v13.2s,v0.2s
756 umlal v20.2d,v13.2s,v4.2s
757 umlal v21.2d,v13.2s,v6.2s
763 addp v22.2d,v22.2d,v22.2d
765 addp v19.2d,v19.2d,v19.2d
767 addp v23.2d,v23.2d,v23.2d
769 addp v20.2d,v20.2d,v20.2d
771 addp v21.2d,v21.2d,v21.2d
776 ushr v29.2d,v22.2d,#26
778 ushr v30.2d,v19.2d,#26
781 add v23.2d,v23.2d,v29.2d // h3 -> h4
782 add v20.2d,v20.2d,v30.2d // h0 -> h1
784 ushr v29.2d,v23.2d,#26
786 ushr v30.2d,v20.2d,#26
788 add v21.2d,v21.2d,v30.2d // h1 -> h2
790 add v19.2d,v19.2d,v29.2d
791 shl v29.2d,v29.2d,#2
792 ushr v30.2d,v21.2d,#26
794 add v19.2d,v19.2d,v29.2d // h4 -> h0
795 add v22.2d,v22.2d,v30.2d // h2 -> h3
797 ushr v29.2d,v19.2d,#26
799 ushr v30.2d,v22.2d,#26
801 add v20.2d,v20.2d,v29.2d // h0 -> h1
802 add v23.2d,v23.2d,v30.2d // h3 -> h4
826 ldp w10,w11,[x0] // load hash value base 2^26
830 add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
842 add x12,x12,x6,lsr#2
876 .align 2
877 .align 2