Lines Matching +full:0 +full:x19
33 add x29,sp,#0
34 stp x19,x20,[sp,#16]
38 ldr x9,[x2],#8 // bp[0]
40 ldp x7,x8,[x1],#16 // ap[0..1]
44 ldp x13,x14,[x3],#16 // np[0..1]
46 mul x6,x7,x9 // ap[0]*bp[0]
49 mul x10,x8,x9 // ap[1]*bp[0]
52 mul x15,x6,x4 // "tp[0]"*n0
55 // (*) mul x12,x13,x15 // np[0]*m1
81 mul x10,x8,x9 // ap[j]*bp[0]
105 adc x19,xzr,xzr // upmost overflow bit
111 ldr x23,[sp] // tp[0]
114 mul x6,x7,x9 // ap[0]*bp[i]
126 // (*) mul x12,x13,x15 // np[0]*m1
166 adcs x13,x17,x19
167 adc x19,xzr,xzr
174 adc x19,x19,xzr // upmost overflow bit
183 ldr x23,[sp] // tp[0]
185 ldr x14,[x3],#8 // np[0]
197 sbcs x19,x19,xzr // did it borrow?
200 ldr x23,[sp] // tp[0]
202 ldr x8,[x0],#8 // rp[0]
218 ldp x19,x20,[x29,#16]
280 umlal v6.2d,v28.2s,v0.s[0]
287 umlal v10.2d,v28.2s,v1.s[0]
290 st1 {v28.2s},[sp] // put aside smashed b[8*i+0]
295 umlal v6.2d,v29.2s,v2.s[0]
301 umlal v10.2d,v29.2s,v3.s[0]
309 ins v7.d[0],v16.d[0]
310 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+0]
311 umlal v7.2d,v28.2s,v0.s[0]
319 umlal v11.2d,v28.2s,v1.s[0]
327 umlal v7.2d,v29.2s,v2.s[0]
333 umlal v11.2d,v29.2s,v3.s[0]
341 ins v8.d[0],v16.d[0]
343 umlal v8.2d,v28.2s,v0.s[0]
351 umlal v12.2d,v28.2s,v1.s[0]
359 umlal v8.2d,v29.2s,v2.s[0]
365 umlal v12.2d,v29.2s,v3.s[0]
373 ins v9.d[0],v16.d[0]
375 umlal v9.2d,v28.2s,v0.s[0]
383 umlal v13.2d,v28.2s,v1.s[0]
391 umlal v9.2d,v29.2s,v2.s[0]
397 umlal v13.2d,v29.2s,v3.s[0]
405 ins v10.d[0],v16.d[0]
407 umlal v10.2d,v28.2s,v0.s[0]
415 umlal v6.2d,v28.2s,v1.s[0]
423 umlal v10.2d,v29.2s,v2.s[0]
429 umlal v6.2d,v29.2s,v3.s[0]
437 ins v11.d[0],v16.d[0]
439 umlal v11.2d,v28.2s,v0.s[0]
447 umlal v7.2d,v28.2s,v1.s[0]
455 umlal v11.2d,v29.2s,v2.s[0]
461 umlal v7.2d,v29.2s,v3.s[0]
469 ins v12.d[0],v16.d[0]
471 umlal v12.2d,v28.2s,v0.s[0]
479 umlal v8.2d,v28.2s,v1.s[0]
487 umlal v12.2d,v29.2s,v2.s[0]
493 umlal v8.2d,v29.2s,v3.s[0]
501 ins v13.d[0],v16.d[0]
503 umlal v13.2d,v28.2s,v0.s[0]
511 umlal v9.2d,v28.2s,v1.s[0]
518 ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
519 umlal v13.2d,v29.2s,v2.s[0]
527 umlal v9.2d,v29.2s,v3.s[0]
532 ins v13.d[1],v15.d[0]
544 umlal v6.2d,v28.2s,v0.s[0]
547 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+0]
554 umlal v10.2d,v28.2s,v1.s[0]
559 umlal v6.2d,v29.2s,v2.s[0]
563 umlal v10.2d,v29.2s,v3.s[0]
568 umlal v7.2d,v28.2s,v0.s[0]
577 umlal v11.2d,v28.2s,v1.s[0]
582 umlal v7.2d,v29.2s,v2.s[0]
586 umlal v11.2d,v29.2s,v3.s[0]
591 umlal v8.2d,v28.2s,v0.s[0]
600 umlal v12.2d,v28.2s,v1.s[0]
605 umlal v8.2d,v29.2s,v2.s[0]
609 umlal v12.2d,v29.2s,v3.s[0]
614 umlal v9.2d,v28.2s,v0.s[0]
623 umlal v13.2d,v28.2s,v1.s[0]
628 umlal v9.2d,v29.2s,v2.s[0]
632 umlal v13.2d,v29.2s,v3.s[0]
637 umlal v10.2d,v28.2s,v0.s[0]
646 umlal v6.2d,v28.2s,v1.s[0]
651 umlal v10.2d,v29.2s,v2.s[0]
655 umlal v6.2d,v29.2s,v3.s[0]
660 umlal v11.2d,v28.2s,v0.s[0]
669 umlal v7.2d,v28.2s,v1.s[0]
674 umlal v11.2d,v29.2s,v2.s[0]
678 umlal v7.2d,v29.2s,v3.s[0]
683 umlal v12.2d,v28.2s,v0.s[0]
692 umlal v8.2d,v28.2s,v1.s[0]
697 umlal v12.2d,v29.2s,v2.s[0]
701 umlal v8.2d,v29.2s,v3.s[0]
706 umlal v13.2d,v28.2s,v0.s[0]
715 umlal v9.2d,v28.2s,v1.s[0]
722 umlal v13.2d,v29.2s,v2.s[0]
723 ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
729 umlal v9.2d,v29.2s,v3.s[0]
766 ins v15.d[1],v14.d[0]
783 ins v15.d[1],v14.d[0]
787 st1 {v6.s}[0], [x7],#4
794 ins v15.d[1],v14.d[0]
796 st1 {v7.s}[0], [x7],#4
803 ins v15.d[1],v14.d[0]
805 st1 {v8.s}[0], [x7],#4
812 ins v15.d[1],v14.d[0]
814 st1 {v9.s}[0], [x7],#4
821 ins v15.d[1],v14.d[0]
823 st1 {v10.s}[0], [x7],#4
830 ins v15.d[1],v14.d[0]
832 st1 {v11.s}[0], [x7],#4
839 ins v15.d[1],v14.d[0]
841 st1 {v12.s}[0], [x7],#4
848 ins v15.d[1],v14.d[0]
851 st1 {v13.s}[0], [x7],#4
854 st1 {v15.s}[0], [x7],#4 // top-most bit
856 subs x1,sp,#0 // clear carry flag
937 add x29,sp,#0
938 stp x19,x20,[sp,#16]
945 ldp x6,x7,[x1,#8*0]
959 stp xzr,xzr,[x2,#8*0]
973 mov x19,xzr
987 // a[1]a[0] (i)
988 // a[2]a[0]
989 // a[3]a[0]
990 // a[4]a[0]
991 // a[5]a[0]
992 // a[6]a[0]
993 // a[7]a[0]
1016 mul x14,x7,x6 // lo(a[1..7]*a[0]) (i)
1020 adds x20,x20,x14 // t[1]+lo(a[1]*a[0])
1027 umulh x17,x7,x6 // hi(a[1..7]*a[0])
1034 stp x19,x20,[x2],#8*2 // t[0..1]
1035 adc x19,xzr,xzr // t[8]
1036 adds x21,x21,x17 // t[2]+lo(a[1]*a[0])
1048 adc x19,x19,x15
1061 adcs x19,x19,x17
1073 adcs x19,x19,x14
1084 adcs x19,x19,x15
1094 adcs x19,x19,x15
1103 adcs x19,x19,x15
1111 adds x19,x19,x14
1151 ldp x6,x7,[x2,#8*0]
1155 adds x19,x19,x6
1157 ldp x6,x7,[x1,#8*0]
1172 // a[8]a[0]
1173 // a[9]a[0]
1174 // a[a]a[0]
1175 // a[b]a[0]
1176 // a[c]a[0]
1177 // a[d]a[0]
1178 // a[e]a[0]
1179 // a[f]a[0]
1201 adds x19,x19,x14
1218 str x19,[x2],#8
1219 adds x19,x20,x14
1239 ldp x6,x7,[x2,#8*0]
1243 adds x19,x19,x6
1246 ldp x6,x7,[x1,#8*0]
1263 ldp x6,x7,[x0,#8*0]
1272 stp x19,x20,[x2,#8*0]
1273 ldp x19,x20,[x15,#8*0]
1285 // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
1286 ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0]
1292 stp x19,x20,[x2,#8*0]
1293 mul x19,x7,x7
1317 stp x19,x20,[x2,#8*0]
1337 adcs x19,x6,x17
1351 stp x19,x20,[x2,#8*0]
1358 ldp x19,x20,[sp,#8*0]
1361 ldp x6,x7,[x1,#8*0]
1369 mul x28,x4,x19 // t[0]*n0
1383 // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0)
1387 str x28,[x2],#8 // put aside t[0]*n0 for tail processing
1389 // (*) adds xzr,x19,x14
1390 subs xzr,x19,#1 // (*)
1392 adcs x19,x20,x15
1399 umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0)
1407 adds x19,x19,x14
1415 mul x28,x4,x19 // next t[0]*n0
1422 ldp x14,x15,[x2,#8*0]
1426 adds x19,x19,x14
1440 ldp x6,x7,[x1,#8*0]
1454 adds x19,x19,x14
1471 str x19,[x2],#8
1472 adds x19,x20,x14
1489 ldp x6,x7,[x2,#8*0]
1498 adds x19,x19,x6
1500 ldp x6,x7,[x1,#8*0]
1521 adcs x14,x19,x6
1523 ldp x19,x20,[x0,#8*0]
1525 ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0]
1536 mul x28,x4,x19
1537 stp x14,x15,[x2,#8*0]
1555 subs x14,x19,x6
1562 ldp x6,x7,[x1,#8*0]
1564 stp x14,x15,[x0,#8*0]
1574 ldp x19,x20,[x2,#8*0]
1581 sbcs x14,x19,x6
1590 ldp x6,x7,[x3,#8*0]
1592 stp x14,x15,[x0,#8*0]
1598 ldp x19,x20,[x1,#8*0]
1609 csel x14,x19,x6,lo
1610 stp xzr,xzr,[x2,#8*0]
1613 ldp x19,x20,[x1,#8*4]
1621 stp x14,x15,[x3,#8*0]
1624 stp xzr,xzr,[x1,#8*0]
1628 csel x14,x19,x6,lo
1629 stp xzr,xzr,[x2,#8*0]
1634 stp x14,x15,[x3,#8*0]
1643 // x19-7,x28 hold result, x6-7 hold modulus
1644 subs x6,x19,x6
1647 stp xzr,xzr,[sp,#8*0]
1664 csel x6,x19,x6,lo
1668 stp x6,x7,[x1,#8*0]
1678 ldp x19,x20,[x29,#16]
1696 add x29,sp,#0
1697 stp x19,x20,[sp,#16]
1712 ldr x24,[x2,#8*0] // b[0]
1713 ldp x6,x7,[x1,#8*0] // a[0..3]
1716 mov x19,xzr
1720 ldp x14,x15,[x3,#8*0] // n[0..3]
1724 mov x28,#0
1728 mul x10,x6,x24 // lo(a[0..3]*b[0])
1735 adds x19,x19,x10
1736 umulh x10,x6,x24 // hi(a[0..3]*b[0])
1738 mul x25,x19,x4 // t[0]*n0
1745 ldr x24,[x2,x28] // next b[i] (or b[0])
1747 // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0)
1748 str x25,[x26],#8 // put aside t[0]*n0 for tail processing
1755 // (*) adds xzr,x19,x10
1756 subs xzr,x19,#1 // (*)
1757 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0)
1758 adcs x19,x20,x11
1766 adds x19,x19,x10
1776 ldp x6,x7,[x1,#8*0] // a[4..7]
1779 ldr x25,[sp] // a[0]*n0
1780 ldp x14,x15,[x3,#8*0] // n[4..7]
1792 adds x19,x19,x10
1801 ldr x24,[x2,x28] // next b[i] (or b[0])
1803 mul x10,x14,x25 // lo(n[4..7]*a[0]*n0)
1810 adds x19,x19,x10
1811 umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0)
1820 ldr x25,[sp,x28] // next t[0]*n0
1821 str x19,[x26],#8 // result!!!
1822 adds x19,x20,x10
1833 ldp x6,x7,[x1,#8*0]
1836 ldp x14,x15,[x3,#8*0]
1845 ldp x6,x7,[x11,#8*0] // a[0..3]
1850 stp x19,x20,[x26,#8*0] // result!!!
1851 ldp x19,x20,[sp,#8*4] // t[0..3]
1855 ldp x14,x15,[x3,#8*0] // n[0..3]
1863 mul x10,x6,x24 // lo(a[0..3]*b[4])
1870 adds x19,x19,x10
1871 umulh x10,x6,x24 // hi(a[0..3]*b[4])
1873 mul x25,x19,x4 // t[0]*n0
1883 str x25,[x26],#8 // put aside t[0]*n0 for tail processing
1885 mul x11,x15,x25 // lo(n[0..3]*t[0]*n0
1890 // (*) adds xzr,x19,x10
1891 subs xzr,x19,#1 // (*)
1892 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0
1893 adcs x19,x20,x11
1901 adds x19,x19,x10
1911 ldp x6,x7,[x1,#8*0] // a[4..7]
1914 adds x19,x19,x10
1920 ldr x25,[sp] // t[0]*n0
1921 ldp x14,x15,[x3,#8*0] // n[4..7]
1934 adds x19,x19,x10
1945 mul x10,x14,x25 // lo(n[4..7]*t[0]*n0)
1952 adds x19,x19,x10
1953 umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0)
1961 ldr x25,[sp,x28] // next a[0]*n0
1963 str x19,[x26],#8 // result!!!
1964 adds x19,x20,x10
1978 ldp x6,x7,[x1,#8*0]
1981 adds x19,x19,x10
1986 ldp x14,x15,[x3,#8*0]
1994 adds x19,x19,x30
1999 stp x19,x20,[x26,#8*0] // result!!!
2001 ldp x19,x20,[sp,#8*4] // t[0..3]
2006 ldp x14,x15,[x11,#8*0] // n[0..3]
2012 ldp x6,x7,[x1,#8*0] // a[0..3]
2027 subs x10,x19,x14
2034 ldp x14,x15,[x3,#8*0]
2036 ldp x19,x20,[x26,#8*0]
2042 stp x10,x11,[x0,#8*0]
2043 sbcs x10,x19,x14
2052 ldp x6,x7,[x27,#8*0]
2054 stp x10,x11,[x0,#8*0]
2057 ldp x19,x20,[x1,#8*0]
2065 csel x10,x19,x6,lo
2066 stp xzr,xzr,[x26,#8*0]
2069 ldp x19,x20,[x1,#8*4]
2077 stp x10,x11,[x27,#8*0]
2082 csel x10,x19,x6,lo
2083 stp xzr,xzr,[x26,#8*0]
2090 stp x10,x11,[x27,#8*0]
2099 // x19-3,x0 hold result, x14-7 hold modulus
2100 subs x6,x19,x14
2103 stp xzr,xzr,[sp,#8*0]
2112 csel x6,x19,x6,lo
2116 stp x6,x7,[x1,#8*0]
2120 ldp x19,x20,[x29,#16]
2132 …9,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0