Lines Matching +full:3 +full:x12

15 	tst	x5,#3
28 tst x5,#3
39 sub x22,sp,x5,lsl#3
41 lsl x5,x5,#3
55 // (*) mul x12,x13,x15 // np[0]*m1
58 // (*) adds x12,x12,x6 // discarded
80 adds x12,x16,x13
85 adds x12,x12,x6
89 str x12,[x22],#8 // tp[j-1]
97 adds x12,x16,x13
101 adds x12,x12,x6
106 stp x12,x13,[x22]
126 // (*) mul x12,x13,x15 // np[0]*m1
129 // (*) adds x12,x12,x6
142 adds x12,x16,x13
152 adds x12,x12,x6
154 stur x12,[x22,#-16] // tp[j-1]
164 adds x12,x16,x13
172 adds x12,x12,x6
175 stp x12,x13,[x22,#-16]
285 umlal v9.2d,v28.2s,v0.s[3]
293 umlal v13.2d,v28.2s,v1.s[3]
300 umlal v9.2d,v29.2s,v2.s[3]
307 umlal v13.2d,v29.2s,v3.s[3]
317 umlal v10.2d,v28.2s,v0.s[3]
325 umlal v6.2d,v28.2s,v1.s[3]
332 umlal v10.2d,v29.2s,v2.s[3]
339 umlal v6.2d,v29.2s,v3.s[3]
349 umlal v11.2d,v28.2s,v0.s[3]
357 umlal v7.2d,v28.2s,v1.s[3]
364 umlal v11.2d,v29.2s,v2.s[3]
371 umlal v7.2d,v29.2s,v3.s[3]
381 umlal v12.2d,v28.2s,v0.s[3]
386 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+3]
389 umlal v8.2d,v28.2s,v1.s[3]
396 umlal v12.2d,v29.2s,v2.s[3]
403 umlal v8.2d,v29.2s,v3.s[3]
406 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+3]
413 umlal v13.2d,v28.2s,v0.s[3]
421 umlal v9.2d,v28.2s,v1.s[3]
428 umlal v13.2d,v29.2s,v2.s[3]
435 umlal v9.2d,v29.2s,v3.s[3]
445 umlal v6.2d,v28.2s,v0.s[3]
453 umlal v10.2d,v28.2s,v1.s[3]
460 umlal v6.2d,v29.2s,v2.s[3]
467 umlal v10.2d,v29.2s,v3.s[3]
477 umlal v7.2d,v28.2s,v0.s[3]
485 umlal v11.2d,v28.2s,v1.s[3]
492 umlal v7.2d,v29.2s,v2.s[3]
499 umlal v11.2d,v29.2s,v3.s[3]
509 umlal v8.2d,v28.2s,v0.s[3]
517 umlal v12.2d,v28.2s,v1.s[3]
526 umlal v8.2d,v29.2s,v2.s[3]
534 umlal v12.2d,v29.2s,v3.s[3]
550 umlal v9.2d,v28.2s,v0.s[3]
557 umlal v13.2d,v28.2s,v1.s[3]
562 umlal v9.2d,v29.2s,v2.s[3]
566 umlal v13.2d,v29.2s,v3.s[3]
576 umlal v10.2d,v28.2s,v0.s[3]
580 umlal v6.2d,v28.2s,v1.s[3]
585 umlal v10.2d,v29.2s,v2.s[3]
589 umlal v6.2d,v29.2s,v3.s[3]
599 umlal v11.2d,v28.2s,v0.s[3]
603 umlal v7.2d,v28.2s,v1.s[3]
604 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+3]
608 umlal v11.2d,v29.2s,v2.s[3]
612 umlal v7.2d,v29.2s,v3.s[3]
617 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+3]
622 umlal v12.2d,v28.2s,v0.s[3]
626 umlal v8.2d,v28.2s,v1.s[3]
631 umlal v12.2d,v29.2s,v2.s[3]
635 umlal v8.2d,v29.2s,v3.s[3]
645 umlal v13.2d,v28.2s,v0.s[3]
649 umlal v9.2d,v28.2s,v1.s[3]
654 umlal v13.2d,v29.2s,v2.s[3]
658 umlal v9.2d,v29.2s,v3.s[3]
668 umlal v6.2d,v28.2s,v0.s[3]
672 umlal v10.2d,v28.2s,v1.s[3]
677 umlal v6.2d,v29.2s,v2.s[3]
681 umlal v10.2d,v29.2s,v3.s[3]
691 umlal v7.2d,v28.2s,v0.s[3]
695 umlal v11.2d,v28.2s,v1.s[3]
700 umlal v7.2d,v29.2s,v2.s[3]
704 umlal v11.2d,v29.2s,v3.s[3]
714 umlal v8.2d,v28.2s,v0.s[3]
718 umlal v12.2d,v28.2s,v1.s[3]
728 umlal v8.2d,v29.2s,v2.s[3]
733 umlal v12.2d,v29.2s,v3.s[3]
880 mov x3,x2 // second 3/4th of frame
948 ldp x12,x13,[x1,#8*6]
951 lsl x5,x5,#3
989 // a[3]a[0]
995 // a[3]a[1]
1000 // a[3]a[2] (iii)
1005 // a[4]a[3] (iv)
1006 // a[5]a[3]
1007 // a[6]a[3]
1008 // a[7]a[3]
1023 mul x15,x12,x6
1039 umulh x14,x12,x6
1052 mul x16,x12,x7
1063 stp x21,x22,[x2],#8*2 // t[2..3]
1066 umulh x14,x12,x7
1070 mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii)
1077 mul x15,x12,x8
1081 umulh x17,x9,x8 // hi(a[3..7]*a[2])
1087 umulh x16,x12,x8
1093 mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv)
1097 mul x16,x12,x9
1102 umulh x14,x10,x9 // hi(a[4..7]*a[3])
1106 umulh x16,x12,x9
1114 mul x15,x12,x10
1121 umulh x14,x12,x10
1125 mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi)
1130 umulh x14,x12,x11 // hi(a[6..7]*a[5])
1135 mul x16,x13,x12 // lo(a[7]*a[6]) (vii)
1137 umulh x17,x13,x12 // hi(a[7]*a[6])
1154 ldp x12,x13,[x2,#8*6]
1164 adcs x25,x25,x12
1167 ldp x12,x13,[x1,#8*6]
1184 // a[8]a[3]
1185 // a[f]a[3]........................
1206 mul x16,x12,x4
1224 umulh x16,x12,x4
1242 ldp x12,x13,[x2,#8*6]
1253 adcs x25,x25,x12
1256 ldp x12,x13,[x1,#8*6]
1269 ldp x12,x13,[x0,#8*6]
1290 ldp x17,x14,[sp,#8*3]
1314 mul x12,x13,x13
1324 adcs x25,x12,x15
1340 ldp x17,x14,[x2,#8*3]
1352 mul x12,x13,x13
1362 adcs x25,x12,x15
1370 ldp x12,x13,[x1,#8*6]
1395 mul x16,x12,x28
1412 umulh x16,x12,x28
1444 ldp x12,x13,[x1,#8*6]
1459 mul x16,x12,x4
1477 umulh x16,x12,x4
1494 ldp x12,x13,[x2,#8*6]
1507 adcs x25,x25,x12
1510 ldp x12,x13,[x1,#8*6]
1531 adcs x25,x25,x12
1533 ldp x12,x13,[x16,#8*6]
1569 sbcs x16,x25,x12
1572 ldp x12,x13,[x1,#8*6]
1597 sbcs x16,x25,x12
1656 sbcs x12,x25,x12
1672 csel x12,x25,x12,lo
1675 stp x12,x13,[x1,#8*6]
1703 sub x26,sp,x5,lsl#3
1704 lsl x5,x5,#3
1713 ldp x6,x7,[x1,#8*0] // a[0..3]
1720 ldp x14,x15,[x3,#8*0] // n[0..3]
1728 mul x10,x6,x24 // lo(a[0..3]*b[0])
1732 mul x12,x8,x24
1736 umulh x10,x6,x24 // hi(a[0..3]*b[0])
1739 adcs x21,x21,x12
1742 umulh x12,x8,x24
1747 // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0)
1751 adcs x22,x22,x12
1752 mul x12,x16,x25
1757 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0)
1760 adcs x20,x21,x12
1761 umulh x12,x16,x25
1769 adcs x21,x21,x12
1789 mul x12,x8,x24
1796 adcs x21,x21,x12
1797 umulh x12,x8,x24
1806 adcs x22,x22,x12
1807 mul x12,x16,x25
1814 adcs x21,x21,x12
1815 umulh x12,x16,x25
1825 adcs x21,x22,x12
1845 ldp x6,x7,[x11,#8*0] // a[0..3]
1851 ldp x19,x20,[sp,#8*4] // t[0..3]
1855 ldp x14,x15,[x3,#8*0] // n[0..3]
1863 mul x10,x6,x24 // lo(a[0..3]*b[4])
1867 mul x12,x8,x24
1871 umulh x10,x6,x24 // hi(a[0..3]*b[4])
1874 adcs x21,x21,x12
1877 umulh x12,x8,x24
1885 mul x11,x15,x25 // lo(n[0..3]*t[0]*n0
1886 adcs x22,x22,x12
1887 mul x12,x16,x25
1892 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0
1895 adcs x20,x21,x12
1896 umulh x12,x16,x25
1903 adcs x21,x21,x12
1910 ldp x12,x13,[x26,#8*6]
1916 adcs x21,x21,x12
1931 mul x12,x8,x24
1938 adcs x21,x21,x12
1939 umulh x12,x8,x24
1948 adcs x22,x22,x12
1949 mul x12,x16,x25
1956 adcs x21,x21,x12
1957 umulh x12,x16,x25
1967 adcs x21,x22,x12
1977 ldp x12,x13,[x26,#8*6]
1983 adcs x21,x21,x12
1993 ldp x12,x13,[x29,#96] // pull rp and &b[num]
2001 ldp x19,x20,[sp,#8*4] // t[0..3]
2006 ldp x14,x15,[x11,#8*0] // n[0..3]
2012 ldp x6,x7,[x1,#8*0] // a[0..3]
2025 mov x0,x12
2026 mov x27,x12 // x0 copy
2033 sbcs x12,x21,x16
2044 stp x12,x13,[x0,#8*2]
2049 sbcs x12,x21,x16
2056 stp x12,x13,[x0,#8*2]
2070 csel x12,x21,x8,lo
2078 stp x12,x13,[x27,#8*2]
2086 csel x12,x21,x8,lo
2087 stp xzr,xzr,[x26,#8*3]
2091 stp x12,x13,[x27,#8*2]
2099 // x19-3,x0 hold result, x14-7 hold modulus
2111 // x6-3 hold result-modulus