Lines Matching +full:4 +full:x2

38 	ldr	x9,[x2],#8		// bp[0]
109 ldr x9,[x2],#8 // bp[i]
241 .align 4
246 sub x7,x7,x5,lsl#4
267 ld1 {v0.4s,v1.4s},[x1],#32
269 ldr s30,[x4],#4
273 .align 4
275 ldr s28,[x2],#4 // *b++
276 uxtl v28.4s,v28.4h
278 ld1 {v2.4s,v3.4s},[x3],#32
292 uxtl v29.4s,v29.4h
294 ldr s28,[x2],#4 // *b++
297 uxtl v28.4s,v28.4h
324 uxtl v29.4s,v29.4h
326 ldr s28,[x2],#4 // *b++
329 uxtl v28.4s,v28.4h
356 uxtl v29.4s,v29.4h
358 ldr s28,[x2],#4 // *b++
361 uxtl v28.4s,v28.4h
388 uxtl v29.4s,v29.4h
390 ldr s28,[x2],#4 // *b++
393 uxtl v28.4s,v28.4h
418 st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+4]
420 uxtl v29.4s,v29.4h
422 ldr s28,[x2],#4 // *b++
425 uxtl v28.4s,v28.4h
438 st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+4]
452 uxtl v29.4s,v29.4h
454 ldr s28,[x2],#4 // *b++
457 uxtl v28.4s,v28.4h
484 uxtl v29.4s,v29.4h
486 ldr s28,[x2],#4 // *b++
489 uxtl v28.4s,v28.4h
516 uxtl v29.4s,v29.4h
520 ld1 {v0.4s,v1.4s},[x1],#32
541 .align 4
549 ld1 {v2.4s,v3.4s},[x3],#32
627 ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+4]
640 ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+4]
725 ld1 {v0.4s,v1.4s},[x1],#32
765 zip1 v6.4h,v5.4h,v6.4h
771 .align 4
782 zip1 v6.4h,v5.4h,v6.4h
787 st1 {v6.s}[0], [x7],#4
793 zip1 v7.4h,v5.4h,v7.4h
796 st1 {v7.s}[0], [x7],#4
802 zip1 v8.4h,v5.4h,v8.4h
805 st1 {v8.s}[0], [x7],#4
811 zip1 v9.4h,v5.4h,v9.4h
814 st1 {v9.s}[0], [x7],#4
820 zip1 v10.4h,v5.4h,v10.4h
823 st1 {v10.s}[0], [x7],#4
829 zip1 v11.4h,v5.4h,v11.4h
832 st1 {v11.s}[0], [x7],#4
838 zip1 v12.4h,v5.4h,v12.4h
841 st1 {v12.s}[0], [x7],#4
847 zip1 v13.4h,v5.4h,v13.4h
851 st1 {v13.s}[0], [x7],#4
854 st1 {v15.s}[0], [x7],#4 // top-most bit
857 add x2,sp,x5,lsl#2
868 sub x17,x2,x1
876 sub x11,x2,x11 // this is num*4
880 mov x3,x2 // second 3/4th of frame
913 sub x17,x2,x1 // preserves carry
931 cmp x1,x2
947 ldp x10,x11,[x1,#8*4]
950 sub x2,sp,x5,lsl#4
953 mov sp,x2 // alloca
959 stp xzr,xzr,[x2,#8*0]
960 stp xzr,xzr,[x2,#8*2]
961 stp xzr,xzr,[x2,#8*4]
962 stp xzr,xzr,[x2,#8*6]
964 stp xzr,xzr,[x2,#8*8]
965 stp xzr,xzr,[x2,#8*10]
966 stp xzr,xzr,[x2,#8*12]
967 stp xzr,xzr,[x2,#8*14]
968 add x2,x2,#8*16
981 mov x2,sp
985 .align 4
990 // a[4]a[0]
996 // a[4]a[1]
1001 // a[4]a[2]
1005 // a[4]a[3] (iv)
1009 // a[5]a[4] (v)
1010 // a[6]a[4]
1011 // a[7]a[4]
1034 stp x19,x20,[x2],#8*2 // t[0..1]
1063 stp x21,x22,[x2],#8*2 // t[2..3]
1088 stp x23,x24,[x2],#8*2 // t[4..5]
1093 mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv)
1102 umulh x14,x10,x9 // hi(a[4..7]*a[3])
1109 stp x25,x26,[x2],#8*2 // t[6..7]
1112 mul x14,x11,x10 // lo(a[5..7]*a[4]) (v)
1119 umulh x17,x11,x10 // hi(a[5..7]*a[4])
1151 ldp x6,x7,[x2,#8*0]
1152 ldp x8,x9,[x2,#8*2]
1153 ldp x10,x11,[x2,#8*4]
1154 ldp x12,x13,[x2,#8*6]
1163 ldp x10,x11,[x1,#8*4]
1186 // a[8]a[4]
1187 // a[f]a[4]........................
1218 str x19,[x2],#8
1239 ldp x6,x7,[x2,#8*0]
1240 ldp x8,x9,[x2,#8*2]
1241 ldp x10,x11,[x2,#8*4]
1242 ldp x12,x13,[x2,#8*6]
1252 ldp x10,x11,[x1,#8*4]
1261 .align 4
1267 ldp x10,x11,[x0,#8*4]
1268 sub x15,x2,x14
1272 stp x19,x20,[x2,#8*0]
1274 stp x21,x22,[x2,#8*2]
1276 stp x23,x24,[x2,#8*4]
1277 ldp x23,x24,[x15,#8*4]
1278 stp x25,x26,[x2,#8*6]
1279 mov x2,x15
1283 .align 4
1289 add x1,x14,#8*4
1292 stp x19,x20,[x2,#8*0]
1294 stp x21,x22,[x2,#8*2]
1296 stp x23,x24,[x2,#8*4]
1298 stp x25,x26,[x2,#8*6]
1299 mov x2,sp
1303 sub x27,x5,#8*4
1308 sub x27,x27,#8*4
1310 ldp x15,x16,[x2,#8*5]
1317 stp x19,x20,[x2,#8*0]
1320 stp x21,x22,[x2,#8*2]
1322 ldp x17,x14,[x2,#8*7]
1327 ldp x15,x16,[x2,#8*9]
1333 stp x23,x24,[x2,#8*4]
1335 stp x25,x26,[x2,#8*6]
1336 add x2,x2,#8*8
1340 ldp x17,x14,[x2,#8*3]
1348 ldp x15,x16,[x2,#8*5]
1351 stp x19,x20,[x2,#8*0]
1354 stp x21,x22,[x2,#8*2]
1366 ldp x10,x11,[x1,#8*4]
1373 stp x23,x24,[x2,#8*4]
1374 ldp x23,x24,[sp,#8*4]
1375 stp x25,x26,[x2,#8*6]
1379 mov x2,sp
1387 str x28,[x2],#8 // put aside t[0]*n0 for tail processing
1422 ldp x14,x15,[x2,#8*0]
1423 ldp x16,x17,[x2,#8*2]
1424 mov x0,x2
1428 ldp x14,x15,[x2,#8*4]
1431 ldp x16,x17,[x2,#8*6]
1439 ldur x4,[x2,#-8*8]
1442 ldp x10,x11,[x1,#8*4]
1471 str x19,[x2],#8
1489 ldp x6,x7,[x2,#8*0]
1492 ldp x8,x9,[x2,#8*2]
1493 ldp x10,x11,[x2,#8*4]
1494 ldp x12,x13,[x2,#8*6]
1506 ldp x10,x11,[x1,#8*4]
1515 .align 4
1518 add x27,x2,#8*8 // end of current t[num] window
1530 ldp x10,x11,[x16,#8*4]
1537 stp x14,x15,[x2,#8*0]
1538 stp x21,x22,[x2,#8*2]
1540 stp x23,x24,[x2,#8*4]
1541 ldp x23,x24,[x0,#8*4]
1543 stp x25,x26,[x2,#8*6]
1544 mov x2,x0 // slide the window
1554 add x2,x2,#8*8
1570 ldp x10,x11,[x1,#8*4]
1574 ldp x19,x20,[x2,#8*0]
1576 ldp x21,x22,[x2,#8*2]
1577 ldp x23,x24,[x2,#8*4]
1578 ldp x25,x26,[x2,#8*6]
1579 add x2,x2,#8*8
1580 stp x14,x15,[x0,#8*4]
1588 mov x2,sp
1603 stp x14,x15,[x0,#8*4]
1606 sub x27,x5,#8*4
1608 sub x27,x27,#8*4
1610 stp xzr,xzr,[x2,#8*0]
1612 ldp x6,x7,[x3,#8*4]
1613 ldp x19,x20,[x1,#8*4]
1615 stp xzr,xzr,[x2,#8*2]
1616 add x2,x2,#8*4
1620 add x1,x1,#8*4
1623 add x3,x3,#8*4
1629 stp xzr,xzr,[x2,#8*0]
1631 stp xzr,xzr,[x2,#8*2]
1639 .align 4
1651 stp xzr,xzr,[sp,#8*4]
1674 stp x10,x11,[x1,#8*4]
1706 sub sp,x26,#8*4 // alloca
1708 add x10,x2,x5
1712 ldr x24,[x2,#8*0] // b[0]
1715 add x1,x1,#8*4
1722 adds x3,x3,#8*4 // clear carry bit
1745 ldr x24,[x2,x28] // next b[i] (or b[0])
1776 ldp x6,x7,[x1,#8*0] // a[4..7]
1778 add x1,x1,#8*4
1780 ldp x14,x15,[x3,#8*0] // n[4..7]
1782 add x3,x3,#8*4
1785 mul x10,x6,x24 // lo(a[4..7]*b[i])
1793 umulh x10,x6,x24 // hi(a[4..7]*b[i])
1801 ldr x24,[x2,x28] // next b[i] (or b[0])
1803 mul x10,x14,x25 // lo(n[4..7]*a[0]*n0)
1811 umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0)
1835 add x1,x1,#8*4
1838 add x3,x3,#8*4
1843 ldr x24,[x2,#8*4]! // *++b
1848 add x1,x11,#8*4
1851 ldp x19,x20,[sp,#8*4] // t[0..3]
1858 adds x3,x3,#8*4 // clear carry bit
1861 .align 4
1863 mul x10,x6,x24 // lo(a[0..3]*b[4])
1871 umulh x10,x6,x24 // hi(a[0..3]*b[4])
1880 ldr x24,[x2,x28] // next b[i]
1909 ldp x10,x11,[x26,#8*4] // t[4..7]
1911 ldp x6,x7,[x1,#8*0] // a[4..7]
1913 add x1,x1,#8*4
1921 ldp x14,x15,[x3,#8*0] // n[4..7]
1923 add x3,x3,#8*4
1925 .align 4
1927 mul x10,x6,x24 // lo(a[4..7]*b[4])
1935 umulh x10,x6,x24 // hi(a[4..7]*b[4])
1943 ldr x24,[x2,x28] // next b[i]
1945 mul x10,x14,x25 // lo(n[4..7]*t[0]*n0)
1953 umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0)
1976 ldp x10,x11,[x26,#8*4]
1980 add x1,x1,#8*4
1988 add x3,x3,#8*4
1991 .align 4
1995 add x2,x2,#8*4 // bp++
2001 ldp x19,x20,[sp,#8*4] // t[0..3]
2004 cmp x2,x13 // done yet?
2008 add x3,x11,#8*4
2011 ldr x24,[x2]
2014 adds x1,x1,#8*4 // clear carry bit
2019 .align 4
2030 sub x28,x5,#8*4
2035 sub x28,x28,#8*4
2039 add x3,x3,#8*4
2041 add x26,x26,#8*4
2045 add x0,x0,#8*4
2051 add x1,sp,#8*4
2062 sub x28,x5,#8*4
2064 sub x28,x28,#8*4
2068 ldp x6,x7,[x27,#8*4]
2069 ldp x19,x20,[x1,#8*4]
2072 add x26,x26,#8*4
2076 add x1,x1,#8*4
2079 add x27,x27,#8*4
2089 stp xzr,xzr,[x26,#8*4]
2095 .align 4
2107 stp xzr,xzr,[sp,#8*4]
2134 .align 4