Lines Matching full:s

372 	ld1	{v0.4s},[x5],#16
375 ld1 {v1.4s,v2.4s},[x3]
377 ld1 {v3.4s},[x4]
379 ld1 {v8.4s,v9.4s},[x5]
381 rev64 v0.4s,v0.4s
391 dup v16.4s,v0.s[0] // unpack key block
393 dup v20.4s,v0.s[1]
395 dup v24.4s,v0.s[2]
397 dup v28.4s,v0.s[3]
399 dup v17.4s,v1.s[0]
401 dup v21.4s,v1.s[1]
403 dup v25.4s,v1.s[2]
405 dup v29.4s,v1.s[3]
407 dup v19.4s,v3.s[0]
409 dup v23.4s,v3.s[1]
411 dup v27.4s,v3.s[2]
413 dup v31.4s,v3.s[3]
415 add v19.4s,v19.4s,v8.4s
417 dup v18.4s,v2.s[0]
419 dup v22.4s,v2.s[1]
421 dup v26.4s,v2.s[2]
423 dup v30.4s,v2.s[3]
429 add v16.4s,v16.4s,v17.4s
431 add v20.4s,v20.4s,v21.4s
433 add v24.4s,v24.4s,v25.4s
435 add v28.4s,v28.4s,v29.4s
453 add v18.4s,v18.4s,v19.4s
455 add v22.4s,v22.4s,v23.4s
457 add v26.4s,v26.4s,v27.4s
459 add v30.4s,v30.4s,v31.4s
469 ushr v17.4s,v4.4s,#20
471 ushr v21.4s,v5.4s,#20
473 ushr v25.4s,v6.4s,#20
475 ushr v29.4s,v7.4s,#20
477 sli v17.4s,v4.4s,#12
479 sli v21.4s,v5.4s,#12
481 sli v25.4s,v6.4s,#12
483 sli v29.4s,v7.4s,#12
485 add v16.4s,v16.4s,v17.4s
487 add v20.4s,v20.4s,v21.4s
489 add v24.4s,v24.4s,v25.4s
491 add v28.4s,v28.4s,v29.4s
509 add v18.4s,v18.4s,v19.4s
511 add v22.4s,v22.4s,v23.4s
513 add v26.4s,v26.4s,v27.4s
515 add v30.4s,v30.4s,v31.4s
525 ushr v17.4s,v4.4s,#25
526 ushr v21.4s,v5.4s,#25
527 ushr v25.4s,v6.4s,#25
528 ushr v29.4s,v7.4s,#25
529 sli v17.4s,v4.4s,#7
530 sli v21.4s,v5.4s,#7
531 sli v25.4s,v6.4s,#7
532 sli v29.4s,v7.4s,#7
533 add v16.4s,v16.4s,v21.4s
535 add v20.4s,v20.4s,v25.4s
537 add v24.4s,v24.4s,v29.4s
539 add v28.4s,v28.4s,v17.4s
557 add v26.4s,v26.4s,v31.4s
559 add v30.4s,v30.4s,v19.4s
561 add v18.4s,v18.4s,v23.4s
563 add v22.4s,v22.4s,v27.4s
573 ushr v21.4s,v4.4s,#20
575 ushr v25.4s,v5.4s,#20
577 ushr v29.4s,v6.4s,#20
579 ushr v17.4s,v7.4s,#20
581 sli v21.4s,v4.4s,#12
583 sli v25.4s,v5.4s,#12
585 sli v29.4s,v6.4s,#12
587 sli v17.4s,v7.4s,#12
589 add v16.4s,v16.4s,v21.4s
591 add v20.4s,v20.4s,v25.4s
593 add v24.4s,v24.4s,v29.4s
595 add v28.4s,v28.4s,v17.4s
613 add v26.4s,v26.4s,v31.4s
615 add v30.4s,v30.4s,v19.4s
617 add v18.4s,v18.4s,v23.4s
619 add v22.4s,v22.4s,v27.4s
629 ushr v21.4s,v4.4s,#25
630 ushr v25.4s,v5.4s,#25
631 ushr v29.4s,v6.4s,#25
632 ushr v17.4s,v7.4s,#25
633 sli v21.4s,v4.4s,#7
634 sli v25.4s,v5.4s,#7
635 sli v29.4s,v6.4s,#7
636 sli v17.4s,v7.4s,#7
639 add v19.4s,v19.4s,v8.4s
641 zip1 v4.4s,v16.4s,v20.4s // transpose data
642 zip1 v5.4s,v24.4s,v28.4s
643 zip2 v6.4s,v16.4s,v20.4s
644 zip2 v7.4s,v24.4s,v28.4s
650 zip1 v4.4s,v17.4s,v21.4s
651 zip1 v5.4s,v25.4s,v29.4s
652 zip2 v6.4s,v17.4s,v21.4s
653 zip2 v7.4s,v25.4s,v29.4s
659 zip1 v4.4s,v18.4s,v22.4s
661 zip1 v5.4s,v26.4s,v30.4s
663 zip2 v6.4s,v18.4s,v22.4s
665 zip2 v7.4s,v26.4s,v30.4s
676 zip1 v4.4s,v19.4s,v23.4s
678 zip1 v5.4s,v27.4s,v31.4s
680 zip2 v6.4s,v19.4s,v23.4s
682 zip2 v7.4s,v27.4s,v31.4s
698 add v16.4s,v16.4s,v0.4s // accumulate key block
702 add v17.4s,v17.4s,v1.4s
706 add v18.4s,v18.4s,v2.4s
710 add v19.4s,v19.4s,v3.4s
724 add v20.4s,v20.4s,v0.4s
726 add v21.4s,v21.4s,v1.4s
728 add v22.4s,v22.4s,v2.4s
730 add v23.4s,v23.4s,v3.4s
733 movi v4.4s,#5
740 add v8.4s,v8.4s,v4.4s // += 5
751 add v24.4s,v24.4s,v0.4s
752 add v25.4s,v25.4s,v1.4s
753 add v26.4s,v26.4s,v2.4s
754 add v27.4s,v27.4s,v3.4s
762 add v28.4s,v28.4s,v0.4s
763 add v29.4s,v29.4s,v1.4s
764 add v30.4s,v30.4s,v2.4s
765 add v31.4s,v31.4s,v3.4s
834 add v16.4s,v16.4s,v0.4s // accumulate key block
836 add v17.4s,v17.4s,v1.4s
838 add v18.4s,v18.4s,v2.4s
840 add v19.4s,v19.4s,v3.4s
855 add v16.4s,v20.4s,v0.4s
856 add v17.4s,v21.4s,v1.4s
858 add v18.4s,v22.4s,v2.4s
860 add v19.4s,v23.4s,v3.4s
871 add v16.4s,v24.4s,v0.4s
872 add v17.4s,v25.4s,v1.4s
874 add v18.4s,v26.4s,v2.4s
876 add v19.4s,v27.4s,v3.4s
887 add v16.4s,v28.4s,v0.4s
888 add v17.4s,v29.4s,v1.4s
889 add v18.4s,v30.4s,v2.4s
890 add v19.4s,v31.4s,v3.4s
946 ld1 {v0.4s},[x5],#16
949 ld1 {v1.4s,v2.4s},[x3]
951 ld1 {v3.4s},[x4]
952 ld1 {v7.s}[0],[x5]
955 rev64 v0.4s,v0.4s
963 add v3.4s,v3.4s,v7.4s // += 1
965 add v3.4s,v3.4s,v7.4s // not typo
967 add v4.4s,v3.4s,v7.4s
968 add v5.4s,v4.4s,v7.4s
969 add v6.4s,v5.4s,v7.4s
970 shl v7.4s,v7.4s,#2 // 1 -> 4
1010 add v27.4s,v11.4s,v7.4s // +4
1012 add v31.4s,v15.4s,v7.4s // +4
1024 ld1 {v6.4s},[x3]
1028 add v8.4s,v8.4s,v9.4s
1030 add v12.4s,v12.4s,v13.4s
1032 add v16.4s,v16.4s,v17.4s
1034 add v20.4s,v20.4s,v21.4s
1036 add v24.4s,v24.4s,v25.4s
1038 add v28.4s,v28.4s,v29.4s
1064 add v10.4s,v10.4s,v11.4s
1066 add v14.4s,v14.4s,v15.4s
1068 add v18.4s,v18.4s,v19.4s
1070 add v22.4s,v22.4s,v23.4s
1072 add v26.4s,v26.4s,v27.4s
1074 add v30.4s,v30.4s,v31.4s
1088 ushr v9.4s,v0.4s,#20
1090 ushr v13.4s,v1.4s,#20
1092 ushr v17.4s,v2.4s,#20
1094 ushr v21.4s,v3.4s,#20
1096 ushr v25.4s,v4.4s,#20
1098 ushr v29.4s,v5.4s,#20
1100 sli v9.4s,v0.4s,#12
1102 sli v13.4s,v1.4s,#12
1104 sli v17.4s,v2.4s,#12
1106 sli v21.4s,v3.4s,#12
1108 sli v25.4s,v4.4s,#12
1110 sli v29.4s,v5.4s,#12
1112 add v8.4s,v8.4s,v9.4s
1114 add v12.4s,v12.4s,v13.4s
1116 add v16.4s,v16.4s,v17.4s
1118 add v20.4s,v20.4s,v21.4s
1120 add v24.4s,v24.4s,v25.4s
1122 add v28.4s,v28.4s,v29.4s
1148 add v10.4s,v10.4s,v11.4s
1150 add v14.4s,v14.4s,v15.4s
1152 add v18.4s,v18.4s,v19.4s
1154 add v22.4s,v22.4s,v23.4s
1156 add v26.4s,v26.4s,v27.4s
1158 add v30.4s,v30.4s,v31.4s
1172 ushr v9.4s,v0.4s,#25
1174 ushr v13.4s,v1.4s,#25
1176 ushr v17.4s,v2.4s,#25
1178 ushr v21.4s,v3.4s,#25
1180 ushr v25.4s,v4.4s,#25
1182 ushr v29.4s,v5.4s,#25
1184 sli v9.4s,v0.4s,#7
1186 sli v13.4s,v1.4s,#7
1188 sli v17.4s,v2.4s,#7
1190 sli v21.4s,v3.4s,#7
1192 sli v25.4s,v4.4s,#7
1194 sli v29.4s,v5.4s,#7
1226 add v8.4s,v8.4s,v9.4s
1228 add v12.4s,v12.4s,v13.4s
1230 add v16.4s,v16.4s,v17.4s
1232 add v20.4s,v20.4s,v21.4s
1234 add v24.4s,v24.4s,v25.4s
1236 add v28.4s,v28.4s,v29.4s
1262 add v10.4s,v10.4s,v11.4s
1264 add v14.4s,v14.4s,v15.4s
1266 add v18.4s,v18.4s,v19.4s
1268 add v22.4s,v22.4s,v23.4s
1270 add v26.4s,v26.4s,v27.4s
1272 add v30.4s,v30.4s,v31.4s
1286 ushr v9.4s,v0.4s,#20
1288 ushr v13.4s,v1.4s,#20
1290 ushr v17.4s,v2.4s,#20
1292 ushr v21.4s,v3.4s,#20
1294 ushr v25.4s,v4.4s,#20
1296 ushr v29.4s,v5.4s,#20
1298 sli v9.4s,v0.4s,#12
1300 sli v13.4s,v1.4s,#12
1302 sli v17.4s,v2.4s,#12
1304 sli v21.4s,v3.4s,#12
1306 sli v25.4s,v4.4s,#12
1308 sli v29.4s,v5.4s,#12
1310 add v8.4s,v8.4s,v9.4s
1312 add v12.4s,v12.4s,v13.4s
1314 add v16.4s,v16.4s,v17.4s
1316 add v20.4s,v20.4s,v21.4s
1318 add v24.4s,v24.4s,v25.4s
1320 add v28.4s,v28.4s,v29.4s
1346 add v10.4s,v10.4s,v11.4s
1348 add v14.4s,v14.4s,v15.4s
1350 add v18.4s,v18.4s,v19.4s
1352 add v22.4s,v22.4s,v23.4s
1354 add v26.4s,v26.4s,v27.4s
1356 add v30.4s,v30.4s,v31.4s
1370 ushr v9.4s,v0.4s,#25
1372 ushr v13.4s,v1.4s,#25
1374 ushr v17.4s,v2.4s,#25
1376 ushr v21.4s,v3.4s,#25
1378 ushr v25.4s,v4.4s,#25
1380 ushr v29.4s,v5.4s,#25
1382 sli v9.4s,v0.4s,#7
1384 sli v13.4s,v1.4s,#7
1386 sli v17.4s,v2.4s,#7
1388 sli v21.4s,v3.4s,#7
1390 sli v25.4s,v4.4s,#7
1392 sli v29.4s,v5.4s,#7
1501 add v8.4s,v8.4s,v9.4s
1503 add v12.4s,v12.4s,v13.4s
1505 add v16.4s,v16.4s,v17.4s
1507 add v20.4s,v20.4s,v21.4s
1509 add v24.4s,v24.4s,v25.4s
1511 add v28.4s,v28.4s,v29.4s
1537 add v10.4s,v10.4s,v11.4s
1539 add v14.4s,v14.4s,v15.4s
1541 add v18.4s,v18.4s,v19.4s
1543 add v22.4s,v22.4s,v23.4s
1545 add v26.4s,v26.4s,v27.4s
1547 add v30.4s,v30.4s,v31.4s
1561 ushr v9.4s,v0.4s,#20
1563 ushr v13.4s,v1.4s,#20
1565 ushr v17.4s,v2.4s,#20
1567 ushr v21.4s,v3.4s,#20
1569 ushr v25.4s,v4.4s,#20
1571 ushr v29.4s,v5.4s,#20
1573 sli v9.4s,v0.4s,#12
1575 sli v13.4s,v1.4s,#12
1577 sli v17.4s,v2.4s,#12
1579 sli v21.4s,v3.4s,#12
1581 sli v25.4s,v4.4s,#12
1583 sli v29.4s,v5.4s,#12
1585 add v8.4s,v8.4s,v9.4s
1587 add v12.4s,v12.4s,v13.4s
1589 add v16.4s,v16.4s,v17.4s
1591 add v20.4s,v20.4s,v21.4s
1593 add v24.4s,v24.4s,v25.4s
1595 add v28.4s,v28.4s,v29.4s
1621 add v10.4s,v10.4s,v11.4s
1623 add v14.4s,v14.4s,v15.4s
1625 add v18.4s,v18.4s,v19.4s
1627 add v22.4s,v22.4s,v23.4s
1629 add v26.4s,v26.4s,v27.4s
1631 add v30.4s,v30.4s,v31.4s
1645 ushr v9.4s,v0.4s,#25
1647 ushr v13.4s,v1.4s,#25
1649 ushr v17.4s,v2.4s,#25
1651 ushr v21.4s,v3.4s,#25
1653 ushr v25.4s,v4.4s,#25
1655 ushr v29.4s,v5.4s,#25
1657 sli v9.4s,v0.4s,#7
1659 sli v13.4s,v1.4s,#7
1661 sli v17.4s,v2.4s,#7
1663 sli v21.4s,v3.4s,#7
1665 sli v25.4s,v4.4s,#7
1667 sli v29.4s,v5.4s,#7
1699 add v8.4s,v8.4s,v9.4s
1701 add v12.4s,v12.4s,v13.4s
1703 add v16.4s,v16.4s,v17.4s
1705 add v20.4s,v20.4s,v21.4s
1707 add v24.4s,v24.4s,v25.4s
1709 add v28.4s,v28.4s,v29.4s
1735 add v10.4s,v10.4s,v11.4s
1737 add v14.4s,v14.4s,v15.4s
1739 add v18.4s,v18.4s,v19.4s
1741 add v22.4s,v22.4s,v23.4s
1743 add v26.4s,v26.4s,v27.4s
1745 add v30.4s,v30.4s,v31.4s
1759 ushr v9.4s,v0.4s,#20
1761 ushr v13.4s,v1.4s,#20
1763 ushr v17.4s,v2.4s,#20
1765 ushr v21.4s,v3.4s,#20
1767 ushr v25.4s,v4.4s,#20
1769 ushr v29.4s,v5.4s,#20
1771 sli v9.4s,v0.4s,#12
1773 sli v13.4s,v1.4s,#12
1775 sli v17.4s,v2.4s,#12
1777 sli v21.4s,v3.4s,#12
1779 sli v25.4s,v4.4s,#12
1781 sli v29.4s,v5.4s,#12
1783 add v8.4s,v8.4s,v9.4s
1785 add v12.4s,v12.4s,v13.4s
1787 add v16.4s,v16.4s,v17.4s
1789 add v20.4s,v20.4s,v21.4s
1791 add v24.4s,v24.4s,v25.4s
1793 add v28.4s,v28.4s,v29.4s
1819 add v10.4s,v10.4s,v11.4s
1821 add v14.4s,v14.4s,v15.4s
1823 add v18.4s,v18.4s,v19.4s
1825 add v22.4s,v22.4s,v23.4s
1827 add v26.4s,v26.4s,v27.4s
1829 add v30.4s,v30.4s,v31.4s
1843 ushr v9.4s,v0.4s,#25
1845 ushr v13.4s,v1.4s,#25
1847 ushr v17.4s,v2.4s,#25
1849 ushr v21.4s,v3.4s,#25
1851 ushr v25.4s,v4.4s,#25
1853 ushr v29.4s,v5.4s,#25
1855 sli v9.4s,v0.4s,#7
1857 sli v13.4s,v1.4s,#7
1859 sli v17.4s,v2.4s,#7
1861 sli v21.4s,v3.4s,#7
1863 sli v25.4s,v4.4s,#7
1865 sli v29.4s,v5.4s,#7
1907 add v8.4s,v8.4s,v0.4s
1909 add v12.4s,v12.4s,v0.4s
1911 add v16.4s,v16.4s,v0.4s
1913 add v20.4s,v20.4s,v0.4s
1915 add v24.4s,v24.4s,v0.4s
1917 add v28.4s,v28.4s,v0.4s
1919 add v10.4s,v10.4s,v2.4s
1921 add v14.4s,v14.4s,v2.4s
1923 add v18.4s,v18.4s,v2.4s
1925 add v22.4s,v22.4s,v2.4s
1927 add v26.4s,v26.4s,v2.4s
1929 add v30.4s,v30.4s,v2.4s
1931 add v27.4s,v27.4s,v7.4s // +4
1933 add v31.4s,v31.4s,v7.4s // +4
1935 add v11.4s,v11.4s,v3.4s
1937 add v15.4s,v15.4s,v4.4s
1939 add v19.4s,v19.4s,v5.4s
1941 add v23.4s,v23.4s,v6.4s
1943 add v27.4s,v27.4s,v3.4s
1945 add v31.4s,v31.4s,v4.4s
1947 add v9.4s,v9.4s,v1.4s
1949 add v13.4s,v13.4s,v1.4s
1951 add v17.4s,v17.4s,v1.4s
1953 add v21.4s,v21.4s,v1.4s
1955 add v25.4s,v25.4s,v1.4s
1957 add v29.4s,v29.4s,v1.4s
2022 shl v8.4s,v7.4s,#1 // 4 -> 8
2029 add v3.4s,v3.4s,v8.4s // += 8
2030 add v4.4s,v4.4s,v8.4s
2031 add v5.4s,v5.4s,v8.4s
2032 add v6.4s,v6.4s,v8.4s
2037 ushr v7.4s,v7.4s,#1 // 4 -> 2
2052 sub v3.4s,v3.4s,v7.4s // -= 2
2053 ld1 {v8.4s,v9.4s},[x3]