Lines Matching +full:2 +full:d

62 	fmov	v1.d[1], x9                               //CTR block 1
63 rev w9, w12 //CTR block 2
65 fmov d2, x10 //CTR block 2
66 orr x9, x11, x9, lsl #32 //CTR block 2
67 add w12, w12, #1 //CTR block 2
69 fmov v2.d[1], x9 //CTR block 2
76 fmov v3.d[1], x9 //CTR block 3
87 aesmc v2.16b, v2.16b //AES block 2 - round 0
102 aesmc v2.16b, v2.16b //AES block 2 - round 1
103 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
115 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
118 aesmc v0.16b, v0.16b //AES block 0 - round 2
122 aesmc v1.16b, v1.16b //AES block 1 - round 2
129 aesmc v3.16b, v3.16b //AES block 3 - round 2
132 aesmc v2.16b, v2.16b //AES block 2 - round 2
142 aesmc v2.16b, v2.16b //AES block 2 - round 3
149 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
156 aesmc v2.16b, v2.16b //AES block 2 - round 4
166 aesmc v2.16b, v2.16b //AES block 2 - round 5
178 aesmc v2.16b, v2.16b //AES block 2 - round 6
179 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
197 aesmc v2.16b, v2.16b //AES block 2 - round 7
206 aesmc v2.16b, v2.16b //AES block 2 - round 8
214 aese v2.16b, v27.16b //AES block 2 - round 9
230 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
248 eor x21, x21, x13 //AES block 2 - round 10 low
252 eor x22, x22, x14 //AES block 2 - round 10 high
253 fmov v4.d[1], x7 //AES block 0 - mov high
259 fmov v5.d[1], x20 //AES block 1 - mov high
261 fmov d6, x21 //AES block 2 - mov low
265 fmov v6.d[1], x22 //AES block 2 - mov high
272 fmov v0.d[1], x9 //CTR block 4
281 fmov v1.d[1], x9 //CTR block 5
287 fmov v7.d[1], x24 //AES block 3 - mov high
291 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
297 fmov v2.d[1], x9 //CTR block 6
299 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
314 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
326 fmov v3.d[1], x9 //CTR block 4k+3
330 mov d31, v6.d[1] //GHASH block 4k+2 - mid
334 mov d30, v5.d[1] //GHASH block 4k+1 - mid
344 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
345 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
356 mov d8, v4.d[1] //GHASH block 4k - mid
359 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
361 mov d10, v17.d[1] //GHASH block 4k - mid
364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
366 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
370 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
376 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
378 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
381 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
383 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
384 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
386 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
390 mov d30, v7.d[1] //GHASH block 4k+3 - mid
397 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
404 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
408 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
410 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
412 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
415 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
416 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
422 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
438 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
447 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
469 fmov v4.d[1], x7 //AES block 4k+4 - mov high
489 fmov v5.d[1], x20 //AES block 4k+5 - mov high
493 fmov v7.d[1], x24 //AES block 4k+3 - mov high
512 fmov v6.d[1], x22 //AES block 4k+6 - mov high
522 fmov v0.d[1], x9 //CTR block 4k+8
534 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
535 fmov v1.d[1], x9 //CTR block 4k+9
551 fmov v2.d[1], x9 //CTR block 4k+10
569 fmov v3.d[1], x9 //CTR block 4k+3
573 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
575 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
580 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
584 mov d30, v5.d[1] //GHASH block 4k+1 - mid
586 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
587 mov d8, v4.d[1] //GHASH block 4k - mid
589 mov d31, v6.d[1] //GHASH block 4k+2 - mid
590 mov d10, v17.d[1] //GHASH block 4k - mid
598 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
599 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
604 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
607 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
611 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
617 mov d30, v7.d[1] //GHASH block 4k+3 - mid
623 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
625 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
628 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
630 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
634 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
637 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
639 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
643 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
644 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
647 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
649 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
650 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
656 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
672 pmull v28.1q, v9.1d, v8.1d
724 pmull v28.1q, v10.1d, v8.1d
767 fmov v4.d[1], x7 //AES block 4k+4 - mov high
795 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
803 eor x7, x7, x14 //AES final-2 block - round 10 high
804 eor x6, x6, x13 //AES final-2 block - round 10 low
806 fmov d5, x6 //AES final-2 block - mov low
809 fmov v5.d[1], x7 //AES final-2 block - mov high
811 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
812 mov d22, v4.d[1] //GHASH final-3 block - mid
814 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
816 mov d10, v17.d[1] //GHASH final-3 block - mid
818 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
821 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
822 .L128_enc_blocks_more_than_2: //blocks left > 2
824 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
826 rev64 v4.16b, v5.16b //GHASH final-2 block
839 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
840 fmov v5.d[1], x7 //AES final-1 block - mov high
842 mov d22, v4.d[1] //GHASH final-2 block - mid
844 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
846 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
848 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
852 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
854 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
858 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
876 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
877 fmov v5.d[1], x7 //AES final block - mov high
879 mov d22, v4.d[1] //GHASH final-1 block - mid
881 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
887 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
889 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
917 fmov v0.d[1], x7
925 mov d8, v4.d[1] //GHASH final block - mid
927 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
936 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
938 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
953 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
961 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1029 fmov d2, x10 //CTR block 2
1046 fmov v1.d[1], x9 //CTR block 1
1047 rev w9, w12 //CTR block 2
1048 add w12, w12, #1 //CTR block 2
1052 orr x9, x11, x9, lsl #32 //CTR block 2
1054 fmov v2.d[1], x9 //CTR block 2
1061 fmov v3.d[1], x9 //CTR block 3
1069 aesmc v0.16b, v0.16b //AES block 0 - round 2
1073 aesmc v2.16b, v2.16b //AES block 2 - round 0
1084 aesmc v2.16b, v2.16b //AES block 2 - round 1
1087 aesmc v1.16b, v1.16b //AES block 1 - round 2
1103 aesmc v3.16b, v3.16b //AES block 3 - round 2
1106 aesmc v2.16b, v2.16b //AES block 2 - round 2
1116 aesmc v2.16b, v2.16b //AES block 2 - round 3
1129 aesmc v2.16b, v2.16b //AES block 2 - round 4
1138 aesmc v2.16b, v2.16b //AES block 2 - round 5
1156 aesmc v2.16b, v2.16b //AES block 2 - round 6
1157 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
1163 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
1170 aesmc v2.16b, v2.16b //AES block 2 - round 7
1181 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
1184 aesmc v2.16b, v2.16b //AES block 2 - round 8
1191 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
1193 aese v2.16b, v27.16b //AES block 2 - round 9
1207 ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext
1218 mov x19, v1.d[0] //AES block 1 - mov low
1220 mov x20, v1.d[1] //AES block 1 - mov high
1222 mov x6, v0.d[0] //AES block 0 - mov low
1225 mov x7, v0.d[1] //AES block 0 - mov high
1229 fmov v0.d[1], x9 //CTR block 4
1239 fmov v1.d[1], x9 //CTR block 5
1253 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
1267 mov x21, v2.d[0] //AES block 4k+2 - mov low
1269 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
1270 mov x22, v2.d[1] //AES block 4k+2 - mov high
1276 rev64 v6.16b, v6.16b //GHASH block 4k+2
1277 fmov v2.d[1], x9 //CTR block 4k+6
1280 mov x23, v3.d[0] //AES block 4k+3 - mov low
1282 mov d30, v5.d[1] //GHASH block 4k+1 - mid
1288 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
1289 mov x24, v3.d[1] //AES block 4k+3 - mov high
1292 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
1297 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
1298 fmov v3.d[1], x9 //CTR block 4k+7
1302 mov d10, v17.d[1] //GHASH block 4k - mid
1304 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
1307 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
1311 mov d8, v4.d[1] //GHASH block 4k - mid
1320 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
1329 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
1330 eor x22, x22, x14 //AES block 4k+2 - round 10 high
1334 mov d31, v6.d[1] //GHASH block 4k+2 - mid
1338 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
1340 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
1343 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
1344 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
1347 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
1353 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
1357 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
1359 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
1363 mov d30, v7.d[1] //GHASH block 4k+3 - mid
1367 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
1369 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
1375 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
1380 eor x21, x21, x13 //AES block 4k+2 - round 10 low
1397 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
1401 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
1403 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
1431 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1465 mov x7, v0.d[1] //AES block 4k+4 - mov high
1469 mov x6, v0.d[0] //AES block 4k+4 - mov low
1475 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1476 fmov v0.d[1], x9 //CTR block 4k+8
1490 mov x20, v1.d[1] //AES block 4k+5 - mov high
1496 mov x19, v1.d[0] //AES block 4k+5 - mov low
1505 fmov v1.d[1], x9 //CTR block 4k+9
1527 mov x21, v2.d[0] //AES block 4k+2 - mov low
1528 mov d30, v5.d[1] //GHASH block 4k+1 - mid
1536 mov x22, v2.d[1] //AES block 4k+2 - mov high
1540 rev64 v6.16b, v6.16b //GHASH block 4k+2
1544 fmov v2.d[1], x9 //CTR block 4k+6
1547 mov x23, v3.d[0] //AES block 4k+3 - mov low
1550 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
1551 mov d10, v17.d[1] //GHASH block 4k - mid
1552 mov x24, v3.d[1] //AES block 4k+3 - mov high
1556 mov d31, v6.d[1] //GHASH block 4k+2 - mid
1559 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
1562 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
1563 mov d8, v4.d[1] //GHASH block 4k - mid
1568 fmov v3.d[1], x9 //CTR block 4k+7
1570 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
1571 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
1579 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
1583 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
1585 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
1587 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
1590 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
1592 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
1597 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
1599 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
1600 mov d30, v7.d[1] //GHASH block 4k+3 - mid
1603 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
1604 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
1606 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
1608 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
1615 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
1618 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
1622 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
1627 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
1628 eor x21, x21, x13 //AES block 4k+2 - round 10 low
1668 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1702 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1723 eor x22, x22, x14 //AES block 4k+2 - round 10 high
1728 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
1743 mov x7, v0.d[1] //AES block 4k+4 - mov high
1745 mov x6, v0.d[0] //AES block 4k+4 - mov low
1781 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
1785 mov d10, v17.d[1] //GHASH final-3 block - mid
1787 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
1789 mov d22, v4.d[1] //GHASH final-3 block - mid
1790 mov x7, v0.d[1] //AES final-2 block - mov high
1792 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
1793 mov x6, v0.d[0] //AES final-2 block - mov low
1795 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
1800 eor x7, x7, x14 //AES final-2 block - round 10 high
1804 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
1805 eor x6, x6, x13 //AES final-2 block - round 10 low
1809 .L128_dec_blocks_more_than_2: //blocks left > 2
1811 rev64 v4.16b, v5.16b //GHASH final-2 block
1817 stp x6, x7, [x2], #16 //AES final-2 block - store result
1819 mov d22, v4.d[1] //GHASH final-2 block - mid
1821 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
1823 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
1824 mov x6, v0.d[0] //AES final-1 block - mov low
1826 mov x7, v0.d[1] //AES final-1 block - mov high
1827 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
1831 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
1837 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
1839 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
1841 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
1853 mov d22, v4.d[1] //GHASH final-1 block - mid
1860 mov x6, v0.d[0] //AES final block - mov low
1862 mov x7, v0.d[1] //AES final block - mov high
1863 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
1865 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
1867 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
1869 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
1904 mov v0.d[1], x10
1916 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
1917 mov d8, v4.d[1] //GHASH final block - mid
1922 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
1924 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
1944 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1959 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
2032 fmov v1.d[1], x9 //CTR block 1
2033 rev w9, w12 //CTR block 2
2034 add w12, w12, #1 //CTR block 2
2036 fmov d2, x10 //CTR block 2
2037 orr x9, x11, x9, lsl #32 //CTR block 2
2039 fmov v2.d[1], x9 //CTR block 2
2045 fmov v3.d[1], x9 //CTR block 3
2068 aesmc v2.16b, v2.16b //AES block 2 - round 0
2082 aesmc v2.16b, v2.16b //AES block 2 - round 1
2092 aesmc v0.16b, v0.16b //AES block 0 - round 2
2095 aesmc v2.16b, v2.16b //AES block 2 - round 2
2098 aesmc v3.16b, v3.16b //AES block 3 - round 2
2102 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
2105 aesmc v2.16b, v2.16b //AES block 2 - round 3
2108 aesmc v1.16b, v1.16b //AES block 1 - round 2
2109 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
2124 aesmc v2.16b, v2.16b //AES block 2 - round 4
2136 aesmc v2.16b, v2.16b //AES block 2 - round 5
2145 aesmc v2.16b, v2.16b //AES block 2 - round 6
2161 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
2170 aesmc v2.16b, v2.16b //AES block 2 - round 7
2171 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
2180 aesmc v2.16b, v2.16b //AES block 2 - round 8
2189 aesmc v2.16b, v2.16b //AES block 2 - round 9
2198 aesmc v2.16b, v2.16b //AES block 2 - round 10
2214 aese v2.16b, v29.16b //AES block 2 - round 11
2234 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
2255 eor x22, x22, x14 //AES block 2 - round 12 high
2259 fmov v4.d[1], x7 //AES block 0 - mov high
2261 eor x21, x21, x13 //AES block 2 - round 12 low
2267 fmov v5.d[1], x20 //AES block 1 - mov high
2270 fmov d6, x21 //AES block 2 - mov low
2276 fmov v0.d[1], x9 //CTR block 4
2285 fmov v6.d[1], x22 //AES block 2 - mov high
2291 fmov v7.d[1], x24 //AES block 3 - mov high
2293 fmov v1.d[1], x9 //CTR block 5
2299 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
2302 fmov v2.d[1], x9 //CTR block 6
2306 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
2330 fmov v3.d[1], x9 //CTR block 4k+3
2332 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
2346 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
2354 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
2360 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
2361 mov d8, v4.d[1] //GHASH block 4k - mid
2364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
2378 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
2379 mov d31, v6.d[1] //GHASH block 4k+2 - mid
2381 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
2382 mov d4, v5.d[1] //GHASH block 4k+1 - mid
2385 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
2390 mov d10, v17.d[1] //GHASH block 4k - mid
2394 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
2395 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
2397 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
2406 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
2408 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
2416 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
2418 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
2421 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
2423 mov d30, v7.d[1] //GHASH block 4k+3 - mid
2425 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
2428 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
2444 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
2454 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
2476 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
2480 fmov v5.d[1], x20 //AES block 4k+5 - mov high
2486 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
2492 fmov v4.d[1], x7 //AES block 4k+4 - mov high
2504 fmov v7.d[1], x24 //AES block 4k+3 - mov high
2506 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
2549 fmov v0.d[1], x9 //CTR block 4k+8
2552 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
2553 fmov v6.d[1], x22 //AES block 4k+6 - mov high
2565 fmov v1.d[1], x9 //CTR block 4k+9
2580 fmov v2.d[1], x9 //CTR block 4k+10
2606 fmov v3.d[1], x9 //CTR block 4k+3
2608 mov d10, v17.d[1] //GHASH block 4k - mid
2612 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
2614 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
2616 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
2617 mov d8, v4.d[1] //GHASH block 4k - mid
2619 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
2622 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
2625 mov d4, v5.d[1] //GHASH block 4k+1 - mid
2628 mov d31, v6.d[1] //GHASH block 4k+2 - mid
2634 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
2637 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
2644 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
2650 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
2651 mov d30, v7.d[1] //GHASH block 4k+3 - mid
2653 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
2654 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
2657 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
2659 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
2665 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
2667 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
2669 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
2672 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
2679 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
2682 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
2683 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
2690 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
2695 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
2742 pmull v30.1q, v9.1d, v8.1d
2777 pmull v30.1q, v10.1d, v8.1d
2815 fmov v4.d[1], x7 //AES block 4k+4 - mov high
2845 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
2852 eor x6, x6, x13 //AES final-2 block - round 12 low
2855 eor x7, x7, x14 //AES final-2 block - round 12 high
2856 fmov d5, x6 //AES final-2 block - mov low
2858 fmov v5.d[1], x7 //AES final-2 block - mov high
2860 mov d22, v4.d[1] //GHASH final-3 block - mid
2862 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
2864 mov d10, v17.d[1] //GHASH final-3 block - mid
2870 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
2872 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
2873 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
2874 .L192_enc_blocks_more_than_2: //blocks left > 2
2876 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
2878 rev64 v4.16b, v5.16b //GHASH final-2 block
2888 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
2889 mov d22, v4.d[1] //GHASH final-2 block - mid
2891 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
2896 fmov v5.d[1], x7 //AES final-1 block - mov high
2897 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
2898 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
2900 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
2902 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
2908 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
2924 mov d22, v4.d[1] //GHASH final-1 block - mid
2930 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
2931 fmov v5.d[1], x7 //AES final block - mov high
2933 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
2937 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
2939 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
2972 fmov v0.d[1], x7
2980 mov d8, v4.d[1] //GHASH final block - mid
2982 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
2984 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
2992 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
3005 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3013 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3094 fmov v1.d[1], x9 //CTR block 1
3095 rev w9, w12 //CTR block 2
3096 add w12, w12, #1 //CTR block 2
3098 fmov d2, x10 //CTR block 2
3099 orr x9, x11, x9, lsl #32 //CTR block 2
3101 fmov v2.d[1], x9 //CTR block 2
3108 fmov v3.d[1], x9 //CTR block 3
3113 aesmc v0.16b, v0.16b //AES block 0 - round 2
3116 aesmc v2.16b, v2.16b //AES block 2 - round 0
3132 aesmc v2.16b, v2.16b //AES block 2 - round 1
3147 aesmc v2.16b, v2.16b //AES block 2 - round 2
3155 aesmc v1.16b, v1.16b //AES block 1 - round 2
3159 aesmc v3.16b, v3.16b //AES block 3 - round 2
3163 aesmc v2.16b, v2.16b //AES block 2 - round 3
3174 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
3182 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
3185 aesmc v2.16b, v2.16b //AES block 2 - round 4
3189 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
3199 aesmc v2.16b, v2.16b //AES block 2 - round 5
3208 aesmc v2.16b, v2.16b //AES block 2 - round 6
3217 aesmc v2.16b, v2.16b //AES block 2 - round 7
3226 aesmc v2.16b, v2.16b //AES block 2 - round 8
3235 aesmc v2.16b, v2.16b //AES block 2 - round 9
3258 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
3263 aesmc v2.16b, v2.16b //AES block 2 - round 10
3272 aese v2.16b, v29.16b //AES block 2 - round 11
3286 ld1 {v6.16b, v7.16b}, [x0], #32 //AES block 2,3 - load ciphertext
3288 mov x19, v1.d[0] //AES block 1 - mov low
3290 mov x20, v1.d[1] //AES block 1 - mov high
3292 mov x6, v0.d[0] //AES block 0 - mov low
3296 mov x7, v0.d[1] //AES block 0 - mov high
3307 fmov v0.d[1], x9 //CTR block 4
3317 fmov v1.d[1], x9 //CTR block 5
3333 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
3341 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
3342 mov x21, v2.d[0] //AES block 4k+2 - mov low
3344 mov x22, v2.d[1] //AES block 4k+2 - mov high
3356 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
3357 fmov v2.d[1], x9 //CTR block 4k+6
3360 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
3361 mov x24, v3.d[1] //AES block 4k+3 - mov high
3365 mov x23, v3.d[0] //AES block 4k+3 - mov low
3367 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
3369 mov d8, v4.d[1] //GHASH block 4k - mid
3371 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
3372 mov d10, v17.d[1] //GHASH block 4k - mid
3379 fmov v3.d[1], x9 //CTR block 4k+7
3381 mov d4, v5.d[1] //GHASH block 4k+1 - mid
3387 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
3388 eor x22, x22, x14 //AES block 4k+2 - round 12 high
3396 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
3400 rev64 v6.16b, v6.16b //GHASH block 4k+2
3403 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
3405 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
3407 eor x21, x21, x13 //AES block 4k+2 - round 12 low
3418 mov d31, v6.d[1] //GHASH block 4k+2 - mid
3427 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
3428 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
3430 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
3435 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
3436 mov d30, v7.d[1] //GHASH block 4k+3 - mid
3441 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
3444 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
3452 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
3457 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
3458 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
3463 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
3473 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
3477 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
3518 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3545 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
3567 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3568 mov x19, v1.d[0] //AES block 4k+5 - mov low
3570 mov x6, v0.d[0] //AES block 4k+4 - mov low
3575 mov x7, v0.d[1] //AES block 4k+4 - mov high
3579 mov x20, v1.d[1] //AES block 4k+5 - mov high
3586 fmov v0.d[1], x9 //CTR block 4k+8
3602 fmov v1.d[1], x9 //CTR block 4k+9
3624 mov x22, v2.d[1] //AES block 4k+2 - mov high
3630 mov x21, v2.d[0] //AES block 4k+2 - mov low
3634 mov d10, v17.d[1] //GHASH block 4k - mid
3641 mov x23, v3.d[0] //AES block 4k+3 - mov low
3645 mov x24, v3.d[1] //AES block 4k+3 - mov high
3647 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
3648 mov d8, v4.d[1] //GHASH block 4k - mid
3652 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
3653 rev64 v6.16b, v6.16b //GHASH block 4k+2
3655 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
3656 fmov v2.d[1], x9 //CTR block 4k+6
3661 mov d4, v5.d[1] //GHASH block 4k+1 - mid
3663 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
3668 fmov v3.d[1], x9 //CTR block 4k+7
3671 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
3672 eor x21, x21, x13 //AES block 4k+2 - round 12 low
3676 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
3677 eor x22, x22, x14 //AES block 4k+2 - round 12 high
3683 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
3688 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
3697 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
3700 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
3707 mov d31, v6.d[1] //GHASH block 4k+2 - mid
3714 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
3716 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
3718 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
3721 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
3722 mov d30, v7.d[1] //GHASH block 4k+3 - mid
3725 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
3726 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
3728 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
3737 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
3738 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
3743 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
3746 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
3756 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
3761 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3836 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3868 mov x7, v0.d[1] //AES block 4k+4 - mov high
3870 mov x6, v0.d[0] //AES block 4k+4 - mov low
3907 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
3913 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
3915 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
3916 mov x6, v0.d[0] //AES final-2 block - mov low
3917 mov d22, v4.d[1] //GHASH final-3 block - mid
3919 mov x7, v0.d[1] //AES final-2 block - mov high
3921 mov d10, v17.d[1] //GHASH final-3 block - mid
3924 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
3926 eor x6, x6, x13 //AES final-2 block - round 12 low
3932 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
3933 eor x7, x7, x14 //AES final-2 block - round 12 high
3937 .L192_dec_blocks_more_than_2: //blocks left > 2
3939 rev64 v4.16b, v5.16b //GHASH final-2 block
3948 mov d22, v4.d[1] //GHASH final-2 block - mid
3950 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
3952 stp x6, x7, [x2], #16 //AES final-2 block - store result
3954 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
3955 mov x7, v0.d[1] //AES final-1 block - mov high
3957 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
3958 mov x6, v0.d[0] //AES final-1 block - mov low
3960 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
3962 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
3964 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
3973 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
3981 mov d22, v4.d[1] //GHASH final-1 block - mid
3983 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
3992 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
3993 mov x7, v0.d[1] //AES final block - mov high
3995 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
3996 mov x6, v0.d[0] //AES final block - mov low
3998 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
4035 mov v0.d[1], x10
4052 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
4053 mov d8, v4.d[1] //GHASH final block - mid
4055 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
4061 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
4074 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
4084 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
4148 fmov d2, x10 //CTR block 2
4166 fmov v1.d[1], x9 //CTR block 1
4167 rev w9, w12 //CTR block 2
4168 add w12, w12, #1 //CTR block 2
4170 orr x9, x11, x9, lsl #32 //CTR block 2
4173 fmov v2.d[1], x9 //CTR block 2
4180 fmov v3.d[1], x9 //CTR block 3
4187 aesmc v0.16b, v0.16b //AES block 0 - round 2
4191 aesmc v2.16b, v2.16b //AES block 2 - round 0
4205 aesmc v2.16b, v2.16b //AES block 2 - round 1
4209 aesmc v1.16b, v1.16b //AES block 1 - round 2
4219 aesmc v2.16b, v2.16b //AES block 2 - round 2
4229 aesmc v3.16b, v3.16b //AES block 3 - round 2
4233 aesmc v2.16b, v2.16b //AES block 2 - round 3
4246 aesmc v2.16b, v2.16b //AES block 2 - round 4
4267 aesmc v2.16b, v2.16b //AES block 2 - round 5
4271 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
4284 aesmc v2.16b, v2.16b //AES block 2 - round 6
4289 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
4295 aesmc v2.16b, v2.16b //AES block 2 - round 7
4299 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
4305 aesmc v2.16b, v2.16b //AES block 2 - round 8
4314 aesmc v2.16b, v2.16b //AES block 2 - round 9
4329 aesmc v2.16b, v2.16b //AES block 2 - round 10
4338 aesmc v2.16b, v2.16b //AES block 2 - round 11
4347 aesmc v2.16b, v2.16b //AES block 2 - round 12
4356 aese v2.16b, v31.16b //AES block 2 - round 13
4357 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
4389 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
4407 fmov v4.d[1], x7 //AES block 0 - mov high
4410 eor x21, x21, x13 //AES block 2 - round 14 low
4411 fmov v5.d[1], x20 //AES block 1 - mov high
4413 fmov d6, x21 //AES block 2 - mov low
4418 eor x22, x22, x14 //AES block 2 - round 14 high
4420 fmov v6.d[1], x22 //AES block 2 - mov high
4425 fmov v0.d[1], x9 //CTR block 4
4433 fmov v1.d[1], x9 //CTR block 5
4437 fmov v7.d[1], x24 //AES block 3 - mov high
4439 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
4446 fmov v2.d[1], x9 //CTR block 6
4447 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
4471 fmov v3.d[1], x9 //CTR block 4k+3
4488 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
4492 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
4500 mov d10, v17.d[1] //GHASH block 4k - mid
4502 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
4504 mov d8, v4.d[1] //GHASH block 4k - mid
4513 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
4517 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
4523 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
4525 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
4526 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
4528 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
4531 mov d4, v5.d[1] //GHASH block 4k+1 - mid
4537 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
4545 mov d8, v6.d[1] //GHASH block 4k+2 - mid
4556 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
4561 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
4568 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
4583 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
4585 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
4590 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
4591 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
4602 mov d4, v7.d[1] //GHASH block 4k+3 - mid
4606 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
4608 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
4610 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
4619 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
4629 pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid
4667 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
4700 fmov v4.d[1], x7 //AES block 4k+4 - mov high
4708 fmov v5.d[1], x20 //AES block 4k+5 - mov high
4713 fmov v6.d[1], x22 //AES block 4k+6 - mov high
4715 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
4719 fmov v0.d[1], x9 //CTR block 4k+8
4729 fmov v1.d[1], x9 //CTR block 4k+9
4737 fmov v7.d[1], x24 //AES block 4k+7 - mov high
4748 fmov v2.d[1], x9 //CTR block 4k+10
4761 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
4771 fmov v3.d[1], x9 //CTR block 4k+3
4784 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
4788 mov d10, v17.d[1] //GHASH block 4k - mid
4793 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
4794 mov d8, v4.d[1] //GHASH block 4k - mid
4796 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
4802 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
4806 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
4814 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
4816 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
4818 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
4821 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
4824 mov d4, v5.d[1] //GHASH block 4k+1 - mid
4834 mov d8, v6.d[1] //GHASH block 4k+2 - mid
4843 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
4844 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
4847 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
4856 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
4858 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
4859 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
4864 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
4865 mov d4, v7.d[1] //GHASH block 4k+3 - mid
4870 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
4874 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
4879 pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid
4880 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
4913 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
4930 pmull v4.1q, v9.1d, v8.1d
4975 pmull v4.1q, v10.1d, v8.1d
5011 fmov v4.d[1], x7 //AES block 4k+4 - mov high
5038 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
5045 eor x6, x6, x13 //AES final-2 block - round 14 low
5048 eor x7, x7, x14 //AES final-2 block - round 14 high
5050 mov d22, v4.d[1] //GHASH final-3 block - mid
5051 fmov d5, x6 //AES final-2 block - mov low
5053 fmov v5.d[1], x7 //AES final-2 block - mov high
5058 mov d10, v17.d[1] //GHASH final-3 block - mid
5060 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
5062 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
5064 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
5065 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
5066 .L256_enc_blocks_more_than_2: //blocks left > 2
5068 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
5075 rev64 v4.16b, v5.16b //GHASH final-2 block
5083 fmov v5.d[1], x7 //AES final-1 block - mov high
5087 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
5088 mov d22, v4.d[1] //GHASH final-2 block - mid
5090 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
5092 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
5096 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
5098 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
5100 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
5102 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
5119 mov d22, v4.d[1] //GHASH final-1 block - mid
5121 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
5128 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
5131 fmov v5.d[1], x7 //AES final block - mov high
5133 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
5135 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
5162 fmov v0.d[1], x7
5172 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
5173 mov d8, v4.d[1] //GHASH final block - mid
5180 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
5185 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
5198 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
5206 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
5289 fmov v1.d[1], x9 //CTR block 1
5290 rev w9, w12 //CTR block 2
5291 add w12, w12, #1 //CTR block 2
5293 fmov d2, x10 //CTR block 2
5294 orr x9, x11, x9, lsl #32 //CTR block 2
5296 fmov v2.d[1], x9 //CTR block 2
5302 fmov v3.d[1], x9 //CTR block 3
5333 aesmc v2.16b, v2.16b //AES block 2 - round 0
5346 aesmc v2.16b, v2.16b //AES block 2 - round 1
5354 aesmc v0.16b, v0.16b //AES block 0 - round 2
5360 aesmc v2.16b, v2.16b //AES block 2 - round 2
5364 aesmc v3.16b, v3.16b //AES block 3 - round 2
5370 aesmc v1.16b, v1.16b //AES block 1 - round 2
5380 aesmc v2.16b, v2.16b //AES block 2 - round 3
5389 aesmc v2.16b, v2.16b //AES block 2 - round 4
5404 aesmc v2.16b, v2.16b //AES block 2 - round 5
5416 aesmc v2.16b, v2.16b //AES block 2 - round 6
5431 aesmc v2.16b, v2.16b //AES block 2 - round 7
5443 aesmc v2.16b, v2.16b //AES block 2 - round 8
5459 aesmc v2.16b, v2.16b //AES block 2 - round 9
5468 aesmc v2.16b, v2.16b //AES block 2 - round 10
5477 aesmc v2.16b, v2.16b //AES block 2 - round 11
5479 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
5481 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
5483 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
5484 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
5493 aesmc v2.16b, v2.16b //AES block 2 - round 12
5501 aese v2.16b, v31.16b //AES block 2 - round 13
5517 ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext
5519 mov x7, v0.d[1] //AES block 0 - mov high
5521 mov x6, v0.d[0] //AES block 0 - mov low
5528 fmov v0.d[1], x9 //CTR block 4
5532 mov x19, v1.d[0] //AES block 1 - mov low
5535 mov x20, v1.d[1] //AES block 1 - mov high
5549 fmov v1.d[1], x9 //CTR block 5
5565 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
5570 mov x21, v2.d[0] //AES block 4k+2 - mov low
5576 mov x22, v2.d[1] //AES block 4k+2 - mov high
5582 fmov v2.d[1], x9 //CTR block 4k+6
5588 mov x24, v3.d[1] //AES block 4k+3 - mov high
5592 mov x23, v3.d[0] //AES block 4k+3 - mov low
5594 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
5595 mov d8, v4.d[1] //GHASH block 4k - mid
5599 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
5604 fmov v3.d[1], x9 //CTR block 4k+7
5607 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
5612 eor x22, x22, x14 //AES block 4k+2 - round 14 high
5618 mov d10, v17.d[1] //GHASH block 4k - mid
5622 rev64 v6.16b, v6.16b //GHASH block 4k+2
5626 eor x21, x21, x13 //AES block 4k+2 - round 14 low
5631 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
5632 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
5634 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
5636 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
5642 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
5647 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
5659 mov d4, v5.d[1] //GHASH block 4k+1 - mid
5670 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
5671 mov d8, v6.d[1] //GHASH block 4k+2 - mid
5677 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
5681 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
5688 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
5690 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
5695 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
5711 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
5712 mov d6, v7.d[1] //GHASH block 4k+3 - mid
5717 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
5721 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
5726 pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
5728 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
5730 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
5746 pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid
5767 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
5807 mov x7, v0.d[1] //AES block 4k+4 - mov high
5814 mov x6, v0.d[0] //AES block 4k+4 - mov low
5822 fmov v0.d[1], x9 //CTR block 4k+8
5824 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
5842 mov x20, v1.d[1] //AES block 4k+5 - mov high
5848 mov x19, v1.d[0] //AES block 4k+5 - mov low
5853 fmov v1.d[1], x9 //CTR block 4k+9
5880 mov x21, v2.d[0] //AES block 4k+2 - mov low
5885 mov x22, v2.d[1] //AES block 4k+2 - mov high
5891 fmov v2.d[1], x9 //CTR block 4k+6
5895 rev64 v6.16b, v6.16b //GHASH block 4k+2
5897 mov x23, v3.d[0] //AES block 4k+3 - mov low
5901 mov x24, v3.d[1] //AES block 4k+3 - mov high
5903 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
5904 mov d8, v4.d[1] //GHASH block 4k - mid
5907 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
5908 fmov v3.d[1], x9 //CTR block 4k+7
5912 mov d10, v17.d[1] //GHASH block 4k - mid
5918 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
5927 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
5930 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
5934 mov d4, v5.d[1] //GHASH block 4k+1 - mid
5937 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
5940 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
5944 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
5948 mov d8, v6.d[1] //GHASH block 4k+2 - mid
5951 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
5954 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
5961 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
5963 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
5967 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
5972 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
5975 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
5979 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
5986 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
5988 pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
5992 mov d6, v7.d[1] //GHASH block 4k+3 - mid
5997 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
6008 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
6021 pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid
6055 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
6076 eor x22, x22, x14 //AES block 4k+2 - round 14 high
6096 eor x21, x21, x13 //AES block 4k+2 - round 14 low
6104 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
6112 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
6141 mov x6, v0.d[0] //AES block 4k+4 - mov low
6143 mov x7, v0.d[1] //AES block 4k+4 - mov high
6180 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
6184 mov d10, v17.d[1] //GHASH final-3 block - mid
6188 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
6190 mov d22, v4.d[1] //GHASH final-3 block - mid
6192 mov x6, v0.d[0] //AES final-2 block - mov low
6194 mov x7, v0.d[1] //AES final-2 block - mov high
6200 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
6202 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
6203 eor x6, x6, x13 //AES final-2 block - round 14 low
6208 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
6209 eor x7, x7, x14 //AES final-2 block - round 14 high
6213 .L256_dec_blocks_more_than_2: //blocks left > 2
6215 rev64 v4.16b, v5.16b //GHASH final-2 block
6219 stp x6, x7, [x2], #16 //AES final-2 block - store result
6223 mov d22, v4.d[1] //GHASH final-2 block - mid
6225 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
6227 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
6229 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
6230 mov x6, v0.d[0] //AES final-1 block - mov low
6232 mov x7, v0.d[1] //AES final-1 block - mov high
6233 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
6236 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
6238 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
6244 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
6259 mov d22, v4.d[1] //GHASH final-1 block - mid
6263 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
6267 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
6268 mov x6, v0.d[0] //AES final block - mov low
6270 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
6272 mov x7, v0.d[1] //AES final block - mov high
6274 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
6310 mov v0.d[1], x10
6333 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
6335 mov d8, v4.d[1] //GHASH final block - mid
6339 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
6341 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
6356 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
6364 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
6394 .align 2
6395 .align 2