Lines Matching +full:1 +full:- +full:v0
1 /* Do not modify. This file is auto-generated from aes-gcm-armv8_64.pl. */
5 .arch armv8-a+crypto
13 stp x19, x20, [sp, #-112]!
41 sub x5, x5, #1 //byte_len - 1
48 fmov d1, x10 //CTR block 1
51 add w12, w12, #1 //increment rev_ctr32
55 rev w9, w12 //CTR block 1
56 add w12, w12, #1 //CTR block 1
59 orr x9, x11, x9, lsl #32 //CTR block 1
60 …ld1 { v0.16b}, [x16] //special case vector load initial counter so we …
62 fmov v1.d[1], x9 //CTR block 1
67 add w12, w12, #1 //CTR block 2
69 fmov v2.d[1], x9 //CTR block 2
75 add w12, w12, #1 //CTR block 3
76 fmov v3.d[1], x9 //CTR block 3
83 aesmc v1.16b, v1.16b //AES block 1 - round 0
87 aesmc v2.16b, v2.16b //AES block 2 - round 0
93 aese v0.16b, v18.16b
94 aesmc v0.16b, v0.16b //AES block 0 - round 0
98 aesmc v3.16b, v3.16b //AES block 3 - round 0
102 aesmc v2.16b, v2.16b //AES block 2 - round 1
105 aese v0.16b, v19.16b
106 aesmc v0.16b, v0.16b //AES block 0 - round 1
110 aesmc v1.16b, v1.16b //AES block 1 - round 1
114 aesmc v3.16b, v3.16b //AES block 3 - round 1
117 aese v0.16b, v20.16b
118 aesmc v0.16b, v0.16b //AES block 0 - round 2
122 aesmc v1.16b, v1.16b //AES block 1 - round 2
129 aesmc v3.16b, v3.16b //AES block 3 - round 2
132 aesmc v2.16b, v2.16b //AES block 2 - round 2
135 aese v0.16b, v21.16b
136 aesmc v0.16b, v0.16b //AES block 0 - round 3
139 aesmc v1.16b, v1.16b //AES block 1 - round 3
142 aesmc v2.16b, v2.16b //AES block 2 - round 3
146 aesmc v3.16b, v3.16b //AES block 3 - round 3
148 …and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte…
152 aesmc v3.16b, v3.16b //AES block 3 - round 4
156 aesmc v2.16b, v2.16b //AES block 2 - round 4
159 aese v0.16b, v22.16b
160 aesmc v0.16b, v0.16b //AES block 0 - round 4
163 aesmc v3.16b, v3.16b //AES block 3 - round 5
166 aesmc v2.16b, v2.16b //AES block 2 - round 5
168 aese v0.16b, v23.16b
169 aesmc v0.16b, v0.16b //AES block 0 - round 5
172 aesmc v3.16b, v3.16b //AES block 3 - round 6
175 aesmc v1.16b, v1.16b //AES block 1 - round 4
178 aesmc v2.16b, v2.16b //AES block 2 - round 6
181 aese v0.16b, v24.16b
182 aesmc v0.16b, v0.16b //AES block 0 - round 6
185 aesmc v1.16b, v1.16b //AES block 1 - round 5
188 aesmc v3.16b, v3.16b //AES block 3 - round 7
190 aese v0.16b, v25.16b
191 aesmc v0.16b, v0.16b //AES block 0 - round 7
194 aesmc v1.16b, v1.16b //AES block 1 - round 6
197 aesmc v2.16b, v2.16b //AES block 2 - round 7
199 aese v0.16b, v26.16b
200 aesmc v0.16b, v0.16b //AES block 0 - round 8
203 aesmc v1.16b, v1.16b //AES block 1 - round 7
206 aesmc v2.16b, v2.16b //AES block 2 - round 8
209 aesmc v3.16b, v3.16b //AES block 3 - round 8
212 aesmc v1.16b, v1.16b //AES block 1 - round 8
214 aese v2.16b, v27.16b //AES block 2 - round 9
216 aese v0.16b, v27.16b //AES block 0 - round 9
220 aese v1.16b, v27.16b //AES block 1 - round 9
222 aese v3.16b, v27.16b //AES block 3 - round 9
225 ldp x6, x7, [x0, #0] //AES block 0 - load plaintext
230 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
235 ldp x19, x20, [x0, #16] //AES block 1 - load plaintext
240 ldp x23, x24, [x0, #48] //AES block 3 - load plaintext
245 eor x6, x6, x13 //AES block 0 - round 10 low
246 eor x7, x7, x14 //AES block 0 - round 10 high
248 eor x21, x21, x13 //AES block 2 - round 10 low
249 fmov d4, x6 //AES block 0 - mov low
251 eor x19, x19, x13 //AES block 1 - round 10 low
252 eor x22, x22, x14 //AES block 2 - round 10 high
253 fmov v4.d[1], x7 //AES block 0 - mov high
255 fmov d5, x19 //AES block 1 - mov low
256 eor x20, x20, x14 //AES block 1 - round 10 high
258 eor x23, x23, x13 //AES block 3 - round 10 low
259 fmov v5.d[1], x20 //AES block 1 - mov high
261 fmov d6, x21 //AES block 2 - mov low
262 eor x24, x24, x14 //AES block 3 - round 10 high
265 fmov v6.d[1], x22 //AES block 2 - mov high
268 eor v4.16b, v4.16b, v0.16b //AES block 0 - result
270 add w12, w12, #1 //CTR block 4
272 fmov v0.d[1], x9 //CTR block 4
275 eor v5.16b, v5.16b, v1.16b //AES block 1 - result
279 add w12, w12, #1 //CTR block 5
281 fmov v1.d[1], x9 //CTR block 5
283 fmov d7, x23 //AES block 3 - mov low
285 st1 { v4.16b}, [x2], #16 //AES block 0 - store result
287 fmov v7.d[1], x24 //AES block 3 - mov high
290 add w12, w12, #1 //CTR block 6
291 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
292 st1 { v5.16b}, [x2], #16 //AES block 1 - store result
297 fmov v2.d[1], x9 //CTR block 6
299 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
303 eor v7.16b, v7.16b, v3.16b //AES block 3 - result
304 st1 { v7.16b}, [x2], #16 //AES block 3 - store result
308 ldp x23, x24, [x0, #48] //AES block 4k+3 - load plaintext
317 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
321 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
324 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
325 add w12, w12, #1 //CTR block 4k+3
326 fmov v3.d[1], x9 //CTR block 4k+3
328 aese v0.16b, v18.16b
329 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
330 mov d31, v6.d[1] //GHASH block 4k+2 - mid
333 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
334 mov d30, v5.d[1] //GHASH block 4k+1 - mid
337 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
338 eor v4.16b, v4.16b, v11.16b //PRE 1
341 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
342 eor x24, x24, x14 //AES block 4k+3 - round 10 high
344 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
345 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
346 ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext
351 aese v0.16b, v19.16b
352 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
355 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
356 mov d8, v4.d[1] //GHASH block 4k - mid
359 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
360 add w12, w12, #1 //CTR block 4k+8
361 mov d10, v17.d[1] //GHASH block 4k - mid
363 aese v0.16b, v20.16b
364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
366 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
367 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
370 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
372 aese v0.16b, v21.16b
373 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
374 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
376 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
378 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
381 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
383 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
384 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
386 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
387 eor x7, x7, x14 //AES block 4k+4 - round 10 high
389 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
390 mov d30, v7.d[1] //GHASH block 4k+3 - mid
393 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
394 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
397 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
398 eor x6, x6, x13 //AES block 4k+4 - round 10 low
401 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
402 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
404 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
407 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
408 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
410 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
412 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
415 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
416 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
419 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
422 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
425 aese v0.16b, v22.16b
426 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
427 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
430 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
431 ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext
437 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
438 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
440 aese v0.16b, v23.16b
441 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
442 ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext
447 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
448 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
451 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
452 eor x19, x19, x13 //AES block 4k+5 - round 10 low
455 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
456 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
459 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
460 eor x23, x23, x13 //AES block 4k+3 - round 10 low
463 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
464 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
466 fmov d4, x6 //AES block 4k+4 - mov low
467 aese v0.16b, v24.16b
468 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
469 fmov v4.d[1], x7 //AES block 4k+4 - mov high
472 fmov d7, x23 //AES block 4k+3 - mov low
473 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
476 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
477 fmov d5, x19 //AES block 4k+5 - mov low
479 aese v0.16b, v25.16b
480 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
481 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
484 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
485 eor x20, x20, x14 //AES block 4k+5 - round 10 high
488 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
489 fmov v5.d[1], x20 //AES block 4k+5 - mov high
491 aese v0.16b, v26.16b
492 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
493 fmov v7.d[1], x24 //AES block 4k+3 - mov high
496 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
500 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
501 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
503 aese v0.16b, v27.16b //AES block 4k+4 - round 9
504 eor x21, x21, x13 //AES block 4k+6 - round 10 low
505 eor x22, x22, x14 //AES block 4k+6 - round 10 high
508 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
509 fmov d6, x21 //AES block 4k+6 - mov low
511 aese v1.16b, v27.16b //AES block 4k+5 - round 9
512 fmov v6.d[1], x22 //AES block 4k+6 - mov high
515 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
516 eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result
520 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
522 fmov v0.d[1], x9 //CTR block 4k+8
524 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
527 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
528 eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result
530 add w12, w12, #1 //CTR block 4k+9
534 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
535 fmov v1.d[1], x9 //CTR block 4k+9
538 aese v2.16b, v27.16b //AES block 4k+6 - round 9
539 st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result
540 eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result
543 aese v3.16b, v27.16b //AES block 4k+7 - round 9
544 add w12, w12, #1 //CTR block 4k+10
545 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
548 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
549 st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result
551 fmov v2.d[1], x9 //CTR block 4k+10
552 st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result
556 eor v7.16b, v7.16b, v3.16b //AES block 4k+3 - result
558 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
559 st1 { v7.16b}, [x2], #16 //AES block 4k+3 - store result
565 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
568 add w12, w12, #1 //CTR block 4k+3
569 fmov v3.d[1], x9 //CTR block 4k+3
572 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
575 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
578 eor v4.16b, v4.16b, v11.16b //PRE 1
580 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
583 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
584 mov d30, v5.d[1] //GHASH block 4k+1 - mid
586 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
587 mov d8, v4.d[1] //GHASH block 4k - mid
589 mov d31, v6.d[1] //GHASH block 4k+2 - mid
590 mov d10, v17.d[1] //GHASH block 4k - mid
593 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
594 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
596 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
598 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
599 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
602 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
604 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
605 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
607 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
609 aese v0.16b, v18.16b
610 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
611 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
614 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
616 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
617 mov d30, v7.d[1] //GHASH block 4k+3 - mid
619 aese v0.16b, v19.16b
620 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
621 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
623 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
625 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
626 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
628 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
630 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
633 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
634 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
636 aese v0.16b, v20.16b
637 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
639 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
643 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
644 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
647 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
649 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
650 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
653 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
656 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
657 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
659 aese v0.16b, v21.16b
660 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
662 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
666 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
667 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
669 aese v0.16b, v22.16b
670 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
672 pmull v28.1q, v9.1d, v8.1d
676 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
678 aese v0.16b, v23.16b
679 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
683 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
686 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
689 aese v0.16b, v24.16b
690 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
693 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
696 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
699 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
703 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
706 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
709 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
712 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
715 aese v0.16b, v25.16b
716 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
719 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
722 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
724 pmull v28.1q, v10.1d, v8.1d
727 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
731 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
733 aese v0.16b, v26.16b
734 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
738 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
740 aese v3.16b, v27.16b //AES block 4k+7 - round 9
743 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
745 aese v0.16b, v27.16b //AES block 4k+4 - round 9
747 aese v1.16b, v27.16b //AES block 4k+5 - round 9
750 aese v2.16b, v27.16b //AES block 4k+6 - round 9
754 ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext
762 eor x6, x6, x13 //AES block 4k+4 - round 10 low
763 eor x7, x7, x14 //AES block 4k+4 - round 10 high
765 fmov d4, x6 //AES block 4k+4 - mov low
767 fmov v4.d[1], x7 //AES block 4k+4 - mov high
769 eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result
773 sub w12, w12, #1
787 sub w12, w12, #1
790 sub w12, w12, #1
793 st1 { v5.16b}, [x2], #16 //AES final-3 block - store result
795 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
800 rev64 v4.16b, v5.16b //GHASH final-3 block
803 eor x7, x7, x14 //AES final-2 block - round 10 high
804 eor x6, x6, x13 //AES final-2 block - round 10 low
806 fmov d5, x6 //AES final-2 block - mov low
809 fmov v5.d[1], x7 //AES final-2 block - mov high
811 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
812 mov d22, v4.d[1] //GHASH final-3 block - mid
814 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
816 mov d10, v17.d[1] //GHASH final-3 block - mid
818 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
819 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
821 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
824 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
826 rev64 v4.16b, v5.16b //GHASH final-2 block
827 ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high
834 eor x6, x6, x13 //AES final-1 block - round 10 low
836 fmov d5, x6 //AES final-1 block - mov low
837 eor x7, x7, x14 //AES final-1 block - round 10 high
839 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
840 fmov v5.d[1], x7 //AES final-1 block - mov high
842 mov d22, v4.d[1] //GHASH final-2 block - mid
844 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
846 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
848 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
850 eor v5.16b, v5.16b, v2.16b //AES final-1 block - result
852 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
854 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
858 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
859 .L128_enc_blocks_more_than_1: //blocks left > 1
861 st1 { v5.16b}, [x2], #16 //AES final-1 block - store result
863 rev64 v4.16b, v5.16b //GHASH final-1 block
864 ldp x6, x7, [x0], #16 //AES final block - load input low & high
871 eor x7, x7, x14 //AES final block - round 10 high
872 eor x6, x6, x13 //AES final block - round 10 low
874 fmov d5, x6 //AES final block - mov low
876 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
877 fmov v5.d[1], x7 //AES final block - mov high
879 mov d22, v4.d[1] //GHASH final-1 block - mid
881 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
883 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
885 eor v5.16b, v5.16b, v3.16b //AES final block - result
887 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
889 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
891 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
893 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
895 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
897 .L128_enc_blocks_less_than_1: //blocks left <= 1
903 sub x1, x1, #128 //bit_length -= 128
905 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
917 fmov v0.d[1], x7
919 …and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in …
925 mov d8, v4.d[1] //GHASH final block - mid
927 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
930 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
936 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
938 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
940 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
942 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
944 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
947 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
951 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
953 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
955 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
957 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
959 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
961 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
963 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
965 …bif v5.16b, v18.16b, v0.16b //insert existing bytes in top end of res…
967 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
972 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
989 .size aes_gcm_enc_128_kernel,.-aes_gcm_enc_128_kernel
996 stp x19, x20, [sp, #-112]!
1018 sub x5, x5, #1 //byte_len - 1
1021 …and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte mu…
1022 …ld1 { v0.16b}, [x16] //special case vector load initial counter so we …
1035 fmov d1, x10 //CTR block 1
1036 add w12, w12, #1 //increment rev_ctr32
1038 aese v0.16b, v18.16b
1039 aesmc v0.16b, v0.16b //AES block 0 - round 0
1040 rev w9, w12 //CTR block 1
1042 orr x9, x11, x9, lsl #32 //CTR block 1
1044 add w12, w12, #1 //CTR block 1
1046 fmov v1.d[1], x9 //CTR block 1
1048 add w12, w12, #1 //CTR block 2
1050 aese v0.16b, v19.16b
1051 aesmc v0.16b, v0.16b //AES block 0 - round 1
1054 fmov v2.d[1], x9 //CTR block 2
1059 add w12, w12, #1 //CTR block 3
1061 fmov v3.d[1], x9 //CTR block 3
1065 aesmc v1.16b, v1.16b //AES block 1 - round 0
1068 aese v0.16b, v20.16b
1069 aesmc v0.16b, v0.16b //AES block 0 - round 2
1073 aesmc v2.16b, v2.16b //AES block 2 - round 0
1077 aesmc v1.16b, v1.16b //AES block 1 - round 1
1081 aesmc v3.16b, v3.16b //AES block 3 - round 0
1084 aesmc v2.16b, v2.16b //AES block 2 - round 1
1087 aesmc v1.16b, v1.16b //AES block 1 - round 2
1090 aesmc v3.16b, v3.16b //AES block 3 - round 1
1095 aese v0.16b, v21.16b
1096 aesmc v0.16b, v0.16b //AES block 0 - round 3
1100 aesmc v1.16b, v1.16b //AES block 1 - round 3
1103 aesmc v3.16b, v3.16b //AES block 3 - round 2
1106 aesmc v2.16b, v2.16b //AES block 2 - round 2
1110 aesmc v1.16b, v1.16b //AES block 1 - round 4
1113 aesmc v3.16b, v3.16b //AES block 3 - round 3
1116 aesmc v2.16b, v2.16b //AES block 2 - round 3
1121 aese v0.16b, v22.16b
1122 aesmc v0.16b, v0.16b //AES block 0 - round 4
1126 aesmc v1.16b, v1.16b //AES block 1 - round 5
1129 aesmc v2.16b, v2.16b //AES block 2 - round 4
1132 aesmc v3.16b, v3.16b //AES block 3 - round 4
1134 aese v0.16b, v23.16b
1135 aesmc v0.16b, v0.16b //AES block 0 - round 5
1138 aesmc v2.16b, v2.16b //AES block 2 - round 5
1144 aesmc v3.16b, v3.16b //AES block 3 - round 5
1146 aese v0.16b, v24.16b
1147 aesmc v0.16b, v0.16b //AES block 0 - round 6
1150 aesmc v1.16b, v1.16b //AES block 1 - round 6
1153 aesmc v3.16b, v3.16b //AES block 3 - round 6
1156 aesmc v2.16b, v2.16b //AES block 2 - round 6
1167 aesmc v1.16b, v1.16b //AES block 1 - round 7
1170 aesmc v2.16b, v2.16b //AES block 2 - round 7
1172 aese v0.16b, v25.16b
1173 aesmc v0.16b, v0.16b //AES block 0 - round 7
1177 aesmc v3.16b, v3.16b //AES block 3 - round 7
1180 aesmc v1.16b, v1.16b //AES block 1 - round 8
1184 aesmc v2.16b, v2.16b //AES block 2 - round 8
1187 aesmc v3.16b, v3.16b //AES block 3 - round 8
1189 aese v0.16b, v26.16b
1190 aesmc v0.16b, v0.16b //AES block 0 - round 8
1193 aese v2.16b, v27.16b //AES block 2 - round 9
1195 aese v3.16b, v27.16b //AES block 3 - round 9
1197 aese v0.16b, v27.16b //AES block 0 - round 9
1200 aese v1.16b, v27.16b //AES block 1 - round 9
1204 …ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0 - load ciphertext; AES block 1 - load …
1206 eor v1.16b, v5.16b, v1.16b //AES block 1 - result
1207 ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext
1209 eor v0.16b, v4.16b, v0.16b //AES block 0 - result
1214 add w12, w12, #1 //CTR block 4
1215 ld1 {v7.16b}, [x0], #16 //AES block 3 - load ciphertext
1217 rev64 v5.16b, v5.16b //GHASH block 1
1218 mov x19, v1.d[0] //AES block 1 - mov low
1220 mov x20, v1.d[1] //AES block 1 - mov high
1222 mov x6, v0.d[0] //AES block 0 - mov low
1225 mov x7, v0.d[1] //AES block 0 - mov high
1229 fmov v0.d[1], x9 //CTR block 4
1231 eor x19, x19, x13 //AES block 1 - round 10 low
1236 add w12, w12, #1 //CTR block 5
1239 fmov v1.d[1], x9 //CTR block 5
1241 add w12, w12, #1 //CTR block 6
1245 eor x20, x20, x14 //AES block 1 - round 10 high
1249 eor x6, x6, x13 //AES block 0 - round 10 low
1253 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
1255 eor x7, x7, x14 //AES block 0 - round 10 high
1259 stp x6, x7, [x2], #16 //AES block 0 - store result
1261 stp x19, x20, [x2], #16 //AES block 1 - store result
1265 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
1267 mov x21, v2.d[0] //AES block 4k+2 - mov low
1269 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
1270 mov x22, v2.d[1] //AES block 4k+2 - mov high
1273 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
1277 fmov v2.d[1], x9 //CTR block 4k+6
1280 mov x23, v3.d[0] //AES block 4k+3 - mov low
1281 eor v4.16b, v4.16b, v11.16b //PRE 1
1282 mov d30, v5.d[1] //GHASH block 4k+1 - mid
1285 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
1288 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
1289 mov x24, v3.d[1] //AES block 4k+3 - mov high
1292 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
1294 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
1297 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
1298 fmov v3.d[1], x9 //CTR block 4k+7
1301 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
1302 mov d10, v17.d[1] //GHASH block 4k - mid
1304 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
1305 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
1307 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
1310 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
1311 mov d8, v4.d[1] //GHASH block 4k - mid
1314 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
1315 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
1317 aese v0.16b, v18.16b
1318 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
1320 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
1321 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
1324 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
1325 eor x23, x23, x13 //AES block 4k+3 - round 10 low
1329 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
1330 eor x22, x22, x14 //AES block 4k+2 - round 10 high
1334 mov d31, v6.d[1] //GHASH block 4k+2 - mid
1336 aese v0.16b, v19.16b
1337 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
1338 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
1340 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
1343 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
1344 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
1346 aese v0.16b, v20.16b
1347 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
1350 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
1351 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
1353 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
1355 aese v0.16b, v21.16b
1356 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
1357 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
1359 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
1362 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
1363 mov d30, v7.d[1] //GHASH block 4k+3 - mid
1365 aese v0.16b, v22.16b
1366 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
1367 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
1369 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
1370 eor x24, x24, x14 //AES block 4k+3 - round 10 high
1375 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
1376 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
1379 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
1380 eor x21, x21, x13 //AES block 4k+2 - round 10 low
1384 aese v0.16b, v23.16b
1385 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
1389 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
1390 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
1393 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
1395 aese v0.16b, v24.16b
1396 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
1397 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
1400 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
1401 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
1403 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
1404 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
1405 ld1 {v4.16b}, [x0], #16 //AES block 4k+3 - load ciphertext
1408 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
1409 add w12, w12, #1 //CTR block 4k+7
1411 aese v0.16b, v25.16b
1412 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
1416 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
1417 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
1420 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
1421 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
1423 aese v0.16b, v26.16b
1424 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
1425 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
1428 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
1431 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1432 ld1 {v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
1433 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
1435 aese v0.16b, v27.16b //AES block 4k+4 - round 9
1439 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
1440 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
1442 aese v1.16b, v27.16b //AES block 4k+5 - round 9
1445 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
1446 eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result
1449 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
1450 ld1 {v6.16b}, [x0], #16 //AES block 4k+5 - load ciphertext
1452 add w12, w12, #1 //CTR block 4k+8
1453 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
1454 eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result
1457 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
1458 ld1 {v7.16b}, [x0], #16 //AES block 4k+6 - load ciphertext
1461 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
1464 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
1465 mov x7, v0.d[1] //AES block 4k+4 - mov high
1468 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
1469 mov x6, v0.d[0] //AES block 4k+4 - mov low
1472 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
1475 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1476 fmov v0.d[1], x9 //CTR block 4k+8
1479 aese v2.16b, v27.16b //AES block 4k+6 - round 9
1481 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
1484 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
1485 eor x7, x7, x14 //AES block 4k+4 - round 10 high
1489 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
1490 mov x20, v1.d[1] //AES block 4k+5 - mov high
1491 eor x6, x6, x13 //AES block 4k+4 - round 10 low
1495 eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result
1496 mov x19, v1.d[0] //AES block 4k+5 - mov low
1497 add w12, w12, #1 //CTR block 4k+9
1499 aese v3.16b, v27.16b //AES block 4k+7 - round 9
1504 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
1505 fmov v1.d[1], x9 //CTR block 4k+9
1508 add w12, w12, #1 //CTR block 4k+10
1510 eor x20, x20, x14 //AES block 4k+5 - round 10 high
1514 stp x6, x7, [x2], #16 //AES block 4k+4 - store result
1516 eor x19, x19, x13 //AES block 4k+5 - round 10 low
1520 stp x19, x20, [x2], #16 //AES block 4k+5 - store result
1527 mov x21, v2.d[0] //AES block 4k+2 - mov low
1528 mov d30, v5.d[1] //GHASH block 4k+1 - mid
1530 aese v0.16b, v18.16b
1531 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
1532 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
1535 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
1536 mov x22, v2.d[1] //AES block 4k+2 - mov high
1538 eor v4.16b, v4.16b, v11.16b //PRE 1
1542 aese v0.16b, v19.16b
1543 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
1544 fmov v2.d[1], x9 //CTR block 4k+6
1547 mov x23, v3.d[0] //AES block 4k+3 - mov low
1548 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
1550 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
1551 mov d10, v17.d[1] //GHASH block 4k - mid
1552 mov x24, v3.d[1] //AES block 4k+3 - mov high
1555 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
1556 mov d31, v6.d[1] //GHASH block 4k+2 - mid
1558 aese v0.16b, v20.16b
1559 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
1562 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
1563 mov d8, v4.d[1] //GHASH block 4k - mid
1567 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
1568 fmov v3.d[1], x9 //CTR block 4k+7
1570 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
1571 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
1576 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
1577 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
1579 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
1582 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
1583 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
1585 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
1587 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
1588 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
1590 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
1592 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
1593 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
1595 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
1597 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
1599 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
1600 mov d30, v7.d[1] //GHASH block 4k+3 - mid
1603 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
1604 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
1606 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
1608 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
1612 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
1613 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
1615 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
1618 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
1619 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
1622 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
1623 eor x23, x23, x13 //AES block 4k+3 - round 10 low
1627 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
1628 eor x21, x21, x13 //AES block 4k+2 - round 10 low
1632 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
1635 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
1638 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
1641 aese v0.16b, v21.16b
1642 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
1645 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
1646 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
1649 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
1652 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
1653 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
1656 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
1659 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
1662 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
1664 aese v0.16b, v22.16b
1665 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
1666 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
1668 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1671 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
1672 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
1675 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
1677 aese v0.16b, v23.16b
1678 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
1679 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
1682 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
1685 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
1687 aese v0.16b, v24.16b
1688 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
1691 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
1692 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
1695 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
1697 aese v0.16b, v25.16b
1698 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
1700 aese v1.16b, v27.16b //AES block 4k+5 - round 9
1702 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1703 eor x24, x24, x14 //AES block 4k+3 - round 10 high
1708 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
1709 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
1712 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
1714 aese v0.16b, v26.16b
1715 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
1716 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
1719 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
1722 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
1723 eor x22, x22, x14 //AES block 4k+2 - round 10 high
1727 aese v0.16b, v27.16b //AES block 4k+4 - round 9
1728 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
1730 aese v2.16b, v27.16b //AES block 4k+6 - round 9
1731 add w12, w12, #1 //CTR block 4k+7
1732 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
1734 aese v3.16b, v27.16b //AES block 4k+7 - round 9
1735 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
1739 ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
1741 eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result
1743 mov x7, v0.d[1] //AES block 4k+4 - mov high
1745 mov x6, v0.d[0] //AES block 4k+4 - mov low
1749 eor x7, x7, x14 //AES block 4k+4 - round 10 high
1754 eor x6, x6, x13 //AES block 4k+4 - round 10 low
1761 sub w12, w12, #1
1774 sub w12, w12, #1
1777 sub w12, w12, #1
1780 rev64 v4.16b, v5.16b //GHASH final-3 block
1781 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
1785 mov d10, v17.d[1] //GHASH final-3 block - mid
1786 stp x6, x7, [x2], #16 //AES final-3 block - store result
1787 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
1789 mov d22, v4.d[1] //GHASH final-3 block - mid
1790 mov x7, v0.d[1] //AES final-2 block - mov high
1792 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
1793 mov x6, v0.d[0] //AES final-2 block - mov low
1795 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
1797 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
1800 eor x7, x7, x14 //AES final-2 block - round 10 high
1804 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
1805 eor x6, x6, x13 //AES final-2 block - round 10 low
1811 rev64 v4.16b, v5.16b //GHASH final-2 block
1812 ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext
1816 eor v0.16b, v5.16b, v2.16b //AES final-1 block - result
1817 stp x6, x7, [x2], #16 //AES final-2 block - store result
1819 mov d22, v4.d[1] //GHASH final-2 block - mid
1821 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
1823 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
1824 mov x6, v0.d[0] //AES final-1 block - mov low
1826 mov x7, v0.d[1] //AES final-1 block - mov high
1827 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
1831 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
1833 eor x6, x6, x13 //AES final-1 block - round 10 low
1837 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
1839 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
1841 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
1842 eor x7, x7, x14 //AES final-1 block - round 10 high
1846 .L128_dec_blocks_more_than_1: //blocks left > 1
1848 rev64 v4.16b, v5.16b //GHASH final-1 block
1850 ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext
1853 mov d22, v4.d[1] //GHASH final-1 block - mid
1855 eor v0.16b, v5.16b, v3.16b //AES final block - result
1857 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
1859 stp x6, x7, [x2], #16 //AES final-1 block - store result
1860 mov x6, v0.d[0] //AES final block - mov low
1862 mov x7, v0.d[1] //AES final block - mov high
1863 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
1865 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
1867 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
1869 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
1872 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
1874 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
1875 eor x7, x7, x14 //AES final block - round 10 high
1879 eor x6, x6, x13 //AES final block - round 10 low
1883 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
1884 .L128_dec_blocks_less_than_1: //blocks left <= 1
1890 sub x1, x1, #128 //bit_length -= 128
1892 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
1904 mov v0.d[1], x10
1906 …and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in …
1916 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
1917 mov d8, v4.d[1] //GHASH final block - mid
1919 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
1920 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
1922 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
1924 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
1934 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
1937 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
1942 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
1944 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1946 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
1953 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
1955 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
1957 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
1959 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1960 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
1962 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
1964 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
1982 .size aes_gcm_dec_128_kernel,.-aes_gcm_dec_128_kernel
1989 stp x19, x20, [sp, #-112]!
2022 add w12, w12, #1 //increment rev_ctr32
2025 rev w9, w12 //CTR block 1
2026 add w12, w12, #1 //CTR block 1
2027 fmov d1, x10 //CTR block 1
2029 orr x9, x11, x9, lsl #32 //CTR block 1
2030 …ld1 { v0.16b}, [x16] //special case vector load initial counter so we c…
2032 fmov v1.d[1], x9 //CTR block 1
2034 add w12, w12, #1 //CTR block 2
2039 fmov v2.d[1], x9 //CTR block 2
2045 fmov v3.d[1], x9 //CTR block 3
2051 aese v0.16b, v18.16b
2052 aesmc v0.16b, v0.16b //AES block 0 - round 0
2058 aesmc v3.16b, v3.16b //AES block 3 - round 0
2062 aesmc v1.16b, v1.16b //AES block 1 - round 0
2068 aesmc v2.16b, v2.16b //AES block 2 - round 0
2071 aese v0.16b, v19.16b
2072 aesmc v0.16b, v0.16b //AES block 0 - round 1
2076 aesmc v1.16b, v1.16b //AES block 1 - round 1
2082 aesmc v2.16b, v2.16b //AES block 2 - round 1
2086 aesmc v3.16b, v3.16b //AES block 3 - round 1
2091 aese v0.16b, v20.16b
2092 aesmc v0.16b, v0.16b //AES block 0 - round 2
2095 aesmc v2.16b, v2.16b //AES block 2 - round 2
2098 aesmc v3.16b, v3.16b //AES block 3 - round 2
2100 aese v0.16b, v21.16b
2101 aesmc v0.16b, v0.16b //AES block 0 - round 3
2105 aesmc v2.16b, v2.16b //AES block 2 - round 3
2108 aesmc v1.16b, v1.16b //AES block 1 - round 2
2111 aese v0.16b, v22.16b
2112 aesmc v0.16b, v0.16b //AES block 0 - round 4
2115 aesmc v3.16b, v3.16b //AES block 3 - round 3
2118 aesmc v1.16b, v1.16b //AES block 1 - round 3
2120 aese v0.16b, v23.16b
2121 aesmc v0.16b, v0.16b //AES block 0 - round 5
2124 aesmc v2.16b, v2.16b //AES block 2 - round 4
2127 aesmc v1.16b, v1.16b //AES block 1 - round 4
2129 aese v0.16b, v24.16b
2130 aesmc v0.16b, v0.16b //AES block 0 - round 6
2133 aesmc v3.16b, v3.16b //AES block 3 - round 4
2136 aesmc v2.16b, v2.16b //AES block 2 - round 5
2139 aesmc v1.16b, v1.16b //AES block 1 - round 5
2142 aesmc v3.16b, v3.16b //AES block 3 - round 5
2145 aesmc v2.16b, v2.16b //AES block 2 - round 6
2151 aesmc v1.16b, v1.16b //AES block 1 - round 6
2154 aesmc v3.16b, v3.16b //AES block 3 - round 6
2156 aese v0.16b, v25.16b
2157 aesmc v0.16b, v0.16b //AES block 0 - round 7
2160 aesmc v1.16b, v1.16b //AES block 1 - round 7
2164 aesmc v3.16b, v3.16b //AES block 3 - round 7
2166 aese v0.16b, v26.16b
2167 aesmc v0.16b, v0.16b //AES block 0 - round 8
2170 aesmc v2.16b, v2.16b //AES block 2 - round 7
2174 aesmc v1.16b, v1.16b //AES block 1 - round 8
2177 aesmc v3.16b, v3.16b //AES block 3 - round 8
2180 aesmc v2.16b, v2.16b //AES block 2 - round 8
2182 aese v0.16b, v27.16b
2183 aesmc v0.16b, v0.16b //AES block 0 - round 9
2186 aesmc v3.16b, v3.16b //AES block 3 - round 9
2189 aesmc v2.16b, v2.16b //AES block 2 - round 9
2192 aesmc v1.16b, v1.16b //AES block 1 - round 9
2194 aese v0.16b, v28.16b
2195 aesmc v0.16b, v0.16b //AES block 0 - round 10
2198 aesmc v2.16b, v2.16b //AES block 2 - round 10
2201 aesmc v1.16b, v1.16b //AES block 1 - round 10
2206 aesmc v3.16b, v3.16b //AES block 3 - round 10
2207 sub x5, x5, #1 //byte_len - 1
2210 …and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte …
2214 aese v2.16b, v29.16b //AES block 2 - round 11
2218 aese v1.16b, v29.16b //AES block 1 - round 11
2221 aese v0.16b, v29.16b //AES block 0 - round 11
2222 add w12, w12, #1 //CTR block 3
2224 aese v3.16b, v29.16b //AES block 3 - round 11
2228 ldp x6, x7, [x0, #0] //AES block 0 - load plaintext
2234 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
2239 ldp x23, x24, [x0, #48] //AES block 3 - load plaintext
2244 ldp x19, x20, [x0, #16] //AES block 1 - load plaintext
2252 eor x6, x6, x13 //AES block 0 - round 12 low
2254 eor x7, x7, x14 //AES block 0 - round 12 high
2255 eor x22, x22, x14 //AES block 2 - round 12 high
2256 fmov d4, x6 //AES block 0 - mov low
2258 eor x24, x24, x14 //AES block 3 - round 12 high
2259 fmov v4.d[1], x7 //AES block 0 - mov high
2261 eor x21, x21, x13 //AES block 2 - round 12 low
2262 eor x19, x19, x13 //AES block 1 - round 12 low
2264 fmov d5, x19 //AES block 1 - mov low
2265 eor x20, x20, x14 //AES block 1 - round 12 high
2267 fmov v5.d[1], x20 //AES block 1 - mov high
2269 eor x23, x23, x13 //AES block 3 - round 12 low
2270 fmov d6, x21 //AES block 2 - mov low
2272 add w12, w12, #1 //CTR block 4
2273 eor v4.16b, v4.16b, v0.16b //AES block 0 - result
2276 fmov v0.d[1], x9 //CTR block 4
2280 add w12, w12, #1 //CTR block 5
2282 fmov d7, x23 //AES block 3 - mov low
2283 st1 { v4.16b}, [x2], #16 //AES block 0 - store result
2285 fmov v6.d[1], x22 //AES block 2 - mov high
2287 eor v5.16b, v5.16b, v1.16b //AES block 1 - result
2289 st1 { v5.16b}, [x2], #16 //AES block 1 - store result
2291 fmov v7.d[1], x24 //AES block 3 - mov high
2293 fmov v1.d[1], x9 //CTR block 5
2298 add w12, w12, #1 //CTR block 6
2299 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
2302 fmov v2.d[1], x9 //CTR block 6
2306 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
2308 eor v7.16b, v7.16b, v3.16b //AES block 3 - result
2309 st1 { v7.16b}, [x2], #16 //AES block 3 - store result
2314 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
2315 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
2318 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
2319 ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext
2329 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
2330 fmov v3.d[1], x9 //CTR block 4k+3
2332 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
2334 ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext
2339 aese v0.16b, v18.16b
2340 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
2341 ldp x23, x24, [x0, #48] //AES block 4k+3 - load plaintext
2346 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
2347 eor v4.16b, v4.16b, v11.16b //PRE 1
2350 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
2352 aese v0.16b, v19.16b
2353 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
2357 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
2358 eor x24, x24, x14 //AES block 4k+3 - round 12 high
2360 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
2361 mov d8, v4.d[1] //GHASH block 4k - mid
2363 aese v0.16b, v20.16b
2364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
2367 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
2368 eor x21, x21, x13 //AES block 4k+6 - round 12 low
2370 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
2371 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
2373 aese v0.16b, v21.16b
2374 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
2375 eor x19, x19, x13 //AES block 4k+5 - round 12 low
2378 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
2379 mov d31, v6.d[1] //GHASH block 4k+2 - mid
2381 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
2382 mov d4, v5.d[1] //GHASH block 4k+1 - mid
2385 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
2388 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
2390 mov d10, v17.d[1] //GHASH block 4k - mid
2391 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
2394 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
2395 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
2397 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
2399 aese v0.16b, v22.16b
2400 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
2401 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
2404 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
2406 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
2407 eor x20, x20, x14 //AES block 4k+5 - round 12 high
2408 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
2410 aese v0.16b, v23.16b
2411 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
2412 add w12, w12, #1 //CTR block 4k+3
2415 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
2416 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
2418 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
2419 eor x22, x22, x14 //AES block 4k+6 - round 12 high
2421 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
2422 eor x23, x23, x13 //AES block 4k+3 - round 12 low
2423 mov d30, v7.d[1] //GHASH block 4k+3 - mid
2425 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
2428 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
2432 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
2433 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
2436 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
2437 ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext
2442 aese v0.16b, v24.16b
2443 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
2444 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
2447 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
2451 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
2454 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
2455 eor x7, x7, x14 //AES block 4k+4 - round 12 high
2456 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
2459 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
2460 eor x6, x6, x13 //AES block 4k+4 - round 12 low
2463 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
2467 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
2468 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
2470 aese v0.16b, v25.16b
2471 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
2472 fmov d5, x19 //AES block 4k+5 - mov low
2475 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
2476 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
2479 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
2480 fmov v5.d[1], x20 //AES block 4k+5 - mov high
2482 aese v0.16b, v26.16b
2483 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
2484 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
2486 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
2488 fmov d4, x6 //AES block 4k+4 - mov low
2491 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
2492 fmov v4.d[1], x7 //AES block 4k+4 - mov high
2495 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
2496 fmov d7, x23 //AES block 4k+3 - mov low
2498 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
2499 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
2500 add w12, w12, #1 //CTR block 4k+8
2503 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
2504 fmov v7.d[1], x24 //AES block 4k+3 - mov high
2506 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
2507 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
2508 fmov d6, x21 //AES block 4k+6 - mov low
2511 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
2513 aese v0.16b, v27.16b
2514 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
2515 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
2518 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
2521 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
2524 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
2526 aese v0.16b, v28.16b
2527 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
2528 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
2531 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
2534 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
2536 aese v0.16b, v29.16b //AES block 4k+4 - round 11
2539 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
2540 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
2543 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
2545 eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result
2548 aese v1.16b, v29.16b //AES block 4k+5 - round 11
2549 fmov v0.d[1], x9 //CTR block 4k+8
2552 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
2553 fmov v6.d[1], x22 //AES block 4k+6 - mov high
2554 st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result
2557 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
2560 eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result
2561 add w12, w12, #1 //CTR block 4k+9
2564 aese v2.16b, v29.16b //AES block 4k+6 - round 11
2565 fmov v1.d[1], x9 //CTR block 4k+9
2568 add w12, w12, #1 //CTR block 4k+10
2569 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
2572 st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result
2573 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
2575 aese v3.16b, v29.16b //AES block 4k+7 - round 11
2576 eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result
2579 st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result
2580 fmov v2.d[1], x9 //CTR block 4k+10
2583 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
2586 eor v7.16b, v7.16b, v3.16b //AES block 4k+3 - result
2587 st1 { v7.16b}, [x2], #16 //AES block 4k+3 - store result
2591 aese v0.16b, v18.16b
2592 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
2597 add w12, w12, #1 //CTR block 4k+3
2600 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
2601 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
2604 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
2606 fmov v3.d[1], x9 //CTR block 4k+3
2607 eor v4.16b, v4.16b, v11.16b //PRE 1
2608 mov d10, v17.d[1] //GHASH block 4k - mid
2611 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
2614 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
2616 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
2617 mov d8, v4.d[1] //GHASH block 4k - mid
2619 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
2622 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
2624 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
2625 mov d4, v5.d[1] //GHASH block 4k+1 - mid
2627 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
2628 mov d31, v6.d[1] //GHASH block 4k+2 - mid
2631 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
2632 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
2634 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
2636 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
2637 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
2640 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
2643 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
2644 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
2646 aese v0.16b, v19.16b
2647 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
2650 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
2651 mov d30, v7.d[1] //GHASH block 4k+3 - mid
2653 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
2654 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
2656 aese v0.16b, v20.16b
2657 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
2659 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
2660 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
2663 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
2665 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
2667 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
2669 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
2670 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
2672 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
2674 aese v0.16b, v21.16b
2675 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
2676 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
2679 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
2682 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
2683 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
2685 aese v0.16b, v22.16b
2686 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
2689 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
2690 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
2693 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
2695 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
2699 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
2702 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
2705 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
2706 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
2709 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
2712 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
2715 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
2716 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
2718 aese v0.16b, v23.16b
2719 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
2722 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
2726 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
2728 aese v0.16b, v24.16b
2729 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
2733 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
2736 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
2739 aese v0.16b, v25.16b
2740 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
2742 pmull v30.1q, v9.1d, v8.1d
2745 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
2748 aese v0.16b, v26.16b
2749 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
2752 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
2756 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
2759 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
2761 aese v0.16b, v27.16b
2762 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
2765 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
2769 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
2772 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
2775 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
2777 pmull v30.1q, v10.1d, v8.1d
2782 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
2784 aese v0.16b, v28.16b
2785 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
2788 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
2791 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
2794 aese v0.16b, v29.16b //AES block 4k+4 - round 11
2796 aese v3.16b, v29.16b //AES block 4k+7 - round 11
2798 aese v2.16b, v29.16b //AES block 4k+6 - round 11
2800 aese v1.16b, v29.16b //AES block 4k+5 - round 11
2805 ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext
2810 eor x6, x6, x13 //AES block 4k+4 - round 12 low
2811 eor x7, x7, x14 //AES block 4k+4 - round 12 high
2813 fmov d4, x6 //AES block 4k+4 - mov low
2815 fmov v4.d[1], x7 //AES block 4k+4 - mov high
2818 eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result
2823 sub w12, w12, #1
2834 sub w12, w12, #1
2840 sub w12, w12, #1
2843 st1 { v5.16b}, [x2], #16 //AES final-3 block - store result
2845 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
2850 rev64 v4.16b, v5.16b //GHASH final-3 block
2852 eor x6, x6, x13 //AES final-2 block - round 12 low
2855 eor x7, x7, x14 //AES final-2 block - round 12 high
2856 fmov d5, x6 //AES final-2 block - mov low
2858 fmov v5.d[1], x7 //AES final-2 block - mov high
2860 mov d22, v4.d[1] //GHASH final-3 block - mid
2862 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
2864 mov d10, v17.d[1] //GHASH final-3 block - mid
2866 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
2870 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
2872 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
2873 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
2876 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
2878 rev64 v4.16b, v5.16b //GHASH final-2 block
2879 ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high
2886 eor x7, x7, x14 //AES final-1 block - round 12 high
2888 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
2889 mov d22, v4.d[1] //GHASH final-2 block - mid
2891 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
2892 eor x6, x6, x13 //AES final-1 block - round 12 low
2894 fmov d5, x6 //AES final-1 block - mov low
2896 fmov v5.d[1], x7 //AES final-1 block - mov high
2897 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
2898 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
2900 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
2902 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
2906 eor v5.16b, v5.16b, v2.16b //AES final-1 block - result
2908 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
2909 .L192_enc_blocks_more_than_1: //blocks left > 1
2911 st1 { v5.16b}, [x2], #16 //AES final-1 block - store result
2913 ldp x6, x7, [x0], #16 //AES final block - load input low & high
2918 rev64 v4.16b, v5.16b //GHASH final-1 block
2920 eor x6, x6, x13 //AES final block - round 12 low
2924 mov d22, v4.d[1] //GHASH final-1 block - mid
2926 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
2927 eor x7, x7, x14 //AES final block - round 12 high
2928 fmov d5, x6 //AES final block - mov low
2930 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
2931 fmov v5.d[1], x7 //AES final block - mov high
2933 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
2935 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
2937 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
2939 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
2941 eor v5.16b, v5.16b, v3.16b //AES final block - result
2943 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
2945 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
2946 .L192_enc_blocks_less_than_1: //blocks left <= 1
2956 sub x1, x1, #128 //bit_length -= 128
2959 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
2972 fmov v0.d[1], x7
2974 …and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in h…
2980 mov d8, v4.d[1] //GHASH final block - mid
2982 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
2984 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
2986 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
2988 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
2990 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
2992 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
2994 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
2997 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
3001 …bif v5.16b, v18.16b, v0.16b //insert existing bytes in top end of resu…
3003 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
3005 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3007 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
3009 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
3011 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
3013 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3015 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
3017 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
3022 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
3040 .size aes_gcm_enc_192_kernel,.-aes_gcm_enc_192_kernel
3047 stp x19, x20, [sp, #-112]!
3068 …ld1 { v0.16b}, [x16] //special case vector load initial counter so we …
3081 fmov d1, x10 //CTR block 1
3083 add w12, w12, #1 //increment rev_ctr32
3086 aese v0.16b, v18.16b
3087 aesmc v0.16b, v0.16b //AES block 0 - round 0
3088 rev w9, w12 //CTR block 1
3090 add w12, w12, #1 //CTR block 1
3091 orr x9, x11, x9, lsl #32 //CTR block 1
3094 fmov v1.d[1], x9 //CTR block 1
3096 add w12, w12, #1 //CTR block 2
3101 fmov v2.d[1], x9 //CTR block 2
3104 aese v0.16b, v19.16b
3105 aesmc v0.16b, v0.16b //AES block 0 - round 1
3108 fmov v3.d[1], x9 //CTR block 3
3112 aese v0.16b, v20.16b
3113 aesmc v0.16b, v0.16b //AES block 0 - round 2
3116 aesmc v2.16b, v2.16b //AES block 2 - round 0
3120 aesmc v1.16b, v1.16b //AES block 1 - round 0
3126 aesmc v3.16b, v3.16b //AES block 3 - round 0
3132 aesmc v2.16b, v2.16b //AES block 2 - round 1
3138 aesmc v1.16b, v1.16b //AES block 1 - round 1
3141 aesmc v3.16b, v3.16b //AES block 3 - round 1
3147 aesmc v2.16b, v2.16b //AES block 2 - round 2
3150 aese v0.16b, v21.16b
3151 aesmc v0.16b, v0.16b //AES block 0 - round 3
3155 aesmc v1.16b, v1.16b //AES block 1 - round 2
3159 aesmc v3.16b, v3.16b //AES block 3 - round 2
3163 aesmc v2.16b, v2.16b //AES block 2 - round 3
3169 aesmc v1.16b, v1.16b //AES block 1 - round 3
3170 add w12, w12, #1 //CTR block 3
3173 aesmc v3.16b, v3.16b //AES block 3 - round 3
3176 aese v0.16b, v22.16b
3177 aesmc v0.16b, v0.16b //AES block 0 - round 4
3181 aesmc v1.16b, v1.16b //AES block 1 - round 4
3185 aesmc v2.16b, v2.16b //AES block 2 - round 4
3188 aesmc v3.16b, v3.16b //AES block 3 - round 4
3191 aese v0.16b, v23.16b
3192 aesmc v0.16b, v0.16b //AES block 0 - round 5
3196 aesmc v1.16b, v1.16b //AES block 1 - round 5
3199 aesmc v2.16b, v2.16b //AES block 2 - round 5
3202 aesmc v3.16b, v3.16b //AES block 3 - round 5
3204 aese v0.16b, v24.16b
3205 aesmc v0.16b, v0.16b //AES block 0 - round 6
3208 aesmc v2.16b, v2.16b //AES block 2 - round 6
3211 aesmc v3.16b, v3.16b //AES block 3 - round 6
3213 aese v0.16b, v25.16b
3214 aesmc v0.16b, v0.16b //AES block 0 - round 7
3217 aesmc v2.16b, v2.16b //AES block 2 - round 7
3220 aesmc v3.16b, v3.16b //AES block 3 - round 7
3223 aesmc v1.16b, v1.16b //AES block 1 - round 6
3226 aesmc v2.16b, v2.16b //AES block 2 - round 8
3229 aesmc v3.16b, v3.16b //AES block 3 - round 8
3232 aesmc v1.16b, v1.16b //AES block 1 - round 7
3235 aesmc v2.16b, v2.16b //AES block 2 - round 9
3238 aesmc v3.16b, v3.16b //AES block 3 - round 9
3241 aesmc v1.16b, v1.16b //AES block 1 - round 8
3242 sub x5, x5, #1 //byte_len - 1
3244 aese v0.16b, v26.16b
3245 aesmc v0.16b, v0.16b //AES block 0 - round 8
3246 …and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte…
3249 aesmc v3.16b, v3.16b //AES block 3 - round 10
3253 aesmc v1.16b, v1.16b //AES block 1 - round 9
3256 aese v0.16b, v27.16b
3257 aesmc v0.16b, v0.16b //AES block 0 - round 9
3260 aese v3.16b, v29.16b //AES block 3 - round 11
3263 aesmc v2.16b, v2.16b //AES block 2 - round 10
3266 aesmc v1.16b, v1.16b //AES block 1 - round 10
3268 aese v0.16b, v28.16b
3269 aesmc v0.16b, v0.16b //AES block 0 - round 10
3272 aese v2.16b, v29.16b //AES block 2 - round 11
3274 aese v1.16b, v29.16b //AES block 1 - round 11
3277 aese v0.16b, v29.16b //AES block 0 - round 11
3280 ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0,1 - load ciphertext
3282 eor v1.16b, v5.16b, v1.16b //AES block 1 - result
3284 eor v0.16b, v4.16b, v0.16b //AES block 0 - result
3286 ld1 {v6.16b, v7.16b}, [x0], #32 //AES block 2,3 - load ciphertext
3288 mov x19, v1.d[0] //AES block 1 - mov low
3290 mov x20, v1.d[1] //AES block 1 - mov high
3292 mov x6, v0.d[0] //AES block 0 - mov low
3294 add w12, w12, #1 //CTR block 4
3296 mov x7, v0.d[1] //AES block 0 - mov high
3300 rev64 v5.16b, v5.16b //GHASH block 1
3303 eor x19, x19, x13 //AES block 1 - round 12 low
3307 fmov v0.d[1], x9 //CTR block 4
3312 eor x20, x20, x14 //AES block 1 - round 12 high
3316 add w12, w12, #1 //CTR block 5
3317 fmov v1.d[1], x9 //CTR block 5
3318 eor x6, x6, x13 //AES block 0 - round 12 low
3323 eor x7, x7, x14 //AES block 0 - round 12 high
3327 stp x6, x7, [x2], #16 //AES block 0 - store result
3330 stp x19, x20, [x2], #16 //AES block 1 - store result
3332 add w12, w12, #1 //CTR block 6
3333 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
3338 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
3341 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
3342 mov x21, v2.d[0] //AES block 4k+2 - mov low
3344 mov x22, v2.d[1] //AES block 4k+2 - mov high
3345 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
3349 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
3352 aese v0.16b, v18.16b
3353 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
3354 eor v4.16b, v4.16b, v11.16b //PRE 1
3356 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
3357 fmov v2.d[1], x9 //CTR block 4k+6
3360 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
3361 mov x24, v3.d[1] //AES block 4k+3 - mov high
3363 aese v0.16b, v19.16b
3364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
3365 mov x23, v3.d[0] //AES block 4k+3 - mov low
3367 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
3369 mov d8, v4.d[1] //GHASH block 4k - mid
3371 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
3372 mov d10, v17.d[1] //GHASH block 4k - mid
3376 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
3379 fmov v3.d[1], x9 //CTR block 4k+7
3380 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
3381 mov d4, v5.d[1] //GHASH block 4k+1 - mid
3384 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
3386 aese v0.16b, v20.16b
3387 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
3388 eor x22, x22, x14 //AES block 4k+2 - round 12 high
3393 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
3394 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
3396 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
3399 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
3403 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
3405 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
3406 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
3407 eor x21, x21, x13 //AES block 4k+2 - round 12 low
3412 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
3414 aese v0.16b, v21.16b
3415 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
3417 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
3418 mov d31, v6.d[1] //GHASH block 4k+2 - mid
3421 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
3422 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
3424 aese v0.16b, v22.16b
3425 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
3427 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
3428 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
3430 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
3432 aese v0.16b, v23.16b
3433 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
3435 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
3436 mov d30, v7.d[1] //GHASH block 4k+3 - mid
3439 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
3441 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
3444 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
3445 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
3448 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
3450 aese v0.16b, v24.16b
3451 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
3452 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
3455 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
3457 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
3458 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
3460 aese v0.16b, v25.16b
3461 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
3463 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
3464 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
3467 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
3469 aese v0.16b, v26.16b
3470 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
3473 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
3476 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
3477 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
3480 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
3482 aese v0.16b, v27.16b
3483 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
3484 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
3487 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
3490 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
3491 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
3493 aese v0.16b, v28.16b
3494 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
3497 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
3498 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
3501 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
3504 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
3508 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
3511 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
3512 ld1 {v4.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
3515 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
3516 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
3518 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3519 ld1 {v5.16b}, [x0], #16 //AES block 4k+5 - load ciphertext
3520 eor x23, x23, x13 //AES block 4k+3 - round 12 low
3525 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
3526 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
3528 aese v0.16b, v29.16b //AES block 4k+4 - round 11
3529 add w12, w12, #1 //CTR block 4k+7
3532 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
3533 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
3536 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
3537 ld1 {v6.16b}, [x0], #16 //AES block 4k+6 - load ciphertext
3539 aese v1.16b, v29.16b //AES block 4k+5 - round 11
3540 ld1 {v7.16b}, [x0], #16 //AES block 4k+7 - load ciphertext
3544 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
3545 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
3548 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
3549 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
3553 eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result
3554 eor x24, x24, x14 //AES block 4k+3 - round 12 high
3558 eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result
3561 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
3565 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
3567 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3568 mov x19, v1.d[0] //AES block 4k+5 - mov low
3570 mov x6, v0.d[0] //AES block 4k+4 - mov low
3571 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
3574 aese v2.16b, v29.16b //AES block 4k+6 - round 11
3575 mov x7, v0.d[1] //AES block 4k+4 - mov high
3578 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
3579 mov x20, v1.d[1] //AES block 4k+5 - mov high
3582 add w12, w12, #1 //CTR block 4k+8
3583 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
3585 eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result
3586 fmov v0.d[1], x9 //CTR block 4k+8
3589 eor x6, x6, x13 //AES block 4k+4 - round 12 low
3594 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
3597 add w12, w12, #1 //CTR block 4k+9
3598 eor x19, x19, x13 //AES block 4k+5 - round 12 low
3602 fmov v1.d[1], x9 //CTR block 4k+9
3604 eor x20, x20, x14 //AES block 4k+5 - round 12 high
3608 eor x7, x7, x14 //AES block 4k+4 - round 12 high
3612 stp x6, x7, [x2], #16 //AES block 4k+4 - store result
3613 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
3615 add w12, w12, #1 //CTR block 4k+10
3619 aese v3.16b, v29.16b //AES block 4k+7 - round 11
3620 stp x19, x20, [x2], #16 //AES block 4k+5 - store result
3624 mov x22, v2.d[1] //AES block 4k+2 - mov high
3626 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
3629 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
3630 mov x21, v2.d[0] //AES block 4k+2 - mov low
3632 aese v0.16b, v18.16b
3633 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
3634 mov d10, v17.d[1] //GHASH block 4k - mid
3636 eor v4.16b, v4.16b, v11.16b //PRE 1
3640 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
3641 mov x23, v3.d[0] //AES block 4k+3 - mov low
3643 aese v0.16b, v19.16b
3644 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
3645 mov x24, v3.d[1] //AES block 4k+3 - mov high
3647 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
3648 mov d8, v4.d[1] //GHASH block 4k - mid
3652 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
3655 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
3656 fmov v2.d[1], x9 //CTR block 4k+6
3660 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
3661 mov d4, v5.d[1] //GHASH block 4k+1 - mid
3663 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
3664 eor x24, x24, x14 //AES block 4k+3 - round 12 high
3668 fmov v3.d[1], x9 //CTR block 4k+7
3670 aese v0.16b, v20.16b
3671 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
3672 eor x21, x21, x13 //AES block 4k+2 - round 12 low
3676 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
3677 eor x22, x22, x14 //AES block 4k+2 - round 12 high
3681 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
3683 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
3684 eor x23, x23, x13 //AES block 4k+3 - round 12 low
3688 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
3691 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
3694 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
3695 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
3697 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
3698 add w12, w12, #1 //CTR block 4k+7
3700 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
3701 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
3704 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
3706 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
3707 mov d31, v6.d[1] //GHASH block 4k+2 - mid
3710 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
3713 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
3714 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
3716 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
3718 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
3721 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
3722 mov d30, v7.d[1] //GHASH block 4k+3 - mid
3725 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
3726 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
3728 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
3730 aese v0.16b, v21.16b
3731 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
3732 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
3735 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
3737 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
3738 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
3740 aese v0.16b, v22.16b
3741 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
3743 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
3746 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
3749 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
3752 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
3754 aese v0.16b, v23.16b
3755 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
3756 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
3759 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
3761 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3762 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
3764 aese v0.16b, v24.16b
3765 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
3768 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
3769 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
3772 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
3774 aese v0.16b, v25.16b
3775 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
3776 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
3779 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
3782 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
3783 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
3785 aese v0.16b, v26.16b
3786 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
3789 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
3790 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
3793 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
3796 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
3798 aese v0.16b, v27.16b
3799 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
3802 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
3805 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
3806 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
3808 aese v0.16b, v28.16b
3809 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
3812 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
3815 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
3818 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
3819 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
3822 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
3825 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
3828 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
3831 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
3834 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
3836 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3839 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
3842 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
3845 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
3846 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
3849 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
3851 aese v0.16b, v29.16b
3852 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
3860 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
3864 ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
3866 eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result
3868 mov x7, v0.d[1] //AES block 4k+4 - mov high
3870 mov x6, v0.d[0] //AES block 4k+4 - mov low
3876 eor x7, x7, x14 //AES block 4k+4 - round 12 high
3880 eor x6, x6, x13 //AES block 4k+4 - round 12 low
3891 sub w12, w12, #1
3899 sub w12, w12, #1
3903 sub w12, w12, #1
3906 rev64 v4.16b, v5.16b //GHASH final-3 block
3907 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
3909 stp x6, x7, [x2], #16 //AES final-3 block - store result
3913 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
3915 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
3916 mov x6, v0.d[0] //AES final-2 block - mov low
3917 mov d22, v4.d[1] //GHASH final-3 block - mid
3919 mov x7, v0.d[1] //AES final-2 block - mov high
3921 mov d10, v17.d[1] //GHASH final-3 block - mid
3922 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
3924 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
3926 eor x6, x6, x13 //AES final-2 block - round 12 low
3932 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
3933 eor x7, x7, x14 //AES final-2 block - round 12 high
3939 rev64 v4.16b, v5.16b //GHASH final-2 block
3940 ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext
3946 eor v0.16b, v5.16b, v2.16b //AES final-1 block - result
3948 mov d22, v4.d[1] //GHASH final-2 block - mid
3950 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
3952 stp x6, x7, [x2], #16 //AES final-2 block - store result
3954 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
3955 mov x7, v0.d[1] //AES final-1 block - mov high
3957 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
3958 mov x6, v0.d[0] //AES final-1 block - mov low
3960 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
3962 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
3964 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
3965 eor x7, x7, x14 //AES final-1 block - round 12 high
3969 eor x6, x6, x13 //AES final-1 block - round 12 low
3973 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
3974 .L192_dec_blocks_more_than_1: //blocks left > 1
3976 rev64 v4.16b, v5.16b //GHASH final-1 block
3979 ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext
3981 mov d22, v4.d[1] //GHASH final-1 block - mid
3983 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
3985 eor v0.16b, v5.16b, v3.16b //AES final block - result
3986 stp x6, x7, [x2], #16 //AES final-1 block - store result
3988 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
3990 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
3992 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
3993 mov x7, v0.d[1] //AES final block - mov high
3995 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
3996 mov x6, v0.d[0] //AES final block - mov low
3998 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
4001 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
4002 eor x7, x7, x14 //AES final block - round 12 high
4006 eor x6, x6, x13 //AES final block - round 12 low
4010 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
4011 .L192_dec_blocks_less_than_1: //blocks left <= 1
4017 sub x1, x1, #128 //bit_length -= 128
4019 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
4035 mov v0.d[1], x10
4042 …and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in …
4052 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
4053 mov d8, v4.d[1] //GHASH final block - mid
4055 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
4057 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
4059 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
4061 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
4063 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
4065 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
4068 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
4072 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
4074 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
4078 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
4080 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
4082 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
4084 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
4086 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
4088 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
4090 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
4108 .size aes_gcm_dec_192_kernel,.-aes_gcm_dec_192_kernel
4115 stp x19, x20, [sp, #-112]!
4138 …ld1 { v0.16b}, [x16] //special case vector load initial counter so we …
4139 sub x5, x5, #1 //byte_len - 1
4142 …and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte mu…
4153 fmov d1, x10 //CTR block 1
4155 aese v0.16b, v18.16b
4156 aesmc v0.16b, v0.16b //AES block 0 - round 0
4157 add w12, w12, #1 //increment rev_ctr32
4159 rev w9, w12 //CTR block 1
4162 orr x9, x11, x9, lsl #32 //CTR block 1
4163 add w12, w12, #1 //CTR block 1
4166 fmov v1.d[1], x9 //CTR block 1
4168 add w12, w12, #1 //CTR block 2
4173 fmov v2.d[1], x9 //CTR block 2
4176 aese v0.16b, v19.16b
4177 aesmc v0.16b, v0.16b //AES block 0 - round 1
4180 fmov v3.d[1], x9 //CTR block 3
4183 aesmc v1.16b, v1.16b //AES block 1 - round 0
4186 aese v0.16b, v20.16b
4187 aesmc v0.16b, v0.16b //AES block 0 - round 2
4191 aesmc v2.16b, v2.16b //AES block 2 - round 0
4195 aesmc v1.16b, v1.16b //AES block 1 - round 1
4201 aesmc v3.16b, v3.16b //AES block 3 - round 0
4205 aesmc v2.16b, v2.16b //AES block 2 - round 1
4209 aesmc v1.16b, v1.16b //AES block 1 - round 2
4215 aesmc v3.16b, v3.16b //AES block 3 - round 1
4219 aesmc v2.16b, v2.16b //AES block 2 - round 2
4225 aesmc v1.16b, v1.16b //AES block 1 - round 3
4229 aesmc v3.16b, v3.16b //AES block 3 - round 2
4233 aesmc v2.16b, v2.16b //AES block 2 - round 3
4234 add w12, w12, #1 //CTR block 3
4236 aese v0.16b, v21.16b
4237 aesmc v0.16b, v0.16b //AES block 0 - round 3
4240 aesmc v3.16b, v3.16b //AES block 3 - round 3
4246 aesmc v2.16b, v2.16b //AES block 2 - round 4
4248 aese v0.16b, v22.16b
4249 aesmc v0.16b, v0.16b //AES block 0 - round 4
4252 aesmc v1.16b, v1.16b //AES block 1 - round 4
4255 aesmc v3.16b, v3.16b //AES block 3 - round 4
4257 aese v0.16b, v23.16b
4258 aesmc v0.16b, v0.16b //AES block 0 - round 5
4261 aesmc v1.16b, v1.16b //AES block 1 - round 5
4264 aesmc v3.16b, v3.16b //AES block 3 - round 5
4267 aesmc v2.16b, v2.16b //AES block 2 - round 5
4270 aesmc v1.16b, v1.16b //AES block 1 - round 6
4274 aesmc v3.16b, v3.16b //AES block 3 - round 6
4277 aese v0.16b, v24.16b
4278 aesmc v0.16b, v0.16b //AES block 0 - round 6
4284 aesmc v2.16b, v2.16b //AES block 2 - round 6
4288 aesmc v1.16b, v1.16b //AES block 1 - round 7
4291 aese v0.16b, v25.16b
4292 aesmc v0.16b, v0.16b //AES block 0 - round 7
4295 aesmc v2.16b, v2.16b //AES block 2 - round 7
4298 aesmc v3.16b, v3.16b //AES block 3 - round 7
4302 aesmc v1.16b, v1.16b //AES block 1 - round 8
4305 aesmc v2.16b, v2.16b //AES block 2 - round 8
4308 aesmc v3.16b, v3.16b //AES block 3 - round 8
4311 aesmc v1.16b, v1.16b //AES block 1 - round 9
4314 aesmc v2.16b, v2.16b //AES block 2 - round 9
4316 aese v0.16b, v26.16b
4317 aesmc v0.16b, v0.16b //AES block 0 - round 8
4320 aesmc v1.16b, v1.16b //AES block 1 - round 10
4323 aesmc v3.16b, v3.16b //AES block 3 - round 9
4325 aese v0.16b, v27.16b
4326 aesmc v0.16b, v0.16b //AES block 0 - round 9
4329 aesmc v2.16b, v2.16b //AES block 2 - round 10
4332 aesmc v3.16b, v3.16b //AES block 3 - round 10
4335 aesmc v1.16b, v1.16b //AES block 1 - round 11
4338 aesmc v2.16b, v2.16b //AES block 2 - round 11
4340 aese v0.16b, v28.16b
4341 aesmc v0.16b, v0.16b //AES block 0 - round 10
4344 aesmc v1.16b, v1.16b //AES block 1 - round 12
4347 aesmc v2.16b, v2.16b //AES block 2 - round 12
4349 aese v0.16b, v29.16b
4350 aesmc v0.16b, v0.16b //AES block 0 - round 11
4354 aesmc v3.16b, v3.16b //AES block 3 - round 11
4356 aese v2.16b, v31.16b //AES block 2 - round 13
4359 aese v0.16b, v30.16b
4360 aesmc v0.16b, v0.16b //AES block 0 - round 12
4363 aesmc v3.16b, v3.16b //AES block 3 - round 12
4365 aese v1.16b, v31.16b //AES block 1 - round 13
4367 aese v0.16b, v31.16b //AES block 0 - round 13
4369 aese v3.16b, v31.16b //AES block 3 - round 13
4373 ldp x19, x20, [x0, #16] //AES block 1 - load plaintext
4379 ldp x6, x7, [x0, #0] //AES block 0 - load plaintext
4384 ldp x23, x24, [x0, #48] //AES block 3 - load plaintext
4389 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
4396 eor x19, x19, x13 //AES block 1 - round 14 low
4397 eor x20, x20, x14 //AES block 1 - round 14 high
4399 fmov d5, x19 //AES block 1 - mov low
4400 eor x6, x6, x13 //AES block 0 - round 14 low
4402 eor x7, x7, x14 //AES block 0 - round 14 high
4403 eor x24, x24, x14 //AES block 3 - round 14 high
4404 fmov d4, x6 //AES block 0 - mov low
4407 fmov v4.d[1], x7 //AES block 0 - mov high
4408 eor x23, x23, x13 //AES block 3 - round 14 low
4410 eor x21, x21, x13 //AES block 2 - round 14 low
4411 fmov v5.d[1], x20 //AES block 1 - mov high
4413 fmov d6, x21 //AES block 2 - mov low
4414 add w12, w12, #1 //CTR block 4
4417 fmov d7, x23 //AES block 3 - mov low
4418 eor x22, x22, x14 //AES block 2 - round 14 high
4420 fmov v6.d[1], x22 //AES block 2 - mov high
4422 eor v4.16b, v4.16b, v0.16b //AES block 0 - result
4425 fmov v0.d[1], x9 //CTR block 4
4427 add w12, w12, #1 //CTR block 5
4429 eor v5.16b, v5.16b, v1.16b //AES block 1 - result
4433 fmov v1.d[1], x9 //CTR block 5
4435 st1 { v4.16b}, [x2], #16 //AES block 0 - store result
4437 fmov v7.d[1], x24 //AES block 3 - mov high
4439 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
4441 st1 { v5.16b}, [x2], #16 //AES block 1 - store result
4443 add w12, w12, #1 //CTR block 6
4446 fmov v2.d[1], x9 //CTR block 6
4447 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
4452 eor v7.16b, v7.16b, v3.16b //AES block 3 - result
4453 st1 { v7.16b}, [x2], #16 //AES block 3 - store result
4457 aese v0.16b, v18.16b
4458 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
4462 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
4466 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
4469 aese v0.16b, v19.16b
4470 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
4471 fmov v3.d[1], x9 //CTR block 4k+3
4474 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
4475 ldp x23, x24, [x0, #48] //AES block 4k+7 - load plaintext
4481 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
4482 ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext
4487 aese v0.16b, v20.16b
4488 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
4489 eor v4.16b, v4.16b, v11.16b //PRE 1
4492 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
4495 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
4496 eor x23, x23, x13 //AES block 4k+7 - round 14 low
4498 aese v0.16b, v21.16b
4499 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
4500 mov d10, v17.d[1] //GHASH block 4k - mid
4502 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
4503 eor x22, x22, x14 //AES block 4k+6 - round 14 high
4504 mov d8, v4.d[1] //GHASH block 4k - mid
4507 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
4508 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
4510 aese v0.16b, v22.16b
4511 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
4513 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
4514 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
4517 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
4519 aese v0.16b, v23.16b
4520 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
4523 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
4525 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
4528 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
4530 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
4531 mov d4, v5.d[1] //GHASH block 4k+1 - mid
4534 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
4537 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
4538 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
4541 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
4544 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
4545 mov d8, v6.d[1] //GHASH block 4k+2 - mid
4548 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
4549 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
4552 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
4554 aese v0.16b, v24.16b
4555 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
4556 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
4559 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
4561 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
4563 aese v0.16b, v25.16b
4564 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
4567 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
4568 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
4571 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
4573 aese v0.16b, v26.16b
4574 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
4577 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
4580 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
4581 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
4583 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
4585 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
4588 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
4590 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
4591 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
4594 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
4595 ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext
4601 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
4602 mov d4, v7.d[1] //GHASH block 4k+3 - mid
4605 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
4606 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
4608 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
4610 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
4611 eor v4.8b, v4.8b, v7.8b //GHASH block 4k+3 - mid
4614 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
4615 eor x19, x19, x13 //AES block 4k+5 - round 14 low
4618 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
4619 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
4622 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
4623 eor x21, x21, x13 //AES block 4k+6 - round 14 low
4625 aese v0.16b, v27.16b
4626 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
4629 pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid
4630 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
4631 fmov d5, x19 //AES block 4k+5 - mov low
4634 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
4635 ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext
4640 aese v0.16b, v28.16b
4641 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
4645 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
4646 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
4649 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
4652 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
4653 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+3 - mid
4656 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
4657 add w12, w12, #1 //CTR block 4k+3
4659 aese v0.16b, v29.16b
4660 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
4661 eor v4.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
4664 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
4667 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
4669 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
4672 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
4673 eor x6, x6, x13 //AES block 4k+4 - round 14 low
4676 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
4677 eor v10.16b, v10.16b, v4.16b //MODULO - karatsuba tidy up
4680 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
4681 eor x7, x7, x14 //AES block 4k+4 - round 14 high
4683 fmov d4, x6 //AES block 4k+4 - mov low
4685 eor v7.16b, v9.16b, v7.16b //MODULO - fold into mid
4687 aese v0.16b, v30.16b
4688 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
4689 eor x20, x20, x14 //AES block 4k+5 - round 14 high
4692 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
4693 eor x24, x24, x14 //AES block 4k+7 - round 14 high
4696 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
4697 add w12, w12, #1 //CTR block 4k+8
4699 aese v0.16b, v31.16b //AES block 4k+4 - round 13
4700 fmov v4.d[1], x7 //AES block 4k+4 - mov high
4701 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
4704 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
4705 fmov d7, x23 //AES block 4k+7 - mov low
4707 aese v1.16b, v31.16b //AES block 4k+5 - round 13
4708 fmov v5.d[1], x20 //AES block 4k+5 - mov high
4710 fmov d6, x21 //AES block 4k+6 - mov low
4713 fmov v6.d[1], x22 //AES block 4k+6 - mov high
4715 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
4716 eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result
4719 fmov v0.d[1], x9 //CTR block 4k+8
4721 add w12, w12, #1 //CTR block 4k+9
4723 eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result
4728 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
4729 fmov v1.d[1], x9 //CTR block 4k+9
4731 aese v2.16b, v31.16b //AES block 4k+6 - round 13
4733 st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result
4736 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
4737 fmov v7.d[1], x24 //AES block 4k+7 - mov high
4739 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
4740 st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result
4741 add w12, w12, #1 //CTR block 4k+10
4743 aese v3.16b, v31.16b //AES block 4k+7 - round 13
4744 eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result
4747 st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result
4748 fmov v2.d[1], x9 //CTR block 4k+10
4751 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
4754 eor v7.16b, v7.16b, v3.16b //AES block 4k+7 - result
4755 st1 { v7.16b}, [x2], #16 //AES block 4k+7 - store result
4760 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
4764 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
4767 aese v0.16b, v18.16b
4768 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
4771 fmov v3.d[1], x9 //CTR block 4k+3
4775 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
4777 aese v0.16b, v19.16b
4778 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
4780 eor v4.16b, v4.16b, v11.16b //PRE 1
4781 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
4784 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
4787 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
4788 mov d10, v17.d[1] //GHASH block 4k - mid
4791 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
4793 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
4794 mov d8, v4.d[1] //GHASH block 4k - mid
4796 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
4799 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
4802 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
4803 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
4805 aese v0.16b, v20.16b
4806 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
4809 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
4812 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
4814 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
4816 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
4818 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
4821 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
4823 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
4824 mov d4, v5.d[1] //GHASH block 4k+1 - mid
4826 aese v0.16b, v21.16b
4827 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
4828 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
4831 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
4833 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
4834 mov d8, v6.d[1] //GHASH block 4k+2 - mid
4836 aese v0.16b, v22.16b
4837 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
4841 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
4843 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
4844 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
4845 add w12, w12, #1 //CTR block 4k+3
4847 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
4850 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
4853 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
4854 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
4856 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
4858 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
4859 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
4862 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
4864 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
4865 mov d4, v7.d[1] //GHASH block 4k+3 - mid
4868 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
4870 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
4872 eor v4.8b, v4.8b, v7.8b //GHASH block 4k+3 - mid
4874 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
4877 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
4879 pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid
4880 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
4882 aese v0.16b, v23.16b
4883 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
4886 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
4889 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
4891 aese v0.16b, v24.16b
4892 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
4896 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
4899 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
4900 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
4902 aese v0.16b, v25.16b
4903 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
4906 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
4910 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
4911 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+3 - mid
4913 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
4916 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
4919 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
4921 aese v0.16b, v26.16b
4922 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
4923 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
4926 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
4930 pmull v4.1q, v9.1d, v8.1d
4934 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
4937 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
4941 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
4943 aese v0.16b, v27.16b
4944 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
4947 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
4950 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
4953 aese v0.16b, v28.16b
4954 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
4957 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
4960 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
4962 aese v0.16b, v29.16b
4963 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
4967 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
4970 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
4972 aese v0.16b, v30.16b
4973 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
4975 pmull v4.1q, v10.1d, v8.1d
4978 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
4982 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
4984 aese v1.16b, v31.16b //AES block 4k+5 - round 13
4988 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
4990 aese v3.16b, v31.16b //AES block 4k+7 - round 13
4992 aese v0.16b, v31.16b //AES block 4k+4 - round 13
4994 aese v2.16b, v31.16b //AES block 4k+6 - round 13
5000 ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext
5005 eor x6, x6, x13 //AES block 4k+4 - round 14 low
5006 eor x7, x7, x14 //AES block 4k+4 - round 14 high
5009 fmov d4, x6 //AES block 4k+4 - mov low
5011 fmov v4.d[1], x7 //AES block 4k+4 - mov high
5013 eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result
5021 sub w12, w12, #1
5028 sub w12, w12, #1
5033 sub w12, w12, #1
5036 st1 { v5.16b}, [x2], #16 //AES final-3 block - store result
5038 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
5043 rev64 v4.16b, v5.16b //GHASH final-3 block
5045 eor x6, x6, x13 //AES final-2 block - round 14 low
5048 eor x7, x7, x14 //AES final-2 block - round 14 high
5050 mov d22, v4.d[1] //GHASH final-3 block - mid
5051 fmov d5, x6 //AES final-2 block - mov low
5053 fmov v5.d[1], x7 //AES final-2 block - mov high
5055 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
5058 mov d10, v17.d[1] //GHASH final-3 block - mid
5060 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
5062 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
5064 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
5065 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
5068 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
5070 ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high
5075 rev64 v4.16b, v5.16b //GHASH final-2 block
5077 eor x6, x6, x13 //AES final-1 block - round 14 low
5080 fmov d5, x6 //AES final-1 block - mov low
5081 eor x7, x7, x14 //AES final-1 block - round 14 high
5083 fmov v5.d[1], x7 //AES final-1 block - mov high
5087 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
5088 mov d22, v4.d[1] //GHASH final-2 block - mid
5090 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
5092 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
5094 eor v5.16b, v5.16b, v2.16b //AES final-1 block - result
5096 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
5098 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
5100 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
5102 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
5103 .L256_enc_blocks_more_than_1: //blocks left > 1
5105 st1 { v5.16b}, [x2], #16 //AES final-1 block - store result
5107 rev64 v4.16b, v5.16b //GHASH final-1 block
5109 ldp x6, x7, [x0], #16 //AES final block - load input low & high
5118 eor x6, x6, x13 //AES final block - round 14 low
5119 mov d22, v4.d[1] //GHASH final-1 block - mid
5121 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
5122 eor x7, x7, x14 //AES final block - round 14 high
5124 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
5126 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
5128 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
5129 fmov d5, x6 //AES final block - mov low
5131 fmov v5.d[1], x7 //AES final block - mov high
5133 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
5135 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
5137 eor v5.16b, v5.16b, v3.16b //AES final block - result
5138 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
5140 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
5141 .L256_enc_blocks_less_than_1: //blocks left <= 1
5146 sub x1, x1, #128 //bit_length -= 128
5148 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
5162 fmov v0.d[1], x7
5164 …and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in h…
5170 …bif v5.16b, v18.16b, v0.16b //insert existing bytes in top end of resu…
5172 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
5173 mov d8, v4.d[1] //GHASH final block - mid
5180 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
5182 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
5183 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
5185 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
5187 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
5189 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
5192 eor v4.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
5196 eor v10.16b, v10.16b, v4.16b //MODULO - karatsuba tidy up
5198 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
5200 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
5202 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
5204 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
5206 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
5208 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
5213 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
5215 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
5233 .size aes_gcm_enc_256_kernel,.-aes_gcm_enc_256_kernel
5240 stp x19, x20, [sp, #-112]!
5263 sub x5, x5, #1 //byte_len - 1
5266 …and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte mu…
5279 add w12, w12, #1 //increment rev_ctr32
5282 rev w9, w12 //CTR block 1
5283 add w12, w12, #1 //CTR block 1
5284 fmov d1, x10 //CTR block 1
5286 orr x9, x11, x9, lsl #32 //CTR block 1
5287 …ld1 { v0.16b}, [x16] //special case vector load initial counter so we …
5289 fmov v1.d[1], x9 //CTR block 1
5291 add w12, w12, #1 //CTR block 2
5296 fmov v2.d[1], x9 //CTR block 2
5302 fmov v3.d[1], x9 //CTR block 3
5303 add w12, w12, #1 //CTR block 3
5311 aese v0.16b, v18.16b
5312 aesmc v0.16b, v0.16b //AES block 0 - round 0
5319 aesmc v3.16b, v3.16b //AES block 3 - round 0
5326 aesmc v1.16b, v1.16b //AES block 1 - round 0
5333 aesmc v2.16b, v2.16b //AES block 2 - round 0
5336 aese v0.16b, v19.16b
5337 aesmc v0.16b, v0.16b //AES block 0 - round 1
5340 aesmc v1.16b, v1.16b //AES block 1 - round 1
5346 aesmc v2.16b, v2.16b //AES block 2 - round 1
5350 aesmc v3.16b, v3.16b //AES block 3 - round 1
5353 aese v0.16b, v20.16b
5354 aesmc v0.16b, v0.16b //AES block 0 - round 2
5360 aesmc v2.16b, v2.16b //AES block 2 - round 2
5364 aesmc v3.16b, v3.16b //AES block 3 - round 2
5366 aese v0.16b, v21.16b
5367 aesmc v0.16b, v0.16b //AES block 0 - round 3
5370 aesmc v1.16b, v1.16b //AES block 1 - round 2
5373 aesmc v3.16b, v3.16b //AES block 3 - round 3
5375 aese v0.16b, v22.16b
5376 aesmc v0.16b, v0.16b //AES block 0 - round 4
5380 aesmc v2.16b, v2.16b //AES block 2 - round 3
5383 aesmc v1.16b, v1.16b //AES block 1 - round 3
5386 aesmc v3.16b, v3.16b //AES block 3 - round 4
5389 aesmc v2.16b, v2.16b //AES block 2 - round 4
5392 aesmc v1.16b, v1.16b //AES block 1 - round 4
5395 aesmc v3.16b, v3.16b //AES block 3 - round 5
5397 aese v0.16b, v23.16b
5398 aesmc v0.16b, v0.16b //AES block 0 - round 5
5401 aesmc v1.16b, v1.16b //AES block 1 - round 5
5404 aesmc v2.16b, v2.16b //AES block 2 - round 5
5406 aese v0.16b, v24.16b
5407 aesmc v0.16b, v0.16b //AES block 0 - round 6
5410 aesmc v3.16b, v3.16b //AES block 3 - round 6
5413 aesmc v1.16b, v1.16b //AES block 1 - round 6
5416 aesmc v2.16b, v2.16b //AES block 2 - round 6
5418 aese v0.16b, v25.16b
5419 aesmc v0.16b, v0.16b //AES block 0 - round 7
5422 aesmc v1.16b, v1.16b //AES block 1 - round 7
5425 aesmc v3.16b, v3.16b //AES block 3 - round 7
5427 aese v0.16b, v26.16b
5428 aesmc v0.16b, v0.16b //AES block 0 - round 8
5431 aesmc v2.16b, v2.16b //AES block 2 - round 7
5434 aesmc v3.16b, v3.16b //AES block 3 - round 8
5437 aesmc v1.16b, v1.16b //AES block 1 - round 8
5439 aese v0.16b, v27.16b
5440 aesmc v0.16b, v0.16b //AES block 0 - round 9
5443 aesmc v2.16b, v2.16b //AES block 2 - round 8
5447 aesmc v1.16b, v1.16b //AES block 1 - round 9
5449 aese v0.16b, v28.16b
5450 aesmc v0.16b, v0.16b //AES block 0 - round 10
5453 aesmc v3.16b, v3.16b //AES block 3 - round 9
5456 aesmc v1.16b, v1.16b //AES block 1 - round 10
5459 aesmc v2.16b, v2.16b //AES block 2 - round 9
5462 aesmc v3.16b, v3.16b //AES block 3 - round 10
5464 aese v0.16b, v29.16b
5465 aesmc v0.16b, v0.16b //AES block 0 - round 11
5468 aesmc v2.16b, v2.16b //AES block 2 - round 10
5471 aesmc v3.16b, v3.16b //AES block 3 - round 11
5474 aesmc v1.16b, v1.16b //AES block 1 - round 11
5477 aesmc v2.16b, v2.16b //AES block 2 - round 11
5487 aesmc v1.16b, v1.16b //AES block 1 - round 12
5489 aese v0.16b, v30.16b
5490 aesmc v0.16b, v0.16b //AES block 0 - round 12
5493 aesmc v2.16b, v2.16b //AES block 2 - round 12
5496 aesmc v3.16b, v3.16b //AES block 3 - round 12
5499 aese v1.16b, v31.16b //AES block 1 - round 13
5501 aese v2.16b, v31.16b //AES block 2 - round 13
5504 aese v3.16b, v31.16b //AES block 3 - round 13
5506 aese v0.16b, v31.16b //AES block 0 - round 13
5509 ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0,1 - load ciphertext
5513 eor v0.16b, v4.16b, v0.16b //AES block 0 - result
5515 eor v1.16b, v5.16b, v1.16b //AES block 1 - result
5516 rev64 v5.16b, v5.16b //GHASH block 1
5517 ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext
5519 mov x7, v0.d[1] //AES block 0 - mov high
5521 mov x6, v0.d[0] //AES block 0 - mov low
5523 add w12, w12, #1 //CTR block 4
5528 fmov v0.d[1], x9 //CTR block 4
5530 add w12, w12, #1 //CTR block 5
5532 mov x19, v1.d[0] //AES block 1 - mov low
5535 mov x20, v1.d[1] //AES block 1 - mov high
5536 eor x7, x7, x14 //AES block 0 - round 14 high
5540 eor x6, x6, x13 //AES block 0 - round 14 low
5544 stp x6, x7, [x2], #16 //AES block 0 - store result
5547 ld1 {v7.16b}, [x0], #16 //AES block 3 - load ciphertext
5549 fmov v1.d[1], x9 //CTR block 5
5551 add w12, w12, #1 //CTR block 6
5553 eor x19, x19, x13 //AES block 1 - round 14 low
5559 eor x20, x20, x14 //AES block 1 - round 14 high
5563 stp x19, x20, [x2], #16 //AES block 1 - store result
5565 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
5570 mov x21, v2.d[0] //AES block 4k+2 - mov low
5572 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
5574 aese v0.16b, v18.16b
5575 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
5576 mov x22, v2.d[1] //AES block 4k+2 - mov high
5579 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
5582 fmov v2.d[1], x9 //CTR block 4k+6
5583 eor v4.16b, v4.16b, v11.16b //PRE 1
5586 aese v0.16b, v19.16b
5587 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
5588 mov x24, v3.d[1] //AES block 4k+3 - mov high
5591 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
5592 mov x23, v3.d[0] //AES block 4k+3 - mov low
5594 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
5595 mov d8, v4.d[1] //GHASH block 4k - mid
5598 aese v0.16b, v20.16b
5599 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
5603 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
5604 fmov v3.d[1], x9 //CTR block 4k+7
5607 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
5608 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
5610 aese v0.16b, v21.16b
5611 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
5612 eor x22, x22, x14 //AES block 4k+2 - round 14 high
5617 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
5618 mov d10, v17.d[1] //GHASH block 4k - mid
5621 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
5625 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
5626 eor x21, x21, x13 //AES block 4k+2 - round 14 low
5631 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
5632 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
5634 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
5636 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
5639 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
5642 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
5643 eor x23, x23, x13 //AES block 4k+3 - round 14 low
5647 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
5648 eor x24, x24, x14 //AES block 4k+3 - round 14 high
5652 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
5655 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
5658 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
5659 mov d4, v5.d[1] //GHASH block 4k+1 - mid
5661 aese v0.16b, v22.16b
5662 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
5663 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
5666 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
5667 add w12, w12, #1 //CTR block 4k+7
5670 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
5671 mov d8, v6.d[1] //GHASH block 4k+2 - mid
5674 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
5675 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
5677 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
5680 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
5681 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
5684 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
5686 aese v0.16b, v23.16b
5687 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
5688 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
5690 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
5694 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
5695 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
5697 aese v0.16b, v24.16b
5698 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
5699 add w12, w12, #1 //CTR block 4k+8
5702 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
5705 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
5706 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
5708 aese v0.16b, v25.16b
5709 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
5711 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
5712 mov d6, v7.d[1] //GHASH block 4k+3 - mid
5715 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
5717 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
5719 aese v0.16b, v26.16b
5720 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
5721 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
5724 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
5726 pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
5728 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
5730 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
5732 aese v0.16b, v27.16b
5733 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
5734 eor v6.8b, v6.8b, v7.8b //GHASH block 4k+3 - mid
5737 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
5740 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
5741 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
5743 aese v0.16b, v28.16b
5744 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
5746 pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid
5750 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
5751 eor v11.16b, v11.16b, v4.16b //GHASH block 4k+3 - low
5753 aese v0.16b, v29.16b
5754 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
5757 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
5761 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
5762 eor v10.16b, v10.16b, v6.16b //GHASH block 4k+3 - mid
5764 aese v0.16b, v30.16b
5765 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
5767 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
5768 eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
5771 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
5772 ld1 {v4.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
5774 aese v0.16b, v31.16b //AES block 4k+4 - round 13
5775 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
5778 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
5779 eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up
5782 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
5783 ld1 {v5.16b}, [x0], #16 //AES block 4k+5 - load ciphertext
5786 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
5787 eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result
5790 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
5791 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
5794 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
5795 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
5798 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
5799 ld1 {v6.16b}, [x0], #16 //AES block 4k+6 - load ciphertext
5802 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
5803 ld1 {v7.16b}, [x0], #16 //AES block 4k+7 - load ciphertext
5806 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
5807 mov x7, v0.d[1] //AES block 4k+4 - mov high
5810 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
5811 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
5813 aese v1.16b, v31.16b //AES block 4k+5 - round 13
5814 mov x6, v0.d[0] //AES block 4k+4 - mov low
5817 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
5821 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
5822 fmov v0.d[1], x9 //CTR block 4k+8
5824 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
5825 eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result
5828 aese v2.16b, v31.16b //AES block 4k+6 - round 13
5832 add w12, w12, #1 //CTR block 4k+9
5834 eor x6, x6, x13 //AES block 4k+4 - round 14 low
5838 eor x7, x7, x14 //AES block 4k+4 - round 14 high
5842 mov x20, v1.d[1] //AES block 4k+5 - mov high
5843 eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result
5844 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
5847 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
5848 mov x19, v1.d[0] //AES block 4k+5 - mov low
5851 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
5853 fmov v1.d[1], x9 //CTR block 4k+9
5855 add w12, w12, #1 //CTR block 4k+10
5857 aese v3.16b, v31.16b //AES block 4k+7 - round 13
5861 eor x20, x20, x14 //AES block 4k+5 - round 14 high
5865 stp x6, x7, [x2], #16 //AES block 4k+4 - store result
5867 eor x19, x19, x13 //AES block 4k+5 - round 14 low
5871 stp x19, x20, [x2], #16 //AES block 4k+5 - store result
5874 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
5880 mov x21, v2.d[0] //AES block 4k+2 - mov low
5881 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
5883 aese v0.16b, v18.16b
5884 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
5885 mov x22, v2.d[1] //AES block 4k+2 - mov high
5888 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
5891 fmov v2.d[1], x9 //CTR block 4k+6
5893 eor v4.16b, v4.16b, v11.16b //PRE 1
5897 mov x23, v3.d[0] //AES block 4k+3 - mov low
5900 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
5901 mov x24, v3.d[1] //AES block 4k+3 - mov high
5903 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
5904 mov d8, v4.d[1] //GHASH block 4k - mid
5907 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
5908 fmov v3.d[1], x9 //CTR block 4k+7
5911 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
5912 mov d10, v17.d[1] //GHASH block 4k - mid
5914 aese v0.16b, v19.16b
5915 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
5916 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
5918 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
5921 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
5925 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
5927 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
5928 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
5930 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
5933 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
5934 mov d4, v5.d[1] //GHASH block 4k+1 - mid
5936 aese v0.16b, v20.16b
5937 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
5940 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
5941 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
5944 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
5946 aese v0.16b, v21.16b
5947 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
5948 mov d8, v6.d[1] //GHASH block 4k+2 - mid
5951 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
5952 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
5954 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
5956 aese v0.16b, v22.16b
5957 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
5960 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
5961 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
5963 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
5965 aese v0.16b, v23.16b
5966 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
5967 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
5970 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
5972 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
5973 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
5975 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
5978 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
5979 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
5982 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
5985 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
5986 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
5988 pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
5991 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
5992 mov d6, v7.d[1] //GHASH block 4k+3 - mid
5995 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
5997 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
6000 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
6001 eor v6.8b, v6.8b, v7.8b //GHASH block 4k+3 - mid
6004 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
6007 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
6008 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
6011 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
6013 aese v0.16b, v24.16b
6014 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
6018 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
6019 eor v11.16b, v11.16b, v4.16b //GHASH block 4k+3 - low
6021 pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid
6024 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
6025 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
6028 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
6030 aese v0.16b, v25.16b
6031 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
6032 eor v10.16b, v10.16b, v6.16b //GHASH block 4k+3 - mid
6035 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
6038 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
6039 eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
6042 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
6044 aese v0.16b, v26.16b
6045 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
6049 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
6052 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
6053 eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up
6055 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
6058 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
6059 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
6062 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
6064 aese v0.16b, v27.16b
6065 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
6066 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
6069 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
6072 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
6074 aese v0.16b, v28.16b
6075 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
6076 eor x22, x22, x14 //AES block 4k+2 - round 14 high
6081 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
6082 eor x23, x23, x13 //AES block 4k+3 - round 14 low
6087 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
6088 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
6090 aese v0.16b, v29.16b
6091 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
6092 add w12, w12, #1 //CTR block 4k+7
6095 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
6096 eor x21, x21, x13 //AES block 4k+2 - round 14 low
6102 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
6104 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
6105 eor x24, x24, x14 //AES block 4k+3 - round 14 high
6111 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
6112 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
6115 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
6116 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
6118 aese v0.16b, v30.16b
6119 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
6120 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
6123 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
6124 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
6126 aese v1.16b, v31.16b //AES block 4k+5 - round 13
6128 aese v0.16b, v31.16b //AES block 4k+4 - round 13
6130 aese v3.16b, v31.16b //AES block 4k+7 - round 13
6132 aese v2.16b, v31.16b //AES block 4k+6 - round 13
6133 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
6137 ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
6139 eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result
6141 mov x6, v0.d[0] //AES block 4k+4 - mov low
6143 mov x7, v0.d[1] //AES block 4k+4 - mov high
6148 eor x6, x6, x13 //AES block 4k+4 - round 14 low
6153 eor x7, x7, x14 //AES block 4k+4 - round 14 high
6159 sub w12, w12, #1
6170 sub w12, w12, #1
6176 sub w12, w12, #1
6179 rev64 v4.16b, v5.16b //GHASH final-3 block
6180 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
6182 stp x6, x7, [x2], #16 //AES final-3 block - store result
6184 mov d10, v17.d[1] //GHASH final-3 block - mid
6188 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
6190 mov d22, v4.d[1] //GHASH final-3 block - mid
6192 mov x6, v0.d[0] //AES final-2 block - mov low
6194 mov x7, v0.d[1] //AES final-2 block - mov high
6196 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
6200 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
6202 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
6203 eor x6, x6, x13 //AES final-2 block - round 14 low
6208 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
6209 eor x7, x7, x14 //AES final-2 block - round 14 high
6215 rev64 v4.16b, v5.16b //GHASH final-2 block
6216 ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext
6219 stp x6, x7, [x2], #16 //AES final-2 block - store result
6221 eor v0.16b, v5.16b, v2.16b //AES final-1 block - result
6223 mov d22, v4.d[1] //GHASH final-2 block - mid
6225 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
6227 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
6229 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
6230 mov x6, v0.d[0] //AES final-1 block - mov low
6232 mov x7, v0.d[1] //AES final-1 block - mov high
6233 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
6236 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
6238 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
6239 eor x6, x6, x13 //AES final-1 block - round 14 low
6244 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
6245 eor x7, x7, x14 //AES final-1 block - round 14 high
6249 .L256_dec_blocks_more_than_1: //blocks left > 1
6251 stp x6, x7, [x2], #16 //AES final-1 block - store result
6252 rev64 v4.16b, v5.16b //GHASH final-1 block
6254 ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext
6259 mov d22, v4.d[1] //GHASH final-1 block - mid
6261 eor v0.16b, v5.16b, v3.16b //AES final block - result
6263 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
6265 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
6267 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
6268 mov x6, v0.d[0] //AES final block - mov low
6270 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
6272 mov x7, v0.d[1] //AES final block - mov high
6274 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
6275 eor x6, x6, x13 //AES final block - round 14 low
6279 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
6281 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
6283 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
6284 eor x7, x7, x14 //AES final block - round 14 high
6288 .L256_dec_blocks_less_than_1: //blocks left <= 1
6293 sub x1, x1, #128 //bit_length -= 128
6297 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
6310 mov v0.d[1], x10
6327 …and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in …
6333 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
6335 mov d8, v4.d[1] //GHASH final block - mid
6337 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
6339 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
6341 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
6343 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
6345 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
6347 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
6350 eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
6354 eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up
6356 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
6358 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
6360 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
6362 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
6364 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
6366 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
6368 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
6374 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
6392 .size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel