Lines Matching +full:2 +full:- +full:4

1 /* Do not modify. This file is auto-generated from aes-gcm-armv8_64.pl. */
5 .arch armv8-a+crypto
9 .align 4
13 stp x19, x20, [sp, #-112]!
39 ld1 {v18.4s}, [x8], #16 //load rk0
41 sub x5, x5, #1 //byte_len - 1
53 ld1 {v19.4s}, [x8], #16 //load rk1
63 rev w9, w12 //CTR block 2
65 fmov d2, x10 //CTR block 2
66 orr x9, x11, x9, lsl #32 //CTR block 2
67 add w12, w12, #1 //CTR block 2
69 fmov v2.d[1], x9 //CTR block 2
73 ld1 {v20.4s}, [x8], #16 //load rk2
83 aesmc v1.16b, v1.16b //AES block 1 - round 0
84 ld1 {v21.4s}, [x8], #16 //load rk3
87 aesmc v2.16b, v2.16b //AES block 2 - round 0
94 aesmc v0.16b, v0.16b //AES block 0 - round 0
95 ld1 {v22.4s}, [x8], #16 //load rk4
98 aesmc v3.16b, v3.16b //AES block 3 - round 0
99 ld1 {v23.4s}, [x8], #16 //load rk5
102 aesmc v2.16b, v2.16b //AES block 2 - round 1
103 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
106 aesmc v0.16b, v0.16b //AES block 0 - round 1
107 ld1 {v24.4s}, [x8], #16 //load rk6
110 aesmc v1.16b, v1.16b //AES block 1 - round 1
111 ld1 {v25.4s}, [x8], #16 //load rk7
114 aesmc v3.16b, v3.16b //AES block 3 - round 1
115 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
118 aesmc v0.16b, v0.16b //AES block 0 - round 2
119 ld1 {v26.4s}, [x8], #16 //load rk8
122 aesmc v1.16b, v1.16b //AES block 1 - round 2
129 aesmc v3.16b, v3.16b //AES block 3 - round 2
132 aesmc v2.16b, v2.16b //AES block 2 - round 2
136 aesmc v0.16b, v0.16b //AES block 0 - round 3
139 aesmc v1.16b, v1.16b //AES block 1 - round 3
142 aesmc v2.16b, v2.16b //AES block 2 - round 3
143 ld1 {v27.4s}, [x8], #16 //load rk9
146 aesmc v3.16b, v3.16b //AES block 3 - round 3
149 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
152 aesmc v3.16b, v3.16b //AES block 3 - round 4
156 aesmc v2.16b, v2.16b //AES block 2 - round 4
157 cmp x0, x5 //check if we have <= 4 blocks
160 aesmc v0.16b, v0.16b //AES block 0 - round 4
163 aesmc v3.16b, v3.16b //AES block 3 - round 5
166 aesmc v2.16b, v2.16b //AES block 2 - round 5
169 aesmc v0.16b, v0.16b //AES block 0 - round 5
172 aesmc v3.16b, v3.16b //AES block 3 - round 6
175 aesmc v1.16b, v1.16b //AES block 1 - round 4
178 aesmc v2.16b, v2.16b //AES block 2 - round 6
179 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
182 aesmc v0.16b, v0.16b //AES block 0 - round 6
185 aesmc v1.16b, v1.16b //AES block 1 - round 5
188 aesmc v3.16b, v3.16b //AES block 3 - round 7
191 aesmc v0.16b, v0.16b //AES block 0 - round 7
194 aesmc v1.16b, v1.16b //AES block 1 - round 6
197 aesmc v2.16b, v2.16b //AES block 2 - round 7
200 aesmc v0.16b, v0.16b //AES block 0 - round 8
203 aesmc v1.16b, v1.16b //AES block 1 - round 7
206 aesmc v2.16b, v2.16b //AES block 2 - round 8
209 aesmc v3.16b, v3.16b //AES block 3 - round 8
212 aesmc v1.16b, v1.16b //AES block 1 - round 8
214 aese v2.16b, v27.16b //AES block 2 - round 9
216 aese v0.16b, v27.16b //AES block 0 - round 9
220 aese v1.16b, v27.16b //AES block 1 - round 9
222 aese v3.16b, v27.16b //AES block 3 - round 9
225 ldp x6, x7, [x0, #0] //AES block 0 - load plaintext
230 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
235 ldp x19, x20, [x0, #16] //AES block 1 - load plaintext
240 ldp x23, x24, [x0, #48] //AES block 3 - load plaintext
245 eor x6, x6, x13 //AES block 0 - round 10 low
246 eor x7, x7, x14 //AES block 0 - round 10 high
248 eor x21, x21, x13 //AES block 2 - round 10 low
249 fmov d4, x6 //AES block 0 - mov low
251 eor x19, x19, x13 //AES block 1 - round 10 low
252 eor x22, x22, x14 //AES block 2 - round 10 high
253 fmov v4.d[1], x7 //AES block 0 - mov high
255 fmov d5, x19 //AES block 1 - mov low
256 eor x20, x20, x14 //AES block 1 - round 10 high
258 eor x23, x23, x13 //AES block 3 - round 10 low
259 fmov v5.d[1], x20 //AES block 1 - mov high
261 fmov d6, x21 //AES block 2 - mov low
262 eor x24, x24, x14 //AES block 3 - round 10 high
263 rev w9, w12 //CTR block 4
265 fmov v6.d[1], x22 //AES block 2 - mov high
266 orr x9, x11, x9, lsl #32 //CTR block 4
268 eor v4.16b, v4.16b, v0.16b //AES block 0 - result
269 fmov d0, x10 //CTR block 4
270 add w12, w12, #1 //CTR block 4
272 fmov v0.d[1], x9 //CTR block 4
275 eor v5.16b, v5.16b, v1.16b //AES block 1 - result
283 fmov d7, x23 //AES block 3 - mov low
285 st1 { v4.16b}, [x2], #16 //AES block 0 - store result
287 fmov v7.d[1], x24 //AES block 3 - mov high
291 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
292 st1 { v5.16b}, [x2], #16 //AES block 1 - store result
299 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
303 eor v7.16b, v7.16b, v3.16b //AES block 3 - result
304 st1 { v7.16b}, [x2], #16 //AES block 3 - store result
308 ldp x23, x24, [x0, #48] //AES block 4k+3 - load plaintext
313 rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
314 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
317 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
318 fmov d3, x10 //CTR block 4k+3
321 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
324 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
325 add w12, w12, #1 //CTR block 4k+3
326 fmov v3.d[1], x9 //CTR block 4k+3
329 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
330 mov d31, v6.d[1] //GHASH block 4k+2 - mid
333 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
334 mov d30, v5.d[1] //GHASH block 4k+1 - mid
337 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
341 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
342 eor x24, x24, x14 //AES block 4k+3 - round 10 high
344 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
345 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
346 ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext
352 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
353 rev w9, w12 //CTR block 4k+8
355 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
356 mov d8, v4.d[1] //GHASH block 4k - mid
357 orr x9, x11, x9, lsl #32 //CTR block 4k+8
359 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
360 add w12, w12, #1 //CTR block 4k+8
361 mov d10, v17.d[1] //GHASH block 4k - mid
364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
366 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
367 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
370 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
373 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
374 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
376 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
378 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
379 rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
381 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
383 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
384 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
386 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
387 eor x7, x7, x14 //AES block 4k+4 - round 10 high
389 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
390 mov d30, v7.d[1] //GHASH block 4k+3 - mid
393 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
394 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
397 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
398 eor x6, x6, x13 //AES block 4k+4 - round 10 low
401 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
402 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
404 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
407 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
408 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
410 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
412 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
415 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
416 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
419 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
422 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
426 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
427 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
430 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
431 ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext
437 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
438 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
441 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
442 ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext
447 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
448 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
451 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
452 eor x19, x19, x13 //AES block 4k+5 - round 10 low
455 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
456 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
459 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
460 eor x23, x23, x13 //AES block 4k+3 - round 10 low
463 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
464 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
466 fmov d4, x6 //AES block 4k+4 - mov low
468 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
469 fmov v4.d[1], x7 //AES block 4k+4 - mov high
472 fmov d7, x23 //AES block 4k+3 - mov low
473 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
476 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
477 fmov d5, x19 //AES block 4k+5 - mov low
480 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
481 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
484 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
485 eor x20, x20, x14 //AES block 4k+5 - round 10 high
488 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
489 fmov v5.d[1], x20 //AES block 4k+5 - mov high
492 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
493 fmov v7.d[1], x24 //AES block 4k+3 - mov high
496 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
500 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
501 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
503 aese v0.16b, v27.16b //AES block 4k+4 - round 9
504 eor x21, x21, x13 //AES block 4k+6 - round 10 low
505 eor x22, x22, x14 //AES block 4k+6 - round 10 high
508 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
509 fmov d6, x21 //AES block 4k+6 - mov low
511 aese v1.16b, v27.16b //AES block 4k+5 - round 9
512 fmov v6.d[1], x22 //AES block 4k+6 - mov high
515 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
516 eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result
518 fmov d0, x10 //CTR block 4k+8
520 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
522 fmov v0.d[1], x9 //CTR block 4k+8
523 rev w9, w12 //CTR block 4k+9
524 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
527 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
528 eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result
530 add w12, w12, #1 //CTR block 4k+9
531 orr x9, x11, x9, lsl #32 //CTR block 4k+9
532 fmov d1, x10 //CTR block 4k+9
534 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
535 fmov v1.d[1], x9 //CTR block 4k+9
536 rev w9, w12 //CTR block 4k+10
538 aese v2.16b, v27.16b //AES block 4k+6 - round 9
539 st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result
540 eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result
541 orr x9, x11, x9, lsl #32 //CTR block 4k+10
543 aese v3.16b, v27.16b //AES block 4k+7 - round 9
544 add w12, w12, #1 //CTR block 4k+10
545 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
546 fmov d2, x10 //CTR block 4k+10
548 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
549 st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result
551 fmov v2.d[1], x9 //CTR block 4k+10
552 st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result
553 rev w9, w12 //CTR block 4k+11
555 orr x9, x11, x9, lsl #32 //CTR block 4k+11
556 eor v7.16b, v7.16b, v3.16b //AES block 4k+3 - result
558 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
559 st1 { v7.16b}, [x2], #16 //AES block 4k+3 - store result
563 rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
564 fmov d3, x10 //CTR block 4k+3
565 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
568 add w12, w12, #1 //CTR block 4k+3
569 fmov v3.d[1], x9 //CTR block 4k+3
572 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
573 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
575 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
577 rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
580 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
583 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
584 mov d30, v5.d[1] //GHASH block 4k+1 - mid
586 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
587 mov d8, v4.d[1] //GHASH block 4k - mid
589 mov d31, v6.d[1] //GHASH block 4k+2 - mid
590 mov d10, v17.d[1] //GHASH block 4k - mid
593 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
594 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
596 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
598 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
599 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
602 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
604 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
605 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
607 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
610 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
611 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
614 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
616 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
617 mov d30, v7.d[1] //GHASH block 4k+3 - mid
620 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
621 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
623 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
625 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
626 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
628 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
630 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
633 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
634 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
637 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
639 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
643 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
644 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
647 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
649 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
650 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
653 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
656 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
657 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
660 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
662 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
666 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
667 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
670 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
676 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
679 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
683 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
686 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
690 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
693 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
696 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
699 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
703 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
706 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
709 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
712 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
716 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
719 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
722 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
727 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
731 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
734 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
738 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
740 aese v3.16b, v27.16b //AES block 4k+7 - round 9
743 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
745 aese v0.16b, v27.16b //AES block 4k+4 - round 9
747 aese v1.16b, v27.16b //AES block 4k+5 - round 9
750 aese v2.16b, v27.16b //AES block 4k+6 - round 9
754 ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext
762 eor x6, x6, x13 //AES block 4k+4 - round 10 low
763 eor x7, x7, x14 //AES block 4k+4 - round 10 high
765 fmov d4, x6 //AES block 4k+4 - mov low
767 fmov v4.d[1], x7 //AES block 4k+4 - mov high
769 eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result
793 st1 { v5.16b}, [x2], #16 //AES final-3 block - store result
795 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
800 rev64 v4.16b, v5.16b //GHASH final-3 block
803 eor x7, x7, x14 //AES final-2 block - round 10 high
804 eor x6, x6, x13 //AES final-2 block - round 10 low
806 fmov d5, x6 //AES final-2 block - mov low
809 fmov v5.d[1], x7 //AES final-2 block - mov high
811 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
812 mov d22, v4.d[1] //GHASH final-3 block - mid
814 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
816 mov d10, v17.d[1] //GHASH final-3 block - mid
818 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
819 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
821 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
822 .L128_enc_blocks_more_than_2: //blocks left > 2
824 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
826 rev64 v4.16b, v5.16b //GHASH final-2 block
827 ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high
834 eor x6, x6, x13 //AES final-1 block - round 10 low
836 fmov d5, x6 //AES final-1 block - mov low
837 eor x7, x7, x14 //AES final-1 block - round 10 high
839 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
840 fmov v5.d[1], x7 //AES final-1 block - mov high
842 mov d22, v4.d[1] //GHASH final-2 block - mid
844 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
846 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
848 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
850 eor v5.16b, v5.16b, v2.16b //AES final-1 block - result
852 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
854 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
858 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
861 st1 { v5.16b}, [x2], #16 //AES final-1 block - store result
863 rev64 v4.16b, v5.16b //GHASH final-1 block
864 ldp x6, x7, [x0], #16 //AES final block - load input low & high
871 eor x7, x7, x14 //AES final block - round 10 high
872 eor x6, x6, x13 //AES final block - round 10 low
874 fmov d5, x6 //AES final block - mov low
876 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
877 fmov v5.d[1], x7 //AES final block - mov high
879 mov d22, v4.d[1] //GHASH final-1 block - mid
881 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
883 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
885 eor v5.16b, v5.16b, v3.16b //AES final block - result
887 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
889 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
891 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
893 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
895 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
903 sub x1, x1, #128 //bit_length -= 128
905 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
925 mov d8, v4.d[1] //GHASH final block - mid
927 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
930 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
936 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
938 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
940 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
942 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
944 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
947 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
951 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
953 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
955 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
957 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
959 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
961 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
963 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
967 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
972 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
989 .size aes_gcm_enc_128_kernel,.-aes_gcm_enc_128_kernel
992 .align 4
996 stp x19, x20, [sp, #-112]!
1018 sub x5, x5, #1 //byte_len - 1
1019 ld1 {v18.4s}, [x8], #16 //load rk0
1029 fmov d2, x10 //CTR block 2
1031 ld1 {v19.4s}, [x8], #16 //load rk1
1039 aesmc v0.16b, v0.16b //AES block 0 - round 0
1043 ld1 {v20.4s}, [x8], #16 //load rk2
1047 rev w9, w12 //CTR block 2
1048 add w12, w12, #1 //CTR block 2
1051 aesmc v0.16b, v0.16b //AES block 0 - round 1
1052 orr x9, x11, x9, lsl #32 //CTR block 2
1054 fmov v2.d[1], x9 //CTR block 2
1065 aesmc v1.16b, v1.16b //AES block 1 - round 0
1066 ld1 {v21.4s}, [x8], #16 //load rk3
1069 aesmc v0.16b, v0.16b //AES block 0 - round 2
1070 ld1 {v22.4s}, [x8], #16 //load rk4
1073 aesmc v2.16b, v2.16b //AES block 2 - round 0
1074 ld1 {v23.4s}, [x8], #16 //load rk5
1077 aesmc v1.16b, v1.16b //AES block 1 - round 1
1078 ld1 {v24.4s}, [x8], #16 //load rk6
1081 aesmc v3.16b, v3.16b //AES block 3 - round 0
1084 aesmc v2.16b, v2.16b //AES block 2 - round 1
1087 aesmc v1.16b, v1.16b //AES block 1 - round 2
1090 aesmc v3.16b, v3.16b //AES block 3 - round 1
1096 aesmc v0.16b, v0.16b //AES block 0 - round 3
1097 ld1 {v25.4s}, [x8], #16 //load rk7
1100 aesmc v1.16b, v1.16b //AES block 1 - round 3
1103 aesmc v3.16b, v3.16b //AES block 3 - round 2
1106 aesmc v2.16b, v2.16b //AES block 2 - round 2
1107 ld1 {v26.4s}, [x8], #16 //load rk8
1110 aesmc v1.16b, v1.16b //AES block 1 - round 4
1113 aesmc v3.16b, v3.16b //AES block 3 - round 3
1116 aesmc v2.16b, v2.16b //AES block 2 - round 3
1122 aesmc v0.16b, v0.16b //AES block 0 - round 4
1123 ld1 {v27.4s}, [x8], #16 //load rk9
1126 aesmc v1.16b, v1.16b //AES block 1 - round 5
1129 aesmc v2.16b, v2.16b //AES block 2 - round 4
1132 aesmc v3.16b, v3.16b //AES block 3 - round 4
1135 aesmc v0.16b, v0.16b //AES block 0 - round 5
1138 aesmc v2.16b, v2.16b //AES block 2 - round 5
1144 aesmc v3.16b, v3.16b //AES block 3 - round 5
1147 aesmc v0.16b, v0.16b //AES block 0 - round 6
1150 aesmc v1.16b, v1.16b //AES block 1 - round 6
1153 aesmc v3.16b, v3.16b //AES block 3 - round 6
1156 aesmc v2.16b, v2.16b //AES block 2 - round 6
1157 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
1163 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
1167 aesmc v1.16b, v1.16b //AES block 1 - round 7
1170 aesmc v2.16b, v2.16b //AES block 2 - round 7
1173 aesmc v0.16b, v0.16b //AES block 0 - round 7
1177 aesmc v3.16b, v3.16b //AES block 3 - round 7
1180 aesmc v1.16b, v1.16b //AES block 1 - round 8
1181 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
1184 aesmc v2.16b, v2.16b //AES block 2 - round 8
1187 aesmc v3.16b, v3.16b //AES block 3 - round 8
1190 aesmc v0.16b, v0.16b //AES block 0 - round 8
1191 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
1193 aese v2.16b, v27.16b //AES block 2 - round 9
1195 aese v3.16b, v27.16b //AES block 3 - round 9
1197 aese v0.16b, v27.16b //AES block 0 - round 9
1198 cmp x0, x5 //check if we have <= 4 blocks
1200 aese v1.16b, v27.16b //AES block 1 - round 9
1204 …ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0 - load ciphertext; AES block 1 - load …
1206 eor v1.16b, v5.16b, v1.16b //AES block 1 - result
1207 ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext
1209 eor v0.16b, v4.16b, v0.16b //AES block 0 - result
1211 rev w9, w12 //CTR block 4
1213 orr x9, x11, x9, lsl #32 //CTR block 4
1214 add w12, w12, #1 //CTR block 4
1215 ld1 {v7.16b}, [x0], #16 //AES block 3 - load ciphertext
1218 mov x19, v1.d[0] //AES block 1 - mov low
1220 mov x20, v1.d[1] //AES block 1 - mov high
1222 mov x6, v0.d[0] //AES block 0 - mov low
1225 mov x7, v0.d[1] //AES block 0 - mov high
1227 fmov d0, x10 //CTR block 4
1229 fmov v0.d[1], x9 //CTR block 4
1231 eor x19, x19, x13 //AES block 1 - round 10 low
1245 eor x20, x20, x14 //AES block 1 - round 10 high
1249 eor x6, x6, x13 //AES block 0 - round 10 low
1253 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
1255 eor x7, x7, x14 //AES block 0 - round 10 high
1259 stp x6, x7, [x2], #16 //AES block 0 - store result
1261 stp x19, x20, [x2], #16 //AES block 1 - store result
1265 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
1267 mov x21, v2.d[0] //AES block 4k+2 - mov low
1269 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
1270 mov x22, v2.d[1] //AES block 4k+2 - mov high
1273 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
1274 fmov d2, x10 //CTR block 4k+6
1276 rev64 v6.16b, v6.16b //GHASH block 4k+2
1277 fmov v2.d[1], x9 //CTR block 4k+6
1278 rev w9, w12 //CTR block 4k+7
1280 mov x23, v3.d[0] //AES block 4k+3 - mov low
1282 mov d30, v5.d[1] //GHASH block 4k+1 - mid
1285 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
1286 rev64 v7.16b, v7.16b //GHASH block 4k+3
1288 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
1289 mov x24, v3.d[1] //AES block 4k+3 - mov high
1290 orr x9, x11, x9, lsl #32 //CTR block 4k+7
1292 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
1293 fmov d3, x10 //CTR block 4k+7
1294 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
1297 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
1298 fmov v3.d[1], x9 //CTR block 4k+7
1301 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
1302 mov d10, v17.d[1] //GHASH block 4k - mid
1304 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
1305 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
1307 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
1310 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
1311 mov d8, v4.d[1] //GHASH block 4k - mid
1314 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
1315 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
1318 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
1320 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
1321 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
1324 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
1325 eor x23, x23, x13 //AES block 4k+3 - round 10 low
1329 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
1330 eor x22, x22, x14 //AES block 4k+2 - round 10 high
1334 mov d31, v6.d[1] //GHASH block 4k+2 - mid
1337 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
1338 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
1340 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
1343 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
1344 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
1347 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
1350 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
1351 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
1353 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
1356 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
1357 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
1359 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
1362 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
1363 mov d30, v7.d[1] //GHASH block 4k+3 - mid
1366 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
1367 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
1369 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
1370 eor x24, x24, x14 //AES block 4k+3 - round 10 high
1375 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
1376 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
1379 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
1380 eor x21, x21, x13 //AES block 4k+2 - round 10 low
1385 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
1389 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
1390 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
1393 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
1396 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
1397 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
1400 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
1401 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
1403 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
1404 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
1405 ld1 {v4.16b}, [x0], #16 //AES block 4k+3 - load ciphertext
1408 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
1409 add w12, w12, #1 //CTR block 4k+7
1412 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
1416 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
1417 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
1420 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
1421 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
1424 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
1425 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
1428 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
1429 rev w9, w12 //CTR block 4k+8
1431 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1432 ld1 {v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
1433 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
1435 aese v0.16b, v27.16b //AES block 4k+4 - round 9
1436 orr x9, x11, x9, lsl #32 //CTR block 4k+8
1439 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
1440 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
1442 aese v1.16b, v27.16b //AES block 4k+5 - round 9
1445 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
1446 eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result
1449 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
1450 ld1 {v6.16b}, [x0], #16 //AES block 4k+5 - load ciphertext
1452 add w12, w12, #1 //CTR block 4k+8
1453 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
1454 eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result
1457 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
1458 ld1 {v7.16b}, [x0], #16 //AES block 4k+6 - load ciphertext
1461 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
1463 rev64 v5.16b, v5.16b //GHASH block 4k+5
1464 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
1465 mov x7, v0.d[1] //AES block 4k+4 - mov high
1468 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
1469 mov x6, v0.d[0] //AES block 4k+4 - mov low
1472 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
1473 fmov d0, x10 //CTR block 4k+8
1475 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1476 fmov v0.d[1], x9 //CTR block 4k+8
1477 rev w9, w12 //CTR block 4k+9
1479 aese v2.16b, v27.16b //AES block 4k+6 - round 9
1480 orr x9, x11, x9, lsl #32 //CTR block 4k+9
1481 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
1484 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
1485 eor x7, x7, x14 //AES block 4k+4 - round 10 high
1489 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
1490 mov x20, v1.d[1] //AES block 4k+5 - mov high
1491 eor x6, x6, x13 //AES block 4k+4 - round 10 low
1495 eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result
1496 mov x19, v1.d[0] //AES block 4k+5 - mov low
1497 add w12, w12, #1 //CTR block 4k+9
1499 aese v3.16b, v27.16b //AES block 4k+7 - round 9
1500 fmov d1, x10 //CTR block 4k+9
1503 rev64 v4.16b, v4.16b //GHASH block 4k+4
1504 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
1505 fmov v1.d[1], x9 //CTR block 4k+9
1507 rev w9, w12 //CTR block 4k+10
1508 add w12, w12, #1 //CTR block 4k+10
1510 eor x20, x20, x14 //AES block 4k+5 - round 10 high
1514 stp x6, x7, [x2], #16 //AES block 4k+4 - store result
1516 eor x19, x19, x13 //AES block 4k+5 - round 10 low
1520 stp x19, x20, [x2], #16 //AES block 4k+5 - store result
1522 orr x9, x11, x9, lsl #32 //CTR block 4k+10
1527 mov x21, v2.d[0] //AES block 4k+2 - mov low
1528 mov d30, v5.d[1] //GHASH block 4k+1 - mid
1531 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
1532 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
1535 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
1536 mov x22, v2.d[1] //AES block 4k+2 - mov high
1539 fmov d2, x10 //CTR block 4k+6
1540 rev64 v6.16b, v6.16b //GHASH block 4k+2
1543 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
1544 fmov v2.d[1], x9 //CTR block 4k+6
1546 rev w9, w12 //CTR block 4k+7
1547 mov x23, v3.d[0] //AES block 4k+3 - mov low
1548 eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid
1550 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
1551 mov d10, v17.d[1] //GHASH block 4k - mid
1552 mov x24, v3.d[1] //AES block 4k+3 - mov high
1555 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
1556 mov d31, v6.d[1] //GHASH block 4k+2 - mid
1559 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
1560 orr x9, x11, x9, lsl #32 //CTR block 4k+7
1562 pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
1563 mov d8, v4.d[1] //GHASH block 4k - mid
1564 fmov d3, x10 //CTR block 4k+7
1567 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
1568 fmov v3.d[1], x9 //CTR block 4k+7
1570 pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid
1571 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
1573 rev64 v7.16b, v7.16b //GHASH block 4k+3
1576 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
1577 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
1579 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
1582 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
1583 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
1585 pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
1587 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
1588 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low
1590 pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
1592 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
1593 eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high
1595 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid
1597 pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
1599 pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
1600 mov d30, v7.d[1] //GHASH block 4k+3 - mid
1603 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
1604 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
1606 pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
1608 eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high
1612 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
1613 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
1615 eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low
1618 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
1619 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high
1622 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
1623 eor x23, x23, x13 //AES block 4k+3 - round 10 low
1627 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
1628 eor x21, x21, x13 //AES block 4k+2 - round 10 low
1632 eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low
1635 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
1638 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
1642 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
1645 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
1646 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
1649 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
1652 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
1653 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
1656 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
1659 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
1662 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
1665 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
1666 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
1668 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1671 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
1672 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
1675 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
1678 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
1679 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
1682 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
1685 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
1688 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
1691 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
1692 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
1695 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
1698 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
1700 aese v1.16b, v27.16b //AES block 4k+5 - round 9
1702 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1703 eor x24, x24, x14 //AES block 4k+3 - round 10 high
1708 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
1709 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
1712 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
1715 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
1716 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
1719 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
1722 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
1723 eor x22, x22, x14 //AES block 4k+2 - round 10 high
1727 aese v0.16b, v27.16b //AES block 4k+4 - round 9
1728 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
1730 aese v2.16b, v27.16b //AES block 4k+6 - round 9
1731 add w12, w12, #1 //CTR block 4k+7
1732 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
1734 aese v3.16b, v27.16b //AES block 4k+7 - round 9
1735 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
1739 ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
1741 eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result
1743 mov x7, v0.d[1] //AES block 4k+4 - mov high
1745 mov x6, v0.d[0] //AES block 4k+4 - mov low
1749 eor x7, x7, x14 //AES block 4k+4 - round 10 high
1754 eor x6, x6, x13 //AES block 4k+4 - round 10 low
1780 rev64 v4.16b, v5.16b //GHASH final-3 block
1781 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
1785 mov d10, v17.d[1] //GHASH final-3 block - mid
1786 stp x6, x7, [x2], #16 //AES final-3 block - store result
1787 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
1789 mov d22, v4.d[1] //GHASH final-3 block - mid
1790 mov x7, v0.d[1] //AES final-2 block - mov high
1792 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
1793 mov x6, v0.d[0] //AES final-2 block - mov low
1795 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
1797 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
1800 eor x7, x7, x14 //AES final-2 block - round 10 high
1804 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
1805 eor x6, x6, x13 //AES final-2 block - round 10 low
1809 .L128_dec_blocks_more_than_2: //blocks left > 2
1811 rev64 v4.16b, v5.16b //GHASH final-2 block
1812 ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext
1816 eor v0.16b, v5.16b, v2.16b //AES final-1 block - result
1817 stp x6, x7, [x2], #16 //AES final-2 block - store result
1819 mov d22, v4.d[1] //GHASH final-2 block - mid
1821 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
1823 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
1824 mov x6, v0.d[0] //AES final-1 block - mov low
1826 mov x7, v0.d[1] //AES final-1 block - mov high
1827 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
1831 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
1833 eor x6, x6, x13 //AES final-1 block - round 10 low
1837 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
1839 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
1841 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
1842 eor x7, x7, x14 //AES final-1 block - round 10 high
1848 rev64 v4.16b, v5.16b //GHASH final-1 block
1850 ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext
1853 mov d22, v4.d[1] //GHASH final-1 block - mid
1855 eor v0.16b, v5.16b, v3.16b //AES final block - result
1857 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
1859 stp x6, x7, [x2], #16 //AES final-1 block - store result
1860 mov x6, v0.d[0] //AES final block - mov low
1862 mov x7, v0.d[1] //AES final block - mov high
1863 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
1865 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
1867 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
1869 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
1872 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
1874 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
1875 eor x7, x7, x14 //AES final block - round 10 high
1879 eor x6, x6, x13 //AES final block - round 10 low
1883 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
1890 sub x1, x1, #128 //bit_length -= 128
1892 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
1916 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
1917 mov d8, v4.d[1] //GHASH final block - mid
1919 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
1920 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
1922 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
1924 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
1934 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
1937 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
1942 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
1944 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
1946 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
1953 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
1955 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
1957 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
1959 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
1960 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
1962 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
1964 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
1982 .size aes_gcm_dec_128_kernel,.-aes_gcm_dec_128_kernel
1985 .align 4
1989 stp x19, x20, [sp, #-112]!
2009 ld1 {v18.4s}, [x8], #16 //load rk0
2011 ld1 {v19.4s}, [x8], #16 //load rk1
2013 ld1 {v20.4s}, [x8], #16 //load rk2
2016 ld1 {v21.4s}, [x8], #16 //load rk3
2019 ld1 {v22.4s}, [x8], #16 //load rk4
2033 rev w9, w12 //CTR block 2
2034 add w12, w12, #1 //CTR block 2
2036 fmov d2, x10 //CTR block 2
2037 orr x9, x11, x9, lsl #32 //CTR block 2
2039 fmov v2.d[1], x9 //CTR block 2
2043 ld1 {v23.4s}, [x8], #16 //load rk5
2047 ld1 {v24.4s}, [x8], #16 //load rk6
2049 ld1 {v25.4s}, [x8], #16 //load rk7
2052 aesmc v0.16b, v0.16b //AES block 0 - round 0
2058 aesmc v3.16b, v3.16b //AES block 3 - round 0
2059 ld1 {v26.4s}, [x8], #16 //load rk8
2062 aesmc v1.16b, v1.16b //AES block 1 - round 0
2068 aesmc v2.16b, v2.16b //AES block 2 - round 0
2069 ld1 {v27.4s}, [x8], #16 //load rk9
2072 aesmc v0.16b, v0.16b //AES block 0 - round 1
2073 ld1 {v28.4s}, [x8], #16 //load rk10
2076 aesmc v1.16b, v1.16b //AES block 1 - round 1
2082 aesmc v2.16b, v2.16b //AES block 2 - round 1
2083 ld1 {v29.4s}, [x8], #16 //load rk11
2086 aesmc v3.16b, v3.16b //AES block 3 - round 1
2092 aesmc v0.16b, v0.16b //AES block 0 - round 2
2095 aesmc v2.16b, v2.16b //AES block 2 - round 2
2098 aesmc v3.16b, v3.16b //AES block 3 - round 2
2101 aesmc v0.16b, v0.16b //AES block 0 - round 3
2102 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
2105 aesmc v2.16b, v2.16b //AES block 2 - round 3
2108 aesmc v1.16b, v1.16b //AES block 1 - round 2
2109 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
2112 aesmc v0.16b, v0.16b //AES block 0 - round 4
2115 aesmc v3.16b, v3.16b //AES block 3 - round 3
2118 aesmc v1.16b, v1.16b //AES block 1 - round 3
2121 aesmc v0.16b, v0.16b //AES block 0 - round 5
2124 aesmc v2.16b, v2.16b //AES block 2 - round 4
2127 aesmc v1.16b, v1.16b //AES block 1 - round 4
2130 aesmc v0.16b, v0.16b //AES block 0 - round 6
2133 aesmc v3.16b, v3.16b //AES block 3 - round 4
2136 aesmc v2.16b, v2.16b //AES block 2 - round 5
2139 aesmc v1.16b, v1.16b //AES block 1 - round 5
2142 aesmc v3.16b, v3.16b //AES block 3 - round 5
2145 aesmc v2.16b, v2.16b //AES block 2 - round 6
2151 aesmc v1.16b, v1.16b //AES block 1 - round 6
2154 aesmc v3.16b, v3.16b //AES block 3 - round 6
2157 aesmc v0.16b, v0.16b //AES block 0 - round 7
2160 aesmc v1.16b, v1.16b //AES block 1 - round 7
2161 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
2164 aesmc v3.16b, v3.16b //AES block 3 - round 7
2167 aesmc v0.16b, v0.16b //AES block 0 - round 8
2170 aesmc v2.16b, v2.16b //AES block 2 - round 7
2171 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
2174 aesmc v1.16b, v1.16b //AES block 1 - round 8
2177 aesmc v3.16b, v3.16b //AES block 3 - round 8
2180 aesmc v2.16b, v2.16b //AES block 2 - round 8
2183 aesmc v0.16b, v0.16b //AES block 0 - round 9
2186 aesmc v3.16b, v3.16b //AES block 3 - round 9
2189 aesmc v2.16b, v2.16b //AES block 2 - round 9
2192 aesmc v1.16b, v1.16b //AES block 1 - round 9
2195 aesmc v0.16b, v0.16b //AES block 0 - round 10
2198 aesmc v2.16b, v2.16b //AES block 2 - round 10
2201 aesmc v1.16b, v1.16b //AES block 1 - round 10
2206 aesmc v3.16b, v3.16b //AES block 3 - round 10
2207 sub x5, x5, #1 //byte_len - 1
2214 aese v2.16b, v29.16b //AES block 2 - round 11
2218 aese v1.16b, v29.16b //AES block 1 - round 11
2219 cmp x0, x5 //check if we have <= 4 blocks
2221 aese v0.16b, v29.16b //AES block 0 - round 11
2224 aese v3.16b, v29.16b //AES block 3 - round 11
2227 rev w9, w12 //CTR block 4
2228 ldp x6, x7, [x0, #0] //AES block 0 - load plaintext
2233 orr x9, x11, x9, lsl #32 //CTR block 4
2234 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
2239 ldp x23, x24, [x0, #48] //AES block 3 - load plaintext
2244 ldp x19, x20, [x0, #16] //AES block 1 - load plaintext
2252 eor x6, x6, x13 //AES block 0 - round 12 low
2254 eor x7, x7, x14 //AES block 0 - round 12 high
2255 eor x22, x22, x14 //AES block 2 - round 12 high
2256 fmov d4, x6 //AES block 0 - mov low
2258 eor x24, x24, x14 //AES block 3 - round 12 high
2259 fmov v4.d[1], x7 //AES block 0 - mov high
2261 eor x21, x21, x13 //AES block 2 - round 12 low
2262 eor x19, x19, x13 //AES block 1 - round 12 low
2264 fmov d5, x19 //AES block 1 - mov low
2265 eor x20, x20, x14 //AES block 1 - round 12 high
2267 fmov v5.d[1], x20 //AES block 1 - mov high
2269 eor x23, x23, x13 //AES block 3 - round 12 low
2270 fmov d6, x21 //AES block 2 - mov low
2272 add w12, w12, #1 //CTR block 4
2273 eor v4.16b, v4.16b, v0.16b //AES block 0 - result
2274 fmov d0, x10 //CTR block 4
2276 fmov v0.d[1], x9 //CTR block 4
2282 fmov d7, x23 //AES block 3 - mov low
2283 st1 { v4.16b}, [x2], #16 //AES block 0 - store result
2285 fmov v6.d[1], x22 //AES block 2 - mov high
2287 eor v5.16b, v5.16b, v1.16b //AES block 1 - result
2289 st1 { v5.16b}, [x2], #16 //AES block 1 - store result
2291 fmov v7.d[1], x24 //AES block 3 - mov high
2299 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
2306 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
2308 eor v7.16b, v7.16b, v3.16b //AES block 3 - result
2309 st1 { v7.16b}, [x2], #16 //AES block 3 - store result
2314 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
2315 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
2318 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
2319 ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext
2325 fmov d3, x10 //CTR block 4k+3
2326 rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
2329 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
2330 fmov v3.d[1], x9 //CTR block 4k+3
2332 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
2333 rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
2334 ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext
2340 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
2341 ldp x23, x24, [x0, #48] //AES block 4k+3 - load plaintext
2346 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
2350 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
2353 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
2354 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
2357 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
2358 eor x24, x24, x14 //AES block 4k+3 - round 12 high
2360 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
2361 mov d8, v4.d[1] //GHASH block 4k - mid
2364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
2367 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
2368 eor x21, x21, x13 //AES block 4k+6 - round 12 low
2370 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
2371 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
2374 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
2375 eor x19, x19, x13 //AES block 4k+5 - round 12 low
2378 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
2379 mov d31, v6.d[1] //GHASH block 4k+2 - mid
2381 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
2382 mov d4, v5.d[1] //GHASH block 4k+1 - mid
2385 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
2388 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
2390 mov d10, v17.d[1] //GHASH block 4k - mid
2391 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
2394 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
2395 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
2397 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
2400 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
2401 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
2404 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
2406 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
2407 eor x20, x20, x14 //AES block 4k+5 - round 12 high
2408 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
2411 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
2412 add w12, w12, #1 //CTR block 4k+3
2415 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
2416 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
2418 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
2419 eor x22, x22, x14 //AES block 4k+6 - round 12 high
2421 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
2422 eor x23, x23, x13 //AES block 4k+3 - round 12 low
2423 mov d30, v7.d[1] //GHASH block 4k+3 - mid
2425 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
2426 rev w9, w12 //CTR block 4k+8
2428 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
2429 orr x9, x11, x9, lsl #32 //CTR block 4k+8
2432 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
2433 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
2436 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
2437 ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext
2443 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
2444 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
2447 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
2451 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
2454 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
2455 eor x7, x7, x14 //AES block 4k+4 - round 12 high
2456 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
2459 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
2460 eor x6, x6, x13 //AES block 4k+4 - round 12 low
2463 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
2467 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
2468 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
2471 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
2472 fmov d5, x19 //AES block 4k+5 - mov low
2475 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
2476 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
2479 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
2480 fmov v5.d[1], x20 //AES block 4k+5 - mov high
2483 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
2484 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
2486 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
2488 fmov d4, x6 //AES block 4k+4 - mov low
2491 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
2492 fmov v4.d[1], x7 //AES block 4k+4 - mov high
2495 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
2496 fmov d7, x23 //AES block 4k+3 - mov low
2498 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
2499 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
2500 add w12, w12, #1 //CTR block 4k+8
2503 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
2504 fmov v7.d[1], x24 //AES block 4k+3 - mov high
2506 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
2507 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
2508 fmov d6, x21 //AES block 4k+6 - mov low
2511 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
2514 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
2515 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
2518 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
2521 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
2524 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
2527 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
2528 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
2531 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
2534 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
2536 aese v0.16b, v29.16b //AES block 4k+4 - round 11
2539 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
2540 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
2543 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
2545 eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result
2546 fmov d0, x10 //CTR block 4k+8
2548 aese v1.16b, v29.16b //AES block 4k+5 - round 11
2549 fmov v0.d[1], x9 //CTR block 4k+8
2550 rev w9, w12 //CTR block 4k+9
2552 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
2553 fmov v6.d[1], x22 //AES block 4k+6 - mov high
2554 st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result
2557 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
2558 orr x9, x11, x9, lsl #32 //CTR block 4k+9
2560 eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result
2561 add w12, w12, #1 //CTR block 4k+9
2562 fmov d1, x10 //CTR block 4k+9
2564 aese v2.16b, v29.16b //AES block 4k+6 - round 11
2565 fmov v1.d[1], x9 //CTR block 4k+9
2566 rev w9, w12 //CTR block 4k+10
2568 add w12, w12, #1 //CTR block 4k+10
2569 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
2570 orr x9, x11, x9, lsl #32 //CTR block 4k+10
2572 st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result
2573 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
2575 aese v3.16b, v29.16b //AES block 4k+7 - round 11
2576 eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result
2577 fmov d2, x10 //CTR block 4k+10
2579 st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result
2580 fmov v2.d[1], x9 //CTR block 4k+10
2581 rev w9, w12 //CTR block 4k+11
2583 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
2584 orr x9, x11, x9, lsl #32 //CTR block 4k+11
2586 eor v7.16b, v7.16b, v3.16b //AES block 4k+3 - result
2587 st1 { v7.16b}, [x2], #16 //AES block 4k+3 - store result
2592 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
2593 rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
2595 fmov d3, x10 //CTR block 4k+3
2597 add w12, w12, #1 //CTR block 4k+3
2600 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
2601 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
2604 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
2606 fmov v3.d[1], x9 //CTR block 4k+3
2608 mov d10, v17.d[1] //GHASH block 4k - mid
2611 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
2612 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
2614 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
2616 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
2617 mov d8, v4.d[1] //GHASH block 4k - mid
2619 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
2620 rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
2622 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
2624 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
2625 mov d4, v5.d[1] //GHASH block 4k+1 - mid
2627 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
2628 mov d31, v6.d[1] //GHASH block 4k+2 - mid
2631 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
2632 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
2634 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
2636 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
2637 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
2640 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
2643 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
2644 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
2647 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
2650 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
2651 mov d30, v7.d[1] //GHASH block 4k+3 - mid
2653 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
2654 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
2657 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
2659 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
2660 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
2663 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
2665 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
2667 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
2669 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
2670 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
2672 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
2675 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
2676 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
2679 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
2682 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
2683 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
2686 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
2689 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
2690 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
2693 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
2695 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
2699 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
2702 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
2705 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
2706 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
2709 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
2712 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
2715 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
2716 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
2719 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
2722 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
2726 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
2729 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
2733 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
2736 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
2740 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
2745 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
2749 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
2752 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
2756 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
2759 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
2762 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
2765 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
2769 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
2772 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
2775 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
2782 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
2785 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
2788 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
2791 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
2794 aese v0.16b, v29.16b //AES block 4k+4 - round 11
2796 aese v3.16b, v29.16b //AES block 4k+7 - round 11
2798 aese v2.16b, v29.16b //AES block 4k+6 - round 11
2800 aese v1.16b, v29.16b //AES block 4k+5 - round 11
2805 ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext
2810 eor x6, x6, x13 //AES block 4k+4 - round 12 low
2811 eor x7, x7, x14 //AES block 4k+4 - round 12 high
2813 fmov d4, x6 //AES block 4k+4 - mov low
2815 fmov v4.d[1], x7 //AES block 4k+4 - mov high
2818 eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result
2843 st1 { v5.16b}, [x2], #16 //AES final-3 block - store result
2845 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
2850 rev64 v4.16b, v5.16b //GHASH final-3 block
2852 eor x6, x6, x13 //AES final-2 block - round 12 low
2855 eor x7, x7, x14 //AES final-2 block - round 12 high
2856 fmov d5, x6 //AES final-2 block - mov low
2858 fmov v5.d[1], x7 //AES final-2 block - mov high
2860 mov d22, v4.d[1] //GHASH final-3 block - mid
2862 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
2864 mov d10, v17.d[1] //GHASH final-3 block - mid
2866 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
2870 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
2872 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
2873 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
2874 .L192_enc_blocks_more_than_2: //blocks left > 2
2876 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
2878 rev64 v4.16b, v5.16b //GHASH final-2 block
2879 ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high
2886 eor x7, x7, x14 //AES final-1 block - round 12 high
2888 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
2889 mov d22, v4.d[1] //GHASH final-2 block - mid
2891 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
2892 eor x6, x6, x13 //AES final-1 block - round 12 low
2894 fmov d5, x6 //AES final-1 block - mov low
2896 fmov v5.d[1], x7 //AES final-1 block - mov high
2897 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
2898 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
2900 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
2902 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
2906 eor v5.16b, v5.16b, v2.16b //AES final-1 block - result
2908 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
2911 st1 { v5.16b}, [x2], #16 //AES final-1 block - store result
2913 ldp x6, x7, [x0], #16 //AES final block - load input low & high
2918 rev64 v4.16b, v5.16b //GHASH final-1 block
2920 eor x6, x6, x13 //AES final block - round 12 low
2924 mov d22, v4.d[1] //GHASH final-1 block - mid
2926 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
2927 eor x7, x7, x14 //AES final block - round 12 high
2928 fmov d5, x6 //AES final block - mov low
2930 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
2931 fmov v5.d[1], x7 //AES final block - mov high
2933 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
2935 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
2937 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
2939 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
2941 eor v5.16b, v5.16b, v3.16b //AES final block - result
2943 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
2945 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
2956 sub x1, x1, #128 //bit_length -= 128
2959 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
2980 mov d8, v4.d[1] //GHASH final block - mid
2982 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
2984 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
2986 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
2988 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
2990 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
2992 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
2994 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
2997 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
3003 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
3005 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3007 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
3009 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
3011 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
3013 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3015 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
3017 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
3022 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
3040 .size aes_gcm_enc_192_kernel,.-aes_gcm_enc_192_kernel
3043 .align 4
3047 stp x19, x20, [sp, #-112]!
3070 ld1 {v18.4s}, [x8], #16 //load rk0
3074 ld1 {v19.4s}, [x8], #16 //load rk1
3084 ld1 {v20.4s}, [x8], #16 //load rk2
3087 aesmc v0.16b, v0.16b //AES block 0 - round 0
3092 ld1 {v21.4s}, [x8], #16 //load rk3
3095 rev w9, w12 //CTR block 2
3096 add w12, w12, #1 //CTR block 2
3098 fmov d2, x10 //CTR block 2
3099 orr x9, x11, x9, lsl #32 //CTR block 2
3101 fmov v2.d[1], x9 //CTR block 2
3105 aesmc v0.16b, v0.16b //AES block 0 - round 1
3110 ld1 {v22.4s}, [x8], #16 //load rk4
3113 aesmc v0.16b, v0.16b //AES block 0 - round 2
3116 aesmc v2.16b, v2.16b //AES block 2 - round 0
3117 ld1 {v23.4s}, [x8], #16 //load rk5
3120 aesmc v1.16b, v1.16b //AES block 1 - round 0
3126 aesmc v3.16b, v3.16b //AES block 3 - round 0
3132 aesmc v2.16b, v2.16b //AES block 2 - round 1
3138 aesmc v1.16b, v1.16b //AES block 1 - round 1
3141 aesmc v3.16b, v3.16b //AES block 3 - round 1
3147 aesmc v2.16b, v2.16b //AES block 2 - round 2
3148 ld1 {v24.4s}, [x8], #16 //load rk6
3151 aesmc v0.16b, v0.16b //AES block 0 - round 3
3152 ld1 {v25.4s}, [x8], #16 //load rk7
3155 aesmc v1.16b, v1.16b //AES block 1 - round 2
3156 ld1 {v26.4s}, [x8], #16 //load rk8
3159 aesmc v3.16b, v3.16b //AES block 3 - round 2
3160 ld1 {v27.4s}, [x8], #16 //load rk9
3163 aesmc v2.16b, v2.16b //AES block 2 - round 3
3169 aesmc v1.16b, v1.16b //AES block 1 - round 3
3173 aesmc v3.16b, v3.16b //AES block 3 - round 3
3174 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
3177 aesmc v0.16b, v0.16b //AES block 0 - round 4
3178 ld1 {v28.4s}, [x8], #16 //load rk10
3181 aesmc v1.16b, v1.16b //AES block 1 - round 4
3182 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
3185 aesmc v2.16b, v2.16b //AES block 2 - round 4
3188 aesmc v3.16b, v3.16b //AES block 3 - round 4
3189 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
3192 aesmc v0.16b, v0.16b //AES block 0 - round 5
3193 ld1 {v29.4s}, [x8], #16 //load rk11
3196 aesmc v1.16b, v1.16b //AES block 1 - round 5
3199 aesmc v2.16b, v2.16b //AES block 2 - round 5
3202 aesmc v3.16b, v3.16b //AES block 3 - round 5
3205 aesmc v0.16b, v0.16b //AES block 0 - round 6
3208 aesmc v2.16b, v2.16b //AES block 2 - round 6
3211 aesmc v3.16b, v3.16b //AES block 3 - round 6
3214 aesmc v0.16b, v0.16b //AES block 0 - round 7
3217 aesmc v2.16b, v2.16b //AES block 2 - round 7
3220 aesmc v3.16b, v3.16b //AES block 3 - round 7
3223 aesmc v1.16b, v1.16b //AES block 1 - round 6
3226 aesmc v2.16b, v2.16b //AES block 2 - round 8
3229 aesmc v3.16b, v3.16b //AES block 3 - round 8
3232 aesmc v1.16b, v1.16b //AES block 1 - round 7
3235 aesmc v2.16b, v2.16b //AES block 2 - round 9
3238 aesmc v3.16b, v3.16b //AES block 3 - round 9
3241 aesmc v1.16b, v1.16b //AES block 1 - round 8
3242 sub x5, x5, #1 //byte_len - 1
3245 aesmc v0.16b, v0.16b //AES block 0 - round 8
3249 aesmc v3.16b, v3.16b //AES block 3 - round 10
3253 aesmc v1.16b, v1.16b //AES block 1 - round 9
3254 cmp x0, x5 //check if we have <= 4 blocks
3257 aesmc v0.16b, v0.16b //AES block 0 - round 9
3258 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
3260 aese v3.16b, v29.16b //AES block 3 - round 11
3263 aesmc v2.16b, v2.16b //AES block 2 - round 10
3266 aesmc v1.16b, v1.16b //AES block 1 - round 10
3269 aesmc v0.16b, v0.16b //AES block 0 - round 10
3272 aese v2.16b, v29.16b //AES block 2 - round 11
3274 aese v1.16b, v29.16b //AES block 1 - round 11
3277 aese v0.16b, v29.16b //AES block 0 - round 11
3280 ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0,1 - load ciphertext
3282 eor v1.16b, v5.16b, v1.16b //AES block 1 - result
3284 eor v0.16b, v4.16b, v0.16b //AES block 0 - result
3285 rev w9, w12 //CTR block 4
3286 ld1 {v6.16b, v7.16b}, [x0], #32 //AES block 2,3 - load ciphertext
3288 mov x19, v1.d[0] //AES block 1 - mov low
3290 mov x20, v1.d[1] //AES block 1 - mov high
3292 mov x6, v0.d[0] //AES block 0 - mov low
3293 orr x9, x11, x9, lsl #32 //CTR block 4
3294 add w12, w12, #1 //CTR block 4
3296 mov x7, v0.d[1] //AES block 0 - mov high
3299 fmov d0, x10 //CTR block 4
3303 eor x19, x19, x13 //AES block 1 - round 12 low
3307 fmov v0.d[1], x9 //CTR block 4
3312 eor x20, x20, x14 //AES block 1 - round 12 high
3318 eor x6, x6, x13 //AES block 0 - round 12 low
3323 eor x7, x7, x14 //AES block 0 - round 12 high
3327 stp x6, x7, [x2], #16 //AES block 0 - store result
3330 stp x19, x20, [x2], #16 //AES block 1 - store result
3333 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
3338 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
3341 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
3342 mov x21, v2.d[0] //AES block 4k+2 - mov low
3344 mov x22, v2.d[1] //AES block 4k+2 - mov high
3345 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
3346 rev64 v7.16b, v7.16b //GHASH block 4k+3
3349 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
3350 fmov d2, x10 //CTR block 4k+6
3353 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
3356 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
3357 fmov v2.d[1], x9 //CTR block 4k+6
3360 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
3361 mov x24, v3.d[1] //AES block 4k+3 - mov high
3364 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
3365 mov x23, v3.d[0] //AES block 4k+3 - mov low
3367 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
3368 fmov d3, x10 //CTR block 4k+7
3369 mov d8, v4.d[1] //GHASH block 4k - mid
3371 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
3372 mov d10, v17.d[1] //GHASH block 4k - mid
3373 rev w9, w12 //CTR block 4k+7
3376 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
3377 orr x9, x11, x9, lsl #32 //CTR block 4k+7
3379 fmov v3.d[1], x9 //CTR block 4k+7
3380 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
3381 mov d4, v5.d[1] //GHASH block 4k+1 - mid
3384 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
3387 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
3388 eor x22, x22, x14 //AES block 4k+2 - round 12 high
3393 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
3394 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
3396 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
3399 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
3400 rev64 v6.16b, v6.16b //GHASH block 4k+2
3403 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
3405 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
3406 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
3407 eor x21, x21, x13 //AES block 4k+2 - round 12 low
3412 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
3415 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
3417 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
3418 mov d31, v6.d[1] //GHASH block 4k+2 - mid
3421 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
3422 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
3425 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
3427 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
3428 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
3430 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
3433 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
3435 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
3436 mov d30, v7.d[1] //GHASH block 4k+3 - mid
3439 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
3441 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
3444 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
3445 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
3448 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
3451 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
3452 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
3455 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
3457 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
3458 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
3461 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
3463 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
3464 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
3467 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
3470 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
3473 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
3476 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
3477 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
3480 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
3483 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
3484 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
3487 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
3490 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
3491 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
3494 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
3497 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
3498 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
3501 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
3504 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
3508 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
3511 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
3512 ld1 {v4.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
3515 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
3516 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
3518 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3519 ld1 {v5.16b}, [x0], #16 //AES block 4k+5 - load ciphertext
3520 eor x23, x23, x13 //AES block 4k+3 - round 12 low
3525 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
3526 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
3528 aese v0.16b, v29.16b //AES block 4k+4 - round 11
3529 add w12, w12, #1 //CTR block 4k+7
3532 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
3533 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
3536 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
3537 ld1 {v6.16b}, [x0], #16 //AES block 4k+6 - load ciphertext
3539 aese v1.16b, v29.16b //AES block 4k+5 - round 11
3540 ld1 {v7.16b}, [x0], #16 //AES block 4k+7 - load ciphertext
3541 rev w9, w12 //CTR block 4k+8
3544 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
3545 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
3548 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
3549 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
3553 eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result
3554 eor x24, x24, x14 //AES block 4k+3 - round 12 high
3558 eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result
3561 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
3562 orr x9, x11, x9, lsl #32 //CTR block 4k+8
3565 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
3567 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3568 mov x19, v1.d[0] //AES block 4k+5 - mov low
3570 mov x6, v0.d[0] //AES block 4k+4 - mov low
3571 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
3572 rev64 v5.16b, v5.16b //GHASH block 4k+5
3574 aese v2.16b, v29.16b //AES block 4k+6 - round 11
3575 mov x7, v0.d[1] //AES block 4k+4 - mov high
3578 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
3579 mov x20, v1.d[1] //AES block 4k+5 - mov high
3581 fmov d0, x10 //CTR block 4k+8
3582 add w12, w12, #1 //CTR block 4k+8
3583 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
3585 eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result
3586 fmov v0.d[1], x9 //CTR block 4k+8
3587 rev w9, w12 //CTR block 4k+9
3589 eor x6, x6, x13 //AES block 4k+4 - round 12 low
3593 orr x9, x11, x9, lsl #32 //CTR block 4k+9
3594 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
3596 fmov d1, x10 //CTR block 4k+9
3597 add w12, w12, #1 //CTR block 4k+9
3598 eor x19, x19, x13 //AES block 4k+5 - round 12 low
3602 fmov v1.d[1], x9 //CTR block 4k+9
3603 rev w9, w12 //CTR block 4k+10
3604 eor x20, x20, x14 //AES block 4k+5 - round 12 high
3608 eor x7, x7, x14 //AES block 4k+4 - round 12 high
3612 stp x6, x7, [x2], #16 //AES block 4k+4 - store result
3613 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
3615 add w12, w12, #1 //CTR block 4k+10
3616 rev64 v4.16b, v4.16b //GHASH block 4k+4
3617 orr x9, x11, x9, lsl #32 //CTR block 4k+10
3619 aese v3.16b, v29.16b //AES block 4k+7 - round 11
3620 stp x19, x20, [x2], #16 //AES block 4k+5 - store result
3624 mov x22, v2.d[1] //AES block 4k+2 - mov high
3626 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
3629 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
3630 mov x21, v2.d[0] //AES block 4k+2 - mov low
3633 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
3634 mov d10, v17.d[1] //GHASH block 4k - mid
3637 fmov d2, x10 //CTR block 4k+6
3640 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
3641 mov x23, v3.d[0] //AES block 4k+3 - mov low
3644 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
3645 mov x24, v3.d[1] //AES block 4k+3 - mov high
3647 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
3648 mov d8, v4.d[1] //GHASH block 4k - mid
3649 fmov d3, x10 //CTR block 4k+7
3652 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
3653 rev64 v6.16b, v6.16b //GHASH block 4k+2
3655 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
3656 fmov v2.d[1], x9 //CTR block 4k+6
3657 rev w9, w12 //CTR block 4k+7
3659 orr x9, x11, x9, lsl #32 //CTR block 4k+7
3660 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
3661 mov d4, v5.d[1] //GHASH block 4k+1 - mid
3663 pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
3664 eor x24, x24, x14 //AES block 4k+3 - round 12 high
3668 fmov v3.d[1], x9 //CTR block 4k+7
3671 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
3672 eor x21, x21, x13 //AES block 4k+2 - round 12 low
3676 pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
3677 eor x22, x22, x14 //AES block 4k+2 - round 12 high
3681 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
3683 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
3684 eor x23, x23, x13 //AES block 4k+3 - round 12 low
3688 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
3690 rev64 v7.16b, v7.16b //GHASH block 4k+3
3691 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
3694 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
3695 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high
3697 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
3698 add w12, w12, #1 //CTR block 4k+7
3700 pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
3701 eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low
3704 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
3706 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
3707 mov d31, v6.d[1] //GHASH block 4k+2 - mid
3710 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
3713 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
3714 eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high
3716 eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid
3718 pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
3721 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
3722 mov d30, v7.d[1] //GHASH block 4k+3 - mid
3725 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
3726 ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid
3728 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
3731 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
3732 eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid
3735 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
3737 pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid
3738 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low
3741 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
3743 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
3746 pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid
3749 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
3752 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
3755 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
3756 eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid
3759 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
3761 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
3762 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
3765 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
3768 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
3769 eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid
3772 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
3775 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
3776 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
3779 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
3782 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
3783 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
3786 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
3789 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
3790 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
3793 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
3796 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
3799 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
3802 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
3805 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
3806 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
3809 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
3812 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
3815 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
3818 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
3819 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
3822 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
3825 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
3828 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
3831 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
3834 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
3836 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
3839 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
3842 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
3845 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
3846 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
3849 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
3852 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
3860 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
3864 ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
3866 eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result
3868 mov x7, v0.d[1] //AES block 4k+4 - mov high
3870 mov x6, v0.d[0] //AES block 4k+4 - mov low
3876 eor x7, x7, x14 //AES block 4k+4 - round 12 high
3880 eor x6, x6, x13 //AES block 4k+4 - round 12 low
3906 rev64 v4.16b, v5.16b //GHASH final-3 block
3907 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
3909 stp x6, x7, [x2], #16 //AES final-3 block - store result
3913 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
3915 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
3916 mov x6, v0.d[0] //AES final-2 block - mov low
3917 mov d22, v4.d[1] //GHASH final-3 block - mid
3919 mov x7, v0.d[1] //AES final-2 block - mov high
3921 mov d10, v17.d[1] //GHASH final-3 block - mid
3922 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
3924 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
3926 eor x6, x6, x13 //AES final-2 block - round 12 low
3932 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
3933 eor x7, x7, x14 //AES final-2 block - round 12 high
3937 .L192_dec_blocks_more_than_2: //blocks left > 2
3939 rev64 v4.16b, v5.16b //GHASH final-2 block
3940 ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext
3946 eor v0.16b, v5.16b, v2.16b //AES final-1 block - result
3948 mov d22, v4.d[1] //GHASH final-2 block - mid
3950 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
3952 stp x6, x7, [x2], #16 //AES final-2 block - store result
3954 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
3955 mov x7, v0.d[1] //AES final-1 block - mov high
3957 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
3958 mov x6, v0.d[0] //AES final-1 block - mov low
3960 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
3962 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
3964 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
3965 eor x7, x7, x14 //AES final-1 block - round 12 high
3969 eor x6, x6, x13 //AES final-1 block - round 12 low
3973 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
3976 rev64 v4.16b, v5.16b //GHASH final-1 block
3979 ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext
3981 mov d22, v4.d[1] //GHASH final-1 block - mid
3983 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
3985 eor v0.16b, v5.16b, v3.16b //AES final block - result
3986 stp x6, x7, [x2], #16 //AES final-1 block - store result
3988 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
3990 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
3992 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
3993 mov x7, v0.d[1] //AES final block - mov high
3995 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
3996 mov x6, v0.d[0] //AES final block - mov low
3998 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
4001 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
4002 eor x7, x7, x14 //AES final block - round 12 high
4006 eor x6, x6, x13 //AES final block - round 12 low
4010 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
4017 sub x1, x1, #128 //bit_length -= 128
4019 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
4052 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
4053 mov d8, v4.d[1] //GHASH final block - mid
4055 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
4057 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
4059 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
4061 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
4063 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
4065 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
4068 eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
4072 eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up
4074 pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
4078 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
4080 eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid
4082 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
4084 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
4086 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
4088 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
4090 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
4108 .size aes_gcm_dec_192_kernel,.-aes_gcm_dec_192_kernel
4111 .align 4
4115 stp x19, x20, [sp, #-112]!
4139 sub x5, x5, #1 //byte_len - 1
4141 ld1 {v18.4s}, [x8], #16 //load rk0
4144 ld1 {v19.4s}, [x8], #16 //load rk1
4148 fmov d2, x10 //CTR block 2
4152 cmp x0, x5 //check if we have <= 4 blocks
4156 aesmc v0.16b, v0.16b //AES block 0 - round 0
4164 ld1 {v20.4s}, [x8], #16 //load rk2
4167 rev w9, w12 //CTR block 2
4168 add w12, w12, #1 //CTR block 2
4170 orr x9, x11, x9, lsl #32 //CTR block 2
4171 ld1 {v21.4s}, [x8], #16 //load rk3
4173 fmov v2.d[1], x9 //CTR block 2
4177 aesmc v0.16b, v0.16b //AES block 0 - round 1
4183 aesmc v1.16b, v1.16b //AES block 1 - round 0
4184 ld1 {v22.4s}, [x8], #16 //load rk4
4187 aesmc v0.16b, v0.16b //AES block 0 - round 2
4188 ld1 {v23.4s}, [x8], #16 //load rk5
4191 aesmc v2.16b, v2.16b //AES block 2 - round 0
4192 ld1 {v24.4s}, [x8], #16 //load rk6
4195 aesmc v1.16b, v1.16b //AES block 1 - round 1
4201 aesmc v3.16b, v3.16b //AES block 3 - round 0
4202 ld1 {v25.4s}, [x8], #16 //load rk7
4205 aesmc v2.16b, v2.16b //AES block 2 - round 1
4206 ld1 {v26.4s}, [x8], #16 //load rk8
4209 aesmc v1.16b, v1.16b //AES block 1 - round 2
4215 aesmc v3.16b, v3.16b //AES block 3 - round 1
4216 ld1 {v27.4s}, [x8], #16 //load rk9
4219 aesmc v2.16b, v2.16b //AES block 2 - round 2
4225 aesmc v1.16b, v1.16b //AES block 1 - round 3
4226 ld1 {v28.4s}, [x8], #16 //load rk10
4229 aesmc v3.16b, v3.16b //AES block 3 - round 2
4230 ld1 {v29.4s}, [x8], #16 //load rk11
4233 aesmc v2.16b, v2.16b //AES block 2 - round 3
4237 aesmc v0.16b, v0.16b //AES block 0 - round 3
4240 aesmc v3.16b, v3.16b //AES block 3 - round 3
4246 aesmc v2.16b, v2.16b //AES block 2 - round 4
4249 aesmc v0.16b, v0.16b //AES block 0 - round 4
4252 aesmc v1.16b, v1.16b //AES block 1 - round 4
4255 aesmc v3.16b, v3.16b //AES block 3 - round 4
4258 aesmc v0.16b, v0.16b //AES block 0 - round 5
4261 aesmc v1.16b, v1.16b //AES block 1 - round 5
4264 aesmc v3.16b, v3.16b //AES block 3 - round 5
4267 aesmc v2.16b, v2.16b //AES block 2 - round 5
4270 aesmc v1.16b, v1.16b //AES block 1 - round 6
4271 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
4274 aesmc v3.16b, v3.16b //AES block 3 - round 6
4275 ld1 {v30.4s}, [x8], #16 //load rk12
4278 aesmc v0.16b, v0.16b //AES block 0 - round 6
4284 aesmc v2.16b, v2.16b //AES block 2 - round 6
4285 ld1 {v31.4s}, [x8], #16 //load rk13
4288 aesmc v1.16b, v1.16b //AES block 1 - round 7
4289 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
4292 aesmc v0.16b, v0.16b //AES block 0 - round 7
4295 aesmc v2.16b, v2.16b //AES block 2 - round 7
4298 aesmc v3.16b, v3.16b //AES block 3 - round 7
4299 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
4302 aesmc v1.16b, v1.16b //AES block 1 - round 8
4305 aesmc v2.16b, v2.16b //AES block 2 - round 8
4308 aesmc v3.16b, v3.16b //AES block 3 - round 8
4311 aesmc v1.16b, v1.16b //AES block 1 - round 9
4314 aesmc v2.16b, v2.16b //AES block 2 - round 9
4317 aesmc v0.16b, v0.16b //AES block 0 - round 8
4320 aesmc v1.16b, v1.16b //AES block 1 - round 10
4323 aesmc v3.16b, v3.16b //AES block 3 - round 9
4326 aesmc v0.16b, v0.16b //AES block 0 - round 9
4329 aesmc v2.16b, v2.16b //AES block 2 - round 10
4332 aesmc v3.16b, v3.16b //AES block 3 - round 10
4335 aesmc v1.16b, v1.16b //AES block 1 - round 11
4338 aesmc v2.16b, v2.16b //AES block 2 - round 11
4341 aesmc v0.16b, v0.16b //AES block 0 - round 10
4344 aesmc v1.16b, v1.16b //AES block 1 - round 12
4347 aesmc v2.16b, v2.16b //AES block 2 - round 12
4350 aesmc v0.16b, v0.16b //AES block 0 - round 11
4354 aesmc v3.16b, v3.16b //AES block 3 - round 11
4356 aese v2.16b, v31.16b //AES block 2 - round 13
4357 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
4360 aesmc v0.16b, v0.16b //AES block 0 - round 12
4363 aesmc v3.16b, v3.16b //AES block 3 - round 12
4365 aese v1.16b, v31.16b //AES block 1 - round 13
4367 aese v0.16b, v31.16b //AES block 0 - round 13
4369 aese v3.16b, v31.16b //AES block 3 - round 13
4373 ldp x19, x20, [x0, #16] //AES block 1 - load plaintext
4378 rev w9, w12 //CTR block 4
4379 ldp x6, x7, [x0, #0] //AES block 0 - load plaintext
4384 ldp x23, x24, [x0, #48] //AES block 3 - load plaintext
4389 ldp x21, x22, [x0, #32] //AES block 2 - load plaintext
4396 eor x19, x19, x13 //AES block 1 - round 14 low
4397 eor x20, x20, x14 //AES block 1 - round 14 high
4399 fmov d5, x19 //AES block 1 - mov low
4400 eor x6, x6, x13 //AES block 0 - round 14 low
4402 eor x7, x7, x14 //AES block 0 - round 14 high
4403 eor x24, x24, x14 //AES block 3 - round 14 high
4404 fmov d4, x6 //AES block 0 - mov low
4407 fmov v4.d[1], x7 //AES block 0 - mov high
4408 eor x23, x23, x13 //AES block 3 - round 14 low
4410 eor x21, x21, x13 //AES block 2 - round 14 low
4411 fmov v5.d[1], x20 //AES block 1 - mov high
4413 fmov d6, x21 //AES block 2 - mov low
4414 add w12, w12, #1 //CTR block 4
4416 orr x9, x11, x9, lsl #32 //CTR block 4
4417 fmov d7, x23 //AES block 3 - mov low
4418 eor x22, x22, x14 //AES block 2 - round 14 high
4420 fmov v6.d[1], x22 //AES block 2 - mov high
4422 eor v4.16b, v4.16b, v0.16b //AES block 0 - result
4423 fmov d0, x10 //CTR block 4
4425 fmov v0.d[1], x9 //CTR block 4
4429 eor v5.16b, v5.16b, v1.16b //AES block 1 - result
4435 st1 { v4.16b}, [x2], #16 //AES block 0 - store result
4437 fmov v7.d[1], x24 //AES block 3 - mov high
4439 eor v6.16b, v6.16b, v2.16b //AES block 2 - result
4441 st1 { v5.16b}, [x2], #16 //AES block 1 - store result
4447 st1 { v6.16b}, [x2], #16 //AES block 2 - store result
4452 eor v7.16b, v7.16b, v3.16b //AES block 3 - result
4453 st1 { v7.16b}, [x2], #16 //AES block 3 - store result
4458 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
4459 rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
4462 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
4463 fmov d3, x10 //CTR block 4k+3
4466 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
4470 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
4471 fmov v3.d[1], x9 //CTR block 4k+3
4474 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
4475 ldp x23, x24, [x0, #48] //AES block 4k+7 - load plaintext
4481 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
4482 ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext
4488 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
4492 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
4495 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
4496 eor x23, x23, x13 //AES block 4k+7 - round 14 low
4499 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
4500 mov d10, v17.d[1] //GHASH block 4k - mid
4502 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
4503 eor x22, x22, x14 //AES block 4k+6 - round 14 high
4504 mov d8, v4.d[1] //GHASH block 4k - mid
4507 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
4508 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
4511 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
4513 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
4514 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
4517 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
4520 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
4521 rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
4523 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
4525 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
4526 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
4528 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
4530 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
4531 mov d4, v5.d[1] //GHASH block 4k+1 - mid
4534 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
4537 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
4538 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
4541 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
4544 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
4545 mov d8, v6.d[1] //GHASH block 4k+2 - mid
4548 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
4549 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
4552 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
4555 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
4556 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
4559 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
4561 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
4564 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
4567 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
4568 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
4571 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
4574 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
4577 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
4580 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
4581 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
4583 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
4585 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
4588 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
4590 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
4591 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
4594 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
4595 ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext
4601 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
4602 mov d4, v7.d[1] //GHASH block 4k+3 - mid
4605 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
4606 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
4608 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
4610 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
4611 eor v4.8b, v4.8b, v7.8b //GHASH block 4k+3 - mid
4614 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
4615 eor x19, x19, x13 //AES block 4k+5 - round 14 low
4618 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
4619 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
4622 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
4623 eor x21, x21, x13 //AES block 4k+6 - round 14 low
4626 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
4629 pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid
4630 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
4631 fmov d5, x19 //AES block 4k+5 - mov low
4634 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
4635 ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext
4641 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
4645 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
4646 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
4649 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
4652 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
4653 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+3 - mid
4656 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
4657 add w12, w12, #1 //CTR block 4k+3
4660 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
4661 eor v4.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
4664 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
4667 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
4668 rev w9, w12 //CTR block 4k+8
4669 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
4672 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
4673 eor x6, x6, x13 //AES block 4k+4 - round 14 low
4676 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
4677 eor v10.16b, v10.16b, v4.16b //MODULO - karatsuba tidy up
4680 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
4681 eor x7, x7, x14 //AES block 4k+4 - round 14 high
4683 fmov d4, x6 //AES block 4k+4 - mov low
4684 orr x9, x11, x9, lsl #32 //CTR block 4k+8
4685 eor v7.16b, v9.16b, v7.16b //MODULO - fold into mid
4688 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
4689 eor x20, x20, x14 //AES block 4k+5 - round 14 high
4692 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
4693 eor x24, x24, x14 //AES block 4k+7 - round 14 high
4696 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
4697 add w12, w12, #1 //CTR block 4k+8
4699 aese v0.16b, v31.16b //AES block 4k+4 - round 13
4700 fmov v4.d[1], x7 //AES block 4k+4 - mov high
4701 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
4704 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
4705 fmov d7, x23 //AES block 4k+7 - mov low
4707 aese v1.16b, v31.16b //AES block 4k+5 - round 13
4708 fmov v5.d[1], x20 //AES block 4k+5 - mov high
4710 fmov d6, x21 //AES block 4k+6 - mov low
4713 fmov v6.d[1], x22 //AES block 4k+6 - mov high
4715 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
4716 eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result
4717 fmov d0, x10 //CTR block 4k+8
4719 fmov v0.d[1], x9 //CTR block 4k+8
4720 rev w9, w12 //CTR block 4k+9
4721 add w12, w12, #1 //CTR block 4k+9
4723 eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result
4724 fmov d1, x10 //CTR block 4k+9
4725 orr x9, x11, x9, lsl #32 //CTR block 4k+9
4728 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
4729 fmov v1.d[1], x9 //CTR block 4k+9
4731 aese v2.16b, v31.16b //AES block 4k+6 - round 13
4732 rev w9, w12 //CTR block 4k+10
4733 st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result
4735 orr x9, x11, x9, lsl #32 //CTR block 4k+10
4736 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
4737 fmov v7.d[1], x24 //AES block 4k+7 - mov high
4739 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
4740 st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result
4741 add w12, w12, #1 //CTR block 4k+10
4743 aese v3.16b, v31.16b //AES block 4k+7 - round 13
4744 eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result
4745 fmov d2, x10 //CTR block 4k+10
4747 st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result
4748 fmov v2.d[1], x9 //CTR block 4k+10
4749 rev w9, w12 //CTR block 4k+11
4751 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
4752 orr x9, x11, x9, lsl #32 //CTR block 4k+11
4754 eor v7.16b, v7.16b, v3.16b //AES block 4k+7 - result
4755 st1 { v7.16b}, [x2], #16 //AES block 4k+7 - store result
4760 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
4761 rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free)
4764 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
4765 fmov d3, x10 //CTR block 4k+3
4768 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
4769 rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free)
4771 fmov v3.d[1], x9 //CTR block 4k+3
4775 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
4778 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
4781 rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free)
4784 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
4787 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
4788 mov d10, v17.d[1] //GHASH block 4k - mid
4791 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
4793 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
4794 mov d8, v4.d[1] //GHASH block 4k - mid
4796 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
4799 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
4802 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
4803 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
4806 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
4809 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
4812 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
4814 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
4816 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
4818 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
4821 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
4823 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
4824 mov d4, v5.d[1] //GHASH block 4k+1 - mid
4827 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
4828 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
4831 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
4833 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
4834 mov d8, v6.d[1] //GHASH block 4k+2 - mid
4837 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
4838 rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free)
4841 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
4843 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
4844 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
4845 add w12, w12, #1 //CTR block 4k+3
4847 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
4850 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
4853 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
4854 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
4856 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
4858 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
4859 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
4862 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
4864 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
4865 mov d4, v7.d[1] //GHASH block 4k+3 - mid
4868 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
4870 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
4872 eor v4.8b, v4.8b, v7.8b //GHASH block 4k+3 - mid
4874 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
4877 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
4879 pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid
4880 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
4883 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
4886 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
4889 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
4892 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
4896 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
4899 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
4900 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
4903 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
4906 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
4910 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
4911 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+3 - mid
4913 pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
4916 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
4919 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
4922 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
4923 eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low
4926 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
4934 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
4937 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
4941 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
4944 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
4947 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
4950 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
4954 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
4957 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
4960 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
4963 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
4967 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
4970 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
4973 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
4978 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
4982 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
4984 aese v1.16b, v31.16b //AES block 4k+5 - round 13
4988 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
4990 aese v3.16b, v31.16b //AES block 4k+7 - round 13
4992 aese v0.16b, v31.16b //AES block 4k+4 - round 13
4994 aese v2.16b, v31.16b //AES block 4k+6 - round 13
5000 ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext
5005 eor x6, x6, x13 //AES block 4k+4 - round 14 low
5006 eor x7, x7, x14 //AES block 4k+4 - round 14 high
5009 fmov d4, x6 //AES block 4k+4 - mov low
5011 fmov v4.d[1], x7 //AES block 4k+4 - mov high
5013 eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result
5036 st1 { v5.16b}, [x2], #16 //AES final-3 block - store result
5038 ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high
5043 rev64 v4.16b, v5.16b //GHASH final-3 block
5045 eor x6, x6, x13 //AES final-2 block - round 14 low
5048 eor x7, x7, x14 //AES final-2 block - round 14 high
5050 mov d22, v4.d[1] //GHASH final-3 block - mid
5051 fmov d5, x6 //AES final-2 block - mov low
5053 fmov v5.d[1], x7 //AES final-2 block - mov high
5055 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
5058 mov d10, v17.d[1] //GHASH final-3 block - mid
5060 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
5062 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
5064 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
5065 eor v5.16b, v5.16b, v1.16b //AES final-2 block - result
5066 .L256_enc_blocks_more_than_2: //blocks left > 2
5068 st1 { v5.16b}, [x2], #16 //AES final-2 block - store result
5070 ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high
5075 rev64 v4.16b, v5.16b //GHASH final-2 block
5077 eor x6, x6, x13 //AES final-1 block - round 14 low
5080 fmov d5, x6 //AES final-1 block - mov low
5081 eor x7, x7, x14 //AES final-1 block - round 14 high
5083 fmov v5.d[1], x7 //AES final-1 block - mov high
5087 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
5088 mov d22, v4.d[1] //GHASH final-2 block - mid
5090 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
5092 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
5094 eor v5.16b, v5.16b, v2.16b //AES final-1 block - result
5096 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
5098 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
5100 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
5102 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
5105 st1 { v5.16b}, [x2], #16 //AES final-1 block - store result
5107 rev64 v4.16b, v5.16b //GHASH final-1 block
5109 ldp x6, x7, [x0], #16 //AES final block - load input low & high
5118 eor x6, x6, x13 //AES final block - round 14 low
5119 mov d22, v4.d[1] //GHASH final-1 block - mid
5121 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
5122 eor x7, x7, x14 //AES final block - round 14 high
5124 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
5126 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
5128 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
5129 fmov d5, x6 //AES final block - mov low
5131 fmov v5.d[1], x7 //AES final block - mov high
5133 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
5135 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
5137 eor v5.16b, v5.16b, v3.16b //AES final block - result
5138 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
5140 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
5146 sub x1, x1, #128 //bit_length -= 128
5148 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
5172 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
5173 mov d8, v4.d[1] //GHASH final block - mid
5180 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
5182 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
5183 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
5185 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
5187 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
5189 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
5192 eor v4.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
5196 eor v10.16b, v10.16b, v4.16b //MODULO - karatsuba tidy up
5198 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
5200 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
5202 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
5204 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
5206 pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
5208 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
5213 eor v11.16b, v11.16b, v9.16b //MODULO - fold into low
5215 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
5233 .size aes_gcm_enc_256_kernel,.-aes_gcm_enc_256_kernel
5236 .align 4
5240 stp x19, x20, [sp, #-112]!
5262 ld1 {v18.4s}, [x8], #16 //load rk0
5263 sub x5, x5, #1 //byte_len - 1
5265 ld1 {v19.4s}, [x8], #16 //load rk1
5269 ld1 {v20.4s}, [x8], #16 //load rk2
5272 ld1 {v21.4s}, [x8], #16 //load rk3
5275 ld1 {v22.4s}, [x8], #16 //load rk4
5290 rev w9, w12 //CTR block 2
5291 add w12, w12, #1 //CTR block 2
5293 fmov d2, x10 //CTR block 2
5294 orr x9, x11, x9, lsl #32 //CTR block 2
5296 fmov v2.d[1], x9 //CTR block 2
5300 ld1 {v23.4s}, [x8], #16 //load rk5
5305 ld1 {v24.4s}, [x8], #16 //load rk6
5307 ld1 {v25.4s}, [x8], #16 //load rk7
5309 ld1 {v26.4s}, [x8], #16 //load rk8
5312 aesmc v0.16b, v0.16b //AES block 0 - round 0
5319 aesmc v3.16b, v3.16b //AES block 3 - round 0
5326 aesmc v1.16b, v1.16b //AES block 1 - round 0
5333 aesmc v2.16b, v2.16b //AES block 2 - round 0
5334 ld1 {v27.4s}, [x8], #16 //load rk9
5337 aesmc v0.16b, v0.16b //AES block 0 - round 1
5340 aesmc v1.16b, v1.16b //AES block 1 - round 1
5346 aesmc v2.16b, v2.16b //AES block 2 - round 1
5347 ld1 {v28.4s}, [x8], #16 //load rk10
5350 aesmc v3.16b, v3.16b //AES block 3 - round 1
5351 ld1 {v29.4s}, [x8], #16 //load rk11
5354 aesmc v0.16b, v0.16b //AES block 0 - round 2
5360 aesmc v2.16b, v2.16b //AES block 2 - round 2
5361 ld1 {v30.4s}, [x8], #16 //load rk12
5364 aesmc v3.16b, v3.16b //AES block 3 - round 2
5367 aesmc v0.16b, v0.16b //AES block 0 - round 3
5370 aesmc v1.16b, v1.16b //AES block 1 - round 2
5373 aesmc v3.16b, v3.16b //AES block 3 - round 3
5376 aesmc v0.16b, v0.16b //AES block 0 - round 4
5377 cmp x0, x5 //check if we have <= 4 blocks
5380 aesmc v2.16b, v2.16b //AES block 2 - round 3
5383 aesmc v1.16b, v1.16b //AES block 1 - round 3
5386 aesmc v3.16b, v3.16b //AES block 3 - round 4
5389 aesmc v2.16b, v2.16b //AES block 2 - round 4
5392 aesmc v1.16b, v1.16b //AES block 1 - round 4
5395 aesmc v3.16b, v3.16b //AES block 3 - round 5
5398 aesmc v0.16b, v0.16b //AES block 0 - round 5
5401 aesmc v1.16b, v1.16b //AES block 1 - round 5
5404 aesmc v2.16b, v2.16b //AES block 2 - round 5
5407 aesmc v0.16b, v0.16b //AES block 0 - round 6
5410 aesmc v3.16b, v3.16b //AES block 3 - round 6
5413 aesmc v1.16b, v1.16b //AES block 1 - round 6
5416 aesmc v2.16b, v2.16b //AES block 2 - round 6
5419 aesmc v0.16b, v0.16b //AES block 0 - round 7
5422 aesmc v1.16b, v1.16b //AES block 1 - round 7
5425 aesmc v3.16b, v3.16b //AES block 3 - round 7
5428 aesmc v0.16b, v0.16b //AES block 0 - round 8
5431 aesmc v2.16b, v2.16b //AES block 2 - round 7
5434 aesmc v3.16b, v3.16b //AES block 3 - round 8
5437 aesmc v1.16b, v1.16b //AES block 1 - round 8
5440 aesmc v0.16b, v0.16b //AES block 0 - round 9
5443 aesmc v2.16b, v2.16b //AES block 2 - round 8
5444 ld1 {v31.4s}, [x8], #16 //load rk13
5447 aesmc v1.16b, v1.16b //AES block 1 - round 9
5450 aesmc v0.16b, v0.16b //AES block 0 - round 10
5453 aesmc v3.16b, v3.16b //AES block 3 - round 9
5456 aesmc v1.16b, v1.16b //AES block 1 - round 10
5459 aesmc v2.16b, v2.16b //AES block 2 - round 9
5462 aesmc v3.16b, v3.16b //AES block 3 - round 10
5465 aesmc v0.16b, v0.16b //AES block 0 - round 11
5468 aesmc v2.16b, v2.16b //AES block 2 - round 10
5471 aesmc v3.16b, v3.16b //AES block 3 - round 11
5474 aesmc v1.16b, v1.16b //AES block 1 - round 11
5477 aesmc v2.16b, v2.16b //AES block 2 - round 11
5479 trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
5481 trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
5483 trn1 v8.2d, v12.2d, v13.2d //h2h | h1h
5484 trn2 v16.2d, v12.2d, v13.2d //h2l | h1l
5487 aesmc v1.16b, v1.16b //AES block 1 - round 12
5490 aesmc v0.16b, v0.16b //AES block 0 - round 12
5493 aesmc v2.16b, v2.16b //AES block 2 - round 12
5496 aesmc v3.16b, v3.16b //AES block 3 - round 12
5499 aese v1.16b, v31.16b //AES block 1 - round 13
5501 aese v2.16b, v31.16b //AES block 2 - round 13
5504 aese v3.16b, v31.16b //AES block 3 - round 13
5506 aese v0.16b, v31.16b //AES block 0 - round 13
5509 ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0,1 - load ciphertext
5511 rev w9, w12 //CTR block 4
5513 eor v0.16b, v4.16b, v0.16b //AES block 0 - result
5515 eor v1.16b, v5.16b, v1.16b //AES block 1 - result
5517 ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext
5519 mov x7, v0.d[1] //AES block 0 - mov high
5521 mov x6, v0.d[0] //AES block 0 - mov low
5523 add w12, w12, #1 //CTR block 4
5525 fmov d0, x10 //CTR block 4
5526 orr x9, x11, x9, lsl #32 //CTR block 4
5528 fmov v0.d[1], x9 //CTR block 4
5532 mov x19, v1.d[0] //AES block 1 - mov low
5535 mov x20, v1.d[1] //AES block 1 - mov high
5536 eor x7, x7, x14 //AES block 0 - round 14 high
5540 eor x6, x6, x13 //AES block 0 - round 14 low
5544 stp x6, x7, [x2], #16 //AES block 0 - store result
5547 ld1 {v7.16b}, [x0], #16 //AES block 3 - load ciphertext
5553 eor x19, x19, x13 //AES block 1 - round 14 low
5559 eor x20, x20, x14 //AES block 1 - round 14 high
5563 stp x19, x20, [x2], #16 //AES block 1 - store result
5565 eor v2.16b, v6.16b, v2.16b //AES block 2 - result
5570 mov x21, v2.d[0] //AES block 4k+2 - mov low
5572 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
5575 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
5576 mov x22, v2.d[1] //AES block 4k+2 - mov high
5579 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
5580 fmov d2, x10 //CTR block 4k+6
5582 fmov v2.d[1], x9 //CTR block 4k+6
5584 rev w9, w12 //CTR block 4k+7
5587 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
5588 mov x24, v3.d[1] //AES block 4k+3 - mov high
5591 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
5592 mov x23, v3.d[0] //AES block 4k+3 - mov low
5594 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
5595 mov d8, v4.d[1] //GHASH block 4k - mid
5596 fmov d3, x10 //CTR block 4k+7
5599 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
5600 orr x9, x11, x9, lsl #32 //CTR block 4k+7
5603 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
5604 fmov v3.d[1], x9 //CTR block 4k+7
5607 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
5608 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
5611 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
5612 eor x22, x22, x14 //AES block 4k+2 - round 14 high
5617 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
5618 mov d10, v17.d[1] //GHASH block 4k - mid
5621 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
5622 rev64 v6.16b, v6.16b //GHASH block 4k+2
5625 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
5626 eor x21, x21, x13 //AES block 4k+2 - round 14 low
5631 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
5632 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
5634 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
5636 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
5639 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
5640 rev64 v7.16b, v7.16b //GHASH block 4k+3
5642 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
5643 eor x23, x23, x13 //AES block 4k+3 - round 14 low
5647 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
5648 eor x24, x24, x14 //AES block 4k+3 - round 14 high
5652 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
5655 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
5658 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
5659 mov d4, v5.d[1] //GHASH block 4k+1 - mid
5662 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
5663 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
5666 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
5667 add w12, w12, #1 //CTR block 4k+7
5670 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
5671 mov d8, v6.d[1] //GHASH block 4k+2 - mid
5674 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
5675 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
5677 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
5680 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
5681 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
5684 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
5687 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
5688 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
5690 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
5691 rev w9, w12 //CTR block 4k+8
5694 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
5695 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
5698 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
5699 add w12, w12, #1 //CTR block 4k+8
5702 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
5705 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
5706 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
5709 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
5711 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
5712 mov d6, v7.d[1] //GHASH block 4k+3 - mid
5715 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
5717 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
5720 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
5721 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
5724 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
5726 pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
5727 orr x9, x11, x9, lsl #32 //CTR block 4k+8
5728 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
5730 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
5733 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
5734 eor v6.8b, v6.8b, v7.8b //GHASH block 4k+3 - mid
5737 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
5740 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
5741 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
5744 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
5746 pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid
5750 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
5751 eor v11.16b, v11.16b, v4.16b //GHASH block 4k+3 - low
5754 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
5757 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
5761 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
5762 eor v10.16b, v10.16b, v6.16b //GHASH block 4k+3 - mid
5765 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
5767 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
5768 eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
5771 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
5772 ld1 {v4.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
5774 aese v0.16b, v31.16b //AES block 4k+4 - round 13
5775 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
5778 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
5779 eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up
5782 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
5783 ld1 {v5.16b}, [x0], #16 //AES block 4k+5 - load ciphertext
5786 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
5787 eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result
5790 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
5791 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
5794 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
5795 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
5798 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
5799 ld1 {v6.16b}, [x0], #16 //AES block 4k+6 - load ciphertext
5802 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
5803 ld1 {v7.16b}, [x0], #16 //AES block 4k+7 - load ciphertext
5806 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
5807 mov x7, v0.d[1] //AES block 4k+4 - mov high
5810 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
5811 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
5813 aese v1.16b, v31.16b //AES block 4k+5 - round 13
5814 mov x6, v0.d[0] //AES block 4k+4 - mov low
5817 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
5818 fmov d0, x10 //CTR block 4k+8
5821 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
5822 fmov v0.d[1], x9 //CTR block 4k+8
5824 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
5825 eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result
5826 rev w9, w12 //CTR block 4k+9
5828 aese v2.16b, v31.16b //AES block 4k+6 - round 13
5829 orr x9, x11, x9, lsl #32 //CTR block 4k+9
5832 add w12, w12, #1 //CTR block 4k+9
5834 eor x6, x6, x13 //AES block 4k+4 - round 14 low
5838 eor x7, x7, x14 //AES block 4k+4 - round 14 high
5842 mov x20, v1.d[1] //AES block 4k+5 - mov high
5843 eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result
5844 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
5847 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
5848 mov x19, v1.d[0] //AES block 4k+5 - mov low
5850 fmov d1, x10 //CTR block 4k+9
5851 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
5853 fmov v1.d[1], x9 //CTR block 4k+9
5854 rev w9, w12 //CTR block 4k+10
5855 add w12, w12, #1 //CTR block 4k+10
5857 aese v3.16b, v31.16b //AES block 4k+7 - round 13
5858 orr x9, x11, x9, lsl #32 //CTR block 4k+10
5860 rev64 v5.16b, v5.16b //GHASH block 4k+5
5861 eor x20, x20, x14 //AES block 4k+5 - round 14 high
5865 stp x6, x7, [x2], #16 //AES block 4k+4 - store result
5867 eor x19, x19, x13 //AES block 4k+5 - round 14 low
5871 stp x19, x20, [x2], #16 //AES block 4k+5 - store result
5873 rev64 v4.16b, v4.16b //GHASH block 4k+4
5874 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
5880 mov x21, v2.d[0] //AES block 4k+2 - mov low
5881 eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result
5884 aesmc v0.16b, v0.16b //AES block 4k+4 - round 0
5885 mov x22, v2.d[1] //AES block 4k+2 - mov high
5888 aesmc v1.16b, v1.16b //AES block 4k+5 - round 0
5889 fmov d2, x10 //CTR block 4k+6
5891 fmov v2.d[1], x9 //CTR block 4k+6
5892 rev w9, w12 //CTR block 4k+7
5895 rev64 v6.16b, v6.16b //GHASH block 4k+2
5896 orr x9, x11, x9, lsl #32 //CTR block 4k+7
5897 mov x23, v3.d[0] //AES block 4k+3 - mov low
5900 aesmc v1.16b, v1.16b //AES block 4k+5 - round 1
5901 mov x24, v3.d[1] //AES block 4k+3 - mov high
5903 pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low
5904 mov d8, v4.d[1] //GHASH block 4k - mid
5905 fmov d3, x10 //CTR block 4k+7
5907 pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high
5908 fmov v3.d[1], x9 //CTR block 4k+7
5911 aesmc v2.16b, v2.16b //AES block 4k+6 - round 0
5912 mov d10, v17.d[1] //GHASH block 4k - mid
5915 aesmc v0.16b, v0.16b //AES block 4k+4 - round 1
5916 eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid
5918 pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high
5921 aesmc v2.16b, v2.16b //AES block 4k+6 - round 1
5922 rev64 v7.16b, v7.16b //GHASH block 4k+3
5925 aesmc v3.16b, v3.16b //AES block 4k+7 - round 0
5927 pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid
5928 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high
5930 pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low
5933 aesmc v3.16b, v3.16b //AES block 4k+7 - round 1
5934 mov d4, v5.d[1] //GHASH block 4k+1 - mid
5937 aesmc v0.16b, v0.16b //AES block 4k+4 - round 2
5940 aesmc v1.16b, v1.16b //AES block 4k+5 - round 2
5941 eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low
5944 aesmc v2.16b, v2.16b //AES block 4k+6 - round 2
5947 aesmc v0.16b, v0.16b //AES block 4k+4 - round 3
5948 mov d8, v6.d[1] //GHASH block 4k+2 - mid
5951 aesmc v3.16b, v3.16b //AES block 4k+7 - round 2
5952 eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid
5954 pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low
5957 aesmc v0.16b, v0.16b //AES block 4k+4 - round 4
5960 aesmc v3.16b, v3.16b //AES block 4k+7 - round 3
5961 eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid
5963 pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid
5966 aesmc v0.16b, v0.16b //AES block 4k+4 - round 5
5967 eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low
5970 aesmc v3.16b, v3.16b //AES block 4k+7 - round 4
5972 pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high
5973 eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid
5975 pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high
5978 aesmc v3.16b, v3.16b //AES block 4k+7 - round 5
5979 ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid
5982 aesmc v2.16b, v2.16b //AES block 4k+6 - round 3
5985 aesmc v1.16b, v1.16b //AES block 4k+5 - round 3
5986 eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high
5988 pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low
5991 aesmc v2.16b, v2.16b //AES block 4k+6 - round 4
5992 mov d6, v7.d[1] //GHASH block 4k+3 - mid
5995 aesmc v1.16b, v1.16b //AES block 4k+5 - round 4
5997 pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid
6000 aesmc v2.16b, v2.16b //AES block 4k+6 - round 5
6001 eor v6.8b, v6.8b, v7.8b //GHASH block 4k+3 - mid
6004 aesmc v1.16b, v1.16b //AES block 4k+5 - round 5
6007 aesmc v3.16b, v3.16b //AES block 4k+7 - round 6
6008 eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid
6011 aesmc v2.16b, v2.16b //AES block 4k+6 - round 6
6014 aesmc v0.16b, v0.16b //AES block 4k+4 - round 6
6018 aesmc v1.16b, v1.16b //AES block 4k+5 - round 6
6019 eor v11.16b, v11.16b, v4.16b //GHASH block 4k+3 - low
6021 pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid
6024 aesmc v3.16b, v3.16b //AES block 4k+7 - round 7
6025 eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high
6028 aesmc v1.16b, v1.16b //AES block 4k+5 - round 7
6031 aesmc v0.16b, v0.16b //AES block 4k+4 - round 7
6032 eor v10.16b, v10.16b, v6.16b //GHASH block 4k+3 - mid
6035 aesmc v3.16b, v3.16b //AES block 4k+7 - round 8
6038 aesmc v2.16b, v2.16b //AES block 4k+6 - round 7
6039 eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
6042 aesmc v1.16b, v1.16b //AES block 4k+5 - round 8
6045 aesmc v0.16b, v0.16b //AES block 4k+4 - round 8
6049 aesmc v2.16b, v2.16b //AES block 4k+6 - round 8
6052 aesmc v1.16b, v1.16b //AES block 4k+5 - round 9
6053 eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up
6055 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
6058 aesmc v2.16b, v2.16b //AES block 4k+6 - round 9
6059 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
6062 aesmc v3.16b, v3.16b //AES block 4k+7 - round 9
6065 aesmc v0.16b, v0.16b //AES block 4k+4 - round 9
6066 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
6069 aesmc v2.16b, v2.16b //AES block 4k+6 - round 10
6072 aesmc v3.16b, v3.16b //AES block 4k+7 - round 10
6075 aesmc v0.16b, v0.16b //AES block 4k+4 - round 10
6076 eor x22, x22, x14 //AES block 4k+2 - round 14 high
6081 aesmc v1.16b, v1.16b //AES block 4k+5 - round 10
6082 eor x23, x23, x13 //AES block 4k+3 - round 14 low
6087 aesmc v2.16b, v2.16b //AES block 4k+6 - round 11
6088 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
6091 aesmc v0.16b, v0.16b //AES block 4k+4 - round 11
6092 add w12, w12, #1 //CTR block 4k+7
6095 aesmc v1.16b, v1.16b //AES block 4k+5 - round 11
6096 eor x21, x21, x13 //AES block 4k+2 - round 14 low
6102 aesmc v2.16b, v2.16b //AES block 4k+6 - round 12
6104 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
6105 eor x24, x24, x14 //AES block 4k+3 - round 14 high
6111 aesmc v3.16b, v3.16b //AES block 4k+7 - round 11
6112 stp x21, x22, [x2], #16 //AES block 4k+2 - store result
6115 aesmc v1.16b, v1.16b //AES block 4k+5 - round 12
6116 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
6119 aesmc v0.16b, v0.16b //AES block 4k+4 - round 12
6120 stp x23, x24, [x2], #16 //AES block 4k+3 - store result
6123 aesmc v3.16b, v3.16b //AES block 4k+7 - round 12
6124 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
6126 aese v1.16b, v31.16b //AES block 4k+5 - round 13
6128 aese v0.16b, v31.16b //AES block 4k+4 - round 13
6130 aese v3.16b, v31.16b //AES block 4k+7 - round 13
6132 aese v2.16b, v31.16b //AES block 4k+6 - round 13
6133 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
6137 ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext
6139 eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result
6141 mov x6, v0.d[0] //AES block 4k+4 - mov low
6143 mov x7, v0.d[1] //AES block 4k+4 - mov high
6148 eor x6, x6, x13 //AES block 4k+4 - round 14 low
6153 eor x7, x7, x14 //AES block 4k+4 - round 14 high
6179 rev64 v4.16b, v5.16b //GHASH final-3 block
6180 ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext
6182 stp x6, x7, [x2], #16 //AES final-3 block - store result
6184 mov d10, v17.d[1] //GHASH final-3 block - mid
6188 eor v0.16b, v5.16b, v1.16b //AES final-2 block - result
6190 mov d22, v4.d[1] //GHASH final-3 block - mid
6192 mov x6, v0.d[0] //AES final-2 block - mov low
6194 mov x7, v0.d[1] //AES final-2 block - mov high
6196 eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid
6200 pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high
6202 pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid
6203 eor x6, x6, x13 //AES final-2 block - round 14 low
6208 pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low
6209 eor x7, x7, x14 //AES final-2 block - round 14 high
6213 .L256_dec_blocks_more_than_2: //blocks left > 2
6215 rev64 v4.16b, v5.16b //GHASH final-2 block
6216 ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext
6219 stp x6, x7, [x2], #16 //AES final-2 block - store result
6221 eor v0.16b, v5.16b, v2.16b //AES final-1 block - result
6223 mov d22, v4.d[1] //GHASH final-2 block - mid
6225 pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low
6227 pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high
6229 eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid
6230 mov x6, v0.d[0] //AES final-1 block - mov low
6232 mov x7, v0.d[1] //AES final-1 block - mov high
6233 eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low
6236 pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid
6238 eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high
6239 eor x6, x6, x13 //AES final-1 block - round 14 low
6244 eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid
6245 eor x7, x7, x14 //AES final-1 block - round 14 high
6251 stp x6, x7, [x2], #16 //AES final-1 block - store result
6252 rev64 v4.16b, v5.16b //GHASH final-1 block
6254 ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext
6259 mov d22, v4.d[1] //GHASH final-1 block - mid
6261 eor v0.16b, v5.16b, v3.16b //AES final block - result
6263 pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high
6265 eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid
6267 pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low
6268 mov x6, v0.d[0] //AES final block - mov low
6270 ins v22.d[1], v22.d[0] //GHASH final-1 block - mid
6272 mov x7, v0.d[1] //AES final block - mov high
6274 pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid
6275 eor x6, x6, x13 //AES final block - round 14 low
6279 eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low
6281 eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high
6283 eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid
6284 eor x7, x7, x14 //AES final block - round 14 high
6293 sub x1, x1, #128 //bit_length -= 128
6297 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])
6333 pmull v21.1q, v4.1d, v12.1d //GHASH final block - low
6335 mov d8, v4.d[1] //GHASH final block - mid
6337 eor v8.8b, v8.8b, v4.8b //GHASH final block - mid
6339 pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high
6341 pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid
6343 eor v9.16b, v9.16b, v20.16b //GHASH final block - high
6345 eor v11.16b, v11.16b, v21.16b //GHASH final block - low
6347 eor v10.16b, v10.16b, v8.16b //GHASH final block - mid
6350 eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up
6354 eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up
6356 pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid
6358 ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment
6360 eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid
6362 eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid
6364 pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low
6366 ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment
6368 eor v11.16b, v11.16b, v8.16b //MODULO - fold into low
6374 eor v11.16b, v11.16b, v10.16b //MODULO - fold into low
6392 .size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel
6394 .align 2
6395 .align 2