1/* Do not modify. This file is auto-generated from aesv8-armx.pl. */ 2#include "arm_arch.h" 3 4#if __ARM_MAX_ARCH__>=7 5.arch armv8-a+crypto 6.text 7.align 5 8.Lrcon: 9.long 0x01,0x01,0x01,0x01 10.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 11.long 0x1b,0x1b,0x1b,0x1b 12 13.globl aes_v8_set_encrypt_key 14.type aes_v8_set_encrypt_key,%function 15.align 5 16aes_v8_set_encrypt_key: 17.Lenc_key: 18 stp x29,x30,[sp,#-16]! 19 add x29,sp,#0 20 mov x3,#-1 21 cmp x0,#0 22 b.eq .Lenc_key_abort 23 cmp x2,#0 24 b.eq .Lenc_key_abort 25 mov x3,#-2 26 cmp w1,#128 27 b.lt .Lenc_key_abort 28 cmp w1,#256 29 b.gt .Lenc_key_abort 30 tst w1,#0x3f 31 b.ne .Lenc_key_abort 32 33 adr x3,.Lrcon 34 cmp w1,#192 35 36 eor v0.16b,v0.16b,v0.16b 37 ld1 {v3.16b},[x0],#16 38 mov w1,#8 // reuse w1 39 ld1 {v1.4s,v2.4s},[x3],#32 40 41 b.lt .Loop128 42 b.eq .L192 43 b .L256 44 45.align 4 46.Loop128: 47 tbl v6.16b,{v3.16b},v2.16b 48 ext v5.16b,v0.16b,v3.16b,#12 49 st1 {v3.4s},[x2],#16 50 aese v6.16b,v0.16b 51 subs w1,w1,#1 52 53 eor v3.16b,v3.16b,v5.16b 54 ext v5.16b,v0.16b,v5.16b,#12 55 eor v3.16b,v3.16b,v5.16b 56 ext v5.16b,v0.16b,v5.16b,#12 57 eor v6.16b,v6.16b,v1.16b 58 eor v3.16b,v3.16b,v5.16b 59 shl v1.16b,v1.16b,#1 60 eor v3.16b,v3.16b,v6.16b 61 b.ne .Loop128 62 63 ld1 {v1.4s},[x3] 64 65 tbl v6.16b,{v3.16b},v2.16b 66 ext v5.16b,v0.16b,v3.16b,#12 67 st1 {v3.4s},[x2],#16 68 aese v6.16b,v0.16b 69 70 eor v3.16b,v3.16b,v5.16b 71 ext v5.16b,v0.16b,v5.16b,#12 72 eor v3.16b,v3.16b,v5.16b 73 ext v5.16b,v0.16b,v5.16b,#12 74 eor v6.16b,v6.16b,v1.16b 75 eor v3.16b,v3.16b,v5.16b 76 shl v1.16b,v1.16b,#1 77 eor v3.16b,v3.16b,v6.16b 78 79 tbl v6.16b,{v3.16b},v2.16b 80 ext v5.16b,v0.16b,v3.16b,#12 81 st1 {v3.4s},[x2],#16 82 aese v6.16b,v0.16b 83 84 eor v3.16b,v3.16b,v5.16b 85 ext v5.16b,v0.16b,v5.16b,#12 86 eor v3.16b,v3.16b,v5.16b 87 ext v5.16b,v0.16b,v5.16b,#12 88 eor v6.16b,v6.16b,v1.16b 89 eor v3.16b,v3.16b,v5.16b 90 eor v3.16b,v3.16b,v6.16b 91 st1 {v3.4s},[x2] 92 add x2,x2,#0x50 93 94 mov w12,#10 95 b .Ldone 96 97.align 4 98.L192: 99 ld1 {v4.8b},[x0],#8 100 movi v6.16b,#8 // borrow v6.16b 101 st1 {v3.4s},[x2],#16 102 sub v2.16b,v2.16b,v6.16b // adjust the mask 103 104.Loop192: 105 tbl v6.16b,{v4.16b},v2.16b 106 ext v5.16b,v0.16b,v3.16b,#12 107#ifdef __ARMEB__ 108 st1 {v4.4s},[x2],#16 109 sub x2,x2,#8 110#else 111 st1 {v4.8b},[x2],#8 112#endif 113 aese v6.16b,v0.16b 114 subs w1,w1,#1 115 116 eor v3.16b,v3.16b,v5.16b 117 ext v5.16b,v0.16b,v5.16b,#12 118 eor v3.16b,v3.16b,v5.16b 119 ext v5.16b,v0.16b,v5.16b,#12 120 eor v3.16b,v3.16b,v5.16b 121 122 dup v5.4s,v3.s[3] 123 eor v5.16b,v5.16b,v4.16b 124 eor v6.16b,v6.16b,v1.16b 125 ext v4.16b,v0.16b,v4.16b,#12 126 shl v1.16b,v1.16b,#1 127 eor v4.16b,v4.16b,v5.16b 128 eor v3.16b,v3.16b,v6.16b 129 eor v4.16b,v4.16b,v6.16b 130 st1 {v3.4s},[x2],#16 131 b.ne .Loop192 132 133 mov w12,#12 134 add x2,x2,#0x20 135 b .Ldone 136 137.align 4 138.L256: 139 ld1 {v4.16b},[x0] 140 mov w1,#7 141 mov w12,#14 142 st1 {v3.4s},[x2],#16 143 144.Loop256: 145 tbl v6.16b,{v4.16b},v2.16b 146 ext v5.16b,v0.16b,v3.16b,#12 147 st1 {v4.4s},[x2],#16 148 aese v6.16b,v0.16b 149 subs w1,w1,#1 150 151 eor v3.16b,v3.16b,v5.16b 152 ext v5.16b,v0.16b,v5.16b,#12 153 eor v3.16b,v3.16b,v5.16b 154 ext v5.16b,v0.16b,v5.16b,#12 155 eor v6.16b,v6.16b,v1.16b 156 eor v3.16b,v3.16b,v5.16b 157 shl v1.16b,v1.16b,#1 158 eor v3.16b,v3.16b,v6.16b 159 st1 {v3.4s},[x2],#16 160 b.eq .Ldone 161 162 dup v6.4s,v3.s[3] // just splat 163 ext v5.16b,v0.16b,v4.16b,#12 164 aese v6.16b,v0.16b 165 166 eor v4.16b,v4.16b,v5.16b 167 ext v5.16b,v0.16b,v5.16b,#12 168 eor v4.16b,v4.16b,v5.16b 169 ext v5.16b,v0.16b,v5.16b,#12 170 eor v4.16b,v4.16b,v5.16b 171 172 eor v4.16b,v4.16b,v6.16b 173 b .Loop256 174 175.Ldone: 176 str w12,[x2] 177 mov x3,#0 178 179.Lenc_key_abort: 180 mov x0,x3 // return value 181 ldr x29,[sp],#16 182 ret 183.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 184 185.globl aes_v8_set_decrypt_key 186.type aes_v8_set_decrypt_key,%function 187.align 5 188aes_v8_set_decrypt_key: 189.inst 0xd503233f // paciasp 190 stp x29,x30,[sp,#-16]! 191 add x29,sp,#0 192 bl .Lenc_key 193 194 cmp x0,#0 195 b.ne .Ldec_key_abort 196 197 sub x2,x2,#240 // restore original x2 198 mov x4,#-16 199 add x0,x2,x12,lsl#4 // end of key schedule 200 201 ld1 {v0.4s},[x2] 202 ld1 {v1.4s},[x0] 203 st1 {v0.4s},[x0],x4 204 st1 {v1.4s},[x2],#16 205 206.Loop_imc: 207 ld1 {v0.4s},[x2] 208 ld1 {v1.4s},[x0] 209 aesimc v0.16b,v0.16b 210 aesimc v1.16b,v1.16b 211 st1 {v0.4s},[x0],x4 212 st1 {v1.4s},[x2],#16 213 cmp x0,x2 214 b.hi .Loop_imc 215 216 ld1 {v0.4s},[x2] 217 aesimc v0.16b,v0.16b 218 st1 {v0.4s},[x0] 219 220 eor x0,x0,x0 // return value 221.Ldec_key_abort: 222 ldp x29,x30,[sp],#16 223.inst 0xd50323bf // autiasp 224 ret 225.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 226.globl aes_v8_encrypt 227.type aes_v8_encrypt,%function 228.align 5 229aes_v8_encrypt: 230 ldr w3,[x2,#240] 231 ld1 {v0.4s},[x2],#16 232 ld1 {v2.16b},[x0] 233 sub w3,w3,#2 234 ld1 {v1.4s},[x2],#16 235 236.Loop_enc: 237 aese v2.16b,v0.16b 238 aesmc v2.16b,v2.16b 239 ld1 {v0.4s},[x2],#16 240 subs w3,w3,#2 241 aese v2.16b,v1.16b 242 aesmc v2.16b,v2.16b 243 ld1 {v1.4s},[x2],#16 244 b.gt .Loop_enc 245 246 aese v2.16b,v0.16b 247 aesmc v2.16b,v2.16b 248 ld1 {v0.4s},[x2] 249 aese v2.16b,v1.16b 250 eor v2.16b,v2.16b,v0.16b 251 252 st1 {v2.16b},[x1] 253 ret 254.size aes_v8_encrypt,.-aes_v8_encrypt 255.globl aes_v8_decrypt 256.type aes_v8_decrypt,%function 257.align 5 258aes_v8_decrypt: 259 ldr w3,[x2,#240] 260 ld1 {v0.4s},[x2],#16 261 ld1 {v2.16b},[x0] 262 sub w3,w3,#2 263 ld1 {v1.4s},[x2],#16 264 265.Loop_dec: 266 aesd v2.16b,v0.16b 267 aesimc v2.16b,v2.16b 268 ld1 {v0.4s},[x2],#16 269 subs w3,w3,#2 270 aesd v2.16b,v1.16b 271 aesimc v2.16b,v2.16b 272 ld1 {v1.4s},[x2],#16 273 b.gt .Loop_dec 274 275 aesd v2.16b,v0.16b 276 aesimc v2.16b,v2.16b 277 ld1 {v0.4s},[x2] 278 aesd v2.16b,v1.16b 279 eor v2.16b,v2.16b,v0.16b 280 281 st1 {v2.16b},[x1] 282 ret 283.size aes_v8_decrypt,.-aes_v8_decrypt 284.globl aes_v8_ecb_encrypt 285.type aes_v8_ecb_encrypt,%function 286.align 5 287aes_v8_ecb_encrypt: 288 subs x2,x2,#16 289 // Original input data size bigger than 16, jump to big size processing. 290 b.ne .Lecb_big_size 291 ld1 {v0.16b},[x0] 292 cmp w4,#0 // en- or decrypting? 293 ldr w5,[x3,#240] 294 ld1 {v5.4s,v6.4s},[x3],#32 // load key schedule... 295 296 b.eq .Lecb_small_dec 297 aese v0.16b,v5.16b 298 aesmc v0.16b,v0.16b 299 ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 300 aese v0.16b,v6.16b 301 aesmc v0.16b,v0.16b 302 subs w5,w5,#10 // if rounds==10, jump to aes-128-ecb processing 303 b.eq .Lecb_128_enc 304.Lecb_round_loop: 305 aese v0.16b,v16.16b 306 aesmc v0.16b,v0.16b 307 ld1 {v16.4s},[x3],#16 // load key schedule... 308 aese v0.16b,v17.16b 309 aesmc v0.16b,v0.16b 310 ld1 {v17.4s},[x3],#16 // load key schedule... 311 subs w5,w5,#2 // bias 312 b.gt .Lecb_round_loop 313.Lecb_128_enc: 314 ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 315 aese v0.16b,v16.16b 316 aesmc v0.16b,v0.16b 317 aese v0.16b,v17.16b 318 aesmc v0.16b,v0.16b 319 ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 320 aese v0.16b,v18.16b 321 aesmc v0.16b,v0.16b 322 aese v0.16b,v19.16b 323 aesmc v0.16b,v0.16b 324 ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 325 aese v0.16b,v20.16b 326 aesmc v0.16b,v0.16b 327 aese v0.16b,v21.16b 328 aesmc v0.16b,v0.16b 329 ld1 {v7.4s},[x3] 330 aese v0.16b,v22.16b 331 aesmc v0.16b,v0.16b 332 aese v0.16b,v23.16b 333 eor v0.16b,v0.16b,v7.16b 334 st1 {v0.16b},[x1] 335 b .Lecb_Final_abort 336.Lecb_small_dec: 337 aesd v0.16b,v5.16b 338 aesimc v0.16b,v0.16b 339 ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 340 aesd v0.16b,v6.16b 341 aesimc v0.16b,v0.16b 342 subs w5,w5,#10 // bias 343 b.eq .Lecb_128_dec 344.Lecb_dec_round_loop: 345 aesd v0.16b,v16.16b 346 aesimc v0.16b,v0.16b 347 ld1 {v16.4s},[x3],#16 // load key schedule... 348 aesd v0.16b,v17.16b 349 aesimc v0.16b,v0.16b 350 ld1 {v17.4s},[x3],#16 // load key schedule... 351 subs w5,w5,#2 // bias 352 b.gt .Lecb_dec_round_loop 353.Lecb_128_dec: 354 ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 355 aesd v0.16b,v16.16b 356 aesimc v0.16b,v0.16b 357 aesd v0.16b,v17.16b 358 aesimc v0.16b,v0.16b 359 ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 360 aesd v0.16b,v18.16b 361 aesimc v0.16b,v0.16b 362 aesd v0.16b,v19.16b 363 aesimc v0.16b,v0.16b 364 ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 365 aesd v0.16b,v20.16b 366 aesimc v0.16b,v0.16b 367 aesd v0.16b,v21.16b 368 aesimc v0.16b,v0.16b 369 ld1 {v7.4s},[x3] 370 aesd v0.16b,v22.16b 371 aesimc v0.16b,v0.16b 372 aesd v0.16b,v23.16b 373 eor v0.16b,v0.16b,v7.16b 374 st1 {v0.16b},[x1] 375 b .Lecb_Final_abort 376.Lecb_big_size: 377 stp x29,x30,[sp,#-16]! 378 add x29,sp,#0 379 mov x8,#16 380 b.lo .Lecb_done 381 csel x8,xzr,x8,eq 382 383 cmp w4,#0 // en- or decrypting? 384 ldr w5,[x3,#240] 385 and x2,x2,#-16 386 ld1 {v0.16b},[x0],x8 387 388 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 389 sub w5,w5,#6 390 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 391 sub w5,w5,#2 392 ld1 {v18.4s,v19.4s},[x7],#32 393 ld1 {v20.4s,v21.4s},[x7],#32 394 ld1 {v22.4s,v23.4s},[x7],#32 395 ld1 {v7.4s},[x7] 396 397 add x7,x3,#32 398 mov w6,w5 399 b.eq .Lecb_dec 400 401 ld1 {v1.16b},[x0],#16 402 subs x2,x2,#32 // bias 403 add w6,w5,#2 404 orr v3.16b,v1.16b,v1.16b 405 orr v24.16b,v1.16b,v1.16b 406 orr v1.16b,v0.16b,v0.16b 407 b.lo .Lecb_enc_tail 408 409 orr v1.16b,v3.16b,v3.16b 410 ld1 {v24.16b},[x0],#16 411 cmp x2,#32 412 b.lo .Loop3x_ecb_enc 413 414 ld1 {v25.16b},[x0],#16 415 ld1 {v26.16b},[x0],#16 416 sub x2,x2,#32 // bias 417 mov w6,w5 418 419.Loop5x_ecb_enc: 420 aese v0.16b,v16.16b 421 aesmc v0.16b,v0.16b 422 aese v1.16b,v16.16b 423 aesmc v1.16b,v1.16b 424 aese v24.16b,v16.16b 425 aesmc v24.16b,v24.16b 426 aese v25.16b,v16.16b 427 aesmc v25.16b,v25.16b 428 aese v26.16b,v16.16b 429 aesmc v26.16b,v26.16b 430 ld1 {v16.4s},[x7],#16 431 subs w6,w6,#2 432 aese v0.16b,v17.16b 433 aesmc v0.16b,v0.16b 434 aese v1.16b,v17.16b 435 aesmc v1.16b,v1.16b 436 aese v24.16b,v17.16b 437 aesmc v24.16b,v24.16b 438 aese v25.16b,v17.16b 439 aesmc v25.16b,v25.16b 440 aese v26.16b,v17.16b 441 aesmc v26.16b,v26.16b 442 ld1 {v17.4s},[x7],#16 443 b.gt .Loop5x_ecb_enc 444 445 aese v0.16b,v16.16b 446 aesmc v0.16b,v0.16b 447 aese v1.16b,v16.16b 448 aesmc v1.16b,v1.16b 449 aese v24.16b,v16.16b 450 aesmc v24.16b,v24.16b 451 aese v25.16b,v16.16b 452 aesmc v25.16b,v25.16b 453 aese v26.16b,v16.16b 454 aesmc v26.16b,v26.16b 455 cmp x2,#0x40 // because .Lecb_enc_tail4x 456 sub x2,x2,#0x50 457 458 aese v0.16b,v17.16b 459 aesmc v0.16b,v0.16b 460 aese v1.16b,v17.16b 461 aesmc v1.16b,v1.16b 462 aese v24.16b,v17.16b 463 aesmc v24.16b,v24.16b 464 aese v25.16b,v17.16b 465 aesmc v25.16b,v25.16b 466 aese v26.16b,v17.16b 467 aesmc v26.16b,v26.16b 468 csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 469 mov x7,x3 470 471 aese v0.16b,v18.16b 472 aesmc v0.16b,v0.16b 473 aese v1.16b,v18.16b 474 aesmc v1.16b,v1.16b 475 aese v24.16b,v18.16b 476 aesmc v24.16b,v24.16b 477 aese v25.16b,v18.16b 478 aesmc v25.16b,v25.16b 479 aese v26.16b,v18.16b 480 aesmc v26.16b,v26.16b 481 add x0,x0,x6 // x0 is adjusted in such way that 482 // at exit from the loop v1.16b-v26.16b 483 // are loaded with last "words" 484 add x6,x2,#0x60 // because .Lecb_enc_tail4x 485 486 aese v0.16b,v19.16b 487 aesmc v0.16b,v0.16b 488 aese v1.16b,v19.16b 489 aesmc v1.16b,v1.16b 490 aese v24.16b,v19.16b 491 aesmc v24.16b,v24.16b 492 aese v25.16b,v19.16b 493 aesmc v25.16b,v25.16b 494 aese v26.16b,v19.16b 495 aesmc v26.16b,v26.16b 496 497 aese v0.16b,v20.16b 498 aesmc v0.16b,v0.16b 499 aese v1.16b,v20.16b 500 aesmc v1.16b,v1.16b 501 aese v24.16b,v20.16b 502 aesmc v24.16b,v24.16b 503 aese v25.16b,v20.16b 504 aesmc v25.16b,v25.16b 505 aese v26.16b,v20.16b 506 aesmc v26.16b,v26.16b 507 508 aese v0.16b,v21.16b 509 aesmc v0.16b,v0.16b 510 aese v1.16b,v21.16b 511 aesmc v1.16b,v1.16b 512 aese v24.16b,v21.16b 513 aesmc v24.16b,v24.16b 514 aese v25.16b,v21.16b 515 aesmc v25.16b,v25.16b 516 aese v26.16b,v21.16b 517 aesmc v26.16b,v26.16b 518 519 aese v0.16b,v22.16b 520 aesmc v0.16b,v0.16b 521 aese v1.16b,v22.16b 522 aesmc v1.16b,v1.16b 523 aese v24.16b,v22.16b 524 aesmc v24.16b,v24.16b 525 aese v25.16b,v22.16b 526 aesmc v25.16b,v25.16b 527 aese v26.16b,v22.16b 528 aesmc v26.16b,v26.16b 529 530 aese v0.16b,v23.16b 531 ld1 {v2.16b},[x0],#16 532 aese v1.16b,v23.16b 533 ld1 {v3.16b},[x0],#16 534 aese v24.16b,v23.16b 535 ld1 {v27.16b},[x0],#16 536 aese v25.16b,v23.16b 537 ld1 {v28.16b},[x0],#16 538 aese v26.16b,v23.16b 539 ld1 {v29.16b},[x0],#16 540 cbz x6,.Lecb_enc_tail4x 541 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 542 eor v4.16b,v7.16b,v0.16b 543 orr v0.16b,v2.16b,v2.16b 544 eor v5.16b,v7.16b,v1.16b 545 orr v1.16b,v3.16b,v3.16b 546 eor v17.16b,v7.16b,v24.16b 547 orr v24.16b,v27.16b,v27.16b 548 eor v30.16b,v7.16b,v25.16b 549 orr v25.16b,v28.16b,v28.16b 550 eor v31.16b,v7.16b,v26.16b 551 st1 {v4.16b},[x1],#16 552 orr v26.16b,v29.16b,v29.16b 553 st1 {v5.16b},[x1],#16 554 mov w6,w5 555 st1 {v17.16b},[x1],#16 556 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 557 st1 {v30.16b},[x1],#16 558 st1 {v31.16b},[x1],#16 559 b.hs .Loop5x_ecb_enc 560 561 add x2,x2,#0x50 562 cbz x2,.Lecb_done 563 564 add w6,w5,#2 565 subs x2,x2,#0x30 566 orr v0.16b,v27.16b,v27.16b 567 orr v1.16b,v28.16b,v28.16b 568 orr v24.16b,v29.16b,v29.16b 569 b.lo .Lecb_enc_tail 570 571 b .Loop3x_ecb_enc 572 573.align 4 574.Lecb_enc_tail4x: 575 eor v5.16b,v7.16b,v1.16b 576 eor v17.16b,v7.16b,v24.16b 577 eor v30.16b,v7.16b,v25.16b 578 eor v31.16b,v7.16b,v26.16b 579 st1 {v5.16b},[x1],#16 580 st1 {v17.16b},[x1],#16 581 st1 {v30.16b},[x1],#16 582 st1 {v31.16b},[x1],#16 583 584 b .Lecb_done 585.align 4 586.Loop3x_ecb_enc: 587 aese v0.16b,v16.16b 588 aesmc v0.16b,v0.16b 589 aese v1.16b,v16.16b 590 aesmc v1.16b,v1.16b 591 aese v24.16b,v16.16b 592 aesmc v24.16b,v24.16b 593 ld1 {v16.4s},[x7],#16 594 subs w6,w6,#2 595 aese v0.16b,v17.16b 596 aesmc v0.16b,v0.16b 597 aese v1.16b,v17.16b 598 aesmc v1.16b,v1.16b 599 aese v24.16b,v17.16b 600 aesmc v24.16b,v24.16b 601 ld1 {v17.4s},[x7],#16 602 b.gt .Loop3x_ecb_enc 603 604 aese v0.16b,v16.16b 605 aesmc v0.16b,v0.16b 606 aese v1.16b,v16.16b 607 aesmc v1.16b,v1.16b 608 aese v24.16b,v16.16b 609 aesmc v24.16b,v24.16b 610 subs x2,x2,#0x30 611 csel x6,x2,x6,lo // x6, w6, is zero at this point 612 aese v0.16b,v17.16b 613 aesmc v0.16b,v0.16b 614 aese v1.16b,v17.16b 615 aesmc v1.16b,v1.16b 616 aese v24.16b,v17.16b 617 aesmc v24.16b,v24.16b 618 add x0,x0,x6 // x0 is adjusted in such way that 619 // at exit from the loop v1.16b-v24.16b 620 // are loaded with last "words" 621 mov x7,x3 622 aese v0.16b,v20.16b 623 aesmc v0.16b,v0.16b 624 aese v1.16b,v20.16b 625 aesmc v1.16b,v1.16b 626 aese v24.16b,v20.16b 627 aesmc v24.16b,v24.16b 628 ld1 {v2.16b},[x0],#16 629 aese v0.16b,v21.16b 630 aesmc v0.16b,v0.16b 631 aese v1.16b,v21.16b 632 aesmc v1.16b,v1.16b 633 aese v24.16b,v21.16b 634 aesmc v24.16b,v24.16b 635 ld1 {v3.16b},[x0],#16 636 aese v0.16b,v22.16b 637 aesmc v0.16b,v0.16b 638 aese v1.16b,v22.16b 639 aesmc v1.16b,v1.16b 640 aese v24.16b,v22.16b 641 aesmc v24.16b,v24.16b 642 ld1 {v27.16b},[x0],#16 643 aese v0.16b,v23.16b 644 aese v1.16b,v23.16b 645 aese v24.16b,v23.16b 646 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 647 add w6,w5,#2 648 eor v4.16b,v7.16b,v0.16b 649 eor v5.16b,v7.16b,v1.16b 650 eor v24.16b,v24.16b,v7.16b 651 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 652 st1 {v4.16b},[x1],#16 653 orr v0.16b,v2.16b,v2.16b 654 st1 {v5.16b},[x1],#16 655 orr v1.16b,v3.16b,v3.16b 656 st1 {v24.16b},[x1],#16 657 orr v24.16b,v27.16b,v27.16b 658 b.hs .Loop3x_ecb_enc 659 660 cmn x2,#0x30 661 b.eq .Lecb_done 662 nop 663 664.Lecb_enc_tail: 665 aese v1.16b,v16.16b 666 aesmc v1.16b,v1.16b 667 aese v24.16b,v16.16b 668 aesmc v24.16b,v24.16b 669 ld1 {v16.4s},[x7],#16 670 subs w6,w6,#2 671 aese v1.16b,v17.16b 672 aesmc v1.16b,v1.16b 673 aese v24.16b,v17.16b 674 aesmc v24.16b,v24.16b 675 ld1 {v17.4s},[x7],#16 676 b.gt .Lecb_enc_tail 677 678 aese v1.16b,v16.16b 679 aesmc v1.16b,v1.16b 680 aese v24.16b,v16.16b 681 aesmc v24.16b,v24.16b 682 aese v1.16b,v17.16b 683 aesmc v1.16b,v1.16b 684 aese v24.16b,v17.16b 685 aesmc v24.16b,v24.16b 686 aese v1.16b,v20.16b 687 aesmc v1.16b,v1.16b 688 aese v24.16b,v20.16b 689 aesmc v24.16b,v24.16b 690 cmn x2,#0x20 691 aese v1.16b,v21.16b 692 aesmc v1.16b,v1.16b 693 aese v24.16b,v21.16b 694 aesmc v24.16b,v24.16b 695 aese v1.16b,v22.16b 696 aesmc v1.16b,v1.16b 697 aese v24.16b,v22.16b 698 aesmc v24.16b,v24.16b 699 aese v1.16b,v23.16b 700 aese v24.16b,v23.16b 701 b.eq .Lecb_enc_one 702 eor v5.16b,v7.16b,v1.16b 703 eor v17.16b,v7.16b,v24.16b 704 st1 {v5.16b},[x1],#16 705 st1 {v17.16b},[x1],#16 706 b .Lecb_done 707 708.Lecb_enc_one: 709 eor v5.16b,v7.16b,v24.16b 710 st1 {v5.16b},[x1],#16 711 b .Lecb_done 712.align 5 713.Lecb_dec: 714 ld1 {v1.16b},[x0],#16 715 subs x2,x2,#32 // bias 716 add w6,w5,#2 717 orr v3.16b,v1.16b,v1.16b 718 orr v24.16b,v1.16b,v1.16b 719 orr v1.16b,v0.16b,v0.16b 720 b.lo .Lecb_dec_tail 721 722 orr v1.16b,v3.16b,v3.16b 723 ld1 {v24.16b},[x0],#16 724 cmp x2,#32 725 b.lo .Loop3x_ecb_dec 726 727 ld1 {v25.16b},[x0],#16 728 ld1 {v26.16b},[x0],#16 729 sub x2,x2,#32 // bias 730 mov w6,w5 731 732.Loop5x_ecb_dec: 733 aesd v0.16b,v16.16b 734 aesimc v0.16b,v0.16b 735 aesd v1.16b,v16.16b 736 aesimc v1.16b,v1.16b 737 aesd v24.16b,v16.16b 738 aesimc v24.16b,v24.16b 739 aesd v25.16b,v16.16b 740 aesimc v25.16b,v25.16b 741 aesd v26.16b,v16.16b 742 aesimc v26.16b,v26.16b 743 ld1 {v16.4s},[x7],#16 744 subs w6,w6,#2 745 aesd v0.16b,v17.16b 746 aesimc v0.16b,v0.16b 747 aesd v1.16b,v17.16b 748 aesimc v1.16b,v1.16b 749 aesd v24.16b,v17.16b 750 aesimc v24.16b,v24.16b 751 aesd v25.16b,v17.16b 752 aesimc v25.16b,v25.16b 753 aesd v26.16b,v17.16b 754 aesimc v26.16b,v26.16b 755 ld1 {v17.4s},[x7],#16 756 b.gt .Loop5x_ecb_dec 757 758 aesd v0.16b,v16.16b 759 aesimc v0.16b,v0.16b 760 aesd v1.16b,v16.16b 761 aesimc v1.16b,v1.16b 762 aesd v24.16b,v16.16b 763 aesimc v24.16b,v24.16b 764 aesd v25.16b,v16.16b 765 aesimc v25.16b,v25.16b 766 aesd v26.16b,v16.16b 767 aesimc v26.16b,v26.16b 768 cmp x2,#0x40 // because .Lecb_tail4x 769 sub x2,x2,#0x50 770 771 aesd v0.16b,v17.16b 772 aesimc v0.16b,v0.16b 773 aesd v1.16b,v17.16b 774 aesimc v1.16b,v1.16b 775 aesd v24.16b,v17.16b 776 aesimc v24.16b,v24.16b 777 aesd v25.16b,v17.16b 778 aesimc v25.16b,v25.16b 779 aesd v26.16b,v17.16b 780 aesimc v26.16b,v26.16b 781 csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 782 mov x7,x3 783 784 aesd v0.16b,v18.16b 785 aesimc v0.16b,v0.16b 786 aesd v1.16b,v18.16b 787 aesimc v1.16b,v1.16b 788 aesd v24.16b,v18.16b 789 aesimc v24.16b,v24.16b 790 aesd v25.16b,v18.16b 791 aesimc v25.16b,v25.16b 792 aesd v26.16b,v18.16b 793 aesimc v26.16b,v26.16b 794 add x0,x0,x6 // x0 is adjusted in such way that 795 // at exit from the loop v1.16b-v26.16b 796 // are loaded with last "words" 797 add x6,x2,#0x60 // because .Lecb_tail4x 798 799 aesd v0.16b,v19.16b 800 aesimc v0.16b,v0.16b 801 aesd v1.16b,v19.16b 802 aesimc v1.16b,v1.16b 803 aesd v24.16b,v19.16b 804 aesimc v24.16b,v24.16b 805 aesd v25.16b,v19.16b 806 aesimc v25.16b,v25.16b 807 aesd v26.16b,v19.16b 808 aesimc v26.16b,v26.16b 809 810 aesd v0.16b,v20.16b 811 aesimc v0.16b,v0.16b 812 aesd v1.16b,v20.16b 813 aesimc v1.16b,v1.16b 814 aesd v24.16b,v20.16b 815 aesimc v24.16b,v24.16b 816 aesd v25.16b,v20.16b 817 aesimc v25.16b,v25.16b 818 aesd v26.16b,v20.16b 819 aesimc v26.16b,v26.16b 820 821 aesd v0.16b,v21.16b 822 aesimc v0.16b,v0.16b 823 aesd v1.16b,v21.16b 824 aesimc v1.16b,v1.16b 825 aesd v24.16b,v21.16b 826 aesimc v24.16b,v24.16b 827 aesd v25.16b,v21.16b 828 aesimc v25.16b,v25.16b 829 aesd v26.16b,v21.16b 830 aesimc v26.16b,v26.16b 831 832 aesd v0.16b,v22.16b 833 aesimc v0.16b,v0.16b 834 aesd v1.16b,v22.16b 835 aesimc v1.16b,v1.16b 836 aesd v24.16b,v22.16b 837 aesimc v24.16b,v24.16b 838 aesd v25.16b,v22.16b 839 aesimc v25.16b,v25.16b 840 aesd v26.16b,v22.16b 841 aesimc v26.16b,v26.16b 842 843 aesd v0.16b,v23.16b 844 ld1 {v2.16b},[x0],#16 845 aesd v1.16b,v23.16b 846 ld1 {v3.16b},[x0],#16 847 aesd v24.16b,v23.16b 848 ld1 {v27.16b},[x0],#16 849 aesd v25.16b,v23.16b 850 ld1 {v28.16b},[x0],#16 851 aesd v26.16b,v23.16b 852 ld1 {v29.16b},[x0],#16 853 cbz x6,.Lecb_tail4x 854 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 855 eor v4.16b,v7.16b,v0.16b 856 orr v0.16b,v2.16b,v2.16b 857 eor v5.16b,v7.16b,v1.16b 858 orr v1.16b,v3.16b,v3.16b 859 eor v17.16b,v7.16b,v24.16b 860 orr v24.16b,v27.16b,v27.16b 861 eor v30.16b,v7.16b,v25.16b 862 orr v25.16b,v28.16b,v28.16b 863 eor v31.16b,v7.16b,v26.16b 864 st1 {v4.16b},[x1],#16 865 orr v26.16b,v29.16b,v29.16b 866 st1 {v5.16b},[x1],#16 867 mov w6,w5 868 st1 {v17.16b},[x1],#16 869 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 870 st1 {v30.16b},[x1],#16 871 st1 {v31.16b},[x1],#16 872 b.hs .Loop5x_ecb_dec 873 874 add x2,x2,#0x50 875 cbz x2,.Lecb_done 876 877 add w6,w5,#2 878 subs x2,x2,#0x30 879 orr v0.16b,v27.16b,v27.16b 880 orr v1.16b,v28.16b,v28.16b 881 orr v24.16b,v29.16b,v29.16b 882 b.lo .Lecb_dec_tail 883 884 b .Loop3x_ecb_dec 885 886.align 4 887.Lecb_tail4x: 888 eor v5.16b,v7.16b,v1.16b 889 eor v17.16b,v7.16b,v24.16b 890 eor v30.16b,v7.16b,v25.16b 891 eor v31.16b,v7.16b,v26.16b 892 st1 {v5.16b},[x1],#16 893 st1 {v17.16b},[x1],#16 894 st1 {v30.16b},[x1],#16 895 st1 {v31.16b},[x1],#16 896 897 b .Lecb_done 898.align 4 899.Loop3x_ecb_dec: 900 aesd v0.16b,v16.16b 901 aesimc v0.16b,v0.16b 902 aesd v1.16b,v16.16b 903 aesimc v1.16b,v1.16b 904 aesd v24.16b,v16.16b 905 aesimc v24.16b,v24.16b 906 ld1 {v16.4s},[x7],#16 907 subs w6,w6,#2 908 aesd v0.16b,v17.16b 909 aesimc v0.16b,v0.16b 910 aesd v1.16b,v17.16b 911 aesimc v1.16b,v1.16b 912 aesd v24.16b,v17.16b 913 aesimc v24.16b,v24.16b 914 ld1 {v17.4s},[x7],#16 915 b.gt .Loop3x_ecb_dec 916 917 aesd v0.16b,v16.16b 918 aesimc v0.16b,v0.16b 919 aesd v1.16b,v16.16b 920 aesimc v1.16b,v1.16b 921 aesd v24.16b,v16.16b 922 aesimc v24.16b,v24.16b 923 subs x2,x2,#0x30 924 csel x6,x2,x6,lo // x6, w6, is zero at this point 925 aesd v0.16b,v17.16b 926 aesimc v0.16b,v0.16b 927 aesd v1.16b,v17.16b 928 aesimc v1.16b,v1.16b 929 aesd v24.16b,v17.16b 930 aesimc v24.16b,v24.16b 931 add x0,x0,x6 // x0 is adjusted in such way that 932 // at exit from the loop v1.16b-v24.16b 933 // are loaded with last "words" 934 mov x7,x3 935 aesd v0.16b,v20.16b 936 aesimc v0.16b,v0.16b 937 aesd v1.16b,v20.16b 938 aesimc v1.16b,v1.16b 939 aesd v24.16b,v20.16b 940 aesimc v24.16b,v24.16b 941 ld1 {v2.16b},[x0],#16 942 aesd v0.16b,v21.16b 943 aesimc v0.16b,v0.16b 944 aesd v1.16b,v21.16b 945 aesimc v1.16b,v1.16b 946 aesd v24.16b,v21.16b 947 aesimc v24.16b,v24.16b 948 ld1 {v3.16b},[x0],#16 949 aesd v0.16b,v22.16b 950 aesimc v0.16b,v0.16b 951 aesd v1.16b,v22.16b 952 aesimc v1.16b,v1.16b 953 aesd v24.16b,v22.16b 954 aesimc v24.16b,v24.16b 955 ld1 {v27.16b},[x0],#16 956 aesd v0.16b,v23.16b 957 aesd v1.16b,v23.16b 958 aesd v24.16b,v23.16b 959 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 960 add w6,w5,#2 961 eor v4.16b,v7.16b,v0.16b 962 eor v5.16b,v7.16b,v1.16b 963 eor v24.16b,v24.16b,v7.16b 964 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 965 st1 {v4.16b},[x1],#16 966 orr v0.16b,v2.16b,v2.16b 967 st1 {v5.16b},[x1],#16 968 orr v1.16b,v3.16b,v3.16b 969 st1 {v24.16b},[x1],#16 970 orr v24.16b,v27.16b,v27.16b 971 b.hs .Loop3x_ecb_dec 972 973 cmn x2,#0x30 974 b.eq .Lecb_done 975 nop 976 977.Lecb_dec_tail: 978 aesd v1.16b,v16.16b 979 aesimc v1.16b,v1.16b 980 aesd v24.16b,v16.16b 981 aesimc v24.16b,v24.16b 982 ld1 {v16.4s},[x7],#16 983 subs w6,w6,#2 984 aesd v1.16b,v17.16b 985 aesimc v1.16b,v1.16b 986 aesd v24.16b,v17.16b 987 aesimc v24.16b,v24.16b 988 ld1 {v17.4s},[x7],#16 989 b.gt .Lecb_dec_tail 990 991 aesd v1.16b,v16.16b 992 aesimc v1.16b,v1.16b 993 aesd v24.16b,v16.16b 994 aesimc v24.16b,v24.16b 995 aesd v1.16b,v17.16b 996 aesimc v1.16b,v1.16b 997 aesd v24.16b,v17.16b 998 aesimc v24.16b,v24.16b 999 aesd v1.16b,v20.16b 1000 aesimc v1.16b,v1.16b 1001 aesd v24.16b,v20.16b 1002 aesimc v24.16b,v24.16b 1003 cmn x2,#0x20 1004 aesd v1.16b,v21.16b 1005 aesimc v1.16b,v1.16b 1006 aesd v24.16b,v21.16b 1007 aesimc v24.16b,v24.16b 1008 aesd v1.16b,v22.16b 1009 aesimc v1.16b,v1.16b 1010 aesd v24.16b,v22.16b 1011 aesimc v24.16b,v24.16b 1012 aesd v1.16b,v23.16b 1013 aesd v24.16b,v23.16b 1014 b.eq .Lecb_dec_one 1015 eor v5.16b,v7.16b,v1.16b 1016 eor v17.16b,v7.16b,v24.16b 1017 st1 {v5.16b},[x1],#16 1018 st1 {v17.16b},[x1],#16 1019 b .Lecb_done 1020 1021.Lecb_dec_one: 1022 eor v5.16b,v7.16b,v24.16b 1023 st1 {v5.16b},[x1],#16 1024 1025.Lecb_done: 1026 ldr x29,[sp],#16 1027.Lecb_Final_abort: 1028 ret 1029.size aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt 1030.globl aes_v8_cbc_encrypt 1031.type aes_v8_cbc_encrypt,%function 1032.align 5 1033aes_v8_cbc_encrypt: 1034 stp x29,x30,[sp,#-16]! 1035 add x29,sp,#0 1036 subs x2,x2,#16 1037 mov x8,#16 1038 b.lo .Lcbc_abort 1039 csel x8,xzr,x8,eq 1040 1041 cmp w5,#0 // en- or decrypting? 1042 ldr w5,[x3,#240] 1043 and x2,x2,#-16 1044 ld1 {v6.16b},[x4] 1045 ld1 {v0.16b},[x0],x8 1046 1047 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1048 sub w5,w5,#6 1049 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 1050 sub w5,w5,#2 1051 ld1 {v18.4s,v19.4s},[x7],#32 1052 ld1 {v20.4s,v21.4s},[x7],#32 1053 ld1 {v22.4s,v23.4s},[x7],#32 1054 ld1 {v7.4s},[x7] 1055 1056 add x7,x3,#32 1057 mov w6,w5 1058 b.eq .Lcbc_dec 1059 1060 cmp w5,#2 1061 eor v0.16b,v0.16b,v6.16b 1062 eor v5.16b,v16.16b,v7.16b 1063 b.eq .Lcbc_enc128 1064 1065 ld1 {v2.4s,v3.4s},[x7] 1066 add x7,x3,#16 1067 add x6,x3,#16*4 1068 add x12,x3,#16*5 1069 aese v0.16b,v16.16b 1070 aesmc v0.16b,v0.16b 1071 add x14,x3,#16*6 1072 add x3,x3,#16*7 1073 b .Lenter_cbc_enc 1074 1075.align 4 1076.Loop_cbc_enc: 1077 aese v0.16b,v16.16b 1078 aesmc v0.16b,v0.16b 1079 st1 {v6.16b},[x1],#16 1080.Lenter_cbc_enc: 1081 aese v0.16b,v17.16b 1082 aesmc v0.16b,v0.16b 1083 aese v0.16b,v2.16b 1084 aesmc v0.16b,v0.16b 1085 ld1 {v16.4s},[x6] 1086 cmp w5,#4 1087 aese v0.16b,v3.16b 1088 aesmc v0.16b,v0.16b 1089 ld1 {v17.4s},[x12] 1090 b.eq .Lcbc_enc192 1091 1092 aese v0.16b,v16.16b 1093 aesmc v0.16b,v0.16b 1094 ld1 {v16.4s},[x14] 1095 aese v0.16b,v17.16b 1096 aesmc v0.16b,v0.16b 1097 ld1 {v17.4s},[x3] 1098 nop 1099 1100.Lcbc_enc192: 1101 aese v0.16b,v16.16b 1102 aesmc v0.16b,v0.16b 1103 subs x2,x2,#16 1104 aese v0.16b,v17.16b 1105 aesmc v0.16b,v0.16b 1106 csel x8,xzr,x8,eq 1107 aese v0.16b,v18.16b 1108 aesmc v0.16b,v0.16b 1109 aese v0.16b,v19.16b 1110 aesmc v0.16b,v0.16b 1111 ld1 {v16.16b},[x0],x8 1112 aese v0.16b,v20.16b 1113 aesmc v0.16b,v0.16b 1114 eor v16.16b,v16.16b,v5.16b 1115 aese v0.16b,v21.16b 1116 aesmc v0.16b,v0.16b 1117 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 1118 aese v0.16b,v22.16b 1119 aesmc v0.16b,v0.16b 1120 aese v0.16b,v23.16b 1121 eor v6.16b,v0.16b,v7.16b 1122 b.hs .Loop_cbc_enc 1123 1124 st1 {v6.16b},[x1],#16 1125 b .Lcbc_done 1126 1127.align 5 1128.Lcbc_enc128: 1129 ld1 {v2.4s,v3.4s},[x7] 1130 aese v0.16b,v16.16b 1131 aesmc v0.16b,v0.16b 1132 b .Lenter_cbc_enc128 1133.Loop_cbc_enc128: 1134 aese v0.16b,v16.16b 1135 aesmc v0.16b,v0.16b 1136 st1 {v6.16b},[x1],#16 1137.Lenter_cbc_enc128: 1138 aese v0.16b,v17.16b 1139 aesmc v0.16b,v0.16b 1140 subs x2,x2,#16 1141 aese v0.16b,v2.16b 1142 aesmc v0.16b,v0.16b 1143 csel x8,xzr,x8,eq 1144 aese v0.16b,v3.16b 1145 aesmc v0.16b,v0.16b 1146 aese v0.16b,v18.16b 1147 aesmc v0.16b,v0.16b 1148 aese v0.16b,v19.16b 1149 aesmc v0.16b,v0.16b 1150 ld1 {v16.16b},[x0],x8 1151 aese v0.16b,v20.16b 1152 aesmc v0.16b,v0.16b 1153 aese v0.16b,v21.16b 1154 aesmc v0.16b,v0.16b 1155 aese v0.16b,v22.16b 1156 aesmc v0.16b,v0.16b 1157 eor v16.16b,v16.16b,v5.16b 1158 aese v0.16b,v23.16b 1159 eor v6.16b,v0.16b,v7.16b 1160 b.hs .Loop_cbc_enc128 1161 1162 st1 {v6.16b},[x1],#16 1163 b .Lcbc_done 1164.align 5 1165.Lcbc_dec: 1166 ld1 {v24.16b},[x0],#16 1167 subs x2,x2,#32 // bias 1168 add w6,w5,#2 1169 orr v3.16b,v0.16b,v0.16b 1170 orr v1.16b,v0.16b,v0.16b 1171 orr v27.16b,v24.16b,v24.16b 1172 b.lo .Lcbc_dec_tail 1173 1174 orr v1.16b,v24.16b,v24.16b 1175 ld1 {v24.16b},[x0],#16 1176 orr v2.16b,v0.16b,v0.16b 1177 orr v3.16b,v1.16b,v1.16b 1178 orr v27.16b,v24.16b,v24.16b 1179 cmp x2,#32 1180 b.lo .Loop3x_cbc_dec 1181 1182 ld1 {v25.16b},[x0],#16 1183 ld1 {v26.16b},[x0],#16 1184 sub x2,x2,#32 // bias 1185 mov w6,w5 1186 orr v28.16b,v25.16b,v25.16b 1187 orr v29.16b,v26.16b,v26.16b 1188 1189.Loop5x_cbc_dec: 1190 aesd v0.16b,v16.16b 1191 aesimc v0.16b,v0.16b 1192 aesd v1.16b,v16.16b 1193 aesimc v1.16b,v1.16b 1194 aesd v24.16b,v16.16b 1195 aesimc v24.16b,v24.16b 1196 aesd v25.16b,v16.16b 1197 aesimc v25.16b,v25.16b 1198 aesd v26.16b,v16.16b 1199 aesimc v26.16b,v26.16b 1200 ld1 {v16.4s},[x7],#16 1201 subs w6,w6,#2 1202 aesd v0.16b,v17.16b 1203 aesimc v0.16b,v0.16b 1204 aesd v1.16b,v17.16b 1205 aesimc v1.16b,v1.16b 1206 aesd v24.16b,v17.16b 1207 aesimc v24.16b,v24.16b 1208 aesd v25.16b,v17.16b 1209 aesimc v25.16b,v25.16b 1210 aesd v26.16b,v17.16b 1211 aesimc v26.16b,v26.16b 1212 ld1 {v17.4s},[x7],#16 1213 b.gt .Loop5x_cbc_dec 1214 1215 aesd v0.16b,v16.16b 1216 aesimc v0.16b,v0.16b 1217 aesd v1.16b,v16.16b 1218 aesimc v1.16b,v1.16b 1219 aesd v24.16b,v16.16b 1220 aesimc v24.16b,v24.16b 1221 aesd v25.16b,v16.16b 1222 aesimc v25.16b,v25.16b 1223 aesd v26.16b,v16.16b 1224 aesimc v26.16b,v26.16b 1225 cmp x2,#0x40 // because .Lcbc_tail4x 1226 sub x2,x2,#0x50 1227 1228 aesd v0.16b,v17.16b 1229 aesimc v0.16b,v0.16b 1230 aesd v1.16b,v17.16b 1231 aesimc v1.16b,v1.16b 1232 aesd v24.16b,v17.16b 1233 aesimc v24.16b,v24.16b 1234 aesd v25.16b,v17.16b 1235 aesimc v25.16b,v25.16b 1236 aesd v26.16b,v17.16b 1237 aesimc v26.16b,v26.16b 1238 csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 1239 mov x7,x3 1240 1241 aesd v0.16b,v18.16b 1242 aesimc v0.16b,v0.16b 1243 aesd v1.16b,v18.16b 1244 aesimc v1.16b,v1.16b 1245 aesd v24.16b,v18.16b 1246 aesimc v24.16b,v24.16b 1247 aesd v25.16b,v18.16b 1248 aesimc v25.16b,v25.16b 1249 aesd v26.16b,v18.16b 1250 aesimc v26.16b,v26.16b 1251 add x0,x0,x6 // x0 is adjusted in such way that 1252 // at exit from the loop v1.16b-v26.16b 1253 // are loaded with last "words" 1254 add x6,x2,#0x60 // because .Lcbc_tail4x 1255 1256 aesd v0.16b,v19.16b 1257 aesimc v0.16b,v0.16b 1258 aesd v1.16b,v19.16b 1259 aesimc v1.16b,v1.16b 1260 aesd v24.16b,v19.16b 1261 aesimc v24.16b,v24.16b 1262 aesd v25.16b,v19.16b 1263 aesimc v25.16b,v25.16b 1264 aesd v26.16b,v19.16b 1265 aesimc v26.16b,v26.16b 1266 1267 aesd v0.16b,v20.16b 1268 aesimc v0.16b,v0.16b 1269 aesd v1.16b,v20.16b 1270 aesimc v1.16b,v1.16b 1271 aesd v24.16b,v20.16b 1272 aesimc v24.16b,v24.16b 1273 aesd v25.16b,v20.16b 1274 aesimc v25.16b,v25.16b 1275 aesd v26.16b,v20.16b 1276 aesimc v26.16b,v26.16b 1277 1278 aesd v0.16b,v21.16b 1279 aesimc v0.16b,v0.16b 1280 aesd v1.16b,v21.16b 1281 aesimc v1.16b,v1.16b 1282 aesd v24.16b,v21.16b 1283 aesimc v24.16b,v24.16b 1284 aesd v25.16b,v21.16b 1285 aesimc v25.16b,v25.16b 1286 aesd v26.16b,v21.16b 1287 aesimc v26.16b,v26.16b 1288 1289 aesd v0.16b,v22.16b 1290 aesimc v0.16b,v0.16b 1291 aesd v1.16b,v22.16b 1292 aesimc v1.16b,v1.16b 1293 aesd v24.16b,v22.16b 1294 aesimc v24.16b,v24.16b 1295 aesd v25.16b,v22.16b 1296 aesimc v25.16b,v25.16b 1297 aesd v26.16b,v22.16b 1298 aesimc v26.16b,v26.16b 1299 1300 eor v4.16b,v6.16b,v7.16b 1301 aesd v0.16b,v23.16b 1302 eor v5.16b,v2.16b,v7.16b 1303 ld1 {v2.16b},[x0],#16 1304 aesd v1.16b,v23.16b 1305 eor v17.16b,v3.16b,v7.16b 1306 ld1 {v3.16b},[x0],#16 1307 aesd v24.16b,v23.16b 1308 eor v30.16b,v27.16b,v7.16b 1309 ld1 {v27.16b},[x0],#16 1310 aesd v25.16b,v23.16b 1311 eor v31.16b,v28.16b,v7.16b 1312 ld1 {v28.16b},[x0],#16 1313 aesd v26.16b,v23.16b 1314 orr v6.16b,v29.16b,v29.16b 1315 ld1 {v29.16b},[x0],#16 1316 cbz x6,.Lcbc_tail4x 1317 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1318 eor v4.16b,v4.16b,v0.16b 1319 orr v0.16b,v2.16b,v2.16b 1320 eor v5.16b,v5.16b,v1.16b 1321 orr v1.16b,v3.16b,v3.16b 1322 eor v17.16b,v17.16b,v24.16b 1323 orr v24.16b,v27.16b,v27.16b 1324 eor v30.16b,v30.16b,v25.16b 1325 orr v25.16b,v28.16b,v28.16b 1326 eor v31.16b,v31.16b,v26.16b 1327 st1 {v4.16b},[x1],#16 1328 orr v26.16b,v29.16b,v29.16b 1329 st1 {v5.16b},[x1],#16 1330 mov w6,w5 1331 st1 {v17.16b},[x1],#16 1332 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1333 st1 {v30.16b},[x1],#16 1334 st1 {v31.16b},[x1],#16 1335 b.hs .Loop5x_cbc_dec 1336 1337 add x2,x2,#0x50 1338 cbz x2,.Lcbc_done 1339 1340 add w6,w5,#2 1341 subs x2,x2,#0x30 1342 orr v0.16b,v27.16b,v27.16b 1343 orr v2.16b,v27.16b,v27.16b 1344 orr v1.16b,v28.16b,v28.16b 1345 orr v3.16b,v28.16b,v28.16b 1346 orr v24.16b,v29.16b,v29.16b 1347 orr v27.16b,v29.16b,v29.16b 1348 b.lo .Lcbc_dec_tail 1349 1350 b .Loop3x_cbc_dec 1351 1352.align 4 1353.Lcbc_tail4x: 1354 eor v5.16b,v4.16b,v1.16b 1355 eor v17.16b,v17.16b,v24.16b 1356 eor v30.16b,v30.16b,v25.16b 1357 eor v31.16b,v31.16b,v26.16b 1358 st1 {v5.16b},[x1],#16 1359 st1 {v17.16b},[x1],#16 1360 st1 {v30.16b},[x1],#16 1361 st1 {v31.16b},[x1],#16 1362 1363 b .Lcbc_done 1364.align 4 1365.Loop3x_cbc_dec: 1366 aesd v0.16b,v16.16b 1367 aesimc v0.16b,v0.16b 1368 aesd v1.16b,v16.16b 1369 aesimc v1.16b,v1.16b 1370 aesd v24.16b,v16.16b 1371 aesimc v24.16b,v24.16b 1372 ld1 {v16.4s},[x7],#16 1373 subs w6,w6,#2 1374 aesd v0.16b,v17.16b 1375 aesimc v0.16b,v0.16b 1376 aesd v1.16b,v17.16b 1377 aesimc v1.16b,v1.16b 1378 aesd v24.16b,v17.16b 1379 aesimc v24.16b,v24.16b 1380 ld1 {v17.4s},[x7],#16 1381 b.gt .Loop3x_cbc_dec 1382 1383 aesd v0.16b,v16.16b 1384 aesimc v0.16b,v0.16b 1385 aesd v1.16b,v16.16b 1386 aesimc v1.16b,v1.16b 1387 aesd v24.16b,v16.16b 1388 aesimc v24.16b,v24.16b 1389 eor v4.16b,v6.16b,v7.16b 1390 subs x2,x2,#0x30 1391 eor v5.16b,v2.16b,v7.16b 1392 csel x6,x2,x6,lo // x6, w6, is zero at this point 1393 aesd v0.16b,v17.16b 1394 aesimc v0.16b,v0.16b 1395 aesd v1.16b,v17.16b 1396 aesimc v1.16b,v1.16b 1397 aesd v24.16b,v17.16b 1398 aesimc v24.16b,v24.16b 1399 eor v17.16b,v3.16b,v7.16b 1400 add x0,x0,x6 // x0 is adjusted in such way that 1401 // at exit from the loop v1.16b-v24.16b 1402 // are loaded with last "words" 1403 orr v6.16b,v27.16b,v27.16b 1404 mov x7,x3 1405 aesd v0.16b,v20.16b 1406 aesimc v0.16b,v0.16b 1407 aesd v1.16b,v20.16b 1408 aesimc v1.16b,v1.16b 1409 aesd v24.16b,v20.16b 1410 aesimc v24.16b,v24.16b 1411 ld1 {v2.16b},[x0],#16 1412 aesd v0.16b,v21.16b 1413 aesimc v0.16b,v0.16b 1414 aesd v1.16b,v21.16b 1415 aesimc v1.16b,v1.16b 1416 aesd v24.16b,v21.16b 1417 aesimc v24.16b,v24.16b 1418 ld1 {v3.16b},[x0],#16 1419 aesd v0.16b,v22.16b 1420 aesimc v0.16b,v0.16b 1421 aesd v1.16b,v22.16b 1422 aesimc v1.16b,v1.16b 1423 aesd v24.16b,v22.16b 1424 aesimc v24.16b,v24.16b 1425 ld1 {v27.16b},[x0],#16 1426 aesd v0.16b,v23.16b 1427 aesd v1.16b,v23.16b 1428 aesd v24.16b,v23.16b 1429 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1430 add w6,w5,#2 1431 eor v4.16b,v4.16b,v0.16b 1432 eor v5.16b,v5.16b,v1.16b 1433 eor v24.16b,v24.16b,v17.16b 1434 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1435 st1 {v4.16b},[x1],#16 1436 orr v0.16b,v2.16b,v2.16b 1437 st1 {v5.16b},[x1],#16 1438 orr v1.16b,v3.16b,v3.16b 1439 st1 {v24.16b},[x1],#16 1440 orr v24.16b,v27.16b,v27.16b 1441 b.hs .Loop3x_cbc_dec 1442 1443 cmn x2,#0x30 1444 b.eq .Lcbc_done 1445 nop 1446 1447.Lcbc_dec_tail: 1448 aesd v1.16b,v16.16b 1449 aesimc v1.16b,v1.16b 1450 aesd v24.16b,v16.16b 1451 aesimc v24.16b,v24.16b 1452 ld1 {v16.4s},[x7],#16 1453 subs w6,w6,#2 1454 aesd v1.16b,v17.16b 1455 aesimc v1.16b,v1.16b 1456 aesd v24.16b,v17.16b 1457 aesimc v24.16b,v24.16b 1458 ld1 {v17.4s},[x7],#16 1459 b.gt .Lcbc_dec_tail 1460 1461 aesd v1.16b,v16.16b 1462 aesimc v1.16b,v1.16b 1463 aesd v24.16b,v16.16b 1464 aesimc v24.16b,v24.16b 1465 aesd v1.16b,v17.16b 1466 aesimc v1.16b,v1.16b 1467 aesd v24.16b,v17.16b 1468 aesimc v24.16b,v24.16b 1469 aesd v1.16b,v20.16b 1470 aesimc v1.16b,v1.16b 1471 aesd v24.16b,v20.16b 1472 aesimc v24.16b,v24.16b 1473 cmn x2,#0x20 1474 aesd v1.16b,v21.16b 1475 aesimc v1.16b,v1.16b 1476 aesd v24.16b,v21.16b 1477 aesimc v24.16b,v24.16b 1478 eor v5.16b,v6.16b,v7.16b 1479 aesd v1.16b,v22.16b 1480 aesimc v1.16b,v1.16b 1481 aesd v24.16b,v22.16b 1482 aesimc v24.16b,v24.16b 1483 eor v17.16b,v3.16b,v7.16b 1484 aesd v1.16b,v23.16b 1485 aesd v24.16b,v23.16b 1486 b.eq .Lcbc_dec_one 1487 eor v5.16b,v5.16b,v1.16b 1488 eor v17.16b,v17.16b,v24.16b 1489 orr v6.16b,v27.16b,v27.16b 1490 st1 {v5.16b},[x1],#16 1491 st1 {v17.16b},[x1],#16 1492 b .Lcbc_done 1493 1494.Lcbc_dec_one: 1495 eor v5.16b,v5.16b,v24.16b 1496 orr v6.16b,v27.16b,v27.16b 1497 st1 {v5.16b},[x1],#16 1498 1499.Lcbc_done: 1500 st1 {v6.16b},[x4] 1501.Lcbc_abort: 1502 ldr x29,[sp],#16 1503 ret 1504.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 1505.globl aes_v8_ctr32_encrypt_blocks 1506.type aes_v8_ctr32_encrypt_blocks,%function 1507.align 5 1508aes_v8_ctr32_encrypt_blocks: 1509 stp x29,x30,[sp,#-16]! 1510 add x29,sp,#0 1511 ldr w5,[x3,#240] 1512 1513 ldr w8, [x4, #12] 1514#ifdef __ARMEB__ 1515 ld1 {v0.16b},[x4] 1516#else 1517 ld1 {v0.4s},[x4] 1518#endif 1519 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1520 sub w5,w5,#4 1521 mov x12,#16 1522 cmp x2,#2 1523 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 1524 sub w5,w5,#2 1525 ld1 {v20.4s,v21.4s},[x7],#32 1526 ld1 {v22.4s,v23.4s},[x7],#32 1527 ld1 {v7.4s},[x7] 1528 add x7,x3,#32 1529 mov w6,w5 1530 csel x12,xzr,x12,lo 1531#ifndef __ARMEB__ 1532 rev w8, w8 1533#endif 1534 orr v1.16b,v0.16b,v0.16b 1535 add w10, w8, #1 1536 orr v18.16b,v0.16b,v0.16b 1537 add w8, w8, #2 1538 orr v6.16b,v0.16b,v0.16b 1539 rev w10, w10 1540 mov v1.s[3],w10 1541 b.ls .Lctr32_tail 1542 rev w12, w8 1543 sub x2,x2,#3 // bias 1544 mov v18.s[3],w12 1545 cmp x2,#32 1546 b.lo .Loop3x_ctr32 1547 1548 add w13,w8,#1 1549 add w14,w8,#2 1550 orr v24.16b,v0.16b,v0.16b 1551 rev w13,w13 1552 orr v25.16b,v0.16b,v0.16b 1553 rev w14,w14 1554 mov v24.s[3],w13 1555 sub x2,x2,#2 // bias 1556 mov v25.s[3],w14 1557 add w8,w8,#2 1558 b .Loop5x_ctr32 1559 1560.align 4 1561.Loop5x_ctr32: 1562 aese v0.16b,v16.16b 1563 aesmc v0.16b,v0.16b 1564 aese v1.16b,v16.16b 1565 aesmc v1.16b,v1.16b 1566 aese v18.16b,v16.16b 1567 aesmc v18.16b,v18.16b 1568 aese v24.16b,v16.16b 1569 aesmc v24.16b,v24.16b 1570 aese v25.16b,v16.16b 1571 aesmc v25.16b,v25.16b 1572 ld1 {v16.4s},[x7],#16 1573 subs w6,w6,#2 1574 aese v0.16b,v17.16b 1575 aesmc v0.16b,v0.16b 1576 aese v1.16b,v17.16b 1577 aesmc v1.16b,v1.16b 1578 aese v18.16b,v17.16b 1579 aesmc v18.16b,v18.16b 1580 aese v24.16b,v17.16b 1581 aesmc v24.16b,v24.16b 1582 aese v25.16b,v17.16b 1583 aesmc v25.16b,v25.16b 1584 ld1 {v17.4s},[x7],#16 1585 b.gt .Loop5x_ctr32 1586 1587 mov x7,x3 1588 aese v0.16b,v16.16b 1589 aesmc v0.16b,v0.16b 1590 aese v1.16b,v16.16b 1591 aesmc v1.16b,v1.16b 1592 aese v18.16b,v16.16b 1593 aesmc v18.16b,v18.16b 1594 aese v24.16b,v16.16b 1595 aesmc v24.16b,v24.16b 1596 aese v25.16b,v16.16b 1597 aesmc v25.16b,v25.16b 1598 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1599 1600 aese v0.16b,v17.16b 1601 aesmc v0.16b,v0.16b 1602 aese v1.16b,v17.16b 1603 aesmc v1.16b,v1.16b 1604 aese v18.16b,v17.16b 1605 aesmc v18.16b,v18.16b 1606 aese v24.16b,v17.16b 1607 aesmc v24.16b,v24.16b 1608 aese v25.16b,v17.16b 1609 aesmc v25.16b,v25.16b 1610 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1611 1612 aese v0.16b,v20.16b 1613 aesmc v0.16b,v0.16b 1614 add w9,w8,#1 1615 add w10,w8,#2 1616 aese v1.16b,v20.16b 1617 aesmc v1.16b,v1.16b 1618 add w12,w8,#3 1619 add w13,w8,#4 1620 aese v18.16b,v20.16b 1621 aesmc v18.16b,v18.16b 1622 add w14,w8,#5 1623 rev w9,w9 1624 aese v24.16b,v20.16b 1625 aesmc v24.16b,v24.16b 1626 rev w10,w10 1627 rev w12,w12 1628 aese v25.16b,v20.16b 1629 aesmc v25.16b,v25.16b 1630 rev w13,w13 1631 rev w14,w14 1632 1633 aese v0.16b,v21.16b 1634 aesmc v0.16b,v0.16b 1635 aese v1.16b,v21.16b 1636 aesmc v1.16b,v1.16b 1637 aese v18.16b,v21.16b 1638 aesmc v18.16b,v18.16b 1639 aese v24.16b,v21.16b 1640 aesmc v24.16b,v24.16b 1641 aese v25.16b,v21.16b 1642 aesmc v25.16b,v25.16b 1643 1644 aese v0.16b,v22.16b 1645 aesmc v0.16b,v0.16b 1646 ld1 {v2.16b},[x0],#16 1647 aese v1.16b,v22.16b 1648 aesmc v1.16b,v1.16b 1649 ld1 {v3.16b},[x0],#16 1650 aese v18.16b,v22.16b 1651 aesmc v18.16b,v18.16b 1652 ld1 {v19.16b},[x0],#16 1653 aese v24.16b,v22.16b 1654 aesmc v24.16b,v24.16b 1655 ld1 {v26.16b},[x0],#16 1656 aese v25.16b,v22.16b 1657 aesmc v25.16b,v25.16b 1658 ld1 {v27.16b},[x0],#16 1659 1660 aese v0.16b,v23.16b 1661 eor v2.16b,v2.16b,v7.16b 1662 aese v1.16b,v23.16b 1663 eor v3.16b,v3.16b,v7.16b 1664 aese v18.16b,v23.16b 1665 eor v19.16b,v19.16b,v7.16b 1666 aese v24.16b,v23.16b 1667 eor v26.16b,v26.16b,v7.16b 1668 aese v25.16b,v23.16b 1669 eor v27.16b,v27.16b,v7.16b 1670 1671 eor v2.16b,v2.16b,v0.16b 1672 orr v0.16b,v6.16b,v6.16b 1673 eor v3.16b,v3.16b,v1.16b 1674 orr v1.16b,v6.16b,v6.16b 1675 eor v19.16b,v19.16b,v18.16b 1676 orr v18.16b,v6.16b,v6.16b 1677 eor v26.16b,v26.16b,v24.16b 1678 orr v24.16b,v6.16b,v6.16b 1679 eor v27.16b,v27.16b,v25.16b 1680 orr v25.16b,v6.16b,v6.16b 1681 1682 st1 {v2.16b},[x1],#16 1683 mov v0.s[3],w9 1684 st1 {v3.16b},[x1],#16 1685 mov v1.s[3],w10 1686 st1 {v19.16b},[x1],#16 1687 mov v18.s[3],w12 1688 st1 {v26.16b},[x1],#16 1689 mov v24.s[3],w13 1690 st1 {v27.16b},[x1],#16 1691 mov v25.s[3],w14 1692 1693 mov w6,w5 1694 cbz x2,.Lctr32_done 1695 1696 add w8,w8,#5 1697 subs x2,x2,#5 1698 b.hs .Loop5x_ctr32 1699 1700 add x2,x2,#5 1701 sub w8,w8,#5 1702 1703 cmp x2,#2 1704 mov x12,#16 1705 csel x12,xzr,x12,lo 1706 b.ls .Lctr32_tail 1707 1708 sub x2,x2,#3 // bias 1709 add w8,w8,#3 1710 b .Loop3x_ctr32 1711 1712.align 4 1713.Loop3x_ctr32: 1714 aese v0.16b,v16.16b 1715 aesmc v0.16b,v0.16b 1716 aese v1.16b,v16.16b 1717 aesmc v1.16b,v1.16b 1718 aese v18.16b,v16.16b 1719 aesmc v18.16b,v18.16b 1720 ld1 {v16.4s},[x7],#16 1721 subs w6,w6,#2 1722 aese v0.16b,v17.16b 1723 aesmc v0.16b,v0.16b 1724 aese v1.16b,v17.16b 1725 aesmc v1.16b,v1.16b 1726 aese v18.16b,v17.16b 1727 aesmc v18.16b,v18.16b 1728 ld1 {v17.4s},[x7],#16 1729 b.gt .Loop3x_ctr32 1730 1731 aese v0.16b,v16.16b 1732 aesmc v4.16b,v0.16b 1733 aese v1.16b,v16.16b 1734 aesmc v5.16b,v1.16b 1735 ld1 {v2.16b},[x0],#16 1736 orr v0.16b,v6.16b,v6.16b 1737 aese v18.16b,v16.16b 1738 aesmc v18.16b,v18.16b 1739 ld1 {v3.16b},[x0],#16 1740 orr v1.16b,v6.16b,v6.16b 1741 aese v4.16b,v17.16b 1742 aesmc v4.16b,v4.16b 1743 aese v5.16b,v17.16b 1744 aesmc v5.16b,v5.16b 1745 ld1 {v19.16b},[x0],#16 1746 mov x7,x3 1747 aese v18.16b,v17.16b 1748 aesmc v17.16b,v18.16b 1749 orr v18.16b,v6.16b,v6.16b 1750 add w9,w8,#1 1751 aese v4.16b,v20.16b 1752 aesmc v4.16b,v4.16b 1753 aese v5.16b,v20.16b 1754 aesmc v5.16b,v5.16b 1755 eor v2.16b,v2.16b,v7.16b 1756 add w10,w8,#2 1757 aese v17.16b,v20.16b 1758 aesmc v17.16b,v17.16b 1759 eor v3.16b,v3.16b,v7.16b 1760 add w8,w8,#3 1761 aese v4.16b,v21.16b 1762 aesmc v4.16b,v4.16b 1763 aese v5.16b,v21.16b 1764 aesmc v5.16b,v5.16b 1765 eor v19.16b,v19.16b,v7.16b 1766 rev w9,w9 1767 aese v17.16b,v21.16b 1768 aesmc v17.16b,v17.16b 1769 mov v0.s[3], w9 1770 rev w10,w10 1771 aese v4.16b,v22.16b 1772 aesmc v4.16b,v4.16b 1773 aese v5.16b,v22.16b 1774 aesmc v5.16b,v5.16b 1775 mov v1.s[3], w10 1776 rev w12,w8 1777 aese v17.16b,v22.16b 1778 aesmc v17.16b,v17.16b 1779 mov v18.s[3], w12 1780 subs x2,x2,#3 1781 aese v4.16b,v23.16b 1782 aese v5.16b,v23.16b 1783 aese v17.16b,v23.16b 1784 1785 eor v2.16b,v2.16b,v4.16b 1786 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1787 st1 {v2.16b},[x1],#16 1788 eor v3.16b,v3.16b,v5.16b 1789 mov w6,w5 1790 st1 {v3.16b},[x1],#16 1791 eor v19.16b,v19.16b,v17.16b 1792 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1793 st1 {v19.16b},[x1],#16 1794 b.hs .Loop3x_ctr32 1795 1796 adds x2,x2,#3 1797 b.eq .Lctr32_done 1798 cmp x2,#1 1799 mov x12,#16 1800 csel x12,xzr,x12,eq 1801 1802.Lctr32_tail: 1803 aese v0.16b,v16.16b 1804 aesmc v0.16b,v0.16b 1805 aese v1.16b,v16.16b 1806 aesmc v1.16b,v1.16b 1807 ld1 {v16.4s},[x7],#16 1808 subs w6,w6,#2 1809 aese v0.16b,v17.16b 1810 aesmc v0.16b,v0.16b 1811 aese v1.16b,v17.16b 1812 aesmc v1.16b,v1.16b 1813 ld1 {v17.4s},[x7],#16 1814 b.gt .Lctr32_tail 1815 1816 aese v0.16b,v16.16b 1817 aesmc v0.16b,v0.16b 1818 aese v1.16b,v16.16b 1819 aesmc v1.16b,v1.16b 1820 aese v0.16b,v17.16b 1821 aesmc v0.16b,v0.16b 1822 aese v1.16b,v17.16b 1823 aesmc v1.16b,v1.16b 1824 ld1 {v2.16b},[x0],x12 1825 aese v0.16b,v20.16b 1826 aesmc v0.16b,v0.16b 1827 aese v1.16b,v20.16b 1828 aesmc v1.16b,v1.16b 1829 ld1 {v3.16b},[x0] 1830 aese v0.16b,v21.16b 1831 aesmc v0.16b,v0.16b 1832 aese v1.16b,v21.16b 1833 aesmc v1.16b,v1.16b 1834 eor v2.16b,v2.16b,v7.16b 1835 aese v0.16b,v22.16b 1836 aesmc v0.16b,v0.16b 1837 aese v1.16b,v22.16b 1838 aesmc v1.16b,v1.16b 1839 eor v3.16b,v3.16b,v7.16b 1840 aese v0.16b,v23.16b 1841 aese v1.16b,v23.16b 1842 1843 cmp x2,#1 1844 eor v2.16b,v2.16b,v0.16b 1845 eor v3.16b,v3.16b,v1.16b 1846 st1 {v2.16b},[x1],#16 1847 b.eq .Lctr32_done 1848 st1 {v3.16b},[x1] 1849 1850.Lctr32_done: 1851 ldr x29,[sp],#16 1852 ret 1853.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 1854.globl aes_v8_xts_encrypt 1855.type aes_v8_xts_encrypt,%function 1856.align 5 1857aes_v8_xts_encrypt: 1858 cmp x2,#16 1859 // Original input data size bigger than 16, jump to big size processing. 1860 b.ne .Lxts_enc_big_size 1861 // Encrypt the iv with key2, as the first XEX iv. 1862 ldr w6,[x4,#240] 1863 ld1 {v0.4s},[x4],#16 1864 ld1 {v6.16b},[x5] 1865 sub w6,w6,#2 1866 ld1 {v1.4s},[x4],#16 1867 1868.Loop_enc_iv_enc: 1869 aese v6.16b,v0.16b 1870 aesmc v6.16b,v6.16b 1871 ld1 {v0.4s},[x4],#16 1872 subs w6,w6,#2 1873 aese v6.16b,v1.16b 1874 aesmc v6.16b,v6.16b 1875 ld1 {v1.4s},[x4],#16 1876 b.gt .Loop_enc_iv_enc 1877 1878 aese v6.16b,v0.16b 1879 aesmc v6.16b,v6.16b 1880 ld1 {v0.4s},[x4] 1881 aese v6.16b,v1.16b 1882 eor v6.16b,v6.16b,v0.16b 1883 1884 ld1 {v0.16b},[x0] 1885 eor v0.16b,v6.16b,v0.16b 1886 1887 ldr w6,[x3,#240] 1888 ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... 1889 1890 aese v0.16b,v28.16b 1891 aesmc v0.16b,v0.16b 1892 ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 1893 aese v0.16b,v29.16b 1894 aesmc v0.16b,v0.16b 1895 subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing 1896 b.eq .Lxts_128_enc 1897.Lxts_enc_round_loop: 1898 aese v0.16b,v16.16b 1899 aesmc v0.16b,v0.16b 1900 ld1 {v16.4s},[x3],#16 // load key schedule... 1901 aese v0.16b,v17.16b 1902 aesmc v0.16b,v0.16b 1903 ld1 {v17.4s},[x3],#16 // load key schedule... 1904 subs w6,w6,#2 // bias 1905 b.gt .Lxts_enc_round_loop 1906.Lxts_128_enc: 1907 ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 1908 aese v0.16b,v16.16b 1909 aesmc v0.16b,v0.16b 1910 aese v0.16b,v17.16b 1911 aesmc v0.16b,v0.16b 1912 ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 1913 aese v0.16b,v18.16b 1914 aesmc v0.16b,v0.16b 1915 aese v0.16b,v19.16b 1916 aesmc v0.16b,v0.16b 1917 ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 1918 aese v0.16b,v20.16b 1919 aesmc v0.16b,v0.16b 1920 aese v0.16b,v21.16b 1921 aesmc v0.16b,v0.16b 1922 ld1 {v7.4s},[x3] 1923 aese v0.16b,v22.16b 1924 aesmc v0.16b,v0.16b 1925 aese v0.16b,v23.16b 1926 eor v0.16b,v0.16b,v7.16b 1927 eor v0.16b,v0.16b,v6.16b 1928 st1 {v0.16b},[x1] 1929 b .Lxts_enc_final_abort 1930 1931.align 4 1932.Lxts_enc_big_size: 1933 stp x19,x20,[sp,#-64]! 1934 stp x21,x22,[sp,#48] 1935 stp d8,d9,[sp,#32] 1936 stp d10,d11,[sp,#16] 1937 1938 // tailcnt store the tail value of length%16. 1939 and x21,x2,#0xf 1940 and x2,x2,#-16 1941 subs x2,x2,#16 1942 mov x8,#16 1943 b.lo .Lxts_abort 1944 csel x8,xzr,x8,eq 1945 1946 // Firstly, encrypt the iv with key2, as the first iv of XEX. 1947 ldr w6,[x4,#240] 1948 ld1 {v0.4s},[x4],#16 1949 ld1 {v6.16b},[x5] 1950 sub w6,w6,#2 1951 ld1 {v1.4s},[x4],#16 1952 1953.Loop_iv_enc: 1954 aese v6.16b,v0.16b 1955 aesmc v6.16b,v6.16b 1956 ld1 {v0.4s},[x4],#16 1957 subs w6,w6,#2 1958 aese v6.16b,v1.16b 1959 aesmc v6.16b,v6.16b 1960 ld1 {v1.4s},[x4],#16 1961 b.gt .Loop_iv_enc 1962 1963 aese v6.16b,v0.16b 1964 aesmc v6.16b,v6.16b 1965 ld1 {v0.4s},[x4] 1966 aese v6.16b,v1.16b 1967 eor v6.16b,v6.16b,v0.16b 1968 1969 // The iv for second block 1970 // x9- iv(low), x10 - iv(high) 1971 // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b 1972 fmov x9,d6 1973 fmov x10,v6.d[1] 1974 mov w19,#0x87 1975 extr x22,x10,x10,#32 1976 extr x10,x10,x9,#63 1977 and w11,w19,w22,asr#31 1978 eor x9,x11,x9,lsl#1 1979 fmov d8,x9 1980 fmov v8.d[1],x10 1981 1982 ldr w5,[x3,#240] // next starting point 1983 ld1 {v0.16b},[x0],x8 1984 1985 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1986 sub w5,w5,#6 1987 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 1988 sub w5,w5,#2 1989 ld1 {v18.4s,v19.4s},[x7],#32 1990 ld1 {v20.4s,v21.4s},[x7],#32 1991 ld1 {v22.4s,v23.4s},[x7],#32 1992 ld1 {v7.4s},[x7] 1993 1994 add x7,x3,#32 1995 mov w6,w5 1996 1997 // Encryption 1998.Lxts_enc: 1999 ld1 {v24.16b},[x0],#16 2000 subs x2,x2,#32 // bias 2001 add w6,w5,#2 2002 orr v3.16b,v0.16b,v0.16b 2003 orr v1.16b,v0.16b,v0.16b 2004 orr v28.16b,v0.16b,v0.16b 2005 orr v27.16b,v24.16b,v24.16b 2006 orr v29.16b,v24.16b,v24.16b 2007 b.lo .Lxts_inner_enc_tail 2008 eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv 2009 eor v24.16b,v24.16b,v8.16b 2010 2011 // The iv for third block 2012 extr x22,x10,x10,#32 2013 extr x10,x10,x9,#63 2014 and w11,w19,w22,asr#31 2015 eor x9,x11,x9,lsl#1 2016 fmov d9,x9 2017 fmov v9.d[1],x10 2018 2019 2020 orr v1.16b,v24.16b,v24.16b 2021 ld1 {v24.16b},[x0],#16 2022 orr v2.16b,v0.16b,v0.16b 2023 orr v3.16b,v1.16b,v1.16b 2024 eor v27.16b,v24.16b,v9.16b // the third block 2025 eor v24.16b,v24.16b,v9.16b 2026 cmp x2,#32 2027 b.lo .Lxts_outer_enc_tail 2028 2029 // The iv for fourth block 2030 extr x22,x10,x10,#32 2031 extr x10,x10,x9,#63 2032 and w11,w19,w22,asr#31 2033 eor x9,x11,x9,lsl#1 2034 fmov d10,x9 2035 fmov v10.d[1],x10 2036 2037 ld1 {v25.16b},[x0],#16 2038 // The iv for fifth block 2039 extr x22,x10,x10,#32 2040 extr x10,x10,x9,#63 2041 and w11,w19,w22,asr#31 2042 eor x9,x11,x9,lsl#1 2043 fmov d11,x9 2044 fmov v11.d[1],x10 2045 2046 ld1 {v26.16b},[x0],#16 2047 eor v25.16b,v25.16b,v10.16b // the fourth block 2048 eor v26.16b,v26.16b,v11.16b 2049 sub x2,x2,#32 // bias 2050 mov w6,w5 2051 b .Loop5x_xts_enc 2052 2053.align 4 2054.Loop5x_xts_enc: 2055 aese v0.16b,v16.16b 2056 aesmc v0.16b,v0.16b 2057 aese v1.16b,v16.16b 2058 aesmc v1.16b,v1.16b 2059 aese v24.16b,v16.16b 2060 aesmc v24.16b,v24.16b 2061 aese v25.16b,v16.16b 2062 aesmc v25.16b,v25.16b 2063 aese v26.16b,v16.16b 2064 aesmc v26.16b,v26.16b 2065 ld1 {v16.4s},[x7],#16 2066 subs w6,w6,#2 2067 aese v0.16b,v17.16b 2068 aesmc v0.16b,v0.16b 2069 aese v1.16b,v17.16b 2070 aesmc v1.16b,v1.16b 2071 aese v24.16b,v17.16b 2072 aesmc v24.16b,v24.16b 2073 aese v25.16b,v17.16b 2074 aesmc v25.16b,v25.16b 2075 aese v26.16b,v17.16b 2076 aesmc v26.16b,v26.16b 2077 ld1 {v17.4s},[x7],#16 2078 b.gt .Loop5x_xts_enc 2079 2080 aese v0.16b,v16.16b 2081 aesmc v0.16b,v0.16b 2082 aese v1.16b,v16.16b 2083 aesmc v1.16b,v1.16b 2084 aese v24.16b,v16.16b 2085 aesmc v24.16b,v24.16b 2086 aese v25.16b,v16.16b 2087 aesmc v25.16b,v25.16b 2088 aese v26.16b,v16.16b 2089 aesmc v26.16b,v26.16b 2090 subs x2,x2,#0x50 // because .Lxts_enc_tail4x 2091 2092 aese v0.16b,v17.16b 2093 aesmc v0.16b,v0.16b 2094 aese v1.16b,v17.16b 2095 aesmc v1.16b,v1.16b 2096 aese v24.16b,v17.16b 2097 aesmc v24.16b,v24.16b 2098 aese v25.16b,v17.16b 2099 aesmc v25.16b,v25.16b 2100 aese v26.16b,v17.16b 2101 aesmc v26.16b,v26.16b 2102 csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 2103 mov x7,x3 2104 2105 aese v0.16b,v18.16b 2106 aesmc v0.16b,v0.16b 2107 aese v1.16b,v18.16b 2108 aesmc v1.16b,v1.16b 2109 aese v24.16b,v18.16b 2110 aesmc v24.16b,v24.16b 2111 aese v25.16b,v18.16b 2112 aesmc v25.16b,v25.16b 2113 aese v26.16b,v18.16b 2114 aesmc v26.16b,v26.16b 2115 add x0,x0,x6 // x0 is adjusted in such way that 2116 // at exit from the loop v1.16b-v26.16b 2117 // are loaded with last "words" 2118 add x6,x2,#0x60 // because .Lxts_enc_tail4x 2119 2120 aese v0.16b,v19.16b 2121 aesmc v0.16b,v0.16b 2122 aese v1.16b,v19.16b 2123 aesmc v1.16b,v1.16b 2124 aese v24.16b,v19.16b 2125 aesmc v24.16b,v24.16b 2126 aese v25.16b,v19.16b 2127 aesmc v25.16b,v25.16b 2128 aese v26.16b,v19.16b 2129 aesmc v26.16b,v26.16b 2130 2131 aese v0.16b,v20.16b 2132 aesmc v0.16b,v0.16b 2133 aese v1.16b,v20.16b 2134 aesmc v1.16b,v1.16b 2135 aese v24.16b,v20.16b 2136 aesmc v24.16b,v24.16b 2137 aese v25.16b,v20.16b 2138 aesmc v25.16b,v25.16b 2139 aese v26.16b,v20.16b 2140 aesmc v26.16b,v26.16b 2141 2142 aese v0.16b,v21.16b 2143 aesmc v0.16b,v0.16b 2144 aese v1.16b,v21.16b 2145 aesmc v1.16b,v1.16b 2146 aese v24.16b,v21.16b 2147 aesmc v24.16b,v24.16b 2148 aese v25.16b,v21.16b 2149 aesmc v25.16b,v25.16b 2150 aese v26.16b,v21.16b 2151 aesmc v26.16b,v26.16b 2152 2153 aese v0.16b,v22.16b 2154 aesmc v0.16b,v0.16b 2155 aese v1.16b,v22.16b 2156 aesmc v1.16b,v1.16b 2157 aese v24.16b,v22.16b 2158 aesmc v24.16b,v24.16b 2159 aese v25.16b,v22.16b 2160 aesmc v25.16b,v25.16b 2161 aese v26.16b,v22.16b 2162 aesmc v26.16b,v26.16b 2163 2164 eor v4.16b,v7.16b,v6.16b 2165 aese v0.16b,v23.16b 2166 // The iv for first block of one iteration 2167 extr x22,x10,x10,#32 2168 extr x10,x10,x9,#63 2169 and w11,w19,w22,asr#31 2170 eor x9,x11,x9,lsl#1 2171 fmov d6,x9 2172 fmov v6.d[1],x10 2173 eor v5.16b,v7.16b,v8.16b 2174 ld1 {v2.16b},[x0],#16 2175 aese v1.16b,v23.16b 2176 // The iv for second block 2177 extr x22,x10,x10,#32 2178 extr x10,x10,x9,#63 2179 and w11,w19,w22,asr#31 2180 eor x9,x11,x9,lsl#1 2181 fmov d8,x9 2182 fmov v8.d[1],x10 2183 eor v17.16b,v7.16b,v9.16b 2184 ld1 {v3.16b},[x0],#16 2185 aese v24.16b,v23.16b 2186 // The iv for third block 2187 extr x22,x10,x10,#32 2188 extr x10,x10,x9,#63 2189 and w11,w19,w22,asr#31 2190 eor x9,x11,x9,lsl#1 2191 fmov d9,x9 2192 fmov v9.d[1],x10 2193 eor v30.16b,v7.16b,v10.16b 2194 ld1 {v27.16b},[x0],#16 2195 aese v25.16b,v23.16b 2196 // The iv for fourth block 2197 extr x22,x10,x10,#32 2198 extr x10,x10,x9,#63 2199 and w11,w19,w22,asr#31 2200 eor x9,x11,x9,lsl#1 2201 fmov d10,x9 2202 fmov v10.d[1],x10 2203 eor v31.16b,v7.16b,v11.16b 2204 ld1 {v28.16b},[x0],#16 2205 aese v26.16b,v23.16b 2206 2207 // The iv for fifth block 2208 extr x22,x10,x10,#32 2209 extr x10,x10,x9,#63 2210 and w11,w19,w22,asr #31 2211 eor x9,x11,x9,lsl #1 2212 fmov d11,x9 2213 fmov v11.d[1],x10 2214 2215 ld1 {v29.16b},[x0],#16 2216 cbz x6,.Lxts_enc_tail4x 2217 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2218 eor v4.16b,v4.16b,v0.16b 2219 eor v0.16b,v2.16b,v6.16b 2220 eor v5.16b,v5.16b,v1.16b 2221 eor v1.16b,v3.16b,v8.16b 2222 eor v17.16b,v17.16b,v24.16b 2223 eor v24.16b,v27.16b,v9.16b 2224 eor v30.16b,v30.16b,v25.16b 2225 eor v25.16b,v28.16b,v10.16b 2226 eor v31.16b,v31.16b,v26.16b 2227 st1 {v4.16b},[x1],#16 2228 eor v26.16b,v29.16b,v11.16b 2229 st1 {v5.16b},[x1],#16 2230 mov w6,w5 2231 st1 {v17.16b},[x1],#16 2232 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2233 st1 {v30.16b},[x1],#16 2234 st1 {v31.16b},[x1],#16 2235 b.hs .Loop5x_xts_enc 2236 2237 2238 // If left 4 blocks, borrow the five block's processing. 2239 cmn x2,#0x10 2240 b.ne .Loop5x_enc_after 2241 orr v11.16b,v10.16b,v10.16b 2242 orr v10.16b,v9.16b,v9.16b 2243 orr v9.16b,v8.16b,v8.16b 2244 orr v8.16b,v6.16b,v6.16b 2245 fmov x9,d11 2246 fmov x10,v11.d[1] 2247 eor v0.16b,v6.16b,v2.16b 2248 eor v1.16b,v8.16b,v3.16b 2249 eor v24.16b,v27.16b,v9.16b 2250 eor v25.16b,v28.16b,v10.16b 2251 eor v26.16b,v29.16b,v11.16b 2252 b.eq .Loop5x_xts_enc 2253 2254.Loop5x_enc_after: 2255 add x2,x2,#0x50 2256 cbz x2,.Lxts_enc_done 2257 2258 add w6,w5,#2 2259 subs x2,x2,#0x30 2260 b.lo .Lxts_inner_enc_tail 2261 2262 eor v0.16b,v6.16b,v27.16b 2263 eor v1.16b,v8.16b,v28.16b 2264 eor v24.16b,v29.16b,v9.16b 2265 b .Lxts_outer_enc_tail 2266 2267.align 4 2268.Lxts_enc_tail4x: 2269 add x0,x0,#16 2270 eor v5.16b,v1.16b,v5.16b 2271 st1 {v5.16b},[x1],#16 2272 eor v17.16b,v24.16b,v17.16b 2273 st1 {v17.16b},[x1],#16 2274 eor v30.16b,v25.16b,v30.16b 2275 eor v31.16b,v26.16b,v31.16b 2276 st1 {v30.16b,v31.16b},[x1],#32 2277 2278 b .Lxts_enc_done 2279.align 4 2280.Lxts_outer_enc_tail: 2281 aese v0.16b,v16.16b 2282 aesmc v0.16b,v0.16b 2283 aese v1.16b,v16.16b 2284 aesmc v1.16b,v1.16b 2285 aese v24.16b,v16.16b 2286 aesmc v24.16b,v24.16b 2287 ld1 {v16.4s},[x7],#16 2288 subs w6,w6,#2 2289 aese v0.16b,v17.16b 2290 aesmc v0.16b,v0.16b 2291 aese v1.16b,v17.16b 2292 aesmc v1.16b,v1.16b 2293 aese v24.16b,v17.16b 2294 aesmc v24.16b,v24.16b 2295 ld1 {v17.4s},[x7],#16 2296 b.gt .Lxts_outer_enc_tail 2297 2298 aese v0.16b,v16.16b 2299 aesmc v0.16b,v0.16b 2300 aese v1.16b,v16.16b 2301 aesmc v1.16b,v1.16b 2302 aese v24.16b,v16.16b 2303 aesmc v24.16b,v24.16b 2304 eor v4.16b,v6.16b,v7.16b 2305 subs x2,x2,#0x30 2306 // The iv for first block 2307 fmov x9,d9 2308 fmov x10,v9.d[1] 2309 //mov w19,#0x87 2310 extr x22,x10,x10,#32 2311 extr x10,x10,x9,#63 2312 and w11,w19,w22,asr#31 2313 eor x9,x11,x9,lsl#1 2314 fmov d6,x9 2315 fmov v6.d[1],x10 2316 eor v5.16b,v8.16b,v7.16b 2317 csel x6,x2,x6,lo // x6, w6, is zero at this point 2318 aese v0.16b,v17.16b 2319 aesmc v0.16b,v0.16b 2320 aese v1.16b,v17.16b 2321 aesmc v1.16b,v1.16b 2322 aese v24.16b,v17.16b 2323 aesmc v24.16b,v24.16b 2324 eor v17.16b,v9.16b,v7.16b 2325 2326 add x6,x6,#0x20 2327 add x0,x0,x6 2328 mov x7,x3 2329 2330 aese v0.16b,v20.16b 2331 aesmc v0.16b,v0.16b 2332 aese v1.16b,v20.16b 2333 aesmc v1.16b,v1.16b 2334 aese v24.16b,v20.16b 2335 aesmc v24.16b,v24.16b 2336 aese v0.16b,v21.16b 2337 aesmc v0.16b,v0.16b 2338 aese v1.16b,v21.16b 2339 aesmc v1.16b,v1.16b 2340 aese v24.16b,v21.16b 2341 aesmc v24.16b,v24.16b 2342 aese v0.16b,v22.16b 2343 aesmc v0.16b,v0.16b 2344 aese v1.16b,v22.16b 2345 aesmc v1.16b,v1.16b 2346 aese v24.16b,v22.16b 2347 aesmc v24.16b,v24.16b 2348 aese v0.16b,v23.16b 2349 aese v1.16b,v23.16b 2350 aese v24.16b,v23.16b 2351 ld1 {v27.16b},[x0],#16 2352 add w6,w5,#2 2353 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2354 eor v4.16b,v4.16b,v0.16b 2355 eor v5.16b,v5.16b,v1.16b 2356 eor v24.16b,v24.16b,v17.16b 2357 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2358 st1 {v4.16b},[x1],#16 2359 st1 {v5.16b},[x1],#16 2360 st1 {v24.16b},[x1],#16 2361 cmn x2,#0x30 2362 b.eq .Lxts_enc_done 2363.Lxts_encxor_one: 2364 orr v28.16b,v3.16b,v3.16b 2365 orr v29.16b,v27.16b,v27.16b 2366 nop 2367 2368.Lxts_inner_enc_tail: 2369 cmn x2,#0x10 2370 eor v1.16b,v28.16b,v6.16b 2371 eor v24.16b,v29.16b,v8.16b 2372 b.eq .Lxts_enc_tail_loop 2373 eor v24.16b,v29.16b,v6.16b 2374.Lxts_enc_tail_loop: 2375 aese v1.16b,v16.16b 2376 aesmc v1.16b,v1.16b 2377 aese v24.16b,v16.16b 2378 aesmc v24.16b,v24.16b 2379 ld1 {v16.4s},[x7],#16 2380 subs w6,w6,#2 2381 aese v1.16b,v17.16b 2382 aesmc v1.16b,v1.16b 2383 aese v24.16b,v17.16b 2384 aesmc v24.16b,v24.16b 2385 ld1 {v17.4s},[x7],#16 2386 b.gt .Lxts_enc_tail_loop 2387 2388 aese v1.16b,v16.16b 2389 aesmc v1.16b,v1.16b 2390 aese v24.16b,v16.16b 2391 aesmc v24.16b,v24.16b 2392 aese v1.16b,v17.16b 2393 aesmc v1.16b,v1.16b 2394 aese v24.16b,v17.16b 2395 aesmc v24.16b,v24.16b 2396 aese v1.16b,v20.16b 2397 aesmc v1.16b,v1.16b 2398 aese v24.16b,v20.16b 2399 aesmc v24.16b,v24.16b 2400 cmn x2,#0x20 2401 aese v1.16b,v21.16b 2402 aesmc v1.16b,v1.16b 2403 aese v24.16b,v21.16b 2404 aesmc v24.16b,v24.16b 2405 eor v5.16b,v6.16b,v7.16b 2406 aese v1.16b,v22.16b 2407 aesmc v1.16b,v1.16b 2408 aese v24.16b,v22.16b 2409 aesmc v24.16b,v24.16b 2410 eor v17.16b,v8.16b,v7.16b 2411 aese v1.16b,v23.16b 2412 aese v24.16b,v23.16b 2413 b.eq .Lxts_enc_one 2414 eor v5.16b,v5.16b,v1.16b 2415 st1 {v5.16b},[x1],#16 2416 eor v17.16b,v17.16b,v24.16b 2417 orr v6.16b,v8.16b,v8.16b 2418 st1 {v17.16b},[x1],#16 2419 fmov x9,d8 2420 fmov x10,v8.d[1] 2421 mov w19,#0x87 2422 extr x22,x10,x10,#32 2423 extr x10,x10,x9,#63 2424 and w11,w19,w22,asr #31 2425 eor x9,x11,x9,lsl #1 2426 fmov d6,x9 2427 fmov v6.d[1],x10 2428 b .Lxts_enc_done 2429 2430.Lxts_enc_one: 2431 eor v5.16b,v5.16b,v24.16b 2432 orr v6.16b,v6.16b,v6.16b 2433 st1 {v5.16b},[x1],#16 2434 fmov x9,d6 2435 fmov x10,v6.d[1] 2436 mov w19,#0x87 2437 extr x22,x10,x10,#32 2438 extr x10,x10,x9,#63 2439 and w11,w19,w22,asr #31 2440 eor x9,x11,x9,lsl #1 2441 fmov d6,x9 2442 fmov v6.d[1],x10 2443 b .Lxts_enc_done 2444.align 5 2445.Lxts_enc_done: 2446 // Process the tail block with cipher stealing. 2447 tst x21,#0xf 2448 b.eq .Lxts_abort 2449 2450 mov x20,x0 2451 mov x13,x1 2452 sub x1,x1,#16 2453.composite_enc_loop: 2454 subs x21,x21,#1 2455 ldrb w15,[x1,x21] 2456 ldrb w14,[x20,x21] 2457 strb w15,[x13,x21] 2458 strb w14,[x1,x21] 2459 b.gt .composite_enc_loop 2460.Lxts_enc_load_done: 2461 ld1 {v26.16b},[x1] 2462 eor v26.16b,v26.16b,v6.16b 2463 2464 // Encrypt the composite block to get the last second encrypted text block 2465 ldr w6,[x3,#240] // load key schedule... 2466 ld1 {v0.4s},[x3],#16 2467 sub w6,w6,#2 2468 ld1 {v1.4s},[x3],#16 // load key schedule... 2469.Loop_final_enc: 2470 aese v26.16b,v0.16b 2471 aesmc v26.16b,v26.16b 2472 ld1 {v0.4s},[x3],#16 2473 subs w6,w6,#2 2474 aese v26.16b,v1.16b 2475 aesmc v26.16b,v26.16b 2476 ld1 {v1.4s},[x3],#16 2477 b.gt .Loop_final_enc 2478 2479 aese v26.16b,v0.16b 2480 aesmc v26.16b,v26.16b 2481 ld1 {v0.4s},[x3] 2482 aese v26.16b,v1.16b 2483 eor v26.16b,v26.16b,v0.16b 2484 eor v26.16b,v26.16b,v6.16b 2485 st1 {v26.16b},[x1] 2486 2487.Lxts_abort: 2488 ldp x21,x22,[sp,#48] 2489 ldp d8,d9,[sp,#32] 2490 ldp d10,d11,[sp,#16] 2491 ldp x19,x20,[sp],#64 2492.Lxts_enc_final_abort: 2493 ret 2494.size aes_v8_xts_encrypt,.-aes_v8_xts_encrypt 2495.globl aes_v8_xts_decrypt 2496.type aes_v8_xts_decrypt,%function 2497.align 5 2498aes_v8_xts_decrypt: 2499 cmp x2,#16 2500 // Original input data size bigger than 16, jump to big size processing. 2501 b.ne .Lxts_dec_big_size 2502 // Encrypt the iv with key2, as the first XEX iv. 2503 ldr w6,[x4,#240] 2504 ld1 {v0.4s},[x4],#16 2505 ld1 {v6.16b},[x5] 2506 sub w6,w6,#2 2507 ld1 {v1.4s},[x4],#16 2508 2509.Loop_dec_small_iv_enc: 2510 aese v6.16b,v0.16b 2511 aesmc v6.16b,v6.16b 2512 ld1 {v0.4s},[x4],#16 2513 subs w6,w6,#2 2514 aese v6.16b,v1.16b 2515 aesmc v6.16b,v6.16b 2516 ld1 {v1.4s},[x4],#16 2517 b.gt .Loop_dec_small_iv_enc 2518 2519 aese v6.16b,v0.16b 2520 aesmc v6.16b,v6.16b 2521 ld1 {v0.4s},[x4] 2522 aese v6.16b,v1.16b 2523 eor v6.16b,v6.16b,v0.16b 2524 2525 ld1 {v0.16b},[x0] 2526 eor v0.16b,v6.16b,v0.16b 2527 2528 ldr w6,[x3,#240] 2529 ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... 2530 2531 aesd v0.16b,v28.16b 2532 aesimc v0.16b,v0.16b 2533 ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 2534 aesd v0.16b,v29.16b 2535 aesimc v0.16b,v0.16b 2536 subs w6,w6,#10 // bias 2537 b.eq .Lxts_128_dec 2538.Lxts_dec_round_loop: 2539 aesd v0.16b,v16.16b 2540 aesimc v0.16b,v0.16b 2541 ld1 {v16.4s},[x3],#16 // load key schedule... 2542 aesd v0.16b,v17.16b 2543 aesimc v0.16b,v0.16b 2544 ld1 {v17.4s},[x3],#16 // load key schedule... 2545 subs w6,w6,#2 // bias 2546 b.gt .Lxts_dec_round_loop 2547.Lxts_128_dec: 2548 ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 2549 aesd v0.16b,v16.16b 2550 aesimc v0.16b,v0.16b 2551 aesd v0.16b,v17.16b 2552 aesimc v0.16b,v0.16b 2553 ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 2554 aesd v0.16b,v18.16b 2555 aesimc v0.16b,v0.16b 2556 aesd v0.16b,v19.16b 2557 aesimc v0.16b,v0.16b 2558 ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 2559 aesd v0.16b,v20.16b 2560 aesimc v0.16b,v0.16b 2561 aesd v0.16b,v21.16b 2562 aesimc v0.16b,v0.16b 2563 ld1 {v7.4s},[x3] 2564 aesd v0.16b,v22.16b 2565 aesimc v0.16b,v0.16b 2566 aesd v0.16b,v23.16b 2567 eor v0.16b,v0.16b,v7.16b 2568 eor v0.16b,v6.16b,v0.16b 2569 st1 {v0.16b},[x1] 2570 b .Lxts_dec_final_abort 2571.Lxts_dec_big_size: 2572 stp x19,x20,[sp,#-64]! 2573 stp x21,x22,[sp,#48] 2574 stp d8,d9,[sp,#32] 2575 stp d10,d11,[sp,#16] 2576 2577 and x21,x2,#0xf 2578 and x2,x2,#-16 2579 subs x2,x2,#16 2580 mov x8,#16 2581 b.lo .Lxts_dec_abort 2582 2583 // Encrypt the iv with key2, as the first XEX iv 2584 ldr w6,[x4,#240] 2585 ld1 {v0.4s},[x4],#16 2586 ld1 {v6.16b},[x5] 2587 sub w6,w6,#2 2588 ld1 {v1.4s},[x4],#16 2589 2590.Loop_dec_iv_enc: 2591 aese v6.16b,v0.16b 2592 aesmc v6.16b,v6.16b 2593 ld1 {v0.4s},[x4],#16 2594 subs w6,w6,#2 2595 aese v6.16b,v1.16b 2596 aesmc v6.16b,v6.16b 2597 ld1 {v1.4s},[x4],#16 2598 b.gt .Loop_dec_iv_enc 2599 2600 aese v6.16b,v0.16b 2601 aesmc v6.16b,v6.16b 2602 ld1 {v0.4s},[x4] 2603 aese v6.16b,v1.16b 2604 eor v6.16b,v6.16b,v0.16b 2605 2606 // The iv for second block 2607 // x9- iv(low), x10 - iv(high) 2608 // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b 2609 fmov x9,d6 2610 fmov x10,v6.d[1] 2611 mov w19,#0x87 2612 extr x22,x10,x10,#32 2613 extr x10,x10,x9,#63 2614 and w11,w19,w22,asr #31 2615 eor x9,x11,x9,lsl #1 2616 fmov d8,x9 2617 fmov v8.d[1],x10 2618 2619 ldr w5,[x3,#240] // load rounds number 2620 2621 // The iv for third block 2622 extr x22,x10,x10,#32 2623 extr x10,x10,x9,#63 2624 and w11,w19,w22,asr #31 2625 eor x9,x11,x9,lsl #1 2626 fmov d9,x9 2627 fmov v9.d[1],x10 2628 2629 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 2630 sub w5,w5,#6 2631 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 2632 sub w5,w5,#2 2633 ld1 {v18.4s,v19.4s},[x7],#32 // load key schedule... 2634 ld1 {v20.4s,v21.4s},[x7],#32 2635 ld1 {v22.4s,v23.4s},[x7],#32 2636 ld1 {v7.4s},[x7] 2637 2638 // The iv for fourth block 2639 extr x22,x10,x10,#32 2640 extr x10,x10,x9,#63 2641 and w11,w19,w22,asr #31 2642 eor x9,x11,x9,lsl #1 2643 fmov d10,x9 2644 fmov v10.d[1],x10 2645 2646 add x7,x3,#32 2647 mov w6,w5 2648 b .Lxts_dec 2649 2650 // Decryption 2651.align 5 2652.Lxts_dec: 2653 tst x21,#0xf 2654 b.eq .Lxts_dec_begin 2655 subs x2,x2,#16 2656 csel x8,xzr,x8,eq 2657 ld1 {v0.16b},[x0],#16 2658 b.lo .Lxts_done 2659 sub x0,x0,#16 2660.Lxts_dec_begin: 2661 ld1 {v0.16b},[x0],x8 2662 subs x2,x2,#32 // bias 2663 add w6,w5,#2 2664 orr v3.16b,v0.16b,v0.16b 2665 orr v1.16b,v0.16b,v0.16b 2666 orr v28.16b,v0.16b,v0.16b 2667 ld1 {v24.16b},[x0],#16 2668 orr v27.16b,v24.16b,v24.16b 2669 orr v29.16b,v24.16b,v24.16b 2670 b.lo .Lxts_inner_dec_tail 2671 eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv 2672 eor v24.16b,v24.16b,v8.16b 2673 2674 orr v1.16b,v24.16b,v24.16b 2675 ld1 {v24.16b},[x0],#16 2676 orr v2.16b,v0.16b,v0.16b 2677 orr v3.16b,v1.16b,v1.16b 2678 eor v27.16b,v24.16b,v9.16b // third block xox with third iv 2679 eor v24.16b,v24.16b,v9.16b 2680 cmp x2,#32 2681 b.lo .Lxts_outer_dec_tail 2682 2683 ld1 {v25.16b},[x0],#16 2684 2685 // The iv for fifth block 2686 extr x22,x10,x10,#32 2687 extr x10,x10,x9,#63 2688 and w11,w19,w22,asr #31 2689 eor x9,x11,x9,lsl #1 2690 fmov d11,x9 2691 fmov v11.d[1],x10 2692 2693 ld1 {v26.16b},[x0],#16 2694 eor v25.16b,v25.16b,v10.16b // the fourth block 2695 eor v26.16b,v26.16b,v11.16b 2696 sub x2,x2,#32 // bias 2697 mov w6,w5 2698 b .Loop5x_xts_dec 2699 2700.align 4 2701.Loop5x_xts_dec: 2702 aesd v0.16b,v16.16b 2703 aesimc v0.16b,v0.16b 2704 aesd v1.16b,v16.16b 2705 aesimc v1.16b,v1.16b 2706 aesd v24.16b,v16.16b 2707 aesimc v24.16b,v24.16b 2708 aesd v25.16b,v16.16b 2709 aesimc v25.16b,v25.16b 2710 aesd v26.16b,v16.16b 2711 aesimc v26.16b,v26.16b 2712 ld1 {v16.4s},[x7],#16 // load key schedule... 2713 subs w6,w6,#2 2714 aesd v0.16b,v17.16b 2715 aesimc v0.16b,v0.16b 2716 aesd v1.16b,v17.16b 2717 aesimc v1.16b,v1.16b 2718 aesd v24.16b,v17.16b 2719 aesimc v24.16b,v24.16b 2720 aesd v25.16b,v17.16b 2721 aesimc v25.16b,v25.16b 2722 aesd v26.16b,v17.16b 2723 aesimc v26.16b,v26.16b 2724 ld1 {v17.4s},[x7],#16 // load key schedule... 2725 b.gt .Loop5x_xts_dec 2726 2727 aesd v0.16b,v16.16b 2728 aesimc v0.16b,v0.16b 2729 aesd v1.16b,v16.16b 2730 aesimc v1.16b,v1.16b 2731 aesd v24.16b,v16.16b 2732 aesimc v24.16b,v24.16b 2733 aesd v25.16b,v16.16b 2734 aesimc v25.16b,v25.16b 2735 aesd v26.16b,v16.16b 2736 aesimc v26.16b,v26.16b 2737 subs x2,x2,#0x50 // because .Lxts_dec_tail4x 2738 2739 aesd v0.16b,v17.16b 2740 aesimc v0.16b,v0.16b 2741 aesd v1.16b,v17.16b 2742 aesimc v1.16b,v1.16b 2743 aesd v24.16b,v17.16b 2744 aesimc v24.16b,v24.16b 2745 aesd v25.16b,v17.16b 2746 aesimc v25.16b,v25.16b 2747 aesd v26.16b,v17.16b 2748 aesimc v26.16b,v26.16b 2749 csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 2750 mov x7,x3 2751 2752 aesd v0.16b,v18.16b 2753 aesimc v0.16b,v0.16b 2754 aesd v1.16b,v18.16b 2755 aesimc v1.16b,v1.16b 2756 aesd v24.16b,v18.16b 2757 aesimc v24.16b,v24.16b 2758 aesd v25.16b,v18.16b 2759 aesimc v25.16b,v25.16b 2760 aesd v26.16b,v18.16b 2761 aesimc v26.16b,v26.16b 2762 add x0,x0,x6 // x0 is adjusted in such way that 2763 // at exit from the loop v1.16b-v26.16b 2764 // are loaded with last "words" 2765 add x6,x2,#0x60 // because .Lxts_dec_tail4x 2766 2767 aesd v0.16b,v19.16b 2768 aesimc v0.16b,v0.16b 2769 aesd v1.16b,v19.16b 2770 aesimc v1.16b,v1.16b 2771 aesd v24.16b,v19.16b 2772 aesimc v24.16b,v24.16b 2773 aesd v25.16b,v19.16b 2774 aesimc v25.16b,v25.16b 2775 aesd v26.16b,v19.16b 2776 aesimc v26.16b,v26.16b 2777 2778 aesd v0.16b,v20.16b 2779 aesimc v0.16b,v0.16b 2780 aesd v1.16b,v20.16b 2781 aesimc v1.16b,v1.16b 2782 aesd v24.16b,v20.16b 2783 aesimc v24.16b,v24.16b 2784 aesd v25.16b,v20.16b 2785 aesimc v25.16b,v25.16b 2786 aesd v26.16b,v20.16b 2787 aesimc v26.16b,v26.16b 2788 2789 aesd v0.16b,v21.16b 2790 aesimc v0.16b,v0.16b 2791 aesd v1.16b,v21.16b 2792 aesimc v1.16b,v1.16b 2793 aesd v24.16b,v21.16b 2794 aesimc v24.16b,v24.16b 2795 aesd v25.16b,v21.16b 2796 aesimc v25.16b,v25.16b 2797 aesd v26.16b,v21.16b 2798 aesimc v26.16b,v26.16b 2799 2800 aesd v0.16b,v22.16b 2801 aesimc v0.16b,v0.16b 2802 aesd v1.16b,v22.16b 2803 aesimc v1.16b,v1.16b 2804 aesd v24.16b,v22.16b 2805 aesimc v24.16b,v24.16b 2806 aesd v25.16b,v22.16b 2807 aesimc v25.16b,v25.16b 2808 aesd v26.16b,v22.16b 2809 aesimc v26.16b,v26.16b 2810 2811 eor v4.16b,v7.16b,v6.16b 2812 aesd v0.16b,v23.16b 2813 // The iv for first block of next iteration. 2814 extr x22,x10,x10,#32 2815 extr x10,x10,x9,#63 2816 and w11,w19,w22,asr #31 2817 eor x9,x11,x9,lsl #1 2818 fmov d6,x9 2819 fmov v6.d[1],x10 2820 eor v5.16b,v7.16b,v8.16b 2821 ld1 {v2.16b},[x0],#16 2822 aesd v1.16b,v23.16b 2823 // The iv for second block 2824 extr x22,x10,x10,#32 2825 extr x10,x10,x9,#63 2826 and w11,w19,w22,asr #31 2827 eor x9,x11,x9,lsl #1 2828 fmov d8,x9 2829 fmov v8.d[1],x10 2830 eor v17.16b,v7.16b,v9.16b 2831 ld1 {v3.16b},[x0],#16 2832 aesd v24.16b,v23.16b 2833 // The iv for third block 2834 extr x22,x10,x10,#32 2835 extr x10,x10,x9,#63 2836 and w11,w19,w22,asr #31 2837 eor x9,x11,x9,lsl #1 2838 fmov d9,x9 2839 fmov v9.d[1],x10 2840 eor v30.16b,v7.16b,v10.16b 2841 ld1 {v27.16b},[x0],#16 2842 aesd v25.16b,v23.16b 2843 // The iv for fourth block 2844 extr x22,x10,x10,#32 2845 extr x10,x10,x9,#63 2846 and w11,w19,w22,asr #31 2847 eor x9,x11,x9,lsl #1 2848 fmov d10,x9 2849 fmov v10.d[1],x10 2850 eor v31.16b,v7.16b,v11.16b 2851 ld1 {v28.16b},[x0],#16 2852 aesd v26.16b,v23.16b 2853 2854 // The iv for fifth block 2855 extr x22,x10,x10,#32 2856 extr x10,x10,x9,#63 2857 and w11,w19,w22,asr #31 2858 eor x9,x11,x9,lsl #1 2859 fmov d11,x9 2860 fmov v11.d[1],x10 2861 2862 ld1 {v29.16b},[x0],#16 2863 cbz x6,.Lxts_dec_tail4x 2864 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2865 eor v4.16b,v4.16b,v0.16b 2866 eor v0.16b,v2.16b,v6.16b 2867 eor v5.16b,v5.16b,v1.16b 2868 eor v1.16b,v3.16b,v8.16b 2869 eor v17.16b,v17.16b,v24.16b 2870 eor v24.16b,v27.16b,v9.16b 2871 eor v30.16b,v30.16b,v25.16b 2872 eor v25.16b,v28.16b,v10.16b 2873 eor v31.16b,v31.16b,v26.16b 2874 st1 {v4.16b},[x1],#16 2875 eor v26.16b,v29.16b,v11.16b 2876 st1 {v5.16b},[x1],#16 2877 mov w6,w5 2878 st1 {v17.16b},[x1],#16 2879 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2880 st1 {v30.16b},[x1],#16 2881 st1 {v31.16b},[x1],#16 2882 b.hs .Loop5x_xts_dec 2883 2884 cmn x2,#0x10 2885 b.ne .Loop5x_dec_after 2886 // If x2(x2) equal to -0x10, the left blocks is 4. 2887 // After specially processing, utilize the five blocks processing again. 2888 // It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b. 2889 orr v11.16b,v10.16b,v10.16b 2890 orr v10.16b,v9.16b,v9.16b 2891 orr v9.16b,v8.16b,v8.16b 2892 orr v8.16b,v6.16b,v6.16b 2893 fmov x9,d11 2894 fmov x10,v11.d[1] 2895 eor v0.16b,v6.16b,v2.16b 2896 eor v1.16b,v8.16b,v3.16b 2897 eor v24.16b,v27.16b,v9.16b 2898 eor v25.16b,v28.16b,v10.16b 2899 eor v26.16b,v29.16b,v11.16b 2900 b.eq .Loop5x_xts_dec 2901 2902.Loop5x_dec_after: 2903 add x2,x2,#0x50 2904 cbz x2,.Lxts_done 2905 2906 add w6,w5,#2 2907 subs x2,x2,#0x30 2908 b.lo .Lxts_inner_dec_tail 2909 2910 eor v0.16b,v6.16b,v27.16b 2911 eor v1.16b,v8.16b,v28.16b 2912 eor v24.16b,v29.16b,v9.16b 2913 b .Lxts_outer_dec_tail 2914 2915.align 4 2916.Lxts_dec_tail4x: 2917 add x0,x0,#16 2918 tst x21,#0xf 2919 eor v5.16b,v1.16b,v4.16b 2920 st1 {v5.16b},[x1],#16 2921 eor v17.16b,v24.16b,v17.16b 2922 st1 {v17.16b},[x1],#16 2923 eor v30.16b,v25.16b,v30.16b 2924 eor v31.16b,v26.16b,v31.16b 2925 st1 {v30.16b,v31.16b},[x1],#32 2926 2927 b.eq .Lxts_dec_abort 2928 ld1 {v0.16b},[x0],#16 2929 b .Lxts_done 2930.align 4 2931.Lxts_outer_dec_tail: 2932 aesd v0.16b,v16.16b 2933 aesimc v0.16b,v0.16b 2934 aesd v1.16b,v16.16b 2935 aesimc v1.16b,v1.16b 2936 aesd v24.16b,v16.16b 2937 aesimc v24.16b,v24.16b 2938 ld1 {v16.4s},[x7],#16 2939 subs w6,w6,#2 2940 aesd v0.16b,v17.16b 2941 aesimc v0.16b,v0.16b 2942 aesd v1.16b,v17.16b 2943 aesimc v1.16b,v1.16b 2944 aesd v24.16b,v17.16b 2945 aesimc v24.16b,v24.16b 2946 ld1 {v17.4s},[x7],#16 2947 b.gt .Lxts_outer_dec_tail 2948 2949 aesd v0.16b,v16.16b 2950 aesimc v0.16b,v0.16b 2951 aesd v1.16b,v16.16b 2952 aesimc v1.16b,v1.16b 2953 aesd v24.16b,v16.16b 2954 aesimc v24.16b,v24.16b 2955 eor v4.16b,v6.16b,v7.16b 2956 subs x2,x2,#0x30 2957 // The iv for first block 2958 fmov x9,d9 2959 fmov x10,v9.d[1] 2960 mov w19,#0x87 2961 extr x22,x10,x10,#32 2962 extr x10,x10,x9,#63 2963 and w11,w19,w22,asr #31 2964 eor x9,x11,x9,lsl #1 2965 fmov d6,x9 2966 fmov v6.d[1],x10 2967 eor v5.16b,v8.16b,v7.16b 2968 csel x6,x2,x6,lo // x6, w6, is zero at this point 2969 aesd v0.16b,v17.16b 2970 aesimc v0.16b,v0.16b 2971 aesd v1.16b,v17.16b 2972 aesimc v1.16b,v1.16b 2973 aesd v24.16b,v17.16b 2974 aesimc v24.16b,v24.16b 2975 eor v17.16b,v9.16b,v7.16b 2976 // The iv for second block 2977 extr x22,x10,x10,#32 2978 extr x10,x10,x9,#63 2979 and w11,w19,w22,asr #31 2980 eor x9,x11,x9,lsl #1 2981 fmov d8,x9 2982 fmov v8.d[1],x10 2983 2984 add x6,x6,#0x20 2985 add x0,x0,x6 // x0 is adjusted to the last data 2986 2987 mov x7,x3 2988 2989 // The iv for third block 2990 extr x22,x10,x10,#32 2991 extr x10,x10,x9,#63 2992 and w11,w19,w22,asr #31 2993 eor x9,x11,x9,lsl #1 2994 fmov d9,x9 2995 fmov v9.d[1],x10 2996 2997 aesd v0.16b,v20.16b 2998 aesimc v0.16b,v0.16b 2999 aesd v1.16b,v20.16b 3000 aesimc v1.16b,v1.16b 3001 aesd v24.16b,v20.16b 3002 aesimc v24.16b,v24.16b 3003 aesd v0.16b,v21.16b 3004 aesimc v0.16b,v0.16b 3005 aesd v1.16b,v21.16b 3006 aesimc v1.16b,v1.16b 3007 aesd v24.16b,v21.16b 3008 aesimc v24.16b,v24.16b 3009 aesd v0.16b,v22.16b 3010 aesimc v0.16b,v0.16b 3011 aesd v1.16b,v22.16b 3012 aesimc v1.16b,v1.16b 3013 aesd v24.16b,v22.16b 3014 aesimc v24.16b,v24.16b 3015 ld1 {v27.16b},[x0],#16 3016 aesd v0.16b,v23.16b 3017 aesd v1.16b,v23.16b 3018 aesd v24.16b,v23.16b 3019 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 3020 add w6,w5,#2 3021 eor v4.16b,v4.16b,v0.16b 3022 eor v5.16b,v5.16b,v1.16b 3023 eor v24.16b,v24.16b,v17.16b 3024 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 3025 st1 {v4.16b},[x1],#16 3026 st1 {v5.16b},[x1],#16 3027 st1 {v24.16b},[x1],#16 3028 3029 cmn x2,#0x30 3030 add x2,x2,#0x30 3031 b.eq .Lxts_done 3032 sub x2,x2,#0x30 3033 orr v28.16b,v3.16b,v3.16b 3034 orr v29.16b,v27.16b,v27.16b 3035 nop 3036 3037.Lxts_inner_dec_tail: 3038 // x2 == -0x10 means two blocks left. 3039 cmn x2,#0x10 3040 eor v1.16b,v28.16b,v6.16b 3041 eor v24.16b,v29.16b,v8.16b 3042 b.eq .Lxts_dec_tail_loop 3043 eor v24.16b,v29.16b,v6.16b 3044.Lxts_dec_tail_loop: 3045 aesd v1.16b,v16.16b 3046 aesimc v1.16b,v1.16b 3047 aesd v24.16b,v16.16b 3048 aesimc v24.16b,v24.16b 3049 ld1 {v16.4s},[x7],#16 3050 subs w6,w6,#2 3051 aesd v1.16b,v17.16b 3052 aesimc v1.16b,v1.16b 3053 aesd v24.16b,v17.16b 3054 aesimc v24.16b,v24.16b 3055 ld1 {v17.4s},[x7],#16 3056 b.gt .Lxts_dec_tail_loop 3057 3058 aesd v1.16b,v16.16b 3059 aesimc v1.16b,v1.16b 3060 aesd v24.16b,v16.16b 3061 aesimc v24.16b,v24.16b 3062 aesd v1.16b,v17.16b 3063 aesimc v1.16b,v1.16b 3064 aesd v24.16b,v17.16b 3065 aesimc v24.16b,v24.16b 3066 aesd v1.16b,v20.16b 3067 aesimc v1.16b,v1.16b 3068 aesd v24.16b,v20.16b 3069 aesimc v24.16b,v24.16b 3070 cmn x2,#0x20 3071 aesd v1.16b,v21.16b 3072 aesimc v1.16b,v1.16b 3073 aesd v24.16b,v21.16b 3074 aesimc v24.16b,v24.16b 3075 eor v5.16b,v6.16b,v7.16b 3076 aesd v1.16b,v22.16b 3077 aesimc v1.16b,v1.16b 3078 aesd v24.16b,v22.16b 3079 aesimc v24.16b,v24.16b 3080 eor v17.16b,v8.16b,v7.16b 3081 aesd v1.16b,v23.16b 3082 aesd v24.16b,v23.16b 3083 b.eq .Lxts_dec_one 3084 eor v5.16b,v5.16b,v1.16b 3085 eor v17.16b,v17.16b,v24.16b 3086 orr v6.16b,v9.16b,v9.16b 3087 orr v8.16b,v10.16b,v10.16b 3088 st1 {v5.16b},[x1],#16 3089 st1 {v17.16b},[x1],#16 3090 add x2,x2,#16 3091 b .Lxts_done 3092 3093.Lxts_dec_one: 3094 eor v5.16b,v5.16b,v24.16b 3095 orr v6.16b,v8.16b,v8.16b 3096 orr v8.16b,v9.16b,v9.16b 3097 st1 {v5.16b},[x1],#16 3098 add x2,x2,#32 3099 3100.Lxts_done: 3101 tst x21,#0xf 3102 b.eq .Lxts_dec_abort 3103 // Processing the last two blocks with cipher stealing. 3104 mov x7,x3 3105 cbnz x2,.Lxts_dec_1st_done 3106 ld1 {v0.16b},[x0],#16 3107 3108 // Decrypt the last secod block to get the last plain text block 3109.Lxts_dec_1st_done: 3110 eor v26.16b,v0.16b,v8.16b 3111 ldr w6,[x3,#240] 3112 ld1 {v0.4s},[x3],#16 3113 sub w6,w6,#2 3114 ld1 {v1.4s},[x3],#16 3115.Loop_final_2nd_dec: 3116 aesd v26.16b,v0.16b 3117 aesimc v26.16b,v26.16b 3118 ld1 {v0.4s},[x3],#16 // load key schedule... 3119 subs w6,w6,#2 3120 aesd v26.16b,v1.16b 3121 aesimc v26.16b,v26.16b 3122 ld1 {v1.4s},[x3],#16 // load key schedule... 3123 b.gt .Loop_final_2nd_dec 3124 3125 aesd v26.16b,v0.16b 3126 aesimc v26.16b,v26.16b 3127 ld1 {v0.4s},[x3] 3128 aesd v26.16b,v1.16b 3129 eor v26.16b,v26.16b,v0.16b 3130 eor v26.16b,v26.16b,v8.16b 3131 st1 {v26.16b},[x1] 3132 3133 mov x20,x0 3134 add x13,x1,#16 3135 3136 // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks 3137 // to get the last encrypted block. 3138.composite_dec_loop: 3139 subs x21,x21,#1 3140 ldrb w15,[x1,x21] 3141 ldrb w14,[x20,x21] 3142 strb w15,[x13,x21] 3143 strb w14,[x1,x21] 3144 b.gt .composite_dec_loop 3145.Lxts_dec_load_done: 3146 ld1 {v26.16b},[x1] 3147 eor v26.16b,v26.16b,v6.16b 3148 3149 // Decrypt the composite block to get the last second plain text block 3150 ldr w6,[x7,#240] 3151 ld1 {v0.4s},[x7],#16 3152 sub w6,w6,#2 3153 ld1 {v1.4s},[x7],#16 3154.Loop_final_dec: 3155 aesd v26.16b,v0.16b 3156 aesimc v26.16b,v26.16b 3157 ld1 {v0.4s},[x7],#16 // load key schedule... 3158 subs w6,w6,#2 3159 aesd v26.16b,v1.16b 3160 aesimc v26.16b,v26.16b 3161 ld1 {v1.4s},[x7],#16 // load key schedule... 3162 b.gt .Loop_final_dec 3163 3164 aesd v26.16b,v0.16b 3165 aesimc v26.16b,v26.16b 3166 ld1 {v0.4s},[x7] 3167 aesd v26.16b,v1.16b 3168 eor v26.16b,v26.16b,v0.16b 3169 eor v26.16b,v26.16b,v6.16b 3170 st1 {v26.16b},[x1] 3171 3172.Lxts_dec_abort: 3173 ldp x21,x22,[sp,#48] 3174 ldp d8,d9,[sp,#32] 3175 ldp d10,d11,[sp,#16] 3176 ldp x19,x20,[sp],#64 3177 3178.Lxts_dec_final_abort: 3179 ret 3180.size aes_v8_xts_decrypt,.-aes_v8_xts_decrypt 3181#endif 3182