1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesv8-armx.pl. */ 2bc3d5698SJohn Baldwin#include "arm_arch.h" 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 5c0855eaaSJohn Baldwin.arch armv8-a+crypto 6bc3d5698SJohn Baldwin.text 7bc3d5698SJohn Baldwin.align 5 8bc3d5698SJohn Baldwin.Lrcon: 9bc3d5698SJohn Baldwin.long 0x01,0x01,0x01,0x01 10bc3d5698SJohn Baldwin.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 11bc3d5698SJohn Baldwin.long 0x1b,0x1b,0x1b,0x1b 12bc3d5698SJohn Baldwin 13bc3d5698SJohn Baldwin.globl aes_v8_set_encrypt_key 14bc3d5698SJohn Baldwin.type aes_v8_set_encrypt_key,%function 15bc3d5698SJohn Baldwin.align 5 16bc3d5698SJohn Baldwinaes_v8_set_encrypt_key: 17bc3d5698SJohn Baldwin.Lenc_key: 18bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 19bd9588bcSAndrew Turner // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 20bc3d5698SJohn Baldwin stp x29,x30,[sp,#-16]! 21bc3d5698SJohn Baldwin add x29,sp,#0 22bc3d5698SJohn Baldwin mov x3,#-1 23bc3d5698SJohn Baldwin cmp x0,#0 24bc3d5698SJohn Baldwin b.eq .Lenc_key_abort 25bc3d5698SJohn Baldwin cmp x2,#0 26bc3d5698SJohn Baldwin b.eq .Lenc_key_abort 27bc3d5698SJohn Baldwin mov x3,#-2 28bc3d5698SJohn Baldwin cmp w1,#128 29bc3d5698SJohn Baldwin b.lt .Lenc_key_abort 30bc3d5698SJohn Baldwin cmp w1,#256 31bc3d5698SJohn Baldwin b.gt .Lenc_key_abort 32bc3d5698SJohn Baldwin tst w1,#0x3f 33bc3d5698SJohn Baldwin b.ne .Lenc_key_abort 34bc3d5698SJohn Baldwin 35bc3d5698SJohn Baldwin adr x3,.Lrcon 36bc3d5698SJohn Baldwin cmp w1,#192 37bc3d5698SJohn Baldwin 38bc3d5698SJohn Baldwin eor v0.16b,v0.16b,v0.16b 39bc3d5698SJohn Baldwin ld1 {v3.16b},[x0],#16 40bc3d5698SJohn Baldwin mov w1,#8 // reuse w1 41bc3d5698SJohn Baldwin ld1 {v1.4s,v2.4s},[x3],#32 42bc3d5698SJohn Baldwin 43bc3d5698SJohn Baldwin b.lt .Loop128 44bc3d5698SJohn Baldwin b.eq .L192 45bc3d5698SJohn Baldwin b .L256 46bc3d5698SJohn Baldwin 47bc3d5698SJohn Baldwin.align 4 48bc3d5698SJohn Baldwin.Loop128: 49bc3d5698SJohn Baldwin tbl v6.16b,{v3.16b},v2.16b 50bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v3.16b,#12 51bc3d5698SJohn Baldwin st1 {v3.4s},[x2],#16 52bc3d5698SJohn Baldwin aese v6.16b,v0.16b 53bc3d5698SJohn Baldwin subs w1,w1,#1 54bc3d5698SJohn Baldwin 55bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 56bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 57bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 58bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 59bc3d5698SJohn Baldwin eor v6.16b,v6.16b,v1.16b 60bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 61bc3d5698SJohn Baldwin shl v1.16b,v1.16b,#1 62bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v6.16b 63bc3d5698SJohn Baldwin b.ne .Loop128 64bc3d5698SJohn Baldwin 65bc3d5698SJohn Baldwin ld1 {v1.4s},[x3] 66bc3d5698SJohn Baldwin 67bc3d5698SJohn Baldwin tbl v6.16b,{v3.16b},v2.16b 68bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v3.16b,#12 69bc3d5698SJohn Baldwin st1 {v3.4s},[x2],#16 70bc3d5698SJohn Baldwin aese v6.16b,v0.16b 71bc3d5698SJohn Baldwin 72bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 73bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 74bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 75bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 76bc3d5698SJohn Baldwin eor v6.16b,v6.16b,v1.16b 77bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 78bc3d5698SJohn Baldwin shl v1.16b,v1.16b,#1 79bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v6.16b 80bc3d5698SJohn Baldwin 81bc3d5698SJohn Baldwin tbl v6.16b,{v3.16b},v2.16b 82bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v3.16b,#12 83bc3d5698SJohn Baldwin st1 {v3.4s},[x2],#16 84bc3d5698SJohn Baldwin aese v6.16b,v0.16b 85bc3d5698SJohn Baldwin 86bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 87bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 88bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 89bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 90bc3d5698SJohn Baldwin eor v6.16b,v6.16b,v1.16b 91bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 92bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v6.16b 93bc3d5698SJohn Baldwin st1 {v3.4s},[x2] 94bc3d5698SJohn Baldwin add x2,x2,#0x50 95bc3d5698SJohn Baldwin 96bc3d5698SJohn Baldwin mov w12,#10 97bc3d5698SJohn Baldwin b .Ldone 98bc3d5698SJohn Baldwin 99bc3d5698SJohn Baldwin.align 4 100bc3d5698SJohn Baldwin.L192: 101bc3d5698SJohn Baldwin ld1 {v4.8b},[x0],#8 102bc3d5698SJohn Baldwin movi v6.16b,#8 // borrow v6.16b 103bc3d5698SJohn Baldwin st1 {v3.4s},[x2],#16 104bc3d5698SJohn Baldwin sub v2.16b,v2.16b,v6.16b // adjust the mask 105bc3d5698SJohn Baldwin 106bc3d5698SJohn Baldwin.Loop192: 107bc3d5698SJohn Baldwin tbl v6.16b,{v4.16b},v2.16b 108bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v3.16b,#12 109*575878a5SEd Maste#ifdef __AARCH64EB__ 110c3c73b4fSJung-uk Kim st1 {v4.4s},[x2],#16 111c3c73b4fSJung-uk Kim sub x2,x2,#8 112c3c73b4fSJung-uk Kim#else 113bc3d5698SJohn Baldwin st1 {v4.8b},[x2],#8 114c3c73b4fSJung-uk Kim#endif 115bc3d5698SJohn Baldwin aese v6.16b,v0.16b 116bc3d5698SJohn Baldwin subs w1,w1,#1 117bc3d5698SJohn Baldwin 118bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 119bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 120bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 121bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 122bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 123bc3d5698SJohn Baldwin 124bc3d5698SJohn Baldwin dup v5.4s,v3.s[3] 125bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v4.16b 126bc3d5698SJohn Baldwin eor v6.16b,v6.16b,v1.16b 127bc3d5698SJohn Baldwin ext v4.16b,v0.16b,v4.16b,#12 128bc3d5698SJohn Baldwin shl v1.16b,v1.16b,#1 129bc3d5698SJohn Baldwin eor v4.16b,v4.16b,v5.16b 130bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v6.16b 131bc3d5698SJohn Baldwin eor v4.16b,v4.16b,v6.16b 132bc3d5698SJohn Baldwin st1 {v3.4s},[x2],#16 133bc3d5698SJohn Baldwin b.ne .Loop192 134bc3d5698SJohn Baldwin 135bc3d5698SJohn Baldwin mov w12,#12 136bc3d5698SJohn Baldwin add x2,x2,#0x20 137bc3d5698SJohn Baldwin b .Ldone 138bc3d5698SJohn Baldwin 139bc3d5698SJohn Baldwin.align 4 140bc3d5698SJohn Baldwin.L256: 141bc3d5698SJohn Baldwin ld1 {v4.16b},[x0] 142bc3d5698SJohn Baldwin mov w1,#7 143bc3d5698SJohn Baldwin mov w12,#14 144bc3d5698SJohn Baldwin st1 {v3.4s},[x2],#16 145bc3d5698SJohn Baldwin 146bc3d5698SJohn Baldwin.Loop256: 147bc3d5698SJohn Baldwin tbl v6.16b,{v4.16b},v2.16b 148bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v3.16b,#12 149bc3d5698SJohn Baldwin st1 {v4.4s},[x2],#16 150bc3d5698SJohn Baldwin aese v6.16b,v0.16b 151bc3d5698SJohn Baldwin subs w1,w1,#1 152bc3d5698SJohn Baldwin 153bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 154bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 155bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 156bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 157bc3d5698SJohn Baldwin eor v6.16b,v6.16b,v1.16b 158bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 159bc3d5698SJohn Baldwin shl v1.16b,v1.16b,#1 160bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v6.16b 161bc3d5698SJohn Baldwin st1 {v3.4s},[x2],#16 162bc3d5698SJohn Baldwin b.eq .Ldone 163bc3d5698SJohn Baldwin 164bc3d5698SJohn Baldwin dup v6.4s,v3.s[3] // just splat 165bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v4.16b,#12 166bc3d5698SJohn Baldwin aese v6.16b,v0.16b 167bc3d5698SJohn Baldwin 168bc3d5698SJohn Baldwin eor v4.16b,v4.16b,v5.16b 169bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 170bc3d5698SJohn Baldwin eor v4.16b,v4.16b,v5.16b 171bc3d5698SJohn Baldwin ext v5.16b,v0.16b,v5.16b,#12 172bc3d5698SJohn Baldwin eor v4.16b,v4.16b,v5.16b 173bc3d5698SJohn Baldwin 174bc3d5698SJohn Baldwin eor v4.16b,v4.16b,v6.16b 175bc3d5698SJohn Baldwin b .Loop256 176bc3d5698SJohn Baldwin 177bc3d5698SJohn Baldwin.Ldone: 178bc3d5698SJohn Baldwin str w12,[x2] 179bc3d5698SJohn Baldwin mov x3,#0 180bc3d5698SJohn Baldwin 181bc3d5698SJohn Baldwin.Lenc_key_abort: 182bc3d5698SJohn Baldwin mov x0,x3 // return value 183bc3d5698SJohn Baldwin ldr x29,[sp],#16 184bc3d5698SJohn Baldwin ret 185bc3d5698SJohn Baldwin.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 186bc3d5698SJohn Baldwin 187bc3d5698SJohn Baldwin.globl aes_v8_set_decrypt_key 188bc3d5698SJohn Baldwin.type aes_v8_set_decrypt_key,%function 189bc3d5698SJohn Baldwin.align 5 190bc3d5698SJohn Baldwinaes_v8_set_decrypt_key: 191bd9588bcSAndrew Turner AARCH64_SIGN_LINK_REGISTER 192bc3d5698SJohn Baldwin stp x29,x30,[sp,#-16]! 193bc3d5698SJohn Baldwin add x29,sp,#0 194bc3d5698SJohn Baldwin bl .Lenc_key 195bc3d5698SJohn Baldwin 196bc3d5698SJohn Baldwin cmp x0,#0 197bc3d5698SJohn Baldwin b.ne .Ldec_key_abort 198bc3d5698SJohn Baldwin 199bc3d5698SJohn Baldwin sub x2,x2,#240 // restore original x2 200bc3d5698SJohn Baldwin mov x4,#-16 201bc3d5698SJohn Baldwin add x0,x2,x12,lsl#4 // end of key schedule 202bc3d5698SJohn Baldwin 203bc3d5698SJohn Baldwin ld1 {v0.4s},[x2] 204bc3d5698SJohn Baldwin ld1 {v1.4s},[x0] 205bc3d5698SJohn Baldwin st1 {v0.4s},[x0],x4 206bc3d5698SJohn Baldwin st1 {v1.4s},[x2],#16 207bc3d5698SJohn Baldwin 208bc3d5698SJohn Baldwin.Loop_imc: 209bc3d5698SJohn Baldwin ld1 {v0.4s},[x2] 210bc3d5698SJohn Baldwin ld1 {v1.4s},[x0] 211bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 212bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 213bc3d5698SJohn Baldwin st1 {v0.4s},[x0],x4 214bc3d5698SJohn Baldwin st1 {v1.4s},[x2],#16 215bc3d5698SJohn Baldwin cmp x0,x2 216bc3d5698SJohn Baldwin b.hi .Loop_imc 217bc3d5698SJohn Baldwin 218bc3d5698SJohn Baldwin ld1 {v0.4s},[x2] 219bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 220bc3d5698SJohn Baldwin st1 {v0.4s},[x0] 221bc3d5698SJohn Baldwin 222bc3d5698SJohn Baldwin eor x0,x0,x0 // return value 223bc3d5698SJohn Baldwin.Ldec_key_abort: 224bc3d5698SJohn Baldwin ldp x29,x30,[sp],#16 225bd9588bcSAndrew Turner AARCH64_VALIDATE_LINK_REGISTER 226bc3d5698SJohn Baldwin ret 227bc3d5698SJohn Baldwin.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 228bc3d5698SJohn Baldwin.globl aes_v8_encrypt 229bc3d5698SJohn Baldwin.type aes_v8_encrypt,%function 230bc3d5698SJohn Baldwin.align 5 231bc3d5698SJohn Baldwinaes_v8_encrypt: 232bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 233bc3d5698SJohn Baldwin ldr w3,[x2,#240] 234bc3d5698SJohn Baldwin ld1 {v0.4s},[x2],#16 235bc3d5698SJohn Baldwin ld1 {v2.16b},[x0] 236bc3d5698SJohn Baldwin sub w3,w3,#2 237bc3d5698SJohn Baldwin ld1 {v1.4s},[x2],#16 238bc3d5698SJohn Baldwin 239bc3d5698SJohn Baldwin.Loop_enc: 240bc3d5698SJohn Baldwin aese v2.16b,v0.16b 241bc3d5698SJohn Baldwin aesmc v2.16b,v2.16b 242bc3d5698SJohn Baldwin ld1 {v0.4s},[x2],#16 243bc3d5698SJohn Baldwin subs w3,w3,#2 244bc3d5698SJohn Baldwin aese v2.16b,v1.16b 245bc3d5698SJohn Baldwin aesmc v2.16b,v2.16b 246bc3d5698SJohn Baldwin ld1 {v1.4s},[x2],#16 247bc3d5698SJohn Baldwin b.gt .Loop_enc 248bc3d5698SJohn Baldwin 249bc3d5698SJohn Baldwin aese v2.16b,v0.16b 250bc3d5698SJohn Baldwin aesmc v2.16b,v2.16b 251bc3d5698SJohn Baldwin ld1 {v0.4s},[x2] 252bc3d5698SJohn Baldwin aese v2.16b,v1.16b 253bc3d5698SJohn Baldwin eor v2.16b,v2.16b,v0.16b 254bc3d5698SJohn Baldwin 255bc3d5698SJohn Baldwin st1 {v2.16b},[x1] 256bc3d5698SJohn Baldwin ret 257bc3d5698SJohn Baldwin.size aes_v8_encrypt,.-aes_v8_encrypt 258bc3d5698SJohn Baldwin.globl aes_v8_decrypt 259bc3d5698SJohn Baldwin.type aes_v8_decrypt,%function 260bc3d5698SJohn Baldwin.align 5 261bc3d5698SJohn Baldwinaes_v8_decrypt: 262bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 263bc3d5698SJohn Baldwin ldr w3,[x2,#240] 264bc3d5698SJohn Baldwin ld1 {v0.4s},[x2],#16 265bc3d5698SJohn Baldwin ld1 {v2.16b},[x0] 266bc3d5698SJohn Baldwin sub w3,w3,#2 267bc3d5698SJohn Baldwin ld1 {v1.4s},[x2],#16 268bc3d5698SJohn Baldwin 269bc3d5698SJohn Baldwin.Loop_dec: 270bc3d5698SJohn Baldwin aesd v2.16b,v0.16b 271bc3d5698SJohn Baldwin aesimc v2.16b,v2.16b 272bc3d5698SJohn Baldwin ld1 {v0.4s},[x2],#16 273bc3d5698SJohn Baldwin subs w3,w3,#2 274bc3d5698SJohn Baldwin aesd v2.16b,v1.16b 275bc3d5698SJohn Baldwin aesimc v2.16b,v2.16b 276bc3d5698SJohn Baldwin ld1 {v1.4s},[x2],#16 277bc3d5698SJohn Baldwin b.gt .Loop_dec 278bc3d5698SJohn Baldwin 279bc3d5698SJohn Baldwin aesd v2.16b,v0.16b 280bc3d5698SJohn Baldwin aesimc v2.16b,v2.16b 281bc3d5698SJohn Baldwin ld1 {v0.4s},[x2] 282bc3d5698SJohn Baldwin aesd v2.16b,v1.16b 283bc3d5698SJohn Baldwin eor v2.16b,v2.16b,v0.16b 284bc3d5698SJohn Baldwin 285bc3d5698SJohn Baldwin st1 {v2.16b},[x1] 286bc3d5698SJohn Baldwin ret 287bc3d5698SJohn Baldwin.size aes_v8_decrypt,.-aes_v8_decrypt 288c0855eaaSJohn Baldwin.globl aes_v8_ecb_encrypt 289c0855eaaSJohn Baldwin.type aes_v8_ecb_encrypt,%function 290c0855eaaSJohn Baldwin.align 5 291c0855eaaSJohn Baldwinaes_v8_ecb_encrypt: 292bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 293c0855eaaSJohn Baldwin subs x2,x2,#16 294c0855eaaSJohn Baldwin // Original input data size bigger than 16, jump to big size processing. 295c0855eaaSJohn Baldwin b.ne .Lecb_big_size 296c0855eaaSJohn Baldwin ld1 {v0.16b},[x0] 297c0855eaaSJohn Baldwin cmp w4,#0 // en- or decrypting? 298c0855eaaSJohn Baldwin ldr w5,[x3,#240] 299c0855eaaSJohn Baldwin ld1 {v5.4s,v6.4s},[x3],#32 // load key schedule... 300c0855eaaSJohn Baldwin 301c0855eaaSJohn Baldwin b.eq .Lecb_small_dec 302c0855eaaSJohn Baldwin aese v0.16b,v5.16b 303c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 304c0855eaaSJohn Baldwin ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 305c0855eaaSJohn Baldwin aese v0.16b,v6.16b 306c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 307c0855eaaSJohn Baldwin subs w5,w5,#10 // if rounds==10, jump to aes-128-ecb processing 308c0855eaaSJohn Baldwin b.eq .Lecb_128_enc 309c0855eaaSJohn Baldwin.Lecb_round_loop: 310c0855eaaSJohn Baldwin aese v0.16b,v16.16b 311c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 312c0855eaaSJohn Baldwin ld1 {v16.4s},[x3],#16 // load key schedule... 313c0855eaaSJohn Baldwin aese v0.16b,v17.16b 314c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 315c0855eaaSJohn Baldwin ld1 {v17.4s},[x3],#16 // load key schedule... 316c0855eaaSJohn Baldwin subs w5,w5,#2 // bias 317c0855eaaSJohn Baldwin b.gt .Lecb_round_loop 318c0855eaaSJohn Baldwin.Lecb_128_enc: 319c0855eaaSJohn Baldwin ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 320c0855eaaSJohn Baldwin aese v0.16b,v16.16b 321c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 322c0855eaaSJohn Baldwin aese v0.16b,v17.16b 323c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 324c0855eaaSJohn Baldwin ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 325c0855eaaSJohn Baldwin aese v0.16b,v18.16b 326c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 327c0855eaaSJohn Baldwin aese v0.16b,v19.16b 328c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 329c0855eaaSJohn Baldwin ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 330c0855eaaSJohn Baldwin aese v0.16b,v20.16b 331c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 332c0855eaaSJohn Baldwin aese v0.16b,v21.16b 333c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 334c0855eaaSJohn Baldwin ld1 {v7.4s},[x3] 335c0855eaaSJohn Baldwin aese v0.16b,v22.16b 336c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 337c0855eaaSJohn Baldwin aese v0.16b,v23.16b 338c0855eaaSJohn Baldwin eor v0.16b,v0.16b,v7.16b 339c0855eaaSJohn Baldwin st1 {v0.16b},[x1] 340c0855eaaSJohn Baldwin b .Lecb_Final_abort 341c0855eaaSJohn Baldwin.Lecb_small_dec: 342c0855eaaSJohn Baldwin aesd v0.16b,v5.16b 343c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 344c0855eaaSJohn Baldwin ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 345c0855eaaSJohn Baldwin aesd v0.16b,v6.16b 346c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 347c0855eaaSJohn Baldwin subs w5,w5,#10 // bias 348c0855eaaSJohn Baldwin b.eq .Lecb_128_dec 349c0855eaaSJohn Baldwin.Lecb_dec_round_loop: 350c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 351c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 352c0855eaaSJohn Baldwin ld1 {v16.4s},[x3],#16 // load key schedule... 353c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 354c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 355c0855eaaSJohn Baldwin ld1 {v17.4s},[x3],#16 // load key schedule... 356c0855eaaSJohn Baldwin subs w5,w5,#2 // bias 357c0855eaaSJohn Baldwin b.gt .Lecb_dec_round_loop 358c0855eaaSJohn Baldwin.Lecb_128_dec: 359c0855eaaSJohn Baldwin ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 360c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 361c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 362c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 363c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 364c0855eaaSJohn Baldwin ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 365c0855eaaSJohn Baldwin aesd v0.16b,v18.16b 366c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 367c0855eaaSJohn Baldwin aesd v0.16b,v19.16b 368c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 369c0855eaaSJohn Baldwin ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 370c0855eaaSJohn Baldwin aesd v0.16b,v20.16b 371c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 372c0855eaaSJohn Baldwin aesd v0.16b,v21.16b 373c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 374c0855eaaSJohn Baldwin ld1 {v7.4s},[x3] 375c0855eaaSJohn Baldwin aesd v0.16b,v22.16b 376c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 377c0855eaaSJohn Baldwin aesd v0.16b,v23.16b 378c0855eaaSJohn Baldwin eor v0.16b,v0.16b,v7.16b 379c0855eaaSJohn Baldwin st1 {v0.16b},[x1] 380c0855eaaSJohn Baldwin b .Lecb_Final_abort 381c0855eaaSJohn Baldwin.Lecb_big_size: 382c0855eaaSJohn Baldwin stp x29,x30,[sp,#-16]! 383c0855eaaSJohn Baldwin add x29,sp,#0 384c0855eaaSJohn Baldwin mov x8,#16 385c0855eaaSJohn Baldwin b.lo .Lecb_done 386c0855eaaSJohn Baldwin csel x8,xzr,x8,eq 387c0855eaaSJohn Baldwin 388c0855eaaSJohn Baldwin cmp w4,#0 // en- or decrypting? 389c0855eaaSJohn Baldwin ldr w5,[x3,#240] 390c0855eaaSJohn Baldwin and x2,x2,#-16 391c0855eaaSJohn Baldwin ld1 {v0.16b},[x0],x8 392c0855eaaSJohn Baldwin 393c0855eaaSJohn Baldwin ld1 {v16.4s,v17.4s},[x3] // load key schedule... 394c0855eaaSJohn Baldwin sub w5,w5,#6 395c0855eaaSJohn Baldwin add x7,x3,x5,lsl#4 // pointer to last 7 round keys 396c0855eaaSJohn Baldwin sub w5,w5,#2 397c0855eaaSJohn Baldwin ld1 {v18.4s,v19.4s},[x7],#32 398c0855eaaSJohn Baldwin ld1 {v20.4s,v21.4s},[x7],#32 399c0855eaaSJohn Baldwin ld1 {v22.4s,v23.4s},[x7],#32 400c0855eaaSJohn Baldwin ld1 {v7.4s},[x7] 401c0855eaaSJohn Baldwin 402c0855eaaSJohn Baldwin add x7,x3,#32 403c0855eaaSJohn Baldwin mov w6,w5 404c0855eaaSJohn Baldwin b.eq .Lecb_dec 405c0855eaaSJohn Baldwin 406c0855eaaSJohn Baldwin ld1 {v1.16b},[x0],#16 407c0855eaaSJohn Baldwin subs x2,x2,#32 // bias 408c0855eaaSJohn Baldwin add w6,w5,#2 409c0855eaaSJohn Baldwin orr v3.16b,v1.16b,v1.16b 410c0855eaaSJohn Baldwin orr v24.16b,v1.16b,v1.16b 411c0855eaaSJohn Baldwin orr v1.16b,v0.16b,v0.16b 412c0855eaaSJohn Baldwin b.lo .Lecb_enc_tail 413c0855eaaSJohn Baldwin 414c0855eaaSJohn Baldwin orr v1.16b,v3.16b,v3.16b 415c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 416c0855eaaSJohn Baldwin cmp x2,#32 417c0855eaaSJohn Baldwin b.lo .Loop3x_ecb_enc 418c0855eaaSJohn Baldwin 419c0855eaaSJohn Baldwin ld1 {v25.16b},[x0],#16 420c0855eaaSJohn Baldwin ld1 {v26.16b},[x0],#16 421c0855eaaSJohn Baldwin sub x2,x2,#32 // bias 422c0855eaaSJohn Baldwin mov w6,w5 423c0855eaaSJohn Baldwin 424c0855eaaSJohn Baldwin.Loop5x_ecb_enc: 425c0855eaaSJohn Baldwin aese v0.16b,v16.16b 426c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 427c0855eaaSJohn Baldwin aese v1.16b,v16.16b 428c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 429c0855eaaSJohn Baldwin aese v24.16b,v16.16b 430c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 431c0855eaaSJohn Baldwin aese v25.16b,v16.16b 432c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 433c0855eaaSJohn Baldwin aese v26.16b,v16.16b 434c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 435c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 436c0855eaaSJohn Baldwin subs w6,w6,#2 437c0855eaaSJohn Baldwin aese v0.16b,v17.16b 438c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 439c0855eaaSJohn Baldwin aese v1.16b,v17.16b 440c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 441c0855eaaSJohn Baldwin aese v24.16b,v17.16b 442c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 443c0855eaaSJohn Baldwin aese v25.16b,v17.16b 444c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 445c0855eaaSJohn Baldwin aese v26.16b,v17.16b 446c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 447c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 448c0855eaaSJohn Baldwin b.gt .Loop5x_ecb_enc 449c0855eaaSJohn Baldwin 450c0855eaaSJohn Baldwin aese v0.16b,v16.16b 451c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 452c0855eaaSJohn Baldwin aese v1.16b,v16.16b 453c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 454c0855eaaSJohn Baldwin aese v24.16b,v16.16b 455c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 456c0855eaaSJohn Baldwin aese v25.16b,v16.16b 457c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 458c0855eaaSJohn Baldwin aese v26.16b,v16.16b 459c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 460c0855eaaSJohn Baldwin cmp x2,#0x40 // because .Lecb_enc_tail4x 461c0855eaaSJohn Baldwin sub x2,x2,#0x50 462c0855eaaSJohn Baldwin 463c0855eaaSJohn Baldwin aese v0.16b,v17.16b 464c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 465c0855eaaSJohn Baldwin aese v1.16b,v17.16b 466c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 467c0855eaaSJohn Baldwin aese v24.16b,v17.16b 468c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 469c0855eaaSJohn Baldwin aese v25.16b,v17.16b 470c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 471c0855eaaSJohn Baldwin aese v26.16b,v17.16b 472c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 473c0855eaaSJohn Baldwin csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 474c0855eaaSJohn Baldwin mov x7,x3 475c0855eaaSJohn Baldwin 476c0855eaaSJohn Baldwin aese v0.16b,v18.16b 477c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 478c0855eaaSJohn Baldwin aese v1.16b,v18.16b 479c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 480c0855eaaSJohn Baldwin aese v24.16b,v18.16b 481c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 482c0855eaaSJohn Baldwin aese v25.16b,v18.16b 483c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 484c0855eaaSJohn Baldwin aese v26.16b,v18.16b 485c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 486c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 487c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v26.16b 488c0855eaaSJohn Baldwin // are loaded with last "words" 489c0855eaaSJohn Baldwin add x6,x2,#0x60 // because .Lecb_enc_tail4x 490c0855eaaSJohn Baldwin 491c0855eaaSJohn Baldwin aese v0.16b,v19.16b 492c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 493c0855eaaSJohn Baldwin aese v1.16b,v19.16b 494c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 495c0855eaaSJohn Baldwin aese v24.16b,v19.16b 496c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 497c0855eaaSJohn Baldwin aese v25.16b,v19.16b 498c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 499c0855eaaSJohn Baldwin aese v26.16b,v19.16b 500c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 501c0855eaaSJohn Baldwin 502c0855eaaSJohn Baldwin aese v0.16b,v20.16b 503c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 504c0855eaaSJohn Baldwin aese v1.16b,v20.16b 505c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 506c0855eaaSJohn Baldwin aese v24.16b,v20.16b 507c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 508c0855eaaSJohn Baldwin aese v25.16b,v20.16b 509c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 510c0855eaaSJohn Baldwin aese v26.16b,v20.16b 511c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 512c0855eaaSJohn Baldwin 513c0855eaaSJohn Baldwin aese v0.16b,v21.16b 514c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 515c0855eaaSJohn Baldwin aese v1.16b,v21.16b 516c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 517c0855eaaSJohn Baldwin aese v24.16b,v21.16b 518c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 519c0855eaaSJohn Baldwin aese v25.16b,v21.16b 520c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 521c0855eaaSJohn Baldwin aese v26.16b,v21.16b 522c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 523c0855eaaSJohn Baldwin 524c0855eaaSJohn Baldwin aese v0.16b,v22.16b 525c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 526c0855eaaSJohn Baldwin aese v1.16b,v22.16b 527c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 528c0855eaaSJohn Baldwin aese v24.16b,v22.16b 529c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 530c0855eaaSJohn Baldwin aese v25.16b,v22.16b 531c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 532c0855eaaSJohn Baldwin aese v26.16b,v22.16b 533c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 534c0855eaaSJohn Baldwin 535c0855eaaSJohn Baldwin aese v0.16b,v23.16b 536c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 537c0855eaaSJohn Baldwin aese v1.16b,v23.16b 538c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 539c0855eaaSJohn Baldwin aese v24.16b,v23.16b 540c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 541c0855eaaSJohn Baldwin aese v25.16b,v23.16b 542c0855eaaSJohn Baldwin ld1 {v28.16b},[x0],#16 543c0855eaaSJohn Baldwin aese v26.16b,v23.16b 544c0855eaaSJohn Baldwin ld1 {v29.16b},[x0],#16 545c0855eaaSJohn Baldwin cbz x6,.Lecb_enc_tail4x 546c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 547c0855eaaSJohn Baldwin eor v4.16b,v7.16b,v0.16b 548c0855eaaSJohn Baldwin orr v0.16b,v2.16b,v2.16b 549c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 550c0855eaaSJohn Baldwin orr v1.16b,v3.16b,v3.16b 551c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v24.16b 552c0855eaaSJohn Baldwin orr v24.16b,v27.16b,v27.16b 553c0855eaaSJohn Baldwin eor v30.16b,v7.16b,v25.16b 554c0855eaaSJohn Baldwin orr v25.16b,v28.16b,v28.16b 555c0855eaaSJohn Baldwin eor v31.16b,v7.16b,v26.16b 556c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 557c0855eaaSJohn Baldwin orr v26.16b,v29.16b,v29.16b 558c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 559c0855eaaSJohn Baldwin mov w6,w5 560c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 561c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 562c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 563c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 564c0855eaaSJohn Baldwin b.hs .Loop5x_ecb_enc 565c0855eaaSJohn Baldwin 566c0855eaaSJohn Baldwin add x2,x2,#0x50 567c0855eaaSJohn Baldwin cbz x2,.Lecb_done 568c0855eaaSJohn Baldwin 569c0855eaaSJohn Baldwin add w6,w5,#2 570c0855eaaSJohn Baldwin subs x2,x2,#0x30 571c0855eaaSJohn Baldwin orr v0.16b,v27.16b,v27.16b 572c0855eaaSJohn Baldwin orr v1.16b,v28.16b,v28.16b 573c0855eaaSJohn Baldwin orr v24.16b,v29.16b,v29.16b 574c0855eaaSJohn Baldwin b.lo .Lecb_enc_tail 575c0855eaaSJohn Baldwin 576c0855eaaSJohn Baldwin b .Loop3x_ecb_enc 577c0855eaaSJohn Baldwin 578c0855eaaSJohn Baldwin.align 4 579c0855eaaSJohn Baldwin.Lecb_enc_tail4x: 580c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 581c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v24.16b 582c0855eaaSJohn Baldwin eor v30.16b,v7.16b,v25.16b 583c0855eaaSJohn Baldwin eor v31.16b,v7.16b,v26.16b 584c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 585c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 586c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 587c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 588c0855eaaSJohn Baldwin 589c0855eaaSJohn Baldwin b .Lecb_done 590c0855eaaSJohn Baldwin.align 4 591c0855eaaSJohn Baldwin.Loop3x_ecb_enc: 592c0855eaaSJohn Baldwin aese v0.16b,v16.16b 593c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 594c0855eaaSJohn Baldwin aese v1.16b,v16.16b 595c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 596c0855eaaSJohn Baldwin aese v24.16b,v16.16b 597c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 598c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 599c0855eaaSJohn Baldwin subs w6,w6,#2 600c0855eaaSJohn Baldwin aese v0.16b,v17.16b 601c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 602c0855eaaSJohn Baldwin aese v1.16b,v17.16b 603c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 604c0855eaaSJohn Baldwin aese v24.16b,v17.16b 605c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 606c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 607c0855eaaSJohn Baldwin b.gt .Loop3x_ecb_enc 608c0855eaaSJohn Baldwin 609c0855eaaSJohn Baldwin aese v0.16b,v16.16b 610c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 611c0855eaaSJohn Baldwin aese v1.16b,v16.16b 612c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 613c0855eaaSJohn Baldwin aese v24.16b,v16.16b 614c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 615c0855eaaSJohn Baldwin subs x2,x2,#0x30 616c0855eaaSJohn Baldwin csel x6,x2,x6,lo // x6, w6, is zero at this point 617c0855eaaSJohn Baldwin aese v0.16b,v17.16b 618c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 619c0855eaaSJohn Baldwin aese v1.16b,v17.16b 620c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 621c0855eaaSJohn Baldwin aese v24.16b,v17.16b 622c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 623c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 624c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v24.16b 625c0855eaaSJohn Baldwin // are loaded with last "words" 626c0855eaaSJohn Baldwin mov x7,x3 627c0855eaaSJohn Baldwin aese v0.16b,v20.16b 628c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 629c0855eaaSJohn Baldwin aese v1.16b,v20.16b 630c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 631c0855eaaSJohn Baldwin aese v24.16b,v20.16b 632c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 633c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 634c0855eaaSJohn Baldwin aese v0.16b,v21.16b 635c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 636c0855eaaSJohn Baldwin aese v1.16b,v21.16b 637c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 638c0855eaaSJohn Baldwin aese v24.16b,v21.16b 639c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 640c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 641c0855eaaSJohn Baldwin aese v0.16b,v22.16b 642c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 643c0855eaaSJohn Baldwin aese v1.16b,v22.16b 644c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 645c0855eaaSJohn Baldwin aese v24.16b,v22.16b 646c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 647c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 648c0855eaaSJohn Baldwin aese v0.16b,v23.16b 649c0855eaaSJohn Baldwin aese v1.16b,v23.16b 650c0855eaaSJohn Baldwin aese v24.16b,v23.16b 651c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 652c0855eaaSJohn Baldwin add w6,w5,#2 653c0855eaaSJohn Baldwin eor v4.16b,v7.16b,v0.16b 654c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 655c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v7.16b 656c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 657c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 658c0855eaaSJohn Baldwin orr v0.16b,v2.16b,v2.16b 659c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 660c0855eaaSJohn Baldwin orr v1.16b,v3.16b,v3.16b 661c0855eaaSJohn Baldwin st1 {v24.16b},[x1],#16 662c0855eaaSJohn Baldwin orr v24.16b,v27.16b,v27.16b 663c0855eaaSJohn Baldwin b.hs .Loop3x_ecb_enc 664c0855eaaSJohn Baldwin 665c0855eaaSJohn Baldwin cmn x2,#0x30 666c0855eaaSJohn Baldwin b.eq .Lecb_done 667c0855eaaSJohn Baldwin nop 668c0855eaaSJohn Baldwin 669c0855eaaSJohn Baldwin.Lecb_enc_tail: 670c0855eaaSJohn Baldwin aese v1.16b,v16.16b 671c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 672c0855eaaSJohn Baldwin aese v24.16b,v16.16b 673c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 674c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 675c0855eaaSJohn Baldwin subs w6,w6,#2 676c0855eaaSJohn Baldwin aese v1.16b,v17.16b 677c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 678c0855eaaSJohn Baldwin aese v24.16b,v17.16b 679c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 680c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 681c0855eaaSJohn Baldwin b.gt .Lecb_enc_tail 682c0855eaaSJohn Baldwin 683c0855eaaSJohn Baldwin aese v1.16b,v16.16b 684c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 685c0855eaaSJohn Baldwin aese v24.16b,v16.16b 686c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 687c0855eaaSJohn Baldwin aese v1.16b,v17.16b 688c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 689c0855eaaSJohn Baldwin aese v24.16b,v17.16b 690c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 691c0855eaaSJohn Baldwin aese v1.16b,v20.16b 692c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 693c0855eaaSJohn Baldwin aese v24.16b,v20.16b 694c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 695c0855eaaSJohn Baldwin cmn x2,#0x20 696c0855eaaSJohn Baldwin aese v1.16b,v21.16b 697c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 698c0855eaaSJohn Baldwin aese v24.16b,v21.16b 699c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 700c0855eaaSJohn Baldwin aese v1.16b,v22.16b 701c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 702c0855eaaSJohn Baldwin aese v24.16b,v22.16b 703c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 704c0855eaaSJohn Baldwin aese v1.16b,v23.16b 705c0855eaaSJohn Baldwin aese v24.16b,v23.16b 706c0855eaaSJohn Baldwin b.eq .Lecb_enc_one 707c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 708c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v24.16b 709c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 710c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 711c0855eaaSJohn Baldwin b .Lecb_done 712c0855eaaSJohn Baldwin 713c0855eaaSJohn Baldwin.Lecb_enc_one: 714c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v24.16b 715c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 716c0855eaaSJohn Baldwin b .Lecb_done 717c0855eaaSJohn Baldwin.align 5 718c0855eaaSJohn Baldwin.Lecb_dec: 719c0855eaaSJohn Baldwin ld1 {v1.16b},[x0],#16 720c0855eaaSJohn Baldwin subs x2,x2,#32 // bias 721c0855eaaSJohn Baldwin add w6,w5,#2 722c0855eaaSJohn Baldwin orr v3.16b,v1.16b,v1.16b 723c0855eaaSJohn Baldwin orr v24.16b,v1.16b,v1.16b 724c0855eaaSJohn Baldwin orr v1.16b,v0.16b,v0.16b 725c0855eaaSJohn Baldwin b.lo .Lecb_dec_tail 726c0855eaaSJohn Baldwin 727c0855eaaSJohn Baldwin orr v1.16b,v3.16b,v3.16b 728c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 729c0855eaaSJohn Baldwin cmp x2,#32 730c0855eaaSJohn Baldwin b.lo .Loop3x_ecb_dec 731c0855eaaSJohn Baldwin 732c0855eaaSJohn Baldwin ld1 {v25.16b},[x0],#16 733c0855eaaSJohn Baldwin ld1 {v26.16b},[x0],#16 734c0855eaaSJohn Baldwin sub x2,x2,#32 // bias 735c0855eaaSJohn Baldwin mov w6,w5 736c0855eaaSJohn Baldwin 737c0855eaaSJohn Baldwin.Loop5x_ecb_dec: 738c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 739c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 740c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 741c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 742c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 743c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 744c0855eaaSJohn Baldwin aesd v25.16b,v16.16b 745c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 746c0855eaaSJohn Baldwin aesd v26.16b,v16.16b 747c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 748c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 749c0855eaaSJohn Baldwin subs w6,w6,#2 750c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 751c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 752c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 753c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 754c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 755c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 756c0855eaaSJohn Baldwin aesd v25.16b,v17.16b 757c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 758c0855eaaSJohn Baldwin aesd v26.16b,v17.16b 759c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 760c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 761c0855eaaSJohn Baldwin b.gt .Loop5x_ecb_dec 762c0855eaaSJohn Baldwin 763c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 764c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 765c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 766c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 767c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 768c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 769c0855eaaSJohn Baldwin aesd v25.16b,v16.16b 770c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 771c0855eaaSJohn Baldwin aesd v26.16b,v16.16b 772c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 773c0855eaaSJohn Baldwin cmp x2,#0x40 // because .Lecb_tail4x 774c0855eaaSJohn Baldwin sub x2,x2,#0x50 775c0855eaaSJohn Baldwin 776c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 777c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 778c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 779c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 780c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 781c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 782c0855eaaSJohn Baldwin aesd v25.16b,v17.16b 783c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 784c0855eaaSJohn Baldwin aesd v26.16b,v17.16b 785c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 786c0855eaaSJohn Baldwin csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 787c0855eaaSJohn Baldwin mov x7,x3 788c0855eaaSJohn Baldwin 789c0855eaaSJohn Baldwin aesd v0.16b,v18.16b 790c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 791c0855eaaSJohn Baldwin aesd v1.16b,v18.16b 792c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 793c0855eaaSJohn Baldwin aesd v24.16b,v18.16b 794c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 795c0855eaaSJohn Baldwin aesd v25.16b,v18.16b 796c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 797c0855eaaSJohn Baldwin aesd v26.16b,v18.16b 798c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 799c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 800c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v26.16b 801c0855eaaSJohn Baldwin // are loaded with last "words" 802c0855eaaSJohn Baldwin add x6,x2,#0x60 // because .Lecb_tail4x 803c0855eaaSJohn Baldwin 804c0855eaaSJohn Baldwin aesd v0.16b,v19.16b 805c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 806c0855eaaSJohn Baldwin aesd v1.16b,v19.16b 807c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 808c0855eaaSJohn Baldwin aesd v24.16b,v19.16b 809c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 810c0855eaaSJohn Baldwin aesd v25.16b,v19.16b 811c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 812c0855eaaSJohn Baldwin aesd v26.16b,v19.16b 813c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 814c0855eaaSJohn Baldwin 815c0855eaaSJohn Baldwin aesd v0.16b,v20.16b 816c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 817c0855eaaSJohn Baldwin aesd v1.16b,v20.16b 818c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 819c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 820c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 821c0855eaaSJohn Baldwin aesd v25.16b,v20.16b 822c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 823c0855eaaSJohn Baldwin aesd v26.16b,v20.16b 824c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 825c0855eaaSJohn Baldwin 826c0855eaaSJohn Baldwin aesd v0.16b,v21.16b 827c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 828c0855eaaSJohn Baldwin aesd v1.16b,v21.16b 829c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 830c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 831c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 832c0855eaaSJohn Baldwin aesd v25.16b,v21.16b 833c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 834c0855eaaSJohn Baldwin aesd v26.16b,v21.16b 835c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 836c0855eaaSJohn Baldwin 837c0855eaaSJohn Baldwin aesd v0.16b,v22.16b 838c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 839c0855eaaSJohn Baldwin aesd v1.16b,v22.16b 840c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 841c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 842c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 843c0855eaaSJohn Baldwin aesd v25.16b,v22.16b 844c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 845c0855eaaSJohn Baldwin aesd v26.16b,v22.16b 846c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 847c0855eaaSJohn Baldwin 848c0855eaaSJohn Baldwin aesd v0.16b,v23.16b 849c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 850c0855eaaSJohn Baldwin aesd v1.16b,v23.16b 851c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 852c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 853c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 854c0855eaaSJohn Baldwin aesd v25.16b,v23.16b 855c0855eaaSJohn Baldwin ld1 {v28.16b},[x0],#16 856c0855eaaSJohn Baldwin aesd v26.16b,v23.16b 857c0855eaaSJohn Baldwin ld1 {v29.16b},[x0],#16 858c0855eaaSJohn Baldwin cbz x6,.Lecb_tail4x 859c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 860c0855eaaSJohn Baldwin eor v4.16b,v7.16b,v0.16b 861c0855eaaSJohn Baldwin orr v0.16b,v2.16b,v2.16b 862c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 863c0855eaaSJohn Baldwin orr v1.16b,v3.16b,v3.16b 864c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v24.16b 865c0855eaaSJohn Baldwin orr v24.16b,v27.16b,v27.16b 866c0855eaaSJohn Baldwin eor v30.16b,v7.16b,v25.16b 867c0855eaaSJohn Baldwin orr v25.16b,v28.16b,v28.16b 868c0855eaaSJohn Baldwin eor v31.16b,v7.16b,v26.16b 869c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 870c0855eaaSJohn Baldwin orr v26.16b,v29.16b,v29.16b 871c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 872c0855eaaSJohn Baldwin mov w6,w5 873c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 874c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 875c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 876c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 877c0855eaaSJohn Baldwin b.hs .Loop5x_ecb_dec 878c0855eaaSJohn Baldwin 879c0855eaaSJohn Baldwin add x2,x2,#0x50 880c0855eaaSJohn Baldwin cbz x2,.Lecb_done 881c0855eaaSJohn Baldwin 882c0855eaaSJohn Baldwin add w6,w5,#2 883c0855eaaSJohn Baldwin subs x2,x2,#0x30 884c0855eaaSJohn Baldwin orr v0.16b,v27.16b,v27.16b 885c0855eaaSJohn Baldwin orr v1.16b,v28.16b,v28.16b 886c0855eaaSJohn Baldwin orr v24.16b,v29.16b,v29.16b 887c0855eaaSJohn Baldwin b.lo .Lecb_dec_tail 888c0855eaaSJohn Baldwin 889c0855eaaSJohn Baldwin b .Loop3x_ecb_dec 890c0855eaaSJohn Baldwin 891c0855eaaSJohn Baldwin.align 4 892c0855eaaSJohn Baldwin.Lecb_tail4x: 893c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 894c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v24.16b 895c0855eaaSJohn Baldwin eor v30.16b,v7.16b,v25.16b 896c0855eaaSJohn Baldwin eor v31.16b,v7.16b,v26.16b 897c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 898c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 899c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 900c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 901c0855eaaSJohn Baldwin 902c0855eaaSJohn Baldwin b .Lecb_done 903c0855eaaSJohn Baldwin.align 4 904c0855eaaSJohn Baldwin.Loop3x_ecb_dec: 905c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 906c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 907c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 908c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 909c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 910c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 911c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 912c0855eaaSJohn Baldwin subs w6,w6,#2 913c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 914c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 915c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 916c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 917c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 918c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 919c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 920c0855eaaSJohn Baldwin b.gt .Loop3x_ecb_dec 921c0855eaaSJohn Baldwin 922c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 923c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 924c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 925c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 926c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 927c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 928c0855eaaSJohn Baldwin subs x2,x2,#0x30 929c0855eaaSJohn Baldwin csel x6,x2,x6,lo // x6, w6, is zero at this point 930c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 931c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 932c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 933c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 934c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 935c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 936c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 937c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v24.16b 938c0855eaaSJohn Baldwin // are loaded with last "words" 939c0855eaaSJohn Baldwin mov x7,x3 940c0855eaaSJohn Baldwin aesd v0.16b,v20.16b 941c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 942c0855eaaSJohn Baldwin aesd v1.16b,v20.16b 943c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 944c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 945c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 946c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 947c0855eaaSJohn Baldwin aesd v0.16b,v21.16b 948c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 949c0855eaaSJohn Baldwin aesd v1.16b,v21.16b 950c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 951c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 952c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 953c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 954c0855eaaSJohn Baldwin aesd v0.16b,v22.16b 955c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 956c0855eaaSJohn Baldwin aesd v1.16b,v22.16b 957c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 958c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 959c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 960c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 961c0855eaaSJohn Baldwin aesd v0.16b,v23.16b 962c0855eaaSJohn Baldwin aesd v1.16b,v23.16b 963c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 964c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 965c0855eaaSJohn Baldwin add w6,w5,#2 966c0855eaaSJohn Baldwin eor v4.16b,v7.16b,v0.16b 967c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 968c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v7.16b 969c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 970c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 971c0855eaaSJohn Baldwin orr v0.16b,v2.16b,v2.16b 972c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 973c0855eaaSJohn Baldwin orr v1.16b,v3.16b,v3.16b 974c0855eaaSJohn Baldwin st1 {v24.16b},[x1],#16 975c0855eaaSJohn Baldwin orr v24.16b,v27.16b,v27.16b 976c0855eaaSJohn Baldwin b.hs .Loop3x_ecb_dec 977c0855eaaSJohn Baldwin 978c0855eaaSJohn Baldwin cmn x2,#0x30 979c0855eaaSJohn Baldwin b.eq .Lecb_done 980c0855eaaSJohn Baldwin nop 981c0855eaaSJohn Baldwin 982c0855eaaSJohn Baldwin.Lecb_dec_tail: 983c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 984c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 985c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 986c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 987c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 988c0855eaaSJohn Baldwin subs w6,w6,#2 989c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 990c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 991c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 992c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 993c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 994c0855eaaSJohn Baldwin b.gt .Lecb_dec_tail 995c0855eaaSJohn Baldwin 996c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 997c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 998c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 999c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1000c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 1001c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1002c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 1003c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1004c0855eaaSJohn Baldwin aesd v1.16b,v20.16b 1005c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1006c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 1007c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1008c0855eaaSJohn Baldwin cmn x2,#0x20 1009c0855eaaSJohn Baldwin aesd v1.16b,v21.16b 1010c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1011c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 1012c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1013c0855eaaSJohn Baldwin aesd v1.16b,v22.16b 1014c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1015c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 1016c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1017c0855eaaSJohn Baldwin aesd v1.16b,v23.16b 1018c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 1019c0855eaaSJohn Baldwin b.eq .Lecb_dec_one 1020c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v1.16b 1021c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v24.16b 1022c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 1023c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 1024c0855eaaSJohn Baldwin b .Lecb_done 1025c0855eaaSJohn Baldwin 1026c0855eaaSJohn Baldwin.Lecb_dec_one: 1027c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v24.16b 1028c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 1029c0855eaaSJohn Baldwin 1030c0855eaaSJohn Baldwin.Lecb_done: 1031c0855eaaSJohn Baldwin ldr x29,[sp],#16 1032c0855eaaSJohn Baldwin.Lecb_Final_abort: 1033c0855eaaSJohn Baldwin ret 1034c0855eaaSJohn Baldwin.size aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt 1035bc3d5698SJohn Baldwin.globl aes_v8_cbc_encrypt 1036bc3d5698SJohn Baldwin.type aes_v8_cbc_encrypt,%function 1037bc3d5698SJohn Baldwin.align 5 1038bc3d5698SJohn Baldwinaes_v8_cbc_encrypt: 1039bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 1040bd9588bcSAndrew Turner // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 1041bc3d5698SJohn Baldwin stp x29,x30,[sp,#-16]! 1042bc3d5698SJohn Baldwin add x29,sp,#0 1043bc3d5698SJohn Baldwin subs x2,x2,#16 1044bc3d5698SJohn Baldwin mov x8,#16 1045bc3d5698SJohn Baldwin b.lo .Lcbc_abort 1046bc3d5698SJohn Baldwin csel x8,xzr,x8,eq 1047bc3d5698SJohn Baldwin 1048bc3d5698SJohn Baldwin cmp w5,#0 // en- or decrypting? 1049bc3d5698SJohn Baldwin ldr w5,[x3,#240] 1050bc3d5698SJohn Baldwin and x2,x2,#-16 1051bc3d5698SJohn Baldwin ld1 {v6.16b},[x4] 1052bc3d5698SJohn Baldwin ld1 {v0.16b},[x0],x8 1053bc3d5698SJohn Baldwin 1054bc3d5698SJohn Baldwin ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1055bc3d5698SJohn Baldwin sub w5,w5,#6 1056bc3d5698SJohn Baldwin add x7,x3,x5,lsl#4 // pointer to last 7 round keys 1057bc3d5698SJohn Baldwin sub w5,w5,#2 1058bc3d5698SJohn Baldwin ld1 {v18.4s,v19.4s},[x7],#32 1059bc3d5698SJohn Baldwin ld1 {v20.4s,v21.4s},[x7],#32 1060bc3d5698SJohn Baldwin ld1 {v22.4s,v23.4s},[x7],#32 1061bc3d5698SJohn Baldwin ld1 {v7.4s},[x7] 1062bc3d5698SJohn Baldwin 1063bc3d5698SJohn Baldwin add x7,x3,#32 1064bc3d5698SJohn Baldwin mov w6,w5 1065bc3d5698SJohn Baldwin b.eq .Lcbc_dec 1066bc3d5698SJohn Baldwin 1067bc3d5698SJohn Baldwin cmp w5,#2 1068bc3d5698SJohn Baldwin eor v0.16b,v0.16b,v6.16b 1069bc3d5698SJohn Baldwin eor v5.16b,v16.16b,v7.16b 1070bc3d5698SJohn Baldwin b.eq .Lcbc_enc128 1071bc3d5698SJohn Baldwin 1072bc3d5698SJohn Baldwin ld1 {v2.4s,v3.4s},[x7] 1073bc3d5698SJohn Baldwin add x7,x3,#16 1074bc3d5698SJohn Baldwin add x6,x3,#16*4 1075bc3d5698SJohn Baldwin add x12,x3,#16*5 1076bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1077bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1078bc3d5698SJohn Baldwin add x14,x3,#16*6 1079bc3d5698SJohn Baldwin add x3,x3,#16*7 1080bc3d5698SJohn Baldwin b .Lenter_cbc_enc 1081bc3d5698SJohn Baldwin 1082bc3d5698SJohn Baldwin.align 4 1083bc3d5698SJohn Baldwin.Loop_cbc_enc: 1084bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1085bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1086bc3d5698SJohn Baldwin st1 {v6.16b},[x1],#16 1087bc3d5698SJohn Baldwin.Lenter_cbc_enc: 1088bc3d5698SJohn Baldwin aese v0.16b,v17.16b 1089bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1090bc3d5698SJohn Baldwin aese v0.16b,v2.16b 1091bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1092bc3d5698SJohn Baldwin ld1 {v16.4s},[x6] 1093bc3d5698SJohn Baldwin cmp w5,#4 1094bc3d5698SJohn Baldwin aese v0.16b,v3.16b 1095bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1096bc3d5698SJohn Baldwin ld1 {v17.4s},[x12] 1097bc3d5698SJohn Baldwin b.eq .Lcbc_enc192 1098bc3d5698SJohn Baldwin 1099bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1100bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1101bc3d5698SJohn Baldwin ld1 {v16.4s},[x14] 1102bc3d5698SJohn Baldwin aese v0.16b,v17.16b 1103bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1104bc3d5698SJohn Baldwin ld1 {v17.4s},[x3] 1105bc3d5698SJohn Baldwin nop 1106bc3d5698SJohn Baldwin 1107bc3d5698SJohn Baldwin.Lcbc_enc192: 1108bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1109bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1110bc3d5698SJohn Baldwin subs x2,x2,#16 1111bc3d5698SJohn Baldwin aese v0.16b,v17.16b 1112bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1113bc3d5698SJohn Baldwin csel x8,xzr,x8,eq 1114bc3d5698SJohn Baldwin aese v0.16b,v18.16b 1115bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1116bc3d5698SJohn Baldwin aese v0.16b,v19.16b 1117bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1118bc3d5698SJohn Baldwin ld1 {v16.16b},[x0],x8 1119bc3d5698SJohn Baldwin aese v0.16b,v20.16b 1120bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1121bc3d5698SJohn Baldwin eor v16.16b,v16.16b,v5.16b 1122bc3d5698SJohn Baldwin aese v0.16b,v21.16b 1123bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1124bc3d5698SJohn Baldwin ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 1125bc3d5698SJohn Baldwin aese v0.16b,v22.16b 1126bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1127bc3d5698SJohn Baldwin aese v0.16b,v23.16b 1128bc3d5698SJohn Baldwin eor v6.16b,v0.16b,v7.16b 1129bc3d5698SJohn Baldwin b.hs .Loop_cbc_enc 1130bc3d5698SJohn Baldwin 1131bc3d5698SJohn Baldwin st1 {v6.16b},[x1],#16 1132bc3d5698SJohn Baldwin b .Lcbc_done 1133bc3d5698SJohn Baldwin 1134bc3d5698SJohn Baldwin.align 5 1135bc3d5698SJohn Baldwin.Lcbc_enc128: 1136bc3d5698SJohn Baldwin ld1 {v2.4s,v3.4s},[x7] 1137bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1138bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1139bc3d5698SJohn Baldwin b .Lenter_cbc_enc128 1140bc3d5698SJohn Baldwin.Loop_cbc_enc128: 1141bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1142bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1143bc3d5698SJohn Baldwin st1 {v6.16b},[x1],#16 1144bc3d5698SJohn Baldwin.Lenter_cbc_enc128: 1145bc3d5698SJohn Baldwin aese v0.16b,v17.16b 1146bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1147bc3d5698SJohn Baldwin subs x2,x2,#16 1148bc3d5698SJohn Baldwin aese v0.16b,v2.16b 1149bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1150bc3d5698SJohn Baldwin csel x8,xzr,x8,eq 1151bc3d5698SJohn Baldwin aese v0.16b,v3.16b 1152bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1153bc3d5698SJohn Baldwin aese v0.16b,v18.16b 1154bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1155bc3d5698SJohn Baldwin aese v0.16b,v19.16b 1156bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1157bc3d5698SJohn Baldwin ld1 {v16.16b},[x0],x8 1158bc3d5698SJohn Baldwin aese v0.16b,v20.16b 1159bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1160bc3d5698SJohn Baldwin aese v0.16b,v21.16b 1161bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1162bc3d5698SJohn Baldwin aese v0.16b,v22.16b 1163bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1164bc3d5698SJohn Baldwin eor v16.16b,v16.16b,v5.16b 1165bc3d5698SJohn Baldwin aese v0.16b,v23.16b 1166bc3d5698SJohn Baldwin eor v6.16b,v0.16b,v7.16b 1167bc3d5698SJohn Baldwin b.hs .Loop_cbc_enc128 1168bc3d5698SJohn Baldwin 1169bc3d5698SJohn Baldwin st1 {v6.16b},[x1],#16 1170bc3d5698SJohn Baldwin b .Lcbc_done 1171bc3d5698SJohn Baldwin.align 5 1172bc3d5698SJohn Baldwin.Lcbc_dec: 1173c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 1174bc3d5698SJohn Baldwin subs x2,x2,#32 // bias 1175bc3d5698SJohn Baldwin add w6,w5,#2 1176bc3d5698SJohn Baldwin orr v3.16b,v0.16b,v0.16b 1177bc3d5698SJohn Baldwin orr v1.16b,v0.16b,v0.16b 1178c0855eaaSJohn Baldwin orr v27.16b,v24.16b,v24.16b 1179bc3d5698SJohn Baldwin b.lo .Lcbc_dec_tail 1180bc3d5698SJohn Baldwin 1181c0855eaaSJohn Baldwin orr v1.16b,v24.16b,v24.16b 1182c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 1183bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1184bc3d5698SJohn Baldwin orr v3.16b,v1.16b,v1.16b 1185c0855eaaSJohn Baldwin orr v27.16b,v24.16b,v24.16b 1186c0855eaaSJohn Baldwin cmp x2,#32 1187c0855eaaSJohn Baldwin b.lo .Loop3x_cbc_dec 1188bc3d5698SJohn Baldwin 1189c0855eaaSJohn Baldwin ld1 {v25.16b},[x0],#16 1190c0855eaaSJohn Baldwin ld1 {v26.16b},[x0],#16 1191c0855eaaSJohn Baldwin sub x2,x2,#32 // bias 1192c0855eaaSJohn Baldwin mov w6,w5 1193c0855eaaSJohn Baldwin orr v28.16b,v25.16b,v25.16b 1194c0855eaaSJohn Baldwin orr v29.16b,v26.16b,v26.16b 1195c0855eaaSJohn Baldwin 1196c0855eaaSJohn Baldwin.Loop5x_cbc_dec: 1197bc3d5698SJohn Baldwin aesd v0.16b,v16.16b 1198bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 1199bc3d5698SJohn Baldwin aesd v1.16b,v16.16b 1200bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1201c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 1202c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1203c0855eaaSJohn Baldwin aesd v25.16b,v16.16b 1204c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1205c0855eaaSJohn Baldwin aesd v26.16b,v16.16b 1206c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1207bc3d5698SJohn Baldwin ld1 {v16.4s},[x7],#16 1208bc3d5698SJohn Baldwin subs w6,w6,#2 1209bc3d5698SJohn Baldwin aesd v0.16b,v17.16b 1210bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 1211bc3d5698SJohn Baldwin aesd v1.16b,v17.16b 1212bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1213c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 1214c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1215c0855eaaSJohn Baldwin aesd v25.16b,v17.16b 1216c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1217c0855eaaSJohn Baldwin aesd v26.16b,v17.16b 1218c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1219c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 1220c0855eaaSJohn Baldwin b.gt .Loop5x_cbc_dec 1221c0855eaaSJohn Baldwin 1222c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 1223c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1224c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 1225c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1226c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 1227c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1228c0855eaaSJohn Baldwin aesd v25.16b,v16.16b 1229c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1230c0855eaaSJohn Baldwin aesd v26.16b,v16.16b 1231c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1232c0855eaaSJohn Baldwin cmp x2,#0x40 // because .Lcbc_tail4x 1233c0855eaaSJohn Baldwin sub x2,x2,#0x50 1234c0855eaaSJohn Baldwin 1235c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 1236c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1237c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 1238c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1239c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 1240c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1241c0855eaaSJohn Baldwin aesd v25.16b,v17.16b 1242c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1243c0855eaaSJohn Baldwin aesd v26.16b,v17.16b 1244c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1245c0855eaaSJohn Baldwin csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 1246c0855eaaSJohn Baldwin mov x7,x3 1247c0855eaaSJohn Baldwin 1248c0855eaaSJohn Baldwin aesd v0.16b,v18.16b 1249c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1250c0855eaaSJohn Baldwin aesd v1.16b,v18.16b 1251c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1252c0855eaaSJohn Baldwin aesd v24.16b,v18.16b 1253c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1254c0855eaaSJohn Baldwin aesd v25.16b,v18.16b 1255c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1256c0855eaaSJohn Baldwin aesd v26.16b,v18.16b 1257c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1258c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 1259c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v26.16b 1260c0855eaaSJohn Baldwin // are loaded with last "words" 1261c0855eaaSJohn Baldwin add x6,x2,#0x60 // because .Lcbc_tail4x 1262c0855eaaSJohn Baldwin 1263c0855eaaSJohn Baldwin aesd v0.16b,v19.16b 1264c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1265c0855eaaSJohn Baldwin aesd v1.16b,v19.16b 1266c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1267c0855eaaSJohn Baldwin aesd v24.16b,v19.16b 1268c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1269c0855eaaSJohn Baldwin aesd v25.16b,v19.16b 1270c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1271c0855eaaSJohn Baldwin aesd v26.16b,v19.16b 1272c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1273c0855eaaSJohn Baldwin 1274c0855eaaSJohn Baldwin aesd v0.16b,v20.16b 1275c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1276c0855eaaSJohn Baldwin aesd v1.16b,v20.16b 1277c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1278c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 1279c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1280c0855eaaSJohn Baldwin aesd v25.16b,v20.16b 1281c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1282c0855eaaSJohn Baldwin aesd v26.16b,v20.16b 1283c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1284c0855eaaSJohn Baldwin 1285c0855eaaSJohn Baldwin aesd v0.16b,v21.16b 1286c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1287c0855eaaSJohn Baldwin aesd v1.16b,v21.16b 1288c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1289c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 1290c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1291c0855eaaSJohn Baldwin aesd v25.16b,v21.16b 1292c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1293c0855eaaSJohn Baldwin aesd v26.16b,v21.16b 1294c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1295c0855eaaSJohn Baldwin 1296c0855eaaSJohn Baldwin aesd v0.16b,v22.16b 1297c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1298c0855eaaSJohn Baldwin aesd v1.16b,v22.16b 1299c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1300c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 1301c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1302c0855eaaSJohn Baldwin aesd v25.16b,v22.16b 1303c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 1304c0855eaaSJohn Baldwin aesd v26.16b,v22.16b 1305c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 1306c0855eaaSJohn Baldwin 1307c0855eaaSJohn Baldwin eor v4.16b,v6.16b,v7.16b 1308c0855eaaSJohn Baldwin aesd v0.16b,v23.16b 1309c0855eaaSJohn Baldwin eor v5.16b,v2.16b,v7.16b 1310c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 1311c0855eaaSJohn Baldwin aesd v1.16b,v23.16b 1312c0855eaaSJohn Baldwin eor v17.16b,v3.16b,v7.16b 1313c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 1314c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 1315c0855eaaSJohn Baldwin eor v30.16b,v27.16b,v7.16b 1316c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 1317c0855eaaSJohn Baldwin aesd v25.16b,v23.16b 1318c0855eaaSJohn Baldwin eor v31.16b,v28.16b,v7.16b 1319c0855eaaSJohn Baldwin ld1 {v28.16b},[x0],#16 1320c0855eaaSJohn Baldwin aesd v26.16b,v23.16b 1321c0855eaaSJohn Baldwin orr v6.16b,v29.16b,v29.16b 1322c0855eaaSJohn Baldwin ld1 {v29.16b},[x0],#16 1323c0855eaaSJohn Baldwin cbz x6,.Lcbc_tail4x 1324c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1325c0855eaaSJohn Baldwin eor v4.16b,v4.16b,v0.16b 1326c0855eaaSJohn Baldwin orr v0.16b,v2.16b,v2.16b 1327c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v1.16b 1328c0855eaaSJohn Baldwin orr v1.16b,v3.16b,v3.16b 1329c0855eaaSJohn Baldwin eor v17.16b,v17.16b,v24.16b 1330c0855eaaSJohn Baldwin orr v24.16b,v27.16b,v27.16b 1331c0855eaaSJohn Baldwin eor v30.16b,v30.16b,v25.16b 1332c0855eaaSJohn Baldwin orr v25.16b,v28.16b,v28.16b 1333c0855eaaSJohn Baldwin eor v31.16b,v31.16b,v26.16b 1334c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 1335c0855eaaSJohn Baldwin orr v26.16b,v29.16b,v29.16b 1336c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 1337c0855eaaSJohn Baldwin mov w6,w5 1338c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 1339c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1340c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 1341c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 1342c0855eaaSJohn Baldwin b.hs .Loop5x_cbc_dec 1343c0855eaaSJohn Baldwin 1344c0855eaaSJohn Baldwin add x2,x2,#0x50 1345c0855eaaSJohn Baldwin cbz x2,.Lcbc_done 1346c0855eaaSJohn Baldwin 1347c0855eaaSJohn Baldwin add w6,w5,#2 1348c0855eaaSJohn Baldwin subs x2,x2,#0x30 1349c0855eaaSJohn Baldwin orr v0.16b,v27.16b,v27.16b 1350c0855eaaSJohn Baldwin orr v2.16b,v27.16b,v27.16b 1351c0855eaaSJohn Baldwin orr v1.16b,v28.16b,v28.16b 1352c0855eaaSJohn Baldwin orr v3.16b,v28.16b,v28.16b 1353c0855eaaSJohn Baldwin orr v24.16b,v29.16b,v29.16b 1354c0855eaaSJohn Baldwin orr v27.16b,v29.16b,v29.16b 1355c0855eaaSJohn Baldwin b.lo .Lcbc_dec_tail 1356c0855eaaSJohn Baldwin 1357c0855eaaSJohn Baldwin b .Loop3x_cbc_dec 1358c0855eaaSJohn Baldwin 1359c0855eaaSJohn Baldwin.align 4 1360c0855eaaSJohn Baldwin.Lcbc_tail4x: 1361c0855eaaSJohn Baldwin eor v5.16b,v4.16b,v1.16b 1362c0855eaaSJohn Baldwin eor v17.16b,v17.16b,v24.16b 1363c0855eaaSJohn Baldwin eor v30.16b,v30.16b,v25.16b 1364c0855eaaSJohn Baldwin eor v31.16b,v31.16b,v26.16b 1365c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 1366c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 1367c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 1368c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 1369c0855eaaSJohn Baldwin 1370c0855eaaSJohn Baldwin b .Lcbc_done 1371c0855eaaSJohn Baldwin.align 4 1372c0855eaaSJohn Baldwin.Loop3x_cbc_dec: 1373c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 1374c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1375c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 1376c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1377c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 1378c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1379c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 1380c0855eaaSJohn Baldwin subs w6,w6,#2 1381c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 1382c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 1383c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 1384c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 1385c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 1386c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1387bc3d5698SJohn Baldwin ld1 {v17.4s},[x7],#16 1388bc3d5698SJohn Baldwin b.gt .Loop3x_cbc_dec 1389bc3d5698SJohn Baldwin 1390bc3d5698SJohn Baldwin aesd v0.16b,v16.16b 1391bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 1392bc3d5698SJohn Baldwin aesd v1.16b,v16.16b 1393bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1394c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 1395c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1396bc3d5698SJohn Baldwin eor v4.16b,v6.16b,v7.16b 1397bc3d5698SJohn Baldwin subs x2,x2,#0x30 1398bc3d5698SJohn Baldwin eor v5.16b,v2.16b,v7.16b 1399bc3d5698SJohn Baldwin csel x6,x2,x6,lo // x6, w6, is zero at this point 1400bc3d5698SJohn Baldwin aesd v0.16b,v17.16b 1401bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 1402bc3d5698SJohn Baldwin aesd v1.16b,v17.16b 1403bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1404c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 1405c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1406bc3d5698SJohn Baldwin eor v17.16b,v3.16b,v7.16b 1407bc3d5698SJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 1408c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v24.16b 1409bc3d5698SJohn Baldwin // are loaded with last "words" 1410c0855eaaSJohn Baldwin orr v6.16b,v27.16b,v27.16b 1411bc3d5698SJohn Baldwin mov x7,x3 1412bc3d5698SJohn Baldwin aesd v0.16b,v20.16b 1413bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 1414bc3d5698SJohn Baldwin aesd v1.16b,v20.16b 1415bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1416c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 1417c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1418bc3d5698SJohn Baldwin ld1 {v2.16b},[x0],#16 1419bc3d5698SJohn Baldwin aesd v0.16b,v21.16b 1420bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 1421bc3d5698SJohn Baldwin aesd v1.16b,v21.16b 1422bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1423c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 1424c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1425bc3d5698SJohn Baldwin ld1 {v3.16b},[x0],#16 1426bc3d5698SJohn Baldwin aesd v0.16b,v22.16b 1427bc3d5698SJohn Baldwin aesimc v0.16b,v0.16b 1428bc3d5698SJohn Baldwin aesd v1.16b,v22.16b 1429bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1430c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 1431c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1432c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 1433bc3d5698SJohn Baldwin aesd v0.16b,v23.16b 1434bc3d5698SJohn Baldwin aesd v1.16b,v23.16b 1435c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 1436bc3d5698SJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1437bc3d5698SJohn Baldwin add w6,w5,#2 1438bc3d5698SJohn Baldwin eor v4.16b,v4.16b,v0.16b 1439bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v1.16b 1440c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v17.16b 1441bc3d5698SJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1442bc3d5698SJohn Baldwin st1 {v4.16b},[x1],#16 1443bc3d5698SJohn Baldwin orr v0.16b,v2.16b,v2.16b 1444bc3d5698SJohn Baldwin st1 {v5.16b},[x1],#16 1445bc3d5698SJohn Baldwin orr v1.16b,v3.16b,v3.16b 1446c0855eaaSJohn Baldwin st1 {v24.16b},[x1],#16 1447c0855eaaSJohn Baldwin orr v24.16b,v27.16b,v27.16b 1448bc3d5698SJohn Baldwin b.hs .Loop3x_cbc_dec 1449bc3d5698SJohn Baldwin 1450bc3d5698SJohn Baldwin cmn x2,#0x30 1451bc3d5698SJohn Baldwin b.eq .Lcbc_done 1452bc3d5698SJohn Baldwin nop 1453bc3d5698SJohn Baldwin 1454bc3d5698SJohn Baldwin.Lcbc_dec_tail: 1455bc3d5698SJohn Baldwin aesd v1.16b,v16.16b 1456bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1457c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 1458c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1459bc3d5698SJohn Baldwin ld1 {v16.4s},[x7],#16 1460bc3d5698SJohn Baldwin subs w6,w6,#2 1461bc3d5698SJohn Baldwin aesd v1.16b,v17.16b 1462bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1463c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 1464c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1465bc3d5698SJohn Baldwin ld1 {v17.4s},[x7],#16 1466bc3d5698SJohn Baldwin b.gt .Lcbc_dec_tail 1467bc3d5698SJohn Baldwin 1468bc3d5698SJohn Baldwin aesd v1.16b,v16.16b 1469bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1470c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 1471c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1472bc3d5698SJohn Baldwin aesd v1.16b,v17.16b 1473bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1474c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 1475c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1476bc3d5698SJohn Baldwin aesd v1.16b,v20.16b 1477bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1478c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 1479c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1480bc3d5698SJohn Baldwin cmn x2,#0x20 1481bc3d5698SJohn Baldwin aesd v1.16b,v21.16b 1482bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1483c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 1484c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1485bc3d5698SJohn Baldwin eor v5.16b,v6.16b,v7.16b 1486bc3d5698SJohn Baldwin aesd v1.16b,v22.16b 1487bc3d5698SJohn Baldwin aesimc v1.16b,v1.16b 1488c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 1489c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 1490bc3d5698SJohn Baldwin eor v17.16b,v3.16b,v7.16b 1491bc3d5698SJohn Baldwin aesd v1.16b,v23.16b 1492c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 1493bc3d5698SJohn Baldwin b.eq .Lcbc_dec_one 1494bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v1.16b 1495c0855eaaSJohn Baldwin eor v17.16b,v17.16b,v24.16b 1496c0855eaaSJohn Baldwin orr v6.16b,v27.16b,v27.16b 1497bc3d5698SJohn Baldwin st1 {v5.16b},[x1],#16 1498bc3d5698SJohn Baldwin st1 {v17.16b},[x1],#16 1499bc3d5698SJohn Baldwin b .Lcbc_done 1500bc3d5698SJohn Baldwin 1501bc3d5698SJohn Baldwin.Lcbc_dec_one: 1502c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v24.16b 1503c0855eaaSJohn Baldwin orr v6.16b,v27.16b,v27.16b 1504bc3d5698SJohn Baldwin st1 {v5.16b},[x1],#16 1505bc3d5698SJohn Baldwin 1506bc3d5698SJohn Baldwin.Lcbc_done: 1507bc3d5698SJohn Baldwin st1 {v6.16b},[x4] 1508bc3d5698SJohn Baldwin.Lcbc_abort: 1509bc3d5698SJohn Baldwin ldr x29,[sp],#16 1510bc3d5698SJohn Baldwin ret 1511bc3d5698SJohn Baldwin.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 1512bc3d5698SJohn Baldwin.globl aes_v8_ctr32_encrypt_blocks 1513bc3d5698SJohn Baldwin.type aes_v8_ctr32_encrypt_blocks,%function 1514bc3d5698SJohn Baldwin.align 5 1515bc3d5698SJohn Baldwinaes_v8_ctr32_encrypt_blocks: 1516bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 1517bd9588bcSAndrew Turner // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 1518bc3d5698SJohn Baldwin stp x29,x30,[sp,#-16]! 1519bc3d5698SJohn Baldwin add x29,sp,#0 1520bc3d5698SJohn Baldwin ldr w5,[x3,#240] 1521bc3d5698SJohn Baldwin 1522bc3d5698SJohn Baldwin ldr w8, [x4, #12] 1523*575878a5SEd Maste#ifdef __AARCH64EB__ 1524c3c73b4fSJung-uk Kim ld1 {v0.16b},[x4] 1525c3c73b4fSJung-uk Kim#else 1526bc3d5698SJohn Baldwin ld1 {v0.4s},[x4] 1527c3c73b4fSJung-uk Kim#endif 1528bc3d5698SJohn Baldwin ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1529bc3d5698SJohn Baldwin sub w5,w5,#4 1530bc3d5698SJohn Baldwin mov x12,#16 1531bc3d5698SJohn Baldwin cmp x2,#2 1532bc3d5698SJohn Baldwin add x7,x3,x5,lsl#4 // pointer to last 5 round keys 1533bc3d5698SJohn Baldwin sub w5,w5,#2 1534bc3d5698SJohn Baldwin ld1 {v20.4s,v21.4s},[x7],#32 1535bc3d5698SJohn Baldwin ld1 {v22.4s,v23.4s},[x7],#32 1536bc3d5698SJohn Baldwin ld1 {v7.4s},[x7] 1537bc3d5698SJohn Baldwin add x7,x3,#32 1538bc3d5698SJohn Baldwin mov w6,w5 1539bc3d5698SJohn Baldwin csel x12,xzr,x12,lo 1540*575878a5SEd Maste#ifndef __AARCH64EB__ 1541bc3d5698SJohn Baldwin rev w8, w8 1542bc3d5698SJohn Baldwin#endif 1543f443d080SJung-uk Kim orr v1.16b,v0.16b,v0.16b 1544bc3d5698SJohn Baldwin add w10, w8, #1 1545f443d080SJung-uk Kim orr v18.16b,v0.16b,v0.16b 1546f443d080SJung-uk Kim add w8, w8, #2 1547bc3d5698SJohn Baldwin orr v6.16b,v0.16b,v0.16b 1548bc3d5698SJohn Baldwin rev w10, w10 1549f443d080SJung-uk Kim mov v1.s[3],w10 1550bc3d5698SJohn Baldwin b.ls .Lctr32_tail 1551bc3d5698SJohn Baldwin rev w12, w8 1552bc3d5698SJohn Baldwin sub x2,x2,#3 // bias 1553f443d080SJung-uk Kim mov v18.s[3],w12 1554c0855eaaSJohn Baldwin cmp x2,#32 1555c0855eaaSJohn Baldwin b.lo .Loop3x_ctr32 1556c0855eaaSJohn Baldwin 1557c0855eaaSJohn Baldwin add w13,w8,#1 1558c0855eaaSJohn Baldwin add w14,w8,#2 1559c0855eaaSJohn Baldwin orr v24.16b,v0.16b,v0.16b 1560c0855eaaSJohn Baldwin rev w13,w13 1561c0855eaaSJohn Baldwin orr v25.16b,v0.16b,v0.16b 1562c0855eaaSJohn Baldwin rev w14,w14 1563c0855eaaSJohn Baldwin mov v24.s[3],w13 1564c0855eaaSJohn Baldwin sub x2,x2,#2 // bias 1565c0855eaaSJohn Baldwin mov v25.s[3],w14 1566c0855eaaSJohn Baldwin add w8,w8,#2 1567c0855eaaSJohn Baldwin b .Loop5x_ctr32 1568c0855eaaSJohn Baldwin 1569c0855eaaSJohn Baldwin.align 4 1570c0855eaaSJohn Baldwin.Loop5x_ctr32: 1571c0855eaaSJohn Baldwin aese v0.16b,v16.16b 1572c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1573c0855eaaSJohn Baldwin aese v1.16b,v16.16b 1574c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 1575c0855eaaSJohn Baldwin aese v18.16b,v16.16b 1576c0855eaaSJohn Baldwin aesmc v18.16b,v18.16b 1577c0855eaaSJohn Baldwin aese v24.16b,v16.16b 1578c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 1579c0855eaaSJohn Baldwin aese v25.16b,v16.16b 1580c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 1581c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 1582c0855eaaSJohn Baldwin subs w6,w6,#2 1583c0855eaaSJohn Baldwin aese v0.16b,v17.16b 1584c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1585c0855eaaSJohn Baldwin aese v1.16b,v17.16b 1586c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 1587c0855eaaSJohn Baldwin aese v18.16b,v17.16b 1588c0855eaaSJohn Baldwin aesmc v18.16b,v18.16b 1589c0855eaaSJohn Baldwin aese v24.16b,v17.16b 1590c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 1591c0855eaaSJohn Baldwin aese v25.16b,v17.16b 1592c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 1593c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 1594c0855eaaSJohn Baldwin b.gt .Loop5x_ctr32 1595c0855eaaSJohn Baldwin 1596c0855eaaSJohn Baldwin mov x7,x3 1597c0855eaaSJohn Baldwin aese v0.16b,v16.16b 1598c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1599c0855eaaSJohn Baldwin aese v1.16b,v16.16b 1600c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 1601c0855eaaSJohn Baldwin aese v18.16b,v16.16b 1602c0855eaaSJohn Baldwin aesmc v18.16b,v18.16b 1603c0855eaaSJohn Baldwin aese v24.16b,v16.16b 1604c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 1605c0855eaaSJohn Baldwin aese v25.16b,v16.16b 1606c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 1607c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1608c0855eaaSJohn Baldwin 1609c0855eaaSJohn Baldwin aese v0.16b,v17.16b 1610c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1611c0855eaaSJohn Baldwin aese v1.16b,v17.16b 1612c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 1613c0855eaaSJohn Baldwin aese v18.16b,v17.16b 1614c0855eaaSJohn Baldwin aesmc v18.16b,v18.16b 1615c0855eaaSJohn Baldwin aese v24.16b,v17.16b 1616c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 1617c0855eaaSJohn Baldwin aese v25.16b,v17.16b 1618c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 1619c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1620c0855eaaSJohn Baldwin 1621c0855eaaSJohn Baldwin aese v0.16b,v20.16b 1622c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1623c0855eaaSJohn Baldwin add w9,w8,#1 1624c0855eaaSJohn Baldwin add w10,w8,#2 1625c0855eaaSJohn Baldwin aese v1.16b,v20.16b 1626c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 1627c0855eaaSJohn Baldwin add w12,w8,#3 1628c0855eaaSJohn Baldwin add w13,w8,#4 1629c0855eaaSJohn Baldwin aese v18.16b,v20.16b 1630c0855eaaSJohn Baldwin aesmc v18.16b,v18.16b 1631c0855eaaSJohn Baldwin add w14,w8,#5 1632c0855eaaSJohn Baldwin rev w9,w9 1633c0855eaaSJohn Baldwin aese v24.16b,v20.16b 1634c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 1635c0855eaaSJohn Baldwin rev w10,w10 1636c0855eaaSJohn Baldwin rev w12,w12 1637c0855eaaSJohn Baldwin aese v25.16b,v20.16b 1638c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 1639c0855eaaSJohn Baldwin rev w13,w13 1640c0855eaaSJohn Baldwin rev w14,w14 1641c0855eaaSJohn Baldwin 1642c0855eaaSJohn Baldwin aese v0.16b,v21.16b 1643c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1644c0855eaaSJohn Baldwin aese v1.16b,v21.16b 1645c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 1646c0855eaaSJohn Baldwin aese v18.16b,v21.16b 1647c0855eaaSJohn Baldwin aesmc v18.16b,v18.16b 1648c0855eaaSJohn Baldwin aese v24.16b,v21.16b 1649c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 1650c0855eaaSJohn Baldwin aese v25.16b,v21.16b 1651c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 1652c0855eaaSJohn Baldwin 1653c0855eaaSJohn Baldwin aese v0.16b,v22.16b 1654c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1655c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 1656c0855eaaSJohn Baldwin aese v1.16b,v22.16b 1657c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 1658c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 1659c0855eaaSJohn Baldwin aese v18.16b,v22.16b 1660c0855eaaSJohn Baldwin aesmc v18.16b,v18.16b 1661c0855eaaSJohn Baldwin ld1 {v19.16b},[x0],#16 1662c0855eaaSJohn Baldwin aese v24.16b,v22.16b 1663c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 1664c0855eaaSJohn Baldwin ld1 {v26.16b},[x0],#16 1665c0855eaaSJohn Baldwin aese v25.16b,v22.16b 1666c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 1667c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 1668c0855eaaSJohn Baldwin 1669c0855eaaSJohn Baldwin aese v0.16b,v23.16b 1670c0855eaaSJohn Baldwin eor v2.16b,v2.16b,v7.16b 1671c0855eaaSJohn Baldwin aese v1.16b,v23.16b 1672c0855eaaSJohn Baldwin eor v3.16b,v3.16b,v7.16b 1673c0855eaaSJohn Baldwin aese v18.16b,v23.16b 1674c0855eaaSJohn Baldwin eor v19.16b,v19.16b,v7.16b 1675c0855eaaSJohn Baldwin aese v24.16b,v23.16b 1676c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v7.16b 1677c0855eaaSJohn Baldwin aese v25.16b,v23.16b 1678c0855eaaSJohn Baldwin eor v27.16b,v27.16b,v7.16b 1679c0855eaaSJohn Baldwin 1680c0855eaaSJohn Baldwin eor v2.16b,v2.16b,v0.16b 1681c0855eaaSJohn Baldwin orr v0.16b,v6.16b,v6.16b 1682c0855eaaSJohn Baldwin eor v3.16b,v3.16b,v1.16b 1683c0855eaaSJohn Baldwin orr v1.16b,v6.16b,v6.16b 1684c0855eaaSJohn Baldwin eor v19.16b,v19.16b,v18.16b 1685c0855eaaSJohn Baldwin orr v18.16b,v6.16b,v6.16b 1686c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v24.16b 1687c0855eaaSJohn Baldwin orr v24.16b,v6.16b,v6.16b 1688c0855eaaSJohn Baldwin eor v27.16b,v27.16b,v25.16b 1689c0855eaaSJohn Baldwin orr v25.16b,v6.16b,v6.16b 1690c0855eaaSJohn Baldwin 1691c0855eaaSJohn Baldwin st1 {v2.16b},[x1],#16 1692c0855eaaSJohn Baldwin mov v0.s[3],w9 1693c0855eaaSJohn Baldwin st1 {v3.16b},[x1],#16 1694c0855eaaSJohn Baldwin mov v1.s[3],w10 1695c0855eaaSJohn Baldwin st1 {v19.16b},[x1],#16 1696c0855eaaSJohn Baldwin mov v18.s[3],w12 1697c0855eaaSJohn Baldwin st1 {v26.16b},[x1],#16 1698c0855eaaSJohn Baldwin mov v24.s[3],w13 1699c0855eaaSJohn Baldwin st1 {v27.16b},[x1],#16 1700c0855eaaSJohn Baldwin mov v25.s[3],w14 1701c0855eaaSJohn Baldwin 1702c0855eaaSJohn Baldwin mov w6,w5 1703c0855eaaSJohn Baldwin cbz x2,.Lctr32_done 1704c0855eaaSJohn Baldwin 1705c0855eaaSJohn Baldwin add w8,w8,#5 1706c0855eaaSJohn Baldwin subs x2,x2,#5 1707c0855eaaSJohn Baldwin b.hs .Loop5x_ctr32 1708c0855eaaSJohn Baldwin 1709c0855eaaSJohn Baldwin add x2,x2,#5 1710c0855eaaSJohn Baldwin sub w8,w8,#5 1711c0855eaaSJohn Baldwin 1712c0855eaaSJohn Baldwin cmp x2,#2 1713c0855eaaSJohn Baldwin mov x12,#16 1714c0855eaaSJohn Baldwin csel x12,xzr,x12,lo 1715c0855eaaSJohn Baldwin b.ls .Lctr32_tail 1716c0855eaaSJohn Baldwin 1717c0855eaaSJohn Baldwin sub x2,x2,#3 // bias 1718c0855eaaSJohn Baldwin add w8,w8,#3 1719bc3d5698SJohn Baldwin b .Loop3x_ctr32 1720bc3d5698SJohn Baldwin 1721bc3d5698SJohn Baldwin.align 4 1722bc3d5698SJohn Baldwin.Loop3x_ctr32: 1723bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1724bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1725bc3d5698SJohn Baldwin aese v1.16b,v16.16b 1726bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1727bc3d5698SJohn Baldwin aese v18.16b,v16.16b 1728bc3d5698SJohn Baldwin aesmc v18.16b,v18.16b 1729bc3d5698SJohn Baldwin ld1 {v16.4s},[x7],#16 1730bc3d5698SJohn Baldwin subs w6,w6,#2 1731bc3d5698SJohn Baldwin aese v0.16b,v17.16b 1732bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1733bc3d5698SJohn Baldwin aese v1.16b,v17.16b 1734bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1735bc3d5698SJohn Baldwin aese v18.16b,v17.16b 1736bc3d5698SJohn Baldwin aesmc v18.16b,v18.16b 1737bc3d5698SJohn Baldwin ld1 {v17.4s},[x7],#16 1738bc3d5698SJohn Baldwin b.gt .Loop3x_ctr32 1739bc3d5698SJohn Baldwin 1740bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1741bc3d5698SJohn Baldwin aesmc v4.16b,v0.16b 1742bc3d5698SJohn Baldwin aese v1.16b,v16.16b 1743bc3d5698SJohn Baldwin aesmc v5.16b,v1.16b 1744bc3d5698SJohn Baldwin ld1 {v2.16b},[x0],#16 1745f443d080SJung-uk Kim orr v0.16b,v6.16b,v6.16b 1746bc3d5698SJohn Baldwin aese v18.16b,v16.16b 1747bc3d5698SJohn Baldwin aesmc v18.16b,v18.16b 1748bc3d5698SJohn Baldwin ld1 {v3.16b},[x0],#16 1749f443d080SJung-uk Kim orr v1.16b,v6.16b,v6.16b 1750bc3d5698SJohn Baldwin aese v4.16b,v17.16b 1751bc3d5698SJohn Baldwin aesmc v4.16b,v4.16b 1752bc3d5698SJohn Baldwin aese v5.16b,v17.16b 1753bc3d5698SJohn Baldwin aesmc v5.16b,v5.16b 1754bc3d5698SJohn Baldwin ld1 {v19.16b},[x0],#16 1755bc3d5698SJohn Baldwin mov x7,x3 1756bc3d5698SJohn Baldwin aese v18.16b,v17.16b 1757bc3d5698SJohn Baldwin aesmc v17.16b,v18.16b 1758f443d080SJung-uk Kim orr v18.16b,v6.16b,v6.16b 1759f443d080SJung-uk Kim add w9,w8,#1 1760bc3d5698SJohn Baldwin aese v4.16b,v20.16b 1761bc3d5698SJohn Baldwin aesmc v4.16b,v4.16b 1762bc3d5698SJohn Baldwin aese v5.16b,v20.16b 1763bc3d5698SJohn Baldwin aesmc v5.16b,v5.16b 1764bc3d5698SJohn Baldwin eor v2.16b,v2.16b,v7.16b 1765bc3d5698SJohn Baldwin add w10,w8,#2 1766bc3d5698SJohn Baldwin aese v17.16b,v20.16b 1767bc3d5698SJohn Baldwin aesmc v17.16b,v17.16b 1768bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v7.16b 1769bc3d5698SJohn Baldwin add w8,w8,#3 1770bc3d5698SJohn Baldwin aese v4.16b,v21.16b 1771bc3d5698SJohn Baldwin aesmc v4.16b,v4.16b 1772bc3d5698SJohn Baldwin aese v5.16b,v21.16b 1773bc3d5698SJohn Baldwin aesmc v5.16b,v5.16b 1774bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v7.16b 1775f443d080SJung-uk Kim rev w9,w9 1776bc3d5698SJohn Baldwin aese v17.16b,v21.16b 1777bc3d5698SJohn Baldwin aesmc v17.16b,v17.16b 1778f443d080SJung-uk Kim mov v0.s[3], w9 1779bc3d5698SJohn Baldwin rev w10,w10 1780bc3d5698SJohn Baldwin aese v4.16b,v22.16b 1781bc3d5698SJohn Baldwin aesmc v4.16b,v4.16b 1782bc3d5698SJohn Baldwin aese v5.16b,v22.16b 1783bc3d5698SJohn Baldwin aesmc v5.16b,v5.16b 1784f443d080SJung-uk Kim mov v1.s[3], w10 1785f443d080SJung-uk Kim rev w12,w8 1786bc3d5698SJohn Baldwin aese v17.16b,v22.16b 1787bc3d5698SJohn Baldwin aesmc v17.16b,v17.16b 1788f443d080SJung-uk Kim mov v18.s[3], w12 1789bc3d5698SJohn Baldwin subs x2,x2,#3 1790bc3d5698SJohn Baldwin aese v4.16b,v23.16b 1791bc3d5698SJohn Baldwin aese v5.16b,v23.16b 1792bc3d5698SJohn Baldwin aese v17.16b,v23.16b 1793bc3d5698SJohn Baldwin 1794bc3d5698SJohn Baldwin eor v2.16b,v2.16b,v4.16b 1795bc3d5698SJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1796bc3d5698SJohn Baldwin st1 {v2.16b},[x1],#16 1797bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v5.16b 1798bc3d5698SJohn Baldwin mov w6,w5 1799bc3d5698SJohn Baldwin st1 {v3.16b},[x1],#16 1800bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v17.16b 1801bc3d5698SJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1802bc3d5698SJohn Baldwin st1 {v19.16b},[x1],#16 1803bc3d5698SJohn Baldwin b.hs .Loop3x_ctr32 1804bc3d5698SJohn Baldwin 1805bc3d5698SJohn Baldwin adds x2,x2,#3 1806bc3d5698SJohn Baldwin b.eq .Lctr32_done 1807bc3d5698SJohn Baldwin cmp x2,#1 1808bc3d5698SJohn Baldwin mov x12,#16 1809bc3d5698SJohn Baldwin csel x12,xzr,x12,eq 1810bc3d5698SJohn Baldwin 1811bc3d5698SJohn Baldwin.Lctr32_tail: 1812bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1813bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1814bc3d5698SJohn Baldwin aese v1.16b,v16.16b 1815bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1816bc3d5698SJohn Baldwin ld1 {v16.4s},[x7],#16 1817bc3d5698SJohn Baldwin subs w6,w6,#2 1818bc3d5698SJohn Baldwin aese v0.16b,v17.16b 1819bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1820bc3d5698SJohn Baldwin aese v1.16b,v17.16b 1821bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1822bc3d5698SJohn Baldwin ld1 {v17.4s},[x7],#16 1823bc3d5698SJohn Baldwin b.gt .Lctr32_tail 1824bc3d5698SJohn Baldwin 1825bc3d5698SJohn Baldwin aese v0.16b,v16.16b 1826bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1827bc3d5698SJohn Baldwin aese v1.16b,v16.16b 1828bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1829bc3d5698SJohn Baldwin aese v0.16b,v17.16b 1830bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1831bc3d5698SJohn Baldwin aese v1.16b,v17.16b 1832bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1833bc3d5698SJohn Baldwin ld1 {v2.16b},[x0],x12 1834bc3d5698SJohn Baldwin aese v0.16b,v20.16b 1835bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1836bc3d5698SJohn Baldwin aese v1.16b,v20.16b 1837bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1838bc3d5698SJohn Baldwin ld1 {v3.16b},[x0] 1839bc3d5698SJohn Baldwin aese v0.16b,v21.16b 1840bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1841bc3d5698SJohn Baldwin aese v1.16b,v21.16b 1842bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1843bc3d5698SJohn Baldwin eor v2.16b,v2.16b,v7.16b 1844bc3d5698SJohn Baldwin aese v0.16b,v22.16b 1845bc3d5698SJohn Baldwin aesmc v0.16b,v0.16b 1846bc3d5698SJohn Baldwin aese v1.16b,v22.16b 1847bc3d5698SJohn Baldwin aesmc v1.16b,v1.16b 1848bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v7.16b 1849bc3d5698SJohn Baldwin aese v0.16b,v23.16b 1850bc3d5698SJohn Baldwin aese v1.16b,v23.16b 1851bc3d5698SJohn Baldwin 1852bc3d5698SJohn Baldwin cmp x2,#1 1853bc3d5698SJohn Baldwin eor v2.16b,v2.16b,v0.16b 1854bc3d5698SJohn Baldwin eor v3.16b,v3.16b,v1.16b 1855bc3d5698SJohn Baldwin st1 {v2.16b},[x1],#16 1856bc3d5698SJohn Baldwin b.eq .Lctr32_done 1857bc3d5698SJohn Baldwin st1 {v3.16b},[x1] 1858bc3d5698SJohn Baldwin 1859bc3d5698SJohn Baldwin.Lctr32_done: 1860bc3d5698SJohn Baldwin ldr x29,[sp],#16 1861bc3d5698SJohn Baldwin ret 1862bc3d5698SJohn Baldwin.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 1863c0855eaaSJohn Baldwin.globl aes_v8_xts_encrypt 1864c0855eaaSJohn Baldwin.type aes_v8_xts_encrypt,%function 1865c0855eaaSJohn Baldwin.align 5 1866c0855eaaSJohn Baldwinaes_v8_xts_encrypt: 1867bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 1868c0855eaaSJohn Baldwin cmp x2,#16 1869c0855eaaSJohn Baldwin // Original input data size bigger than 16, jump to big size processing. 1870c0855eaaSJohn Baldwin b.ne .Lxts_enc_big_size 1871c0855eaaSJohn Baldwin // Encrypt the iv with key2, as the first XEX iv. 1872c0855eaaSJohn Baldwin ldr w6,[x4,#240] 1873c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 1874c0855eaaSJohn Baldwin ld1 {v6.16b},[x5] 1875c0855eaaSJohn Baldwin sub w6,w6,#2 1876c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 1877c0855eaaSJohn Baldwin 1878c0855eaaSJohn Baldwin.Loop_enc_iv_enc: 1879c0855eaaSJohn Baldwin aese v6.16b,v0.16b 1880c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 1881c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 1882c0855eaaSJohn Baldwin subs w6,w6,#2 1883c0855eaaSJohn Baldwin aese v6.16b,v1.16b 1884c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 1885c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 1886c0855eaaSJohn Baldwin b.gt .Loop_enc_iv_enc 1887c0855eaaSJohn Baldwin 1888c0855eaaSJohn Baldwin aese v6.16b,v0.16b 1889c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 1890c0855eaaSJohn Baldwin ld1 {v0.4s},[x4] 1891c0855eaaSJohn Baldwin aese v6.16b,v1.16b 1892c0855eaaSJohn Baldwin eor v6.16b,v6.16b,v0.16b 1893c0855eaaSJohn Baldwin 1894c0855eaaSJohn Baldwin ld1 {v0.16b},[x0] 1895c0855eaaSJohn Baldwin eor v0.16b,v6.16b,v0.16b 1896c0855eaaSJohn Baldwin 1897c0855eaaSJohn Baldwin ldr w6,[x3,#240] 1898c0855eaaSJohn Baldwin ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... 1899c0855eaaSJohn Baldwin 1900c0855eaaSJohn Baldwin aese v0.16b,v28.16b 1901c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1902c0855eaaSJohn Baldwin ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 1903c0855eaaSJohn Baldwin aese v0.16b,v29.16b 1904c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1905c0855eaaSJohn Baldwin subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing 1906c0855eaaSJohn Baldwin b.eq .Lxts_128_enc 1907c0855eaaSJohn Baldwin.Lxts_enc_round_loop: 1908c0855eaaSJohn Baldwin aese v0.16b,v16.16b 1909c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1910c0855eaaSJohn Baldwin ld1 {v16.4s},[x3],#16 // load key schedule... 1911c0855eaaSJohn Baldwin aese v0.16b,v17.16b 1912c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1913c0855eaaSJohn Baldwin ld1 {v17.4s},[x3],#16 // load key schedule... 1914c0855eaaSJohn Baldwin subs w6,w6,#2 // bias 1915c0855eaaSJohn Baldwin b.gt .Lxts_enc_round_loop 1916c0855eaaSJohn Baldwin.Lxts_128_enc: 1917c0855eaaSJohn Baldwin ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 1918c0855eaaSJohn Baldwin aese v0.16b,v16.16b 1919c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1920c0855eaaSJohn Baldwin aese v0.16b,v17.16b 1921c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1922c0855eaaSJohn Baldwin ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 1923c0855eaaSJohn Baldwin aese v0.16b,v18.16b 1924c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1925c0855eaaSJohn Baldwin aese v0.16b,v19.16b 1926c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1927c0855eaaSJohn Baldwin ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 1928c0855eaaSJohn Baldwin aese v0.16b,v20.16b 1929c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1930c0855eaaSJohn Baldwin aese v0.16b,v21.16b 1931c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1932c0855eaaSJohn Baldwin ld1 {v7.4s},[x3] 1933c0855eaaSJohn Baldwin aese v0.16b,v22.16b 1934c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 1935c0855eaaSJohn Baldwin aese v0.16b,v23.16b 1936c0855eaaSJohn Baldwin eor v0.16b,v0.16b,v7.16b 1937c0855eaaSJohn Baldwin eor v0.16b,v0.16b,v6.16b 1938c0855eaaSJohn Baldwin st1 {v0.16b},[x1] 1939c0855eaaSJohn Baldwin b .Lxts_enc_final_abort 1940c0855eaaSJohn Baldwin 1941c0855eaaSJohn Baldwin.align 4 1942c0855eaaSJohn Baldwin.Lxts_enc_big_size: 1943c0855eaaSJohn Baldwin stp x19,x20,[sp,#-64]! 1944c0855eaaSJohn Baldwin stp x21,x22,[sp,#48] 1945c0855eaaSJohn Baldwin stp d8,d9,[sp,#32] 1946c0855eaaSJohn Baldwin stp d10,d11,[sp,#16] 1947c0855eaaSJohn Baldwin 1948c0855eaaSJohn Baldwin // tailcnt store the tail value of length%16. 1949c0855eaaSJohn Baldwin and x21,x2,#0xf 1950c0855eaaSJohn Baldwin and x2,x2,#-16 1951c0855eaaSJohn Baldwin subs x2,x2,#16 1952c0855eaaSJohn Baldwin mov x8,#16 1953c0855eaaSJohn Baldwin b.lo .Lxts_abort 1954c0855eaaSJohn Baldwin csel x8,xzr,x8,eq 1955c0855eaaSJohn Baldwin 1956c0855eaaSJohn Baldwin // Firstly, encrypt the iv with key2, as the first iv of XEX. 1957c0855eaaSJohn Baldwin ldr w6,[x4,#240] 1958c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 1959c0855eaaSJohn Baldwin ld1 {v6.16b},[x5] 1960c0855eaaSJohn Baldwin sub w6,w6,#2 1961c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 1962c0855eaaSJohn Baldwin 1963c0855eaaSJohn Baldwin.Loop_iv_enc: 1964c0855eaaSJohn Baldwin aese v6.16b,v0.16b 1965c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 1966c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 1967c0855eaaSJohn Baldwin subs w6,w6,#2 1968c0855eaaSJohn Baldwin aese v6.16b,v1.16b 1969c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 1970c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 1971c0855eaaSJohn Baldwin b.gt .Loop_iv_enc 1972c0855eaaSJohn Baldwin 1973c0855eaaSJohn Baldwin aese v6.16b,v0.16b 1974c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 1975c0855eaaSJohn Baldwin ld1 {v0.4s},[x4] 1976c0855eaaSJohn Baldwin aese v6.16b,v1.16b 1977c0855eaaSJohn Baldwin eor v6.16b,v6.16b,v0.16b 1978c0855eaaSJohn Baldwin 1979c0855eaaSJohn Baldwin // The iv for second block 1980c0855eaaSJohn Baldwin // x9- iv(low), x10 - iv(high) 1981c0855eaaSJohn Baldwin // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b 1982c0855eaaSJohn Baldwin fmov x9,d6 1983c0855eaaSJohn Baldwin fmov x10,v6.d[1] 1984c0855eaaSJohn Baldwin mov w19,#0x87 1985c0855eaaSJohn Baldwin extr x22,x10,x10,#32 1986c0855eaaSJohn Baldwin extr x10,x10,x9,#63 1987c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 1988c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 1989c0855eaaSJohn Baldwin fmov d8,x9 1990c0855eaaSJohn Baldwin fmov v8.d[1],x10 1991c0855eaaSJohn Baldwin 1992c0855eaaSJohn Baldwin ldr w5,[x3,#240] // next starting point 1993c0855eaaSJohn Baldwin ld1 {v0.16b},[x0],x8 1994c0855eaaSJohn Baldwin 1995c0855eaaSJohn Baldwin ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1996c0855eaaSJohn Baldwin sub w5,w5,#6 1997c0855eaaSJohn Baldwin add x7,x3,x5,lsl#4 // pointer to last 7 round keys 1998c0855eaaSJohn Baldwin sub w5,w5,#2 1999c0855eaaSJohn Baldwin ld1 {v18.4s,v19.4s},[x7],#32 2000c0855eaaSJohn Baldwin ld1 {v20.4s,v21.4s},[x7],#32 2001c0855eaaSJohn Baldwin ld1 {v22.4s,v23.4s},[x7],#32 2002c0855eaaSJohn Baldwin ld1 {v7.4s},[x7] 2003c0855eaaSJohn Baldwin 2004c0855eaaSJohn Baldwin add x7,x3,#32 2005c0855eaaSJohn Baldwin mov w6,w5 2006c0855eaaSJohn Baldwin 2007c0855eaaSJohn Baldwin // Encryption 2008c0855eaaSJohn Baldwin.Lxts_enc: 2009c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 2010c0855eaaSJohn Baldwin subs x2,x2,#32 // bias 2011c0855eaaSJohn Baldwin add w6,w5,#2 2012c0855eaaSJohn Baldwin orr v3.16b,v0.16b,v0.16b 2013c0855eaaSJohn Baldwin orr v1.16b,v0.16b,v0.16b 2014c0855eaaSJohn Baldwin orr v28.16b,v0.16b,v0.16b 2015c0855eaaSJohn Baldwin orr v27.16b,v24.16b,v24.16b 2016c0855eaaSJohn Baldwin orr v29.16b,v24.16b,v24.16b 2017c0855eaaSJohn Baldwin b.lo .Lxts_inner_enc_tail 2018c0855eaaSJohn Baldwin eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv 2019c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v8.16b 2020c0855eaaSJohn Baldwin 2021c0855eaaSJohn Baldwin // The iv for third block 2022c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2023c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2024c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2025c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2026c0855eaaSJohn Baldwin fmov d9,x9 2027c0855eaaSJohn Baldwin fmov v9.d[1],x10 2028c0855eaaSJohn Baldwin 2029c0855eaaSJohn Baldwin 2030c0855eaaSJohn Baldwin orr v1.16b,v24.16b,v24.16b 2031c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 2032c0855eaaSJohn Baldwin orr v2.16b,v0.16b,v0.16b 2033c0855eaaSJohn Baldwin orr v3.16b,v1.16b,v1.16b 2034c0855eaaSJohn Baldwin eor v27.16b,v24.16b,v9.16b // the third block 2035c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v9.16b 2036c0855eaaSJohn Baldwin cmp x2,#32 2037c0855eaaSJohn Baldwin b.lo .Lxts_outer_enc_tail 2038c0855eaaSJohn Baldwin 2039c0855eaaSJohn Baldwin // The iv for fourth block 2040c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2041c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2042c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2043c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2044c0855eaaSJohn Baldwin fmov d10,x9 2045c0855eaaSJohn Baldwin fmov v10.d[1],x10 2046c0855eaaSJohn Baldwin 2047c0855eaaSJohn Baldwin ld1 {v25.16b},[x0],#16 2048c0855eaaSJohn Baldwin // The iv for fifth block 2049c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2050c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2051c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2052c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2053c0855eaaSJohn Baldwin fmov d11,x9 2054c0855eaaSJohn Baldwin fmov v11.d[1],x10 2055c0855eaaSJohn Baldwin 2056c0855eaaSJohn Baldwin ld1 {v26.16b},[x0],#16 2057c0855eaaSJohn Baldwin eor v25.16b,v25.16b,v10.16b // the fourth block 2058c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v11.16b 2059c0855eaaSJohn Baldwin sub x2,x2,#32 // bias 2060c0855eaaSJohn Baldwin mov w6,w5 2061c0855eaaSJohn Baldwin b .Loop5x_xts_enc 2062c0855eaaSJohn Baldwin 2063c0855eaaSJohn Baldwin.align 4 2064c0855eaaSJohn Baldwin.Loop5x_xts_enc: 2065c0855eaaSJohn Baldwin aese v0.16b,v16.16b 2066c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2067c0855eaaSJohn Baldwin aese v1.16b,v16.16b 2068c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2069c0855eaaSJohn Baldwin aese v24.16b,v16.16b 2070c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2071c0855eaaSJohn Baldwin aese v25.16b,v16.16b 2072c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2073c0855eaaSJohn Baldwin aese v26.16b,v16.16b 2074c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2075c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 2076c0855eaaSJohn Baldwin subs w6,w6,#2 2077c0855eaaSJohn Baldwin aese v0.16b,v17.16b 2078c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2079c0855eaaSJohn Baldwin aese v1.16b,v17.16b 2080c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2081c0855eaaSJohn Baldwin aese v24.16b,v17.16b 2082c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2083c0855eaaSJohn Baldwin aese v25.16b,v17.16b 2084c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2085c0855eaaSJohn Baldwin aese v26.16b,v17.16b 2086c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2087c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 2088c0855eaaSJohn Baldwin b.gt .Loop5x_xts_enc 2089c0855eaaSJohn Baldwin 2090c0855eaaSJohn Baldwin aese v0.16b,v16.16b 2091c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2092c0855eaaSJohn Baldwin aese v1.16b,v16.16b 2093c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2094c0855eaaSJohn Baldwin aese v24.16b,v16.16b 2095c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2096c0855eaaSJohn Baldwin aese v25.16b,v16.16b 2097c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2098c0855eaaSJohn Baldwin aese v26.16b,v16.16b 2099c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2100c0855eaaSJohn Baldwin subs x2,x2,#0x50 // because .Lxts_enc_tail4x 2101c0855eaaSJohn Baldwin 2102c0855eaaSJohn Baldwin aese v0.16b,v17.16b 2103c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2104c0855eaaSJohn Baldwin aese v1.16b,v17.16b 2105c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2106c0855eaaSJohn Baldwin aese v24.16b,v17.16b 2107c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2108c0855eaaSJohn Baldwin aese v25.16b,v17.16b 2109c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2110c0855eaaSJohn Baldwin aese v26.16b,v17.16b 2111c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2112c0855eaaSJohn Baldwin csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 2113c0855eaaSJohn Baldwin mov x7,x3 2114c0855eaaSJohn Baldwin 2115c0855eaaSJohn Baldwin aese v0.16b,v18.16b 2116c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2117c0855eaaSJohn Baldwin aese v1.16b,v18.16b 2118c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2119c0855eaaSJohn Baldwin aese v24.16b,v18.16b 2120c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2121c0855eaaSJohn Baldwin aese v25.16b,v18.16b 2122c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2123c0855eaaSJohn Baldwin aese v26.16b,v18.16b 2124c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2125c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 2126c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v26.16b 2127c0855eaaSJohn Baldwin // are loaded with last "words" 2128c0855eaaSJohn Baldwin add x6,x2,#0x60 // because .Lxts_enc_tail4x 2129c0855eaaSJohn Baldwin 2130c0855eaaSJohn Baldwin aese v0.16b,v19.16b 2131c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2132c0855eaaSJohn Baldwin aese v1.16b,v19.16b 2133c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2134c0855eaaSJohn Baldwin aese v24.16b,v19.16b 2135c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2136c0855eaaSJohn Baldwin aese v25.16b,v19.16b 2137c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2138c0855eaaSJohn Baldwin aese v26.16b,v19.16b 2139c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2140c0855eaaSJohn Baldwin 2141c0855eaaSJohn Baldwin aese v0.16b,v20.16b 2142c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2143c0855eaaSJohn Baldwin aese v1.16b,v20.16b 2144c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2145c0855eaaSJohn Baldwin aese v24.16b,v20.16b 2146c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2147c0855eaaSJohn Baldwin aese v25.16b,v20.16b 2148c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2149c0855eaaSJohn Baldwin aese v26.16b,v20.16b 2150c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2151c0855eaaSJohn Baldwin 2152c0855eaaSJohn Baldwin aese v0.16b,v21.16b 2153c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2154c0855eaaSJohn Baldwin aese v1.16b,v21.16b 2155c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2156c0855eaaSJohn Baldwin aese v24.16b,v21.16b 2157c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2158c0855eaaSJohn Baldwin aese v25.16b,v21.16b 2159c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2160c0855eaaSJohn Baldwin aese v26.16b,v21.16b 2161c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2162c0855eaaSJohn Baldwin 2163c0855eaaSJohn Baldwin aese v0.16b,v22.16b 2164c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2165c0855eaaSJohn Baldwin aese v1.16b,v22.16b 2166c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2167c0855eaaSJohn Baldwin aese v24.16b,v22.16b 2168c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2169c0855eaaSJohn Baldwin aese v25.16b,v22.16b 2170c0855eaaSJohn Baldwin aesmc v25.16b,v25.16b 2171c0855eaaSJohn Baldwin aese v26.16b,v22.16b 2172c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2173c0855eaaSJohn Baldwin 2174c0855eaaSJohn Baldwin eor v4.16b,v7.16b,v6.16b 2175c0855eaaSJohn Baldwin aese v0.16b,v23.16b 2176c0855eaaSJohn Baldwin // The iv for first block of one iteration 2177c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2178c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2179c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2180c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2181c0855eaaSJohn Baldwin fmov d6,x9 2182c0855eaaSJohn Baldwin fmov v6.d[1],x10 2183c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v8.16b 2184c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 2185c0855eaaSJohn Baldwin aese v1.16b,v23.16b 2186c0855eaaSJohn Baldwin // The iv for second block 2187c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2188c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2189c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2190c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2191c0855eaaSJohn Baldwin fmov d8,x9 2192c0855eaaSJohn Baldwin fmov v8.d[1],x10 2193c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v9.16b 2194c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 2195c0855eaaSJohn Baldwin aese v24.16b,v23.16b 2196c0855eaaSJohn Baldwin // The iv for third block 2197c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2198c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2199c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2200c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2201c0855eaaSJohn Baldwin fmov d9,x9 2202c0855eaaSJohn Baldwin fmov v9.d[1],x10 2203c0855eaaSJohn Baldwin eor v30.16b,v7.16b,v10.16b 2204c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 2205c0855eaaSJohn Baldwin aese v25.16b,v23.16b 2206c0855eaaSJohn Baldwin // The iv for fourth block 2207c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2208c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2209c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2210c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2211c0855eaaSJohn Baldwin fmov d10,x9 2212c0855eaaSJohn Baldwin fmov v10.d[1],x10 2213c0855eaaSJohn Baldwin eor v31.16b,v7.16b,v11.16b 2214c0855eaaSJohn Baldwin ld1 {v28.16b},[x0],#16 2215c0855eaaSJohn Baldwin aese v26.16b,v23.16b 2216c0855eaaSJohn Baldwin 2217c0855eaaSJohn Baldwin // The iv for fifth block 2218c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2219c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2220c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2221c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2222c0855eaaSJohn Baldwin fmov d11,x9 2223c0855eaaSJohn Baldwin fmov v11.d[1],x10 2224c0855eaaSJohn Baldwin 2225c0855eaaSJohn Baldwin ld1 {v29.16b},[x0],#16 2226c0855eaaSJohn Baldwin cbz x6,.Lxts_enc_tail4x 2227c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2228c0855eaaSJohn Baldwin eor v4.16b,v4.16b,v0.16b 2229c0855eaaSJohn Baldwin eor v0.16b,v2.16b,v6.16b 2230c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v1.16b 2231c0855eaaSJohn Baldwin eor v1.16b,v3.16b,v8.16b 2232c0855eaaSJohn Baldwin eor v17.16b,v17.16b,v24.16b 2233c0855eaaSJohn Baldwin eor v24.16b,v27.16b,v9.16b 2234c0855eaaSJohn Baldwin eor v30.16b,v30.16b,v25.16b 2235c0855eaaSJohn Baldwin eor v25.16b,v28.16b,v10.16b 2236c0855eaaSJohn Baldwin eor v31.16b,v31.16b,v26.16b 2237c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 2238c0855eaaSJohn Baldwin eor v26.16b,v29.16b,v11.16b 2239c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 2240c0855eaaSJohn Baldwin mov w6,w5 2241c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 2242c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2243c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 2244c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 2245c0855eaaSJohn Baldwin b.hs .Loop5x_xts_enc 2246c0855eaaSJohn Baldwin 2247c0855eaaSJohn Baldwin 2248c0855eaaSJohn Baldwin // If left 4 blocks, borrow the five block's processing. 2249c0855eaaSJohn Baldwin cmn x2,#0x10 2250c0855eaaSJohn Baldwin b.ne .Loop5x_enc_after 2251c0855eaaSJohn Baldwin orr v11.16b,v10.16b,v10.16b 2252c0855eaaSJohn Baldwin orr v10.16b,v9.16b,v9.16b 2253c0855eaaSJohn Baldwin orr v9.16b,v8.16b,v8.16b 2254c0855eaaSJohn Baldwin orr v8.16b,v6.16b,v6.16b 2255c0855eaaSJohn Baldwin fmov x9,d11 2256c0855eaaSJohn Baldwin fmov x10,v11.d[1] 2257c0855eaaSJohn Baldwin eor v0.16b,v6.16b,v2.16b 2258c0855eaaSJohn Baldwin eor v1.16b,v8.16b,v3.16b 2259c0855eaaSJohn Baldwin eor v24.16b,v27.16b,v9.16b 2260c0855eaaSJohn Baldwin eor v25.16b,v28.16b,v10.16b 2261c0855eaaSJohn Baldwin eor v26.16b,v29.16b,v11.16b 2262c0855eaaSJohn Baldwin b.eq .Loop5x_xts_enc 2263c0855eaaSJohn Baldwin 2264c0855eaaSJohn Baldwin.Loop5x_enc_after: 2265c0855eaaSJohn Baldwin add x2,x2,#0x50 2266c0855eaaSJohn Baldwin cbz x2,.Lxts_enc_done 2267c0855eaaSJohn Baldwin 2268c0855eaaSJohn Baldwin add w6,w5,#2 2269c0855eaaSJohn Baldwin subs x2,x2,#0x30 2270c0855eaaSJohn Baldwin b.lo .Lxts_inner_enc_tail 2271c0855eaaSJohn Baldwin 2272c0855eaaSJohn Baldwin eor v0.16b,v6.16b,v27.16b 2273c0855eaaSJohn Baldwin eor v1.16b,v8.16b,v28.16b 2274c0855eaaSJohn Baldwin eor v24.16b,v29.16b,v9.16b 2275c0855eaaSJohn Baldwin b .Lxts_outer_enc_tail 2276c0855eaaSJohn Baldwin 2277c0855eaaSJohn Baldwin.align 4 2278c0855eaaSJohn Baldwin.Lxts_enc_tail4x: 2279c0855eaaSJohn Baldwin add x0,x0,#16 2280c0855eaaSJohn Baldwin eor v5.16b,v1.16b,v5.16b 2281c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 2282c0855eaaSJohn Baldwin eor v17.16b,v24.16b,v17.16b 2283c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 2284c0855eaaSJohn Baldwin eor v30.16b,v25.16b,v30.16b 2285c0855eaaSJohn Baldwin eor v31.16b,v26.16b,v31.16b 2286c0855eaaSJohn Baldwin st1 {v30.16b,v31.16b},[x1],#32 2287c0855eaaSJohn Baldwin 2288c0855eaaSJohn Baldwin b .Lxts_enc_done 2289c0855eaaSJohn Baldwin.align 4 2290c0855eaaSJohn Baldwin.Lxts_outer_enc_tail: 2291c0855eaaSJohn Baldwin aese v0.16b,v16.16b 2292c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2293c0855eaaSJohn Baldwin aese v1.16b,v16.16b 2294c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2295c0855eaaSJohn Baldwin aese v24.16b,v16.16b 2296c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2297c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 2298c0855eaaSJohn Baldwin subs w6,w6,#2 2299c0855eaaSJohn Baldwin aese v0.16b,v17.16b 2300c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2301c0855eaaSJohn Baldwin aese v1.16b,v17.16b 2302c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2303c0855eaaSJohn Baldwin aese v24.16b,v17.16b 2304c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2305c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 2306c0855eaaSJohn Baldwin b.gt .Lxts_outer_enc_tail 2307c0855eaaSJohn Baldwin 2308c0855eaaSJohn Baldwin aese v0.16b,v16.16b 2309c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2310c0855eaaSJohn Baldwin aese v1.16b,v16.16b 2311c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2312c0855eaaSJohn Baldwin aese v24.16b,v16.16b 2313c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2314c0855eaaSJohn Baldwin eor v4.16b,v6.16b,v7.16b 2315c0855eaaSJohn Baldwin subs x2,x2,#0x30 2316c0855eaaSJohn Baldwin // The iv for first block 2317c0855eaaSJohn Baldwin fmov x9,d9 2318c0855eaaSJohn Baldwin fmov x10,v9.d[1] 2319c0855eaaSJohn Baldwin //mov w19,#0x87 2320c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2321c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2322c0855eaaSJohn Baldwin and w11,w19,w22,asr#31 2323c0855eaaSJohn Baldwin eor x9,x11,x9,lsl#1 2324c0855eaaSJohn Baldwin fmov d6,x9 2325c0855eaaSJohn Baldwin fmov v6.d[1],x10 2326c0855eaaSJohn Baldwin eor v5.16b,v8.16b,v7.16b 2327c0855eaaSJohn Baldwin csel x6,x2,x6,lo // x6, w6, is zero at this point 2328c0855eaaSJohn Baldwin aese v0.16b,v17.16b 2329c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2330c0855eaaSJohn Baldwin aese v1.16b,v17.16b 2331c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2332c0855eaaSJohn Baldwin aese v24.16b,v17.16b 2333c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2334c0855eaaSJohn Baldwin eor v17.16b,v9.16b,v7.16b 2335c0855eaaSJohn Baldwin 2336c0855eaaSJohn Baldwin add x6,x6,#0x20 2337c0855eaaSJohn Baldwin add x0,x0,x6 2338c0855eaaSJohn Baldwin mov x7,x3 2339c0855eaaSJohn Baldwin 2340c0855eaaSJohn Baldwin aese v0.16b,v20.16b 2341c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2342c0855eaaSJohn Baldwin aese v1.16b,v20.16b 2343c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2344c0855eaaSJohn Baldwin aese v24.16b,v20.16b 2345c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2346c0855eaaSJohn Baldwin aese v0.16b,v21.16b 2347c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2348c0855eaaSJohn Baldwin aese v1.16b,v21.16b 2349c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2350c0855eaaSJohn Baldwin aese v24.16b,v21.16b 2351c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2352c0855eaaSJohn Baldwin aese v0.16b,v22.16b 2353c0855eaaSJohn Baldwin aesmc v0.16b,v0.16b 2354c0855eaaSJohn Baldwin aese v1.16b,v22.16b 2355c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2356c0855eaaSJohn Baldwin aese v24.16b,v22.16b 2357c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2358c0855eaaSJohn Baldwin aese v0.16b,v23.16b 2359c0855eaaSJohn Baldwin aese v1.16b,v23.16b 2360c0855eaaSJohn Baldwin aese v24.16b,v23.16b 2361c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 2362c0855eaaSJohn Baldwin add w6,w5,#2 2363c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2364c0855eaaSJohn Baldwin eor v4.16b,v4.16b,v0.16b 2365c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v1.16b 2366c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v17.16b 2367c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2368c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 2369c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 2370c0855eaaSJohn Baldwin st1 {v24.16b},[x1],#16 2371c0855eaaSJohn Baldwin cmn x2,#0x30 2372c0855eaaSJohn Baldwin b.eq .Lxts_enc_done 2373c0855eaaSJohn Baldwin.Lxts_encxor_one: 2374c0855eaaSJohn Baldwin orr v28.16b,v3.16b,v3.16b 2375c0855eaaSJohn Baldwin orr v29.16b,v27.16b,v27.16b 2376c0855eaaSJohn Baldwin nop 2377c0855eaaSJohn Baldwin 2378c0855eaaSJohn Baldwin.Lxts_inner_enc_tail: 2379c0855eaaSJohn Baldwin cmn x2,#0x10 2380c0855eaaSJohn Baldwin eor v1.16b,v28.16b,v6.16b 2381c0855eaaSJohn Baldwin eor v24.16b,v29.16b,v8.16b 2382c0855eaaSJohn Baldwin b.eq .Lxts_enc_tail_loop 2383c0855eaaSJohn Baldwin eor v24.16b,v29.16b,v6.16b 2384c0855eaaSJohn Baldwin.Lxts_enc_tail_loop: 2385c0855eaaSJohn Baldwin aese v1.16b,v16.16b 2386c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2387c0855eaaSJohn Baldwin aese v24.16b,v16.16b 2388c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2389c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 2390c0855eaaSJohn Baldwin subs w6,w6,#2 2391c0855eaaSJohn Baldwin aese v1.16b,v17.16b 2392c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2393c0855eaaSJohn Baldwin aese v24.16b,v17.16b 2394c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2395c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 2396c0855eaaSJohn Baldwin b.gt .Lxts_enc_tail_loop 2397c0855eaaSJohn Baldwin 2398c0855eaaSJohn Baldwin aese v1.16b,v16.16b 2399c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2400c0855eaaSJohn Baldwin aese v24.16b,v16.16b 2401c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2402c0855eaaSJohn Baldwin aese v1.16b,v17.16b 2403c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2404c0855eaaSJohn Baldwin aese v24.16b,v17.16b 2405c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2406c0855eaaSJohn Baldwin aese v1.16b,v20.16b 2407c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2408c0855eaaSJohn Baldwin aese v24.16b,v20.16b 2409c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2410c0855eaaSJohn Baldwin cmn x2,#0x20 2411c0855eaaSJohn Baldwin aese v1.16b,v21.16b 2412c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2413c0855eaaSJohn Baldwin aese v24.16b,v21.16b 2414c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2415c0855eaaSJohn Baldwin eor v5.16b,v6.16b,v7.16b 2416c0855eaaSJohn Baldwin aese v1.16b,v22.16b 2417c0855eaaSJohn Baldwin aesmc v1.16b,v1.16b 2418c0855eaaSJohn Baldwin aese v24.16b,v22.16b 2419c0855eaaSJohn Baldwin aesmc v24.16b,v24.16b 2420c0855eaaSJohn Baldwin eor v17.16b,v8.16b,v7.16b 2421c0855eaaSJohn Baldwin aese v1.16b,v23.16b 2422c0855eaaSJohn Baldwin aese v24.16b,v23.16b 2423c0855eaaSJohn Baldwin b.eq .Lxts_enc_one 2424c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v1.16b 2425c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 2426c0855eaaSJohn Baldwin eor v17.16b,v17.16b,v24.16b 2427c0855eaaSJohn Baldwin orr v6.16b,v8.16b,v8.16b 2428c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 2429c0855eaaSJohn Baldwin fmov x9,d8 2430c0855eaaSJohn Baldwin fmov x10,v8.d[1] 2431c0855eaaSJohn Baldwin mov w19,#0x87 2432c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2433c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2434c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2435c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2436c0855eaaSJohn Baldwin fmov d6,x9 2437c0855eaaSJohn Baldwin fmov v6.d[1],x10 2438c0855eaaSJohn Baldwin b .Lxts_enc_done 2439c0855eaaSJohn Baldwin 2440c0855eaaSJohn Baldwin.Lxts_enc_one: 2441c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v24.16b 2442c0855eaaSJohn Baldwin orr v6.16b,v6.16b,v6.16b 2443c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 2444c0855eaaSJohn Baldwin fmov x9,d6 2445c0855eaaSJohn Baldwin fmov x10,v6.d[1] 2446c0855eaaSJohn Baldwin mov w19,#0x87 2447c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2448c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2449c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2450c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2451c0855eaaSJohn Baldwin fmov d6,x9 2452c0855eaaSJohn Baldwin fmov v6.d[1],x10 2453c0855eaaSJohn Baldwin b .Lxts_enc_done 2454c0855eaaSJohn Baldwin.align 5 2455c0855eaaSJohn Baldwin.Lxts_enc_done: 2456c0855eaaSJohn Baldwin // Process the tail block with cipher stealing. 2457c0855eaaSJohn Baldwin tst x21,#0xf 2458c0855eaaSJohn Baldwin b.eq .Lxts_abort 2459c0855eaaSJohn Baldwin 2460c0855eaaSJohn Baldwin mov x20,x0 2461c0855eaaSJohn Baldwin mov x13,x1 2462c0855eaaSJohn Baldwin sub x1,x1,#16 2463c0855eaaSJohn Baldwin.composite_enc_loop: 2464c0855eaaSJohn Baldwin subs x21,x21,#1 2465c0855eaaSJohn Baldwin ldrb w15,[x1,x21] 2466c0855eaaSJohn Baldwin ldrb w14,[x20,x21] 2467c0855eaaSJohn Baldwin strb w15,[x13,x21] 2468c0855eaaSJohn Baldwin strb w14,[x1,x21] 2469c0855eaaSJohn Baldwin b.gt .composite_enc_loop 2470c0855eaaSJohn Baldwin.Lxts_enc_load_done: 2471c0855eaaSJohn Baldwin ld1 {v26.16b},[x1] 2472c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v6.16b 2473c0855eaaSJohn Baldwin 2474c0855eaaSJohn Baldwin // Encrypt the composite block to get the last second encrypted text block 2475c0855eaaSJohn Baldwin ldr w6,[x3,#240] // load key schedule... 2476c0855eaaSJohn Baldwin ld1 {v0.4s},[x3],#16 2477c0855eaaSJohn Baldwin sub w6,w6,#2 2478c0855eaaSJohn Baldwin ld1 {v1.4s},[x3],#16 // load key schedule... 2479c0855eaaSJohn Baldwin.Loop_final_enc: 2480c0855eaaSJohn Baldwin aese v26.16b,v0.16b 2481c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2482c0855eaaSJohn Baldwin ld1 {v0.4s},[x3],#16 2483c0855eaaSJohn Baldwin subs w6,w6,#2 2484c0855eaaSJohn Baldwin aese v26.16b,v1.16b 2485c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2486c0855eaaSJohn Baldwin ld1 {v1.4s},[x3],#16 2487c0855eaaSJohn Baldwin b.gt .Loop_final_enc 2488c0855eaaSJohn Baldwin 2489c0855eaaSJohn Baldwin aese v26.16b,v0.16b 2490c0855eaaSJohn Baldwin aesmc v26.16b,v26.16b 2491c0855eaaSJohn Baldwin ld1 {v0.4s},[x3] 2492c0855eaaSJohn Baldwin aese v26.16b,v1.16b 2493c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v0.16b 2494c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v6.16b 2495c0855eaaSJohn Baldwin st1 {v26.16b},[x1] 2496c0855eaaSJohn Baldwin 2497c0855eaaSJohn Baldwin.Lxts_abort: 2498c0855eaaSJohn Baldwin ldp x21,x22,[sp,#48] 2499c0855eaaSJohn Baldwin ldp d8,d9,[sp,#32] 2500c0855eaaSJohn Baldwin ldp d10,d11,[sp,#16] 2501c0855eaaSJohn Baldwin ldp x19,x20,[sp],#64 2502c0855eaaSJohn Baldwin.Lxts_enc_final_abort: 2503c0855eaaSJohn Baldwin ret 2504c0855eaaSJohn Baldwin.size aes_v8_xts_encrypt,.-aes_v8_xts_encrypt 2505c0855eaaSJohn Baldwin.globl aes_v8_xts_decrypt 2506c0855eaaSJohn Baldwin.type aes_v8_xts_decrypt,%function 2507c0855eaaSJohn Baldwin.align 5 2508c0855eaaSJohn Baldwinaes_v8_xts_decrypt: 2509bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 2510c0855eaaSJohn Baldwin cmp x2,#16 2511c0855eaaSJohn Baldwin // Original input data size bigger than 16, jump to big size processing. 2512c0855eaaSJohn Baldwin b.ne .Lxts_dec_big_size 2513c0855eaaSJohn Baldwin // Encrypt the iv with key2, as the first XEX iv. 2514c0855eaaSJohn Baldwin ldr w6,[x4,#240] 2515c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 2516c0855eaaSJohn Baldwin ld1 {v6.16b},[x5] 2517c0855eaaSJohn Baldwin sub w6,w6,#2 2518c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 2519c0855eaaSJohn Baldwin 2520c0855eaaSJohn Baldwin.Loop_dec_small_iv_enc: 2521c0855eaaSJohn Baldwin aese v6.16b,v0.16b 2522c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 2523c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 2524c0855eaaSJohn Baldwin subs w6,w6,#2 2525c0855eaaSJohn Baldwin aese v6.16b,v1.16b 2526c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 2527c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 2528c0855eaaSJohn Baldwin b.gt .Loop_dec_small_iv_enc 2529c0855eaaSJohn Baldwin 2530c0855eaaSJohn Baldwin aese v6.16b,v0.16b 2531c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 2532c0855eaaSJohn Baldwin ld1 {v0.4s},[x4] 2533c0855eaaSJohn Baldwin aese v6.16b,v1.16b 2534c0855eaaSJohn Baldwin eor v6.16b,v6.16b,v0.16b 2535c0855eaaSJohn Baldwin 2536c0855eaaSJohn Baldwin ld1 {v0.16b},[x0] 2537c0855eaaSJohn Baldwin eor v0.16b,v6.16b,v0.16b 2538c0855eaaSJohn Baldwin 2539c0855eaaSJohn Baldwin ldr w6,[x3,#240] 2540c0855eaaSJohn Baldwin ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... 2541c0855eaaSJohn Baldwin 2542c0855eaaSJohn Baldwin aesd v0.16b,v28.16b 2543c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2544c0855eaaSJohn Baldwin ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 2545c0855eaaSJohn Baldwin aesd v0.16b,v29.16b 2546c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2547c0855eaaSJohn Baldwin subs w6,w6,#10 // bias 2548c0855eaaSJohn Baldwin b.eq .Lxts_128_dec 2549c0855eaaSJohn Baldwin.Lxts_dec_round_loop: 2550c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 2551c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2552c0855eaaSJohn Baldwin ld1 {v16.4s},[x3],#16 // load key schedule... 2553c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 2554c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2555c0855eaaSJohn Baldwin ld1 {v17.4s},[x3],#16 // load key schedule... 2556c0855eaaSJohn Baldwin subs w6,w6,#2 // bias 2557c0855eaaSJohn Baldwin b.gt .Lxts_dec_round_loop 2558c0855eaaSJohn Baldwin.Lxts_128_dec: 2559c0855eaaSJohn Baldwin ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 2560c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 2561c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2562c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 2563c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2564c0855eaaSJohn Baldwin ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 2565c0855eaaSJohn Baldwin aesd v0.16b,v18.16b 2566c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2567c0855eaaSJohn Baldwin aesd v0.16b,v19.16b 2568c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2569c0855eaaSJohn Baldwin ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 2570c0855eaaSJohn Baldwin aesd v0.16b,v20.16b 2571c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2572c0855eaaSJohn Baldwin aesd v0.16b,v21.16b 2573c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2574c0855eaaSJohn Baldwin ld1 {v7.4s},[x3] 2575c0855eaaSJohn Baldwin aesd v0.16b,v22.16b 2576c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2577c0855eaaSJohn Baldwin aesd v0.16b,v23.16b 2578c0855eaaSJohn Baldwin eor v0.16b,v0.16b,v7.16b 2579c0855eaaSJohn Baldwin eor v0.16b,v6.16b,v0.16b 2580c0855eaaSJohn Baldwin st1 {v0.16b},[x1] 2581c0855eaaSJohn Baldwin b .Lxts_dec_final_abort 2582c0855eaaSJohn Baldwin.Lxts_dec_big_size: 2583c0855eaaSJohn Baldwin stp x19,x20,[sp,#-64]! 2584c0855eaaSJohn Baldwin stp x21,x22,[sp,#48] 2585c0855eaaSJohn Baldwin stp d8,d9,[sp,#32] 2586c0855eaaSJohn Baldwin stp d10,d11,[sp,#16] 2587c0855eaaSJohn Baldwin 2588c0855eaaSJohn Baldwin and x21,x2,#0xf 2589c0855eaaSJohn Baldwin and x2,x2,#-16 2590c0855eaaSJohn Baldwin subs x2,x2,#16 2591c0855eaaSJohn Baldwin mov x8,#16 2592c0855eaaSJohn Baldwin b.lo .Lxts_dec_abort 2593c0855eaaSJohn Baldwin 2594c0855eaaSJohn Baldwin // Encrypt the iv with key2, as the first XEX iv 2595c0855eaaSJohn Baldwin ldr w6,[x4,#240] 2596c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 2597c0855eaaSJohn Baldwin ld1 {v6.16b},[x5] 2598c0855eaaSJohn Baldwin sub w6,w6,#2 2599c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 2600c0855eaaSJohn Baldwin 2601c0855eaaSJohn Baldwin.Loop_dec_iv_enc: 2602c0855eaaSJohn Baldwin aese v6.16b,v0.16b 2603c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 2604c0855eaaSJohn Baldwin ld1 {v0.4s},[x4],#16 2605c0855eaaSJohn Baldwin subs w6,w6,#2 2606c0855eaaSJohn Baldwin aese v6.16b,v1.16b 2607c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 2608c0855eaaSJohn Baldwin ld1 {v1.4s},[x4],#16 2609c0855eaaSJohn Baldwin b.gt .Loop_dec_iv_enc 2610c0855eaaSJohn Baldwin 2611c0855eaaSJohn Baldwin aese v6.16b,v0.16b 2612c0855eaaSJohn Baldwin aesmc v6.16b,v6.16b 2613c0855eaaSJohn Baldwin ld1 {v0.4s},[x4] 2614c0855eaaSJohn Baldwin aese v6.16b,v1.16b 2615c0855eaaSJohn Baldwin eor v6.16b,v6.16b,v0.16b 2616c0855eaaSJohn Baldwin 2617c0855eaaSJohn Baldwin // The iv for second block 2618c0855eaaSJohn Baldwin // x9- iv(low), x10 - iv(high) 2619c0855eaaSJohn Baldwin // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b 2620c0855eaaSJohn Baldwin fmov x9,d6 2621c0855eaaSJohn Baldwin fmov x10,v6.d[1] 2622c0855eaaSJohn Baldwin mov w19,#0x87 2623c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2624c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2625c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2626c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2627c0855eaaSJohn Baldwin fmov d8,x9 2628c0855eaaSJohn Baldwin fmov v8.d[1],x10 2629c0855eaaSJohn Baldwin 2630c0855eaaSJohn Baldwin ldr w5,[x3,#240] // load rounds number 2631c0855eaaSJohn Baldwin 2632c0855eaaSJohn Baldwin // The iv for third block 2633c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2634c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2635c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2636c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2637c0855eaaSJohn Baldwin fmov d9,x9 2638c0855eaaSJohn Baldwin fmov v9.d[1],x10 2639c0855eaaSJohn Baldwin 2640c0855eaaSJohn Baldwin ld1 {v16.4s,v17.4s},[x3] // load key schedule... 2641c0855eaaSJohn Baldwin sub w5,w5,#6 2642c0855eaaSJohn Baldwin add x7,x3,x5,lsl#4 // pointer to last 7 round keys 2643c0855eaaSJohn Baldwin sub w5,w5,#2 2644c0855eaaSJohn Baldwin ld1 {v18.4s,v19.4s},[x7],#32 // load key schedule... 2645c0855eaaSJohn Baldwin ld1 {v20.4s,v21.4s},[x7],#32 2646c0855eaaSJohn Baldwin ld1 {v22.4s,v23.4s},[x7],#32 2647c0855eaaSJohn Baldwin ld1 {v7.4s},[x7] 2648c0855eaaSJohn Baldwin 2649c0855eaaSJohn Baldwin // The iv for fourth block 2650c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2651c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2652c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2653c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2654c0855eaaSJohn Baldwin fmov d10,x9 2655c0855eaaSJohn Baldwin fmov v10.d[1],x10 2656c0855eaaSJohn Baldwin 2657c0855eaaSJohn Baldwin add x7,x3,#32 2658c0855eaaSJohn Baldwin mov w6,w5 2659c0855eaaSJohn Baldwin b .Lxts_dec 2660c0855eaaSJohn Baldwin 2661c0855eaaSJohn Baldwin // Decryption 2662c0855eaaSJohn Baldwin.align 5 2663c0855eaaSJohn Baldwin.Lxts_dec: 2664c0855eaaSJohn Baldwin tst x21,#0xf 2665c0855eaaSJohn Baldwin b.eq .Lxts_dec_begin 2666c0855eaaSJohn Baldwin subs x2,x2,#16 2667c0855eaaSJohn Baldwin csel x8,xzr,x8,eq 2668c0855eaaSJohn Baldwin ld1 {v0.16b},[x0],#16 2669c0855eaaSJohn Baldwin b.lo .Lxts_done 2670c0855eaaSJohn Baldwin sub x0,x0,#16 2671c0855eaaSJohn Baldwin.Lxts_dec_begin: 2672c0855eaaSJohn Baldwin ld1 {v0.16b},[x0],x8 2673c0855eaaSJohn Baldwin subs x2,x2,#32 // bias 2674c0855eaaSJohn Baldwin add w6,w5,#2 2675c0855eaaSJohn Baldwin orr v3.16b,v0.16b,v0.16b 2676c0855eaaSJohn Baldwin orr v1.16b,v0.16b,v0.16b 2677c0855eaaSJohn Baldwin orr v28.16b,v0.16b,v0.16b 2678c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 2679c0855eaaSJohn Baldwin orr v27.16b,v24.16b,v24.16b 2680c0855eaaSJohn Baldwin orr v29.16b,v24.16b,v24.16b 2681c0855eaaSJohn Baldwin b.lo .Lxts_inner_dec_tail 2682c0855eaaSJohn Baldwin eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv 2683c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v8.16b 2684c0855eaaSJohn Baldwin 2685c0855eaaSJohn Baldwin orr v1.16b,v24.16b,v24.16b 2686c0855eaaSJohn Baldwin ld1 {v24.16b},[x0],#16 2687c0855eaaSJohn Baldwin orr v2.16b,v0.16b,v0.16b 2688c0855eaaSJohn Baldwin orr v3.16b,v1.16b,v1.16b 2689c0855eaaSJohn Baldwin eor v27.16b,v24.16b,v9.16b // third block xox with third iv 2690c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v9.16b 2691c0855eaaSJohn Baldwin cmp x2,#32 2692c0855eaaSJohn Baldwin b.lo .Lxts_outer_dec_tail 2693c0855eaaSJohn Baldwin 2694c0855eaaSJohn Baldwin ld1 {v25.16b},[x0],#16 2695c0855eaaSJohn Baldwin 2696c0855eaaSJohn Baldwin // The iv for fifth block 2697c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2698c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2699c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2700c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2701c0855eaaSJohn Baldwin fmov d11,x9 2702c0855eaaSJohn Baldwin fmov v11.d[1],x10 2703c0855eaaSJohn Baldwin 2704c0855eaaSJohn Baldwin ld1 {v26.16b},[x0],#16 2705c0855eaaSJohn Baldwin eor v25.16b,v25.16b,v10.16b // the fourth block 2706c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v11.16b 2707c0855eaaSJohn Baldwin sub x2,x2,#32 // bias 2708c0855eaaSJohn Baldwin mov w6,w5 2709c0855eaaSJohn Baldwin b .Loop5x_xts_dec 2710c0855eaaSJohn Baldwin 2711c0855eaaSJohn Baldwin.align 4 2712c0855eaaSJohn Baldwin.Loop5x_xts_dec: 2713c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 2714c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2715c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 2716c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2717c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 2718c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2719c0855eaaSJohn Baldwin aesd v25.16b,v16.16b 2720c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2721c0855eaaSJohn Baldwin aesd v26.16b,v16.16b 2722c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2723c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // load key schedule... 2724c0855eaaSJohn Baldwin subs w6,w6,#2 2725c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 2726c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2727c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 2728c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2729c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 2730c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2731c0855eaaSJohn Baldwin aesd v25.16b,v17.16b 2732c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2733c0855eaaSJohn Baldwin aesd v26.16b,v17.16b 2734c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2735c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // load key schedule... 2736c0855eaaSJohn Baldwin b.gt .Loop5x_xts_dec 2737c0855eaaSJohn Baldwin 2738c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 2739c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2740c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 2741c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2742c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 2743c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2744c0855eaaSJohn Baldwin aesd v25.16b,v16.16b 2745c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2746c0855eaaSJohn Baldwin aesd v26.16b,v16.16b 2747c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2748c0855eaaSJohn Baldwin subs x2,x2,#0x50 // because .Lxts_dec_tail4x 2749c0855eaaSJohn Baldwin 2750c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 2751c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2752c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 2753c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2754c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 2755c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2756c0855eaaSJohn Baldwin aesd v25.16b,v17.16b 2757c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2758c0855eaaSJohn Baldwin aesd v26.16b,v17.16b 2759c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2760c0855eaaSJohn Baldwin csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 2761c0855eaaSJohn Baldwin mov x7,x3 2762c0855eaaSJohn Baldwin 2763c0855eaaSJohn Baldwin aesd v0.16b,v18.16b 2764c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2765c0855eaaSJohn Baldwin aesd v1.16b,v18.16b 2766c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2767c0855eaaSJohn Baldwin aesd v24.16b,v18.16b 2768c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2769c0855eaaSJohn Baldwin aesd v25.16b,v18.16b 2770c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2771c0855eaaSJohn Baldwin aesd v26.16b,v18.16b 2772c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2773c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted in such way that 2774c0855eaaSJohn Baldwin // at exit from the loop v1.16b-v26.16b 2775c0855eaaSJohn Baldwin // are loaded with last "words" 2776c0855eaaSJohn Baldwin add x6,x2,#0x60 // because .Lxts_dec_tail4x 2777c0855eaaSJohn Baldwin 2778c0855eaaSJohn Baldwin aesd v0.16b,v19.16b 2779c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2780c0855eaaSJohn Baldwin aesd v1.16b,v19.16b 2781c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2782c0855eaaSJohn Baldwin aesd v24.16b,v19.16b 2783c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2784c0855eaaSJohn Baldwin aesd v25.16b,v19.16b 2785c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2786c0855eaaSJohn Baldwin aesd v26.16b,v19.16b 2787c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2788c0855eaaSJohn Baldwin 2789c0855eaaSJohn Baldwin aesd v0.16b,v20.16b 2790c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2791c0855eaaSJohn Baldwin aesd v1.16b,v20.16b 2792c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2793c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 2794c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2795c0855eaaSJohn Baldwin aesd v25.16b,v20.16b 2796c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2797c0855eaaSJohn Baldwin aesd v26.16b,v20.16b 2798c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2799c0855eaaSJohn Baldwin 2800c0855eaaSJohn Baldwin aesd v0.16b,v21.16b 2801c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2802c0855eaaSJohn Baldwin aesd v1.16b,v21.16b 2803c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2804c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 2805c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2806c0855eaaSJohn Baldwin aesd v25.16b,v21.16b 2807c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2808c0855eaaSJohn Baldwin aesd v26.16b,v21.16b 2809c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2810c0855eaaSJohn Baldwin 2811c0855eaaSJohn Baldwin aesd v0.16b,v22.16b 2812c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2813c0855eaaSJohn Baldwin aesd v1.16b,v22.16b 2814c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2815c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 2816c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2817c0855eaaSJohn Baldwin aesd v25.16b,v22.16b 2818c0855eaaSJohn Baldwin aesimc v25.16b,v25.16b 2819c0855eaaSJohn Baldwin aesd v26.16b,v22.16b 2820c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 2821c0855eaaSJohn Baldwin 2822c0855eaaSJohn Baldwin eor v4.16b,v7.16b,v6.16b 2823c0855eaaSJohn Baldwin aesd v0.16b,v23.16b 2824c0855eaaSJohn Baldwin // The iv for first block of next iteration. 2825c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2826c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2827c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2828c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2829c0855eaaSJohn Baldwin fmov d6,x9 2830c0855eaaSJohn Baldwin fmov v6.d[1],x10 2831c0855eaaSJohn Baldwin eor v5.16b,v7.16b,v8.16b 2832c0855eaaSJohn Baldwin ld1 {v2.16b},[x0],#16 2833c0855eaaSJohn Baldwin aesd v1.16b,v23.16b 2834c0855eaaSJohn Baldwin // The iv for second block 2835c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2836c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2837c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2838c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2839c0855eaaSJohn Baldwin fmov d8,x9 2840c0855eaaSJohn Baldwin fmov v8.d[1],x10 2841c0855eaaSJohn Baldwin eor v17.16b,v7.16b,v9.16b 2842c0855eaaSJohn Baldwin ld1 {v3.16b},[x0],#16 2843c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 2844c0855eaaSJohn Baldwin // The iv for third block 2845c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2846c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2847c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2848c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2849c0855eaaSJohn Baldwin fmov d9,x9 2850c0855eaaSJohn Baldwin fmov v9.d[1],x10 2851c0855eaaSJohn Baldwin eor v30.16b,v7.16b,v10.16b 2852c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 2853c0855eaaSJohn Baldwin aesd v25.16b,v23.16b 2854c0855eaaSJohn Baldwin // The iv for fourth block 2855c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2856c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2857c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2858c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2859c0855eaaSJohn Baldwin fmov d10,x9 2860c0855eaaSJohn Baldwin fmov v10.d[1],x10 2861c0855eaaSJohn Baldwin eor v31.16b,v7.16b,v11.16b 2862c0855eaaSJohn Baldwin ld1 {v28.16b},[x0],#16 2863c0855eaaSJohn Baldwin aesd v26.16b,v23.16b 2864c0855eaaSJohn Baldwin 2865c0855eaaSJohn Baldwin // The iv for fifth block 2866c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2867c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2868c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2869c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2870c0855eaaSJohn Baldwin fmov d11,x9 2871c0855eaaSJohn Baldwin fmov v11.d[1],x10 2872c0855eaaSJohn Baldwin 2873c0855eaaSJohn Baldwin ld1 {v29.16b},[x0],#16 2874c0855eaaSJohn Baldwin cbz x6,.Lxts_dec_tail4x 2875c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2876c0855eaaSJohn Baldwin eor v4.16b,v4.16b,v0.16b 2877c0855eaaSJohn Baldwin eor v0.16b,v2.16b,v6.16b 2878c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v1.16b 2879c0855eaaSJohn Baldwin eor v1.16b,v3.16b,v8.16b 2880c0855eaaSJohn Baldwin eor v17.16b,v17.16b,v24.16b 2881c0855eaaSJohn Baldwin eor v24.16b,v27.16b,v9.16b 2882c0855eaaSJohn Baldwin eor v30.16b,v30.16b,v25.16b 2883c0855eaaSJohn Baldwin eor v25.16b,v28.16b,v10.16b 2884c0855eaaSJohn Baldwin eor v31.16b,v31.16b,v26.16b 2885c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 2886c0855eaaSJohn Baldwin eor v26.16b,v29.16b,v11.16b 2887c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 2888c0855eaaSJohn Baldwin mov w6,w5 2889c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 2890c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2891c0855eaaSJohn Baldwin st1 {v30.16b},[x1],#16 2892c0855eaaSJohn Baldwin st1 {v31.16b},[x1],#16 2893c0855eaaSJohn Baldwin b.hs .Loop5x_xts_dec 2894c0855eaaSJohn Baldwin 2895c0855eaaSJohn Baldwin cmn x2,#0x10 2896c0855eaaSJohn Baldwin b.ne .Loop5x_dec_after 2897c0855eaaSJohn Baldwin // If x2(x2) equal to -0x10, the left blocks is 4. 2898c0855eaaSJohn Baldwin // After specially processing, utilize the five blocks processing again. 2899c0855eaaSJohn Baldwin // It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b. 2900c0855eaaSJohn Baldwin orr v11.16b,v10.16b,v10.16b 2901c0855eaaSJohn Baldwin orr v10.16b,v9.16b,v9.16b 2902c0855eaaSJohn Baldwin orr v9.16b,v8.16b,v8.16b 2903c0855eaaSJohn Baldwin orr v8.16b,v6.16b,v6.16b 2904c0855eaaSJohn Baldwin fmov x9,d11 2905c0855eaaSJohn Baldwin fmov x10,v11.d[1] 2906c0855eaaSJohn Baldwin eor v0.16b,v6.16b,v2.16b 2907c0855eaaSJohn Baldwin eor v1.16b,v8.16b,v3.16b 2908c0855eaaSJohn Baldwin eor v24.16b,v27.16b,v9.16b 2909c0855eaaSJohn Baldwin eor v25.16b,v28.16b,v10.16b 2910c0855eaaSJohn Baldwin eor v26.16b,v29.16b,v11.16b 2911c0855eaaSJohn Baldwin b.eq .Loop5x_xts_dec 2912c0855eaaSJohn Baldwin 2913c0855eaaSJohn Baldwin.Loop5x_dec_after: 2914c0855eaaSJohn Baldwin add x2,x2,#0x50 2915c0855eaaSJohn Baldwin cbz x2,.Lxts_done 2916c0855eaaSJohn Baldwin 2917c0855eaaSJohn Baldwin add w6,w5,#2 2918c0855eaaSJohn Baldwin subs x2,x2,#0x30 2919c0855eaaSJohn Baldwin b.lo .Lxts_inner_dec_tail 2920c0855eaaSJohn Baldwin 2921c0855eaaSJohn Baldwin eor v0.16b,v6.16b,v27.16b 2922c0855eaaSJohn Baldwin eor v1.16b,v8.16b,v28.16b 2923c0855eaaSJohn Baldwin eor v24.16b,v29.16b,v9.16b 2924c0855eaaSJohn Baldwin b .Lxts_outer_dec_tail 2925c0855eaaSJohn Baldwin 2926c0855eaaSJohn Baldwin.align 4 2927c0855eaaSJohn Baldwin.Lxts_dec_tail4x: 2928c0855eaaSJohn Baldwin add x0,x0,#16 2929c0855eaaSJohn Baldwin tst x21,#0xf 2930c0855eaaSJohn Baldwin eor v5.16b,v1.16b,v4.16b 2931c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 2932c0855eaaSJohn Baldwin eor v17.16b,v24.16b,v17.16b 2933c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 2934c0855eaaSJohn Baldwin eor v30.16b,v25.16b,v30.16b 2935c0855eaaSJohn Baldwin eor v31.16b,v26.16b,v31.16b 2936c0855eaaSJohn Baldwin st1 {v30.16b,v31.16b},[x1],#32 2937c0855eaaSJohn Baldwin 2938c0855eaaSJohn Baldwin b.eq .Lxts_dec_abort 2939c0855eaaSJohn Baldwin ld1 {v0.16b},[x0],#16 2940c0855eaaSJohn Baldwin b .Lxts_done 2941c0855eaaSJohn Baldwin.align 4 2942c0855eaaSJohn Baldwin.Lxts_outer_dec_tail: 2943c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 2944c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2945c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 2946c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2947c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 2948c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2949c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 2950c0855eaaSJohn Baldwin subs w6,w6,#2 2951c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 2952c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2953c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 2954c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2955c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 2956c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2957c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 2958c0855eaaSJohn Baldwin b.gt .Lxts_outer_dec_tail 2959c0855eaaSJohn Baldwin 2960c0855eaaSJohn Baldwin aesd v0.16b,v16.16b 2961c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2962c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 2963c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2964c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 2965c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2966c0855eaaSJohn Baldwin eor v4.16b,v6.16b,v7.16b 2967c0855eaaSJohn Baldwin subs x2,x2,#0x30 2968c0855eaaSJohn Baldwin // The iv for first block 2969c0855eaaSJohn Baldwin fmov x9,d9 2970c0855eaaSJohn Baldwin fmov x10,v9.d[1] 2971c0855eaaSJohn Baldwin mov w19,#0x87 2972c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2973c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2974c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2975c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2976c0855eaaSJohn Baldwin fmov d6,x9 2977c0855eaaSJohn Baldwin fmov v6.d[1],x10 2978c0855eaaSJohn Baldwin eor v5.16b,v8.16b,v7.16b 2979c0855eaaSJohn Baldwin csel x6,x2,x6,lo // x6, w6, is zero at this point 2980c0855eaaSJohn Baldwin aesd v0.16b,v17.16b 2981c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 2982c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 2983c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 2984c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 2985c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 2986c0855eaaSJohn Baldwin eor v17.16b,v9.16b,v7.16b 2987c0855eaaSJohn Baldwin // The iv for second block 2988c0855eaaSJohn Baldwin extr x22,x10,x10,#32 2989c0855eaaSJohn Baldwin extr x10,x10,x9,#63 2990c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 2991c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 2992c0855eaaSJohn Baldwin fmov d8,x9 2993c0855eaaSJohn Baldwin fmov v8.d[1],x10 2994c0855eaaSJohn Baldwin 2995c0855eaaSJohn Baldwin add x6,x6,#0x20 2996c0855eaaSJohn Baldwin add x0,x0,x6 // x0 is adjusted to the last data 2997c0855eaaSJohn Baldwin 2998c0855eaaSJohn Baldwin mov x7,x3 2999c0855eaaSJohn Baldwin 3000c0855eaaSJohn Baldwin // The iv for third block 3001c0855eaaSJohn Baldwin extr x22,x10,x10,#32 3002c0855eaaSJohn Baldwin extr x10,x10,x9,#63 3003c0855eaaSJohn Baldwin and w11,w19,w22,asr #31 3004c0855eaaSJohn Baldwin eor x9,x11,x9,lsl #1 3005c0855eaaSJohn Baldwin fmov d9,x9 3006c0855eaaSJohn Baldwin fmov v9.d[1],x10 3007c0855eaaSJohn Baldwin 3008c0855eaaSJohn Baldwin aesd v0.16b,v20.16b 3009c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 3010c0855eaaSJohn Baldwin aesd v1.16b,v20.16b 3011c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3012c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 3013c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3014c0855eaaSJohn Baldwin aesd v0.16b,v21.16b 3015c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 3016c0855eaaSJohn Baldwin aesd v1.16b,v21.16b 3017c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3018c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 3019c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3020c0855eaaSJohn Baldwin aesd v0.16b,v22.16b 3021c0855eaaSJohn Baldwin aesimc v0.16b,v0.16b 3022c0855eaaSJohn Baldwin aesd v1.16b,v22.16b 3023c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3024c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 3025c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3026c0855eaaSJohn Baldwin ld1 {v27.16b},[x0],#16 3027c0855eaaSJohn Baldwin aesd v0.16b,v23.16b 3028c0855eaaSJohn Baldwin aesd v1.16b,v23.16b 3029c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 3030c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 3031c0855eaaSJohn Baldwin add w6,w5,#2 3032c0855eaaSJohn Baldwin eor v4.16b,v4.16b,v0.16b 3033c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v1.16b 3034c0855eaaSJohn Baldwin eor v24.16b,v24.16b,v17.16b 3035c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 3036c0855eaaSJohn Baldwin st1 {v4.16b},[x1],#16 3037c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 3038c0855eaaSJohn Baldwin st1 {v24.16b},[x1],#16 3039c0855eaaSJohn Baldwin 3040c0855eaaSJohn Baldwin cmn x2,#0x30 3041c0855eaaSJohn Baldwin add x2,x2,#0x30 3042c0855eaaSJohn Baldwin b.eq .Lxts_done 3043c0855eaaSJohn Baldwin sub x2,x2,#0x30 3044c0855eaaSJohn Baldwin orr v28.16b,v3.16b,v3.16b 3045c0855eaaSJohn Baldwin orr v29.16b,v27.16b,v27.16b 3046c0855eaaSJohn Baldwin nop 3047c0855eaaSJohn Baldwin 3048c0855eaaSJohn Baldwin.Lxts_inner_dec_tail: 3049c0855eaaSJohn Baldwin // x2 == -0x10 means two blocks left. 3050c0855eaaSJohn Baldwin cmn x2,#0x10 3051c0855eaaSJohn Baldwin eor v1.16b,v28.16b,v6.16b 3052c0855eaaSJohn Baldwin eor v24.16b,v29.16b,v8.16b 3053c0855eaaSJohn Baldwin b.eq .Lxts_dec_tail_loop 3054c0855eaaSJohn Baldwin eor v24.16b,v29.16b,v6.16b 3055c0855eaaSJohn Baldwin.Lxts_dec_tail_loop: 3056c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 3057c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3058c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 3059c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3060c0855eaaSJohn Baldwin ld1 {v16.4s},[x7],#16 3061c0855eaaSJohn Baldwin subs w6,w6,#2 3062c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 3063c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3064c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 3065c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3066c0855eaaSJohn Baldwin ld1 {v17.4s},[x7],#16 3067c0855eaaSJohn Baldwin b.gt .Lxts_dec_tail_loop 3068c0855eaaSJohn Baldwin 3069c0855eaaSJohn Baldwin aesd v1.16b,v16.16b 3070c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3071c0855eaaSJohn Baldwin aesd v24.16b,v16.16b 3072c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3073c0855eaaSJohn Baldwin aesd v1.16b,v17.16b 3074c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3075c0855eaaSJohn Baldwin aesd v24.16b,v17.16b 3076c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3077c0855eaaSJohn Baldwin aesd v1.16b,v20.16b 3078c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3079c0855eaaSJohn Baldwin aesd v24.16b,v20.16b 3080c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3081c0855eaaSJohn Baldwin cmn x2,#0x20 3082c0855eaaSJohn Baldwin aesd v1.16b,v21.16b 3083c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3084c0855eaaSJohn Baldwin aesd v24.16b,v21.16b 3085c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3086c0855eaaSJohn Baldwin eor v5.16b,v6.16b,v7.16b 3087c0855eaaSJohn Baldwin aesd v1.16b,v22.16b 3088c0855eaaSJohn Baldwin aesimc v1.16b,v1.16b 3089c0855eaaSJohn Baldwin aesd v24.16b,v22.16b 3090c0855eaaSJohn Baldwin aesimc v24.16b,v24.16b 3091c0855eaaSJohn Baldwin eor v17.16b,v8.16b,v7.16b 3092c0855eaaSJohn Baldwin aesd v1.16b,v23.16b 3093c0855eaaSJohn Baldwin aesd v24.16b,v23.16b 3094c0855eaaSJohn Baldwin b.eq .Lxts_dec_one 3095c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v1.16b 3096c0855eaaSJohn Baldwin eor v17.16b,v17.16b,v24.16b 3097c0855eaaSJohn Baldwin orr v6.16b,v9.16b,v9.16b 3098c0855eaaSJohn Baldwin orr v8.16b,v10.16b,v10.16b 3099c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 3100c0855eaaSJohn Baldwin st1 {v17.16b},[x1],#16 3101c0855eaaSJohn Baldwin add x2,x2,#16 3102c0855eaaSJohn Baldwin b .Lxts_done 3103c0855eaaSJohn Baldwin 3104c0855eaaSJohn Baldwin.Lxts_dec_one: 3105c0855eaaSJohn Baldwin eor v5.16b,v5.16b,v24.16b 3106c0855eaaSJohn Baldwin orr v6.16b,v8.16b,v8.16b 3107c0855eaaSJohn Baldwin orr v8.16b,v9.16b,v9.16b 3108c0855eaaSJohn Baldwin st1 {v5.16b},[x1],#16 3109c0855eaaSJohn Baldwin add x2,x2,#32 3110c0855eaaSJohn Baldwin 3111c0855eaaSJohn Baldwin.Lxts_done: 3112c0855eaaSJohn Baldwin tst x21,#0xf 3113c0855eaaSJohn Baldwin b.eq .Lxts_dec_abort 3114c0855eaaSJohn Baldwin // Processing the last two blocks with cipher stealing. 3115c0855eaaSJohn Baldwin mov x7,x3 3116c0855eaaSJohn Baldwin cbnz x2,.Lxts_dec_1st_done 3117c0855eaaSJohn Baldwin ld1 {v0.16b},[x0],#16 3118c0855eaaSJohn Baldwin 3119c0855eaaSJohn Baldwin // Decrypt the last secod block to get the last plain text block 3120c0855eaaSJohn Baldwin.Lxts_dec_1st_done: 3121c0855eaaSJohn Baldwin eor v26.16b,v0.16b,v8.16b 3122c0855eaaSJohn Baldwin ldr w6,[x3,#240] 3123c0855eaaSJohn Baldwin ld1 {v0.4s},[x3],#16 3124c0855eaaSJohn Baldwin sub w6,w6,#2 3125c0855eaaSJohn Baldwin ld1 {v1.4s},[x3],#16 3126c0855eaaSJohn Baldwin.Loop_final_2nd_dec: 3127c0855eaaSJohn Baldwin aesd v26.16b,v0.16b 3128c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 3129c0855eaaSJohn Baldwin ld1 {v0.4s},[x3],#16 // load key schedule... 3130c0855eaaSJohn Baldwin subs w6,w6,#2 3131c0855eaaSJohn Baldwin aesd v26.16b,v1.16b 3132c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 3133c0855eaaSJohn Baldwin ld1 {v1.4s},[x3],#16 // load key schedule... 3134c0855eaaSJohn Baldwin b.gt .Loop_final_2nd_dec 3135c0855eaaSJohn Baldwin 3136c0855eaaSJohn Baldwin aesd v26.16b,v0.16b 3137c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 3138c0855eaaSJohn Baldwin ld1 {v0.4s},[x3] 3139c0855eaaSJohn Baldwin aesd v26.16b,v1.16b 3140c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v0.16b 3141c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v8.16b 3142c0855eaaSJohn Baldwin st1 {v26.16b},[x1] 3143c0855eaaSJohn Baldwin 3144c0855eaaSJohn Baldwin mov x20,x0 3145c0855eaaSJohn Baldwin add x13,x1,#16 3146c0855eaaSJohn Baldwin 3147c0855eaaSJohn Baldwin // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks 3148c0855eaaSJohn Baldwin // to get the last encrypted block. 3149c0855eaaSJohn Baldwin.composite_dec_loop: 3150c0855eaaSJohn Baldwin subs x21,x21,#1 3151c0855eaaSJohn Baldwin ldrb w15,[x1,x21] 3152c0855eaaSJohn Baldwin ldrb w14,[x20,x21] 3153c0855eaaSJohn Baldwin strb w15,[x13,x21] 3154c0855eaaSJohn Baldwin strb w14,[x1,x21] 3155c0855eaaSJohn Baldwin b.gt .composite_dec_loop 3156c0855eaaSJohn Baldwin.Lxts_dec_load_done: 3157c0855eaaSJohn Baldwin ld1 {v26.16b},[x1] 3158c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v6.16b 3159c0855eaaSJohn Baldwin 3160c0855eaaSJohn Baldwin // Decrypt the composite block to get the last second plain text block 3161c0855eaaSJohn Baldwin ldr w6,[x7,#240] 3162c0855eaaSJohn Baldwin ld1 {v0.4s},[x7],#16 3163c0855eaaSJohn Baldwin sub w6,w6,#2 3164c0855eaaSJohn Baldwin ld1 {v1.4s},[x7],#16 3165c0855eaaSJohn Baldwin.Loop_final_dec: 3166c0855eaaSJohn Baldwin aesd v26.16b,v0.16b 3167c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 3168c0855eaaSJohn Baldwin ld1 {v0.4s},[x7],#16 // load key schedule... 3169c0855eaaSJohn Baldwin subs w6,w6,#2 3170c0855eaaSJohn Baldwin aesd v26.16b,v1.16b 3171c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 3172c0855eaaSJohn Baldwin ld1 {v1.4s},[x7],#16 // load key schedule... 3173c0855eaaSJohn Baldwin b.gt .Loop_final_dec 3174c0855eaaSJohn Baldwin 3175c0855eaaSJohn Baldwin aesd v26.16b,v0.16b 3176c0855eaaSJohn Baldwin aesimc v26.16b,v26.16b 3177c0855eaaSJohn Baldwin ld1 {v0.4s},[x7] 3178c0855eaaSJohn Baldwin aesd v26.16b,v1.16b 3179c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v0.16b 3180c0855eaaSJohn Baldwin eor v26.16b,v26.16b,v6.16b 3181c0855eaaSJohn Baldwin st1 {v26.16b},[x1] 3182c0855eaaSJohn Baldwin 3183c0855eaaSJohn Baldwin.Lxts_dec_abort: 3184c0855eaaSJohn Baldwin ldp x21,x22,[sp,#48] 3185c0855eaaSJohn Baldwin ldp d8,d9,[sp,#32] 3186c0855eaaSJohn Baldwin ldp d10,d11,[sp,#16] 3187c0855eaaSJohn Baldwin ldp x19,x20,[sp],#64 3188c0855eaaSJohn Baldwin 3189c0855eaaSJohn Baldwin.Lxts_dec_final_abort: 3190c0855eaaSJohn Baldwin ret 3191c0855eaaSJohn Baldwin.size aes_v8_xts_decrypt,.-aes_v8_xts_decrypt 3192bc3d5698SJohn Baldwin#endif 3193