1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesv8-armx.pl. */ 2bc3d5698SJohn Baldwin#include "arm_arch.h" 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 5bc3d5698SJohn Baldwin.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 6bc3d5698SJohn Baldwin.fpu neon 7*c0855eaaSJohn Baldwin#ifdef __thumb2__ 8*c0855eaaSJohn Baldwin.syntax unified 9*c0855eaaSJohn Baldwin.thumb 10*c0855eaaSJohn Baldwin# define INST(a,b,c,d) .byte c,d|0xc,a,b 11*c0855eaaSJohn Baldwin#else 12bc3d5698SJohn Baldwin.code 32 13*c0855eaaSJohn Baldwin# define INST(a,b,c,d) .byte a,b,c,d 14*c0855eaaSJohn Baldwin#endif 15*c0855eaaSJohn Baldwin 16*c0855eaaSJohn Baldwin.text 17bc3d5698SJohn Baldwin.align 5 18bc3d5698SJohn Baldwin.Lrcon: 19bc3d5698SJohn Baldwin.long 0x01,0x01,0x01,0x01 20bc3d5698SJohn Baldwin.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 21bc3d5698SJohn Baldwin.long 0x1b,0x1b,0x1b,0x1b 22bc3d5698SJohn Baldwin 23bc3d5698SJohn Baldwin.globl aes_v8_set_encrypt_key 24bc3d5698SJohn Baldwin.type aes_v8_set_encrypt_key,%function 25bc3d5698SJohn Baldwin.align 5 26bc3d5698SJohn Baldwinaes_v8_set_encrypt_key: 27bc3d5698SJohn Baldwin.Lenc_key: 28bc3d5698SJohn Baldwin mov r3,#-1 29bc3d5698SJohn Baldwin cmp r0,#0 30bc3d5698SJohn Baldwin beq .Lenc_key_abort 31bc3d5698SJohn Baldwin cmp r2,#0 32bc3d5698SJohn Baldwin beq .Lenc_key_abort 33bc3d5698SJohn Baldwin mov r3,#-2 34bc3d5698SJohn Baldwin cmp r1,#128 35bc3d5698SJohn Baldwin blt .Lenc_key_abort 36bc3d5698SJohn Baldwin cmp r1,#256 37bc3d5698SJohn Baldwin bgt .Lenc_key_abort 38bc3d5698SJohn Baldwin tst r1,#0x3f 39bc3d5698SJohn Baldwin bne .Lenc_key_abort 40bc3d5698SJohn Baldwin 41bc3d5698SJohn Baldwin adr r3,.Lrcon 42bc3d5698SJohn Baldwin cmp r1,#192 43bc3d5698SJohn Baldwin 44bc3d5698SJohn Baldwin veor q0,q0,q0 45bc3d5698SJohn Baldwin vld1.8 {q3},[r0]! 46bc3d5698SJohn Baldwin mov r1,#8 @ reuse r1 47bc3d5698SJohn Baldwin vld1.32 {q1,q2},[r3]! 48bc3d5698SJohn Baldwin 49bc3d5698SJohn Baldwin blt .Loop128 50bc3d5698SJohn Baldwin beq .L192 51bc3d5698SJohn Baldwin b .L256 52bc3d5698SJohn Baldwin 53bc3d5698SJohn Baldwin.align 4 54bc3d5698SJohn Baldwin.Loop128: 55bc3d5698SJohn Baldwin vtbl.8 d20,{q3},d4 56bc3d5698SJohn Baldwin vtbl.8 d21,{q3},d5 57bc3d5698SJohn Baldwin vext.8 q9,q0,q3,#12 58bc3d5698SJohn Baldwin vst1.32 {q3},[r2]! 59*c0855eaaSJohn Baldwin INST(0x00,0x43,0xf0,0xf3) @ aese q10,q0 60bc3d5698SJohn Baldwin subs r1,r1,#1 61bc3d5698SJohn Baldwin 62bc3d5698SJohn Baldwin veor q3,q3,q9 63bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 64bc3d5698SJohn Baldwin veor q3,q3,q9 65bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 66bc3d5698SJohn Baldwin veor q10,q10,q1 67bc3d5698SJohn Baldwin veor q3,q3,q9 68bc3d5698SJohn Baldwin vshl.u8 q1,q1,#1 69bc3d5698SJohn Baldwin veor q3,q3,q10 70bc3d5698SJohn Baldwin bne .Loop128 71bc3d5698SJohn Baldwin 72bc3d5698SJohn Baldwin vld1.32 {q1},[r3] 73bc3d5698SJohn Baldwin 74bc3d5698SJohn Baldwin vtbl.8 d20,{q3},d4 75bc3d5698SJohn Baldwin vtbl.8 d21,{q3},d5 76bc3d5698SJohn Baldwin vext.8 q9,q0,q3,#12 77bc3d5698SJohn Baldwin vst1.32 {q3},[r2]! 78*c0855eaaSJohn Baldwin INST(0x00,0x43,0xf0,0xf3) @ aese q10,q0 79bc3d5698SJohn Baldwin 80bc3d5698SJohn Baldwin veor q3,q3,q9 81bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 82bc3d5698SJohn Baldwin veor q3,q3,q9 83bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 84bc3d5698SJohn Baldwin veor q10,q10,q1 85bc3d5698SJohn Baldwin veor q3,q3,q9 86bc3d5698SJohn Baldwin vshl.u8 q1,q1,#1 87bc3d5698SJohn Baldwin veor q3,q3,q10 88bc3d5698SJohn Baldwin 89bc3d5698SJohn Baldwin vtbl.8 d20,{q3},d4 90bc3d5698SJohn Baldwin vtbl.8 d21,{q3},d5 91bc3d5698SJohn Baldwin vext.8 q9,q0,q3,#12 92bc3d5698SJohn Baldwin vst1.32 {q3},[r2]! 93*c0855eaaSJohn Baldwin INST(0x00,0x43,0xf0,0xf3) @ aese q10,q0 94bc3d5698SJohn Baldwin 95bc3d5698SJohn Baldwin veor q3,q3,q9 96bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 97bc3d5698SJohn Baldwin veor q3,q3,q9 98bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 99bc3d5698SJohn Baldwin veor q10,q10,q1 100bc3d5698SJohn Baldwin veor q3,q3,q9 101bc3d5698SJohn Baldwin veor q3,q3,q10 102bc3d5698SJohn Baldwin vst1.32 {q3},[r2] 103bc3d5698SJohn Baldwin add r2,r2,#0x50 104bc3d5698SJohn Baldwin 105bc3d5698SJohn Baldwin mov r12,#10 106bc3d5698SJohn Baldwin b .Ldone 107bc3d5698SJohn Baldwin 108bc3d5698SJohn Baldwin.align 4 109bc3d5698SJohn Baldwin.L192: 110bc3d5698SJohn Baldwin vld1.8 {d16},[r0]! 111bc3d5698SJohn Baldwin vmov.i8 q10,#8 @ borrow q10 112bc3d5698SJohn Baldwin vst1.32 {q3},[r2]! 113bc3d5698SJohn Baldwin vsub.i8 q2,q2,q10 @ adjust the mask 114bc3d5698SJohn Baldwin 115bc3d5698SJohn Baldwin.Loop192: 116bc3d5698SJohn Baldwin vtbl.8 d20,{q8},d4 117bc3d5698SJohn Baldwin vtbl.8 d21,{q8},d5 118bc3d5698SJohn Baldwin vext.8 q9,q0,q3,#12 119c3c73b4fSJung-uk Kim#ifdef __ARMEB__ 120c3c73b4fSJung-uk Kim vst1.32 {q8},[r2]! 121c3c73b4fSJung-uk Kim sub r2,r2,#8 122c3c73b4fSJung-uk Kim#else 123bc3d5698SJohn Baldwin vst1.32 {d16},[r2]! 124c3c73b4fSJung-uk Kim#endif 125*c0855eaaSJohn Baldwin INST(0x00,0x43,0xf0,0xf3) @ aese q10,q0 126bc3d5698SJohn Baldwin subs r1,r1,#1 127bc3d5698SJohn Baldwin 128bc3d5698SJohn Baldwin veor q3,q3,q9 129bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 130bc3d5698SJohn Baldwin veor q3,q3,q9 131bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 132bc3d5698SJohn Baldwin veor q3,q3,q9 133bc3d5698SJohn Baldwin 134bc3d5698SJohn Baldwin vdup.32 q9,d7[1] 135bc3d5698SJohn Baldwin veor q9,q9,q8 136bc3d5698SJohn Baldwin veor q10,q10,q1 137bc3d5698SJohn Baldwin vext.8 q8,q0,q8,#12 138bc3d5698SJohn Baldwin vshl.u8 q1,q1,#1 139bc3d5698SJohn Baldwin veor q8,q8,q9 140bc3d5698SJohn Baldwin veor q3,q3,q10 141bc3d5698SJohn Baldwin veor q8,q8,q10 142bc3d5698SJohn Baldwin vst1.32 {q3},[r2]! 143bc3d5698SJohn Baldwin bne .Loop192 144bc3d5698SJohn Baldwin 145bc3d5698SJohn Baldwin mov r12,#12 146bc3d5698SJohn Baldwin add r2,r2,#0x20 147bc3d5698SJohn Baldwin b .Ldone 148bc3d5698SJohn Baldwin 149bc3d5698SJohn Baldwin.align 4 150bc3d5698SJohn Baldwin.L256: 151bc3d5698SJohn Baldwin vld1.8 {q8},[r0] 152bc3d5698SJohn Baldwin mov r1,#7 153bc3d5698SJohn Baldwin mov r12,#14 154bc3d5698SJohn Baldwin vst1.32 {q3},[r2]! 155bc3d5698SJohn Baldwin 156bc3d5698SJohn Baldwin.Loop256: 157bc3d5698SJohn Baldwin vtbl.8 d20,{q8},d4 158bc3d5698SJohn Baldwin vtbl.8 d21,{q8},d5 159bc3d5698SJohn Baldwin vext.8 q9,q0,q3,#12 160bc3d5698SJohn Baldwin vst1.32 {q8},[r2]! 161*c0855eaaSJohn Baldwin INST(0x00,0x43,0xf0,0xf3) @ aese q10,q0 162bc3d5698SJohn Baldwin subs r1,r1,#1 163bc3d5698SJohn Baldwin 164bc3d5698SJohn Baldwin veor q3,q3,q9 165bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 166bc3d5698SJohn Baldwin veor q3,q3,q9 167bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 168bc3d5698SJohn Baldwin veor q10,q10,q1 169bc3d5698SJohn Baldwin veor q3,q3,q9 170bc3d5698SJohn Baldwin vshl.u8 q1,q1,#1 171bc3d5698SJohn Baldwin veor q3,q3,q10 172bc3d5698SJohn Baldwin vst1.32 {q3},[r2]! 173bc3d5698SJohn Baldwin beq .Ldone 174bc3d5698SJohn Baldwin 175bc3d5698SJohn Baldwin vdup.32 q10,d7[1] 176bc3d5698SJohn Baldwin vext.8 q9,q0,q8,#12 177*c0855eaaSJohn Baldwin INST(0x00,0x43,0xf0,0xf3) @ aese q10,q0 178bc3d5698SJohn Baldwin 179bc3d5698SJohn Baldwin veor q8,q8,q9 180bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 181bc3d5698SJohn Baldwin veor q8,q8,q9 182bc3d5698SJohn Baldwin vext.8 q9,q0,q9,#12 183bc3d5698SJohn Baldwin veor q8,q8,q9 184bc3d5698SJohn Baldwin 185bc3d5698SJohn Baldwin veor q8,q8,q10 186bc3d5698SJohn Baldwin b .Loop256 187bc3d5698SJohn Baldwin 188bc3d5698SJohn Baldwin.Ldone: 189bc3d5698SJohn Baldwin str r12,[r2] 190bc3d5698SJohn Baldwin mov r3,#0 191bc3d5698SJohn Baldwin 192bc3d5698SJohn Baldwin.Lenc_key_abort: 193bc3d5698SJohn Baldwin mov r0,r3 @ return value 194bc3d5698SJohn Baldwin 195bc3d5698SJohn Baldwin bx lr 196bc3d5698SJohn Baldwin.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 197bc3d5698SJohn Baldwin 198bc3d5698SJohn Baldwin.globl aes_v8_set_decrypt_key 199bc3d5698SJohn Baldwin.type aes_v8_set_decrypt_key,%function 200bc3d5698SJohn Baldwin.align 5 201bc3d5698SJohn Baldwinaes_v8_set_decrypt_key: 202bc3d5698SJohn Baldwin stmdb sp!,{r4,lr} 203bc3d5698SJohn Baldwin bl .Lenc_key 204bc3d5698SJohn Baldwin 205bc3d5698SJohn Baldwin cmp r0,#0 206bc3d5698SJohn Baldwin bne .Ldec_key_abort 207bc3d5698SJohn Baldwin 208bc3d5698SJohn Baldwin sub r2,r2,#240 @ restore original r2 209bc3d5698SJohn Baldwin mov r4,#-16 210bc3d5698SJohn Baldwin add r0,r2,r12,lsl#4 @ end of key schedule 211bc3d5698SJohn Baldwin 212bc3d5698SJohn Baldwin vld1.32 {q0},[r2] 213bc3d5698SJohn Baldwin vld1.32 {q1},[r0] 214bc3d5698SJohn Baldwin vst1.32 {q0},[r0],r4 215bc3d5698SJohn Baldwin vst1.32 {q1},[r2]! 216bc3d5698SJohn Baldwin 217bc3d5698SJohn Baldwin.Loop_imc: 218bc3d5698SJohn Baldwin vld1.32 {q0},[r2] 219bc3d5698SJohn Baldwin vld1.32 {q1},[r0] 220*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 221*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 222bc3d5698SJohn Baldwin vst1.32 {q0},[r0],r4 223bc3d5698SJohn Baldwin vst1.32 {q1},[r2]! 224bc3d5698SJohn Baldwin cmp r0,r2 225bc3d5698SJohn Baldwin bhi .Loop_imc 226bc3d5698SJohn Baldwin 227bc3d5698SJohn Baldwin vld1.32 {q0},[r2] 228*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 229bc3d5698SJohn Baldwin vst1.32 {q0},[r0] 230bc3d5698SJohn Baldwin 231bc3d5698SJohn Baldwin eor r0,r0,r0 @ return value 232bc3d5698SJohn Baldwin.Ldec_key_abort: 233bc3d5698SJohn Baldwin ldmia sp!,{r4,pc} 234bc3d5698SJohn Baldwin.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 235bc3d5698SJohn Baldwin.globl aes_v8_encrypt 236bc3d5698SJohn Baldwin.type aes_v8_encrypt,%function 237bc3d5698SJohn Baldwin.align 5 238bc3d5698SJohn Baldwinaes_v8_encrypt: 239bc3d5698SJohn Baldwin ldr r3,[r2,#240] 240bc3d5698SJohn Baldwin vld1.32 {q0},[r2]! 241bc3d5698SJohn Baldwin vld1.8 {q2},[r0] 242bc3d5698SJohn Baldwin sub r3,r3,#2 243bc3d5698SJohn Baldwin vld1.32 {q1},[r2]! 244bc3d5698SJohn Baldwin 245bc3d5698SJohn Baldwin.Loop_enc: 246*c0855eaaSJohn Baldwin INST(0x00,0x43,0xb0,0xf3) @ aese q2,q0 247*c0855eaaSJohn Baldwin INST(0x84,0x43,0xb0,0xf3) @ aesmc q2,q2 248bc3d5698SJohn Baldwin vld1.32 {q0},[r2]! 249bc3d5698SJohn Baldwin subs r3,r3,#2 250*c0855eaaSJohn Baldwin INST(0x02,0x43,0xb0,0xf3) @ aese q2,q1 251*c0855eaaSJohn Baldwin INST(0x84,0x43,0xb0,0xf3) @ aesmc q2,q2 252bc3d5698SJohn Baldwin vld1.32 {q1},[r2]! 253bc3d5698SJohn Baldwin bgt .Loop_enc 254bc3d5698SJohn Baldwin 255*c0855eaaSJohn Baldwin INST(0x00,0x43,0xb0,0xf3) @ aese q2,q0 256*c0855eaaSJohn Baldwin INST(0x84,0x43,0xb0,0xf3) @ aesmc q2,q2 257bc3d5698SJohn Baldwin vld1.32 {q0},[r2] 258*c0855eaaSJohn Baldwin INST(0x02,0x43,0xb0,0xf3) @ aese q2,q1 259bc3d5698SJohn Baldwin veor q2,q2,q0 260bc3d5698SJohn Baldwin 261bc3d5698SJohn Baldwin vst1.8 {q2},[r1] 262bc3d5698SJohn Baldwin bx lr 263bc3d5698SJohn Baldwin.size aes_v8_encrypt,.-aes_v8_encrypt 264bc3d5698SJohn Baldwin.globl aes_v8_decrypt 265bc3d5698SJohn Baldwin.type aes_v8_decrypt,%function 266bc3d5698SJohn Baldwin.align 5 267bc3d5698SJohn Baldwinaes_v8_decrypt: 268bc3d5698SJohn Baldwin ldr r3,[r2,#240] 269bc3d5698SJohn Baldwin vld1.32 {q0},[r2]! 270bc3d5698SJohn Baldwin vld1.8 {q2},[r0] 271bc3d5698SJohn Baldwin sub r3,r3,#2 272bc3d5698SJohn Baldwin vld1.32 {q1},[r2]! 273bc3d5698SJohn Baldwin 274bc3d5698SJohn Baldwin.Loop_dec: 275*c0855eaaSJohn Baldwin INST(0x40,0x43,0xb0,0xf3) @ aesd q2,q0 276*c0855eaaSJohn Baldwin INST(0xc4,0x43,0xb0,0xf3) @ aesimc q2,q2 277bc3d5698SJohn Baldwin vld1.32 {q0},[r2]! 278bc3d5698SJohn Baldwin subs r3,r3,#2 279*c0855eaaSJohn Baldwin INST(0x42,0x43,0xb0,0xf3) @ aesd q2,q1 280*c0855eaaSJohn Baldwin INST(0xc4,0x43,0xb0,0xf3) @ aesimc q2,q2 281bc3d5698SJohn Baldwin vld1.32 {q1},[r2]! 282bc3d5698SJohn Baldwin bgt .Loop_dec 283bc3d5698SJohn Baldwin 284*c0855eaaSJohn Baldwin INST(0x40,0x43,0xb0,0xf3) @ aesd q2,q0 285*c0855eaaSJohn Baldwin INST(0xc4,0x43,0xb0,0xf3) @ aesimc q2,q2 286bc3d5698SJohn Baldwin vld1.32 {q0},[r2] 287*c0855eaaSJohn Baldwin INST(0x42,0x43,0xb0,0xf3) @ aesd q2,q1 288bc3d5698SJohn Baldwin veor q2,q2,q0 289bc3d5698SJohn Baldwin 290bc3d5698SJohn Baldwin vst1.8 {q2},[r1] 291bc3d5698SJohn Baldwin bx lr 292bc3d5698SJohn Baldwin.size aes_v8_decrypt,.-aes_v8_decrypt 293*c0855eaaSJohn Baldwin.globl aes_v8_ecb_encrypt 294*c0855eaaSJohn Baldwin.type aes_v8_ecb_encrypt,%function 295*c0855eaaSJohn Baldwin.align 5 296*c0855eaaSJohn Baldwinaes_v8_ecb_encrypt: 297*c0855eaaSJohn Baldwin mov ip,sp 298*c0855eaaSJohn Baldwin stmdb sp!,{r4,r5,r6,r7,r8,lr} 299*c0855eaaSJohn Baldwin vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 300*c0855eaaSJohn Baldwin ldmia ip,{r4,r5} @ load remaining args 301*c0855eaaSJohn Baldwin subs r2,r2,#16 302*c0855eaaSJohn Baldwin mov r8,#16 303*c0855eaaSJohn Baldwin blo .Lecb_done 304*c0855eaaSJohn Baldwin it eq 305*c0855eaaSJohn Baldwin moveq r8,#0 306*c0855eaaSJohn Baldwin 307*c0855eaaSJohn Baldwin cmp r4,#0 @ en- or decrypting? 308*c0855eaaSJohn Baldwin ldr r5,[r3,#240] 309*c0855eaaSJohn Baldwin and r2,r2,#-16 310*c0855eaaSJohn Baldwin vld1.8 {q0},[r0],r8 311*c0855eaaSJohn Baldwin 312*c0855eaaSJohn Baldwin vld1.32 {q8,q9},[r3] @ load key schedule... 313*c0855eaaSJohn Baldwin sub r5,r5,#6 314*c0855eaaSJohn Baldwin add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 315*c0855eaaSJohn Baldwin sub r5,r5,#2 316*c0855eaaSJohn Baldwin vld1.32 {q10,q11},[r7]! 317*c0855eaaSJohn Baldwin vld1.32 {q12,q13},[r7]! 318*c0855eaaSJohn Baldwin vld1.32 {q14,q15},[r7]! 319*c0855eaaSJohn Baldwin vld1.32 {q7},[r7] 320*c0855eaaSJohn Baldwin 321*c0855eaaSJohn Baldwin add r7,r3,#32 322*c0855eaaSJohn Baldwin mov r6,r5 323*c0855eaaSJohn Baldwin beq .Lecb_dec 324*c0855eaaSJohn Baldwin 325*c0855eaaSJohn Baldwin vld1.8 {q1},[r0]! 326*c0855eaaSJohn Baldwin subs r2,r2,#32 @ bias 327*c0855eaaSJohn Baldwin add r6,r5,#2 328*c0855eaaSJohn Baldwin vorr q3,q1,q1 329*c0855eaaSJohn Baldwin vorr q10,q1,q1 330*c0855eaaSJohn Baldwin vorr q1,q0,q0 331*c0855eaaSJohn Baldwin blo .Lecb_enc_tail 332*c0855eaaSJohn Baldwin 333*c0855eaaSJohn Baldwin vorr q1,q3,q3 334*c0855eaaSJohn Baldwin vld1.8 {q10},[r0]! 335*c0855eaaSJohn Baldwin.Loop3x_ecb_enc: 336*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 337*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 338*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 339*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 340*c0855eaaSJohn Baldwin INST(0x20,0x43,0xf0,0xf3) @ aese q10,q8 341*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 342*c0855eaaSJohn Baldwin vld1.32 {q8},[r7]! 343*c0855eaaSJohn Baldwin subs r6,r6,#2 344*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 345*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 346*c0855eaaSJohn Baldwin INST(0x22,0x23,0xb0,0xf3) @ aese q1,q9 347*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 348*c0855eaaSJohn Baldwin INST(0x22,0x43,0xf0,0xf3) @ aese q10,q9 349*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 350*c0855eaaSJohn Baldwin vld1.32 {q9},[r7]! 351*c0855eaaSJohn Baldwin bgt .Loop3x_ecb_enc 352*c0855eaaSJohn Baldwin 353*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 354*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 355*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 356*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 357*c0855eaaSJohn Baldwin INST(0x20,0x43,0xf0,0xf3) @ aese q10,q8 358*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 359*c0855eaaSJohn Baldwin subs r2,r2,#0x30 360*c0855eaaSJohn Baldwin it lo 361*c0855eaaSJohn Baldwin movlo r6,r2 @ r6, r6, is zero at this point 362*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 363*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 364*c0855eaaSJohn Baldwin INST(0x22,0x23,0xb0,0xf3) @ aese q1,q9 365*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 366*c0855eaaSJohn Baldwin INST(0x22,0x43,0xf0,0xf3) @ aese q10,q9 367*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 368*c0855eaaSJohn Baldwin add r0,r0,r6 @ r0 is adjusted in such way that 369*c0855eaaSJohn Baldwin @ at exit from the loop q1-q10 370*c0855eaaSJohn Baldwin @ are loaded with last "words" 371*c0855eaaSJohn Baldwin mov r7,r3 372*c0855eaaSJohn Baldwin INST(0x28,0x03,0xb0,0xf3) @ aese q0,q12 373*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 374*c0855eaaSJohn Baldwin INST(0x28,0x23,0xb0,0xf3) @ aese q1,q12 375*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 376*c0855eaaSJohn Baldwin INST(0x28,0x43,0xf0,0xf3) @ aese q10,q12 377*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 378*c0855eaaSJohn Baldwin vld1.8 {q2},[r0]! 379*c0855eaaSJohn Baldwin INST(0x2a,0x03,0xb0,0xf3) @ aese q0,q13 380*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 381*c0855eaaSJohn Baldwin INST(0x2a,0x23,0xb0,0xf3) @ aese q1,q13 382*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 383*c0855eaaSJohn Baldwin INST(0x2a,0x43,0xf0,0xf3) @ aese q10,q13 384*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 385*c0855eaaSJohn Baldwin vld1.8 {q3},[r0]! 386*c0855eaaSJohn Baldwin INST(0x2c,0x03,0xb0,0xf3) @ aese q0,q14 387*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 388*c0855eaaSJohn Baldwin INST(0x2c,0x23,0xb0,0xf3) @ aese q1,q14 389*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 390*c0855eaaSJohn Baldwin INST(0x2c,0x43,0xf0,0xf3) @ aese q10,q14 391*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 392*c0855eaaSJohn Baldwin vld1.8 {q11},[r0]! 393*c0855eaaSJohn Baldwin INST(0x2e,0x03,0xb0,0xf3) @ aese q0,q15 394*c0855eaaSJohn Baldwin INST(0x2e,0x23,0xb0,0xf3) @ aese q1,q15 395*c0855eaaSJohn Baldwin INST(0x2e,0x43,0xf0,0xf3) @ aese q10,q15 396*c0855eaaSJohn Baldwin vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 397*c0855eaaSJohn Baldwin add r6,r5,#2 398*c0855eaaSJohn Baldwin veor q4,q7,q0 399*c0855eaaSJohn Baldwin veor q5,q7,q1 400*c0855eaaSJohn Baldwin veor q10,q10,q7 401*c0855eaaSJohn Baldwin vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 402*c0855eaaSJohn Baldwin vst1.8 {q4},[r1]! 403*c0855eaaSJohn Baldwin vorr q0,q2,q2 404*c0855eaaSJohn Baldwin vst1.8 {q5},[r1]! 405*c0855eaaSJohn Baldwin vorr q1,q3,q3 406*c0855eaaSJohn Baldwin vst1.8 {q10},[r1]! 407*c0855eaaSJohn Baldwin vorr q10,q11,q11 408*c0855eaaSJohn Baldwin bhs .Loop3x_ecb_enc 409*c0855eaaSJohn Baldwin 410*c0855eaaSJohn Baldwin cmn r2,#0x30 411*c0855eaaSJohn Baldwin beq .Lecb_done 412*c0855eaaSJohn Baldwin nop 413*c0855eaaSJohn Baldwin 414*c0855eaaSJohn Baldwin.Lecb_enc_tail: 415*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 416*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 417*c0855eaaSJohn Baldwin INST(0x20,0x43,0xf0,0xf3) @ aese q10,q8 418*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 419*c0855eaaSJohn Baldwin vld1.32 {q8},[r7]! 420*c0855eaaSJohn Baldwin subs r6,r6,#2 421*c0855eaaSJohn Baldwin INST(0x22,0x23,0xb0,0xf3) @ aese q1,q9 422*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 423*c0855eaaSJohn Baldwin INST(0x22,0x43,0xf0,0xf3) @ aese q10,q9 424*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 425*c0855eaaSJohn Baldwin vld1.32 {q9},[r7]! 426*c0855eaaSJohn Baldwin bgt .Lecb_enc_tail 427*c0855eaaSJohn Baldwin 428*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 429*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 430*c0855eaaSJohn Baldwin INST(0x20,0x43,0xf0,0xf3) @ aese q10,q8 431*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 432*c0855eaaSJohn Baldwin INST(0x22,0x23,0xb0,0xf3) @ aese q1,q9 433*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 434*c0855eaaSJohn Baldwin INST(0x22,0x43,0xf0,0xf3) @ aese q10,q9 435*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 436*c0855eaaSJohn Baldwin INST(0x28,0x23,0xb0,0xf3) @ aese q1,q12 437*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 438*c0855eaaSJohn Baldwin INST(0x28,0x43,0xf0,0xf3) @ aese q10,q12 439*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 440*c0855eaaSJohn Baldwin cmn r2,#0x20 441*c0855eaaSJohn Baldwin INST(0x2a,0x23,0xb0,0xf3) @ aese q1,q13 442*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 443*c0855eaaSJohn Baldwin INST(0x2a,0x43,0xf0,0xf3) @ aese q10,q13 444*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 445*c0855eaaSJohn Baldwin INST(0x2c,0x23,0xb0,0xf3) @ aese q1,q14 446*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 447*c0855eaaSJohn Baldwin INST(0x2c,0x43,0xf0,0xf3) @ aese q10,q14 448*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 449*c0855eaaSJohn Baldwin INST(0x2e,0x23,0xb0,0xf3) @ aese q1,q15 450*c0855eaaSJohn Baldwin INST(0x2e,0x43,0xf0,0xf3) @ aese q10,q15 451*c0855eaaSJohn Baldwin beq .Lecb_enc_one 452*c0855eaaSJohn Baldwin veor q5,q7,q1 453*c0855eaaSJohn Baldwin veor q9,q7,q10 454*c0855eaaSJohn Baldwin vst1.8 {q5},[r1]! 455*c0855eaaSJohn Baldwin vst1.8 {q9},[r1]! 456*c0855eaaSJohn Baldwin b .Lecb_done 457*c0855eaaSJohn Baldwin 458*c0855eaaSJohn Baldwin.Lecb_enc_one: 459*c0855eaaSJohn Baldwin veor q5,q7,q10 460*c0855eaaSJohn Baldwin vst1.8 {q5},[r1]! 461*c0855eaaSJohn Baldwin b .Lecb_done 462*c0855eaaSJohn Baldwin.align 5 463*c0855eaaSJohn Baldwin.Lecb_dec: 464*c0855eaaSJohn Baldwin vld1.8 {q1},[r0]! 465*c0855eaaSJohn Baldwin subs r2,r2,#32 @ bias 466*c0855eaaSJohn Baldwin add r6,r5,#2 467*c0855eaaSJohn Baldwin vorr q3,q1,q1 468*c0855eaaSJohn Baldwin vorr q10,q1,q1 469*c0855eaaSJohn Baldwin vorr q1,q0,q0 470*c0855eaaSJohn Baldwin blo .Lecb_dec_tail 471*c0855eaaSJohn Baldwin 472*c0855eaaSJohn Baldwin vorr q1,q3,q3 473*c0855eaaSJohn Baldwin vld1.8 {q10},[r0]! 474*c0855eaaSJohn Baldwin.Loop3x_ecb_dec: 475*c0855eaaSJohn Baldwin INST(0x60,0x03,0xb0,0xf3) @ aesd q0,q8 476*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 477*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 478*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 479*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 480*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 481*c0855eaaSJohn Baldwin vld1.32 {q8},[r7]! 482*c0855eaaSJohn Baldwin subs r6,r6,#2 483*c0855eaaSJohn Baldwin INST(0x62,0x03,0xb0,0xf3) @ aesd q0,q9 484*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 485*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 486*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 487*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 488*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 489*c0855eaaSJohn Baldwin vld1.32 {q9},[r7]! 490*c0855eaaSJohn Baldwin bgt .Loop3x_ecb_dec 491*c0855eaaSJohn Baldwin 492*c0855eaaSJohn Baldwin INST(0x60,0x03,0xb0,0xf3) @ aesd q0,q8 493*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 494*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 495*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 496*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 497*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 498*c0855eaaSJohn Baldwin subs r2,r2,#0x30 499*c0855eaaSJohn Baldwin it lo 500*c0855eaaSJohn Baldwin movlo r6,r2 @ r6, r6, is zero at this point 501*c0855eaaSJohn Baldwin INST(0x62,0x03,0xb0,0xf3) @ aesd q0,q9 502*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 503*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 504*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 505*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 506*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 507*c0855eaaSJohn Baldwin add r0,r0,r6 @ r0 is adjusted in such way that 508*c0855eaaSJohn Baldwin @ at exit from the loop q1-q10 509*c0855eaaSJohn Baldwin @ are loaded with last "words" 510*c0855eaaSJohn Baldwin mov r7,r3 511*c0855eaaSJohn Baldwin INST(0x68,0x03,0xb0,0xf3) @ aesd q0,q12 512*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 513*c0855eaaSJohn Baldwin INST(0x68,0x23,0xb0,0xf3) @ aesd q1,q12 514*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 515*c0855eaaSJohn Baldwin INST(0x68,0x43,0xf0,0xf3) @ aesd q10,q12 516*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 517*c0855eaaSJohn Baldwin vld1.8 {q2},[r0]! 518*c0855eaaSJohn Baldwin INST(0x6a,0x03,0xb0,0xf3) @ aesd q0,q13 519*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 520*c0855eaaSJohn Baldwin INST(0x6a,0x23,0xb0,0xf3) @ aesd q1,q13 521*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 522*c0855eaaSJohn Baldwin INST(0x6a,0x43,0xf0,0xf3) @ aesd q10,q13 523*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 524*c0855eaaSJohn Baldwin vld1.8 {q3},[r0]! 525*c0855eaaSJohn Baldwin INST(0x6c,0x03,0xb0,0xf3) @ aesd q0,q14 526*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 527*c0855eaaSJohn Baldwin INST(0x6c,0x23,0xb0,0xf3) @ aesd q1,q14 528*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 529*c0855eaaSJohn Baldwin INST(0x6c,0x43,0xf0,0xf3) @ aesd q10,q14 530*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 531*c0855eaaSJohn Baldwin vld1.8 {q11},[r0]! 532*c0855eaaSJohn Baldwin INST(0x6e,0x03,0xb0,0xf3) @ aesd q0,q15 533*c0855eaaSJohn Baldwin INST(0x6e,0x23,0xb0,0xf3) @ aesd q1,q15 534*c0855eaaSJohn Baldwin INST(0x6e,0x43,0xf0,0xf3) @ aesd q10,q15 535*c0855eaaSJohn Baldwin vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 536*c0855eaaSJohn Baldwin add r6,r5,#2 537*c0855eaaSJohn Baldwin veor q4,q7,q0 538*c0855eaaSJohn Baldwin veor q5,q7,q1 539*c0855eaaSJohn Baldwin veor q10,q10,q7 540*c0855eaaSJohn Baldwin vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 541*c0855eaaSJohn Baldwin vst1.8 {q4},[r1]! 542*c0855eaaSJohn Baldwin vorr q0,q2,q2 543*c0855eaaSJohn Baldwin vst1.8 {q5},[r1]! 544*c0855eaaSJohn Baldwin vorr q1,q3,q3 545*c0855eaaSJohn Baldwin vst1.8 {q10},[r1]! 546*c0855eaaSJohn Baldwin vorr q10,q11,q11 547*c0855eaaSJohn Baldwin bhs .Loop3x_ecb_dec 548*c0855eaaSJohn Baldwin 549*c0855eaaSJohn Baldwin cmn r2,#0x30 550*c0855eaaSJohn Baldwin beq .Lecb_done 551*c0855eaaSJohn Baldwin nop 552*c0855eaaSJohn Baldwin 553*c0855eaaSJohn Baldwin.Lecb_dec_tail: 554*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 555*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 556*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 557*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 558*c0855eaaSJohn Baldwin vld1.32 {q8},[r7]! 559*c0855eaaSJohn Baldwin subs r6,r6,#2 560*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 561*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 562*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 563*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 564*c0855eaaSJohn Baldwin vld1.32 {q9},[r7]! 565*c0855eaaSJohn Baldwin bgt .Lecb_dec_tail 566*c0855eaaSJohn Baldwin 567*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 568*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 569*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 570*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 571*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 572*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 573*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 574*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 575*c0855eaaSJohn Baldwin INST(0x68,0x23,0xb0,0xf3) @ aesd q1,q12 576*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 577*c0855eaaSJohn Baldwin INST(0x68,0x43,0xf0,0xf3) @ aesd q10,q12 578*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 579*c0855eaaSJohn Baldwin cmn r2,#0x20 580*c0855eaaSJohn Baldwin INST(0x6a,0x23,0xb0,0xf3) @ aesd q1,q13 581*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 582*c0855eaaSJohn Baldwin INST(0x6a,0x43,0xf0,0xf3) @ aesd q10,q13 583*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 584*c0855eaaSJohn Baldwin INST(0x6c,0x23,0xb0,0xf3) @ aesd q1,q14 585*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 586*c0855eaaSJohn Baldwin INST(0x6c,0x43,0xf0,0xf3) @ aesd q10,q14 587*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 588*c0855eaaSJohn Baldwin INST(0x6e,0x23,0xb0,0xf3) @ aesd q1,q15 589*c0855eaaSJohn Baldwin INST(0x6e,0x43,0xf0,0xf3) @ aesd q10,q15 590*c0855eaaSJohn Baldwin beq .Lecb_dec_one 591*c0855eaaSJohn Baldwin veor q5,q7,q1 592*c0855eaaSJohn Baldwin veor q9,q7,q10 593*c0855eaaSJohn Baldwin vst1.8 {q5},[r1]! 594*c0855eaaSJohn Baldwin vst1.8 {q9},[r1]! 595*c0855eaaSJohn Baldwin b .Lecb_done 596*c0855eaaSJohn Baldwin 597*c0855eaaSJohn Baldwin.Lecb_dec_one: 598*c0855eaaSJohn Baldwin veor q5,q7,q10 599*c0855eaaSJohn Baldwin vst1.8 {q5},[r1]! 600*c0855eaaSJohn Baldwin 601*c0855eaaSJohn Baldwin.Lecb_done: 602*c0855eaaSJohn Baldwin vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 603*c0855eaaSJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,pc} 604*c0855eaaSJohn Baldwin.size aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt 605bc3d5698SJohn Baldwin.globl aes_v8_cbc_encrypt 606bc3d5698SJohn Baldwin.type aes_v8_cbc_encrypt,%function 607bc3d5698SJohn Baldwin.align 5 608bc3d5698SJohn Baldwinaes_v8_cbc_encrypt: 609bc3d5698SJohn Baldwin mov ip,sp 610bc3d5698SJohn Baldwin stmdb sp!,{r4,r5,r6,r7,r8,lr} 611bc3d5698SJohn Baldwin vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 612bc3d5698SJohn Baldwin ldmia ip,{r4,r5} @ load remaining args 613bc3d5698SJohn Baldwin subs r2,r2,#16 614bc3d5698SJohn Baldwin mov r8,#16 615bc3d5698SJohn Baldwin blo .Lcbc_abort 616*c0855eaaSJohn Baldwin it eq 617bc3d5698SJohn Baldwin moveq r8,#0 618bc3d5698SJohn Baldwin 619bc3d5698SJohn Baldwin cmp r5,#0 @ en- or decrypting? 620bc3d5698SJohn Baldwin ldr r5,[r3,#240] 621bc3d5698SJohn Baldwin and r2,r2,#-16 622bc3d5698SJohn Baldwin vld1.8 {q6},[r4] 623bc3d5698SJohn Baldwin vld1.8 {q0},[r0],r8 624bc3d5698SJohn Baldwin 625bc3d5698SJohn Baldwin vld1.32 {q8,q9},[r3] @ load key schedule... 626bc3d5698SJohn Baldwin sub r5,r5,#6 627bc3d5698SJohn Baldwin add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 628bc3d5698SJohn Baldwin sub r5,r5,#2 629bc3d5698SJohn Baldwin vld1.32 {q10,q11},[r7]! 630bc3d5698SJohn Baldwin vld1.32 {q12,q13},[r7]! 631bc3d5698SJohn Baldwin vld1.32 {q14,q15},[r7]! 632bc3d5698SJohn Baldwin vld1.32 {q7},[r7] 633bc3d5698SJohn Baldwin 634bc3d5698SJohn Baldwin add r7,r3,#32 635bc3d5698SJohn Baldwin mov r6,r5 636bc3d5698SJohn Baldwin beq .Lcbc_dec 637bc3d5698SJohn Baldwin 638bc3d5698SJohn Baldwin cmp r5,#2 639bc3d5698SJohn Baldwin veor q0,q0,q6 640bc3d5698SJohn Baldwin veor q5,q8,q7 641bc3d5698SJohn Baldwin beq .Lcbc_enc128 642bc3d5698SJohn Baldwin 643bc3d5698SJohn Baldwin vld1.32 {q2,q3},[r7] 644bc3d5698SJohn Baldwin add r7,r3,#16 645bc3d5698SJohn Baldwin add r6,r3,#16*4 646bc3d5698SJohn Baldwin add r12,r3,#16*5 647*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 648*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 649bc3d5698SJohn Baldwin add r14,r3,#16*6 650bc3d5698SJohn Baldwin add r3,r3,#16*7 651bc3d5698SJohn Baldwin b .Lenter_cbc_enc 652bc3d5698SJohn Baldwin 653bc3d5698SJohn Baldwin.align 4 654bc3d5698SJohn Baldwin.Loop_cbc_enc: 655*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 656*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 657bc3d5698SJohn Baldwin vst1.8 {q6},[r1]! 658bc3d5698SJohn Baldwin.Lenter_cbc_enc: 659*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 660*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 661*c0855eaaSJohn Baldwin INST(0x04,0x03,0xb0,0xf3) @ aese q0,q2 662*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 663bc3d5698SJohn Baldwin vld1.32 {q8},[r6] 664bc3d5698SJohn Baldwin cmp r5,#4 665*c0855eaaSJohn Baldwin INST(0x06,0x03,0xb0,0xf3) @ aese q0,q3 666*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 667bc3d5698SJohn Baldwin vld1.32 {q9},[r12] 668bc3d5698SJohn Baldwin beq .Lcbc_enc192 669bc3d5698SJohn Baldwin 670*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 671*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 672bc3d5698SJohn Baldwin vld1.32 {q8},[r14] 673*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 674*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 675bc3d5698SJohn Baldwin vld1.32 {q9},[r3] 676bc3d5698SJohn Baldwin nop 677bc3d5698SJohn Baldwin 678bc3d5698SJohn Baldwin.Lcbc_enc192: 679*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 680*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 681bc3d5698SJohn Baldwin subs r2,r2,#16 682*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 683*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 684*c0855eaaSJohn Baldwin it eq 685bc3d5698SJohn Baldwin moveq r8,#0 686*c0855eaaSJohn Baldwin INST(0x24,0x03,0xb0,0xf3) @ aese q0,q10 687*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 688*c0855eaaSJohn Baldwin INST(0x26,0x03,0xb0,0xf3) @ aese q0,q11 689*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 690bc3d5698SJohn Baldwin vld1.8 {q8},[r0],r8 691*c0855eaaSJohn Baldwin INST(0x28,0x03,0xb0,0xf3) @ aese q0,q12 692*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 693bc3d5698SJohn Baldwin veor q8,q8,q5 694*c0855eaaSJohn Baldwin INST(0x2a,0x03,0xb0,0xf3) @ aese q0,q13 695*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 696bc3d5698SJohn Baldwin vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 697*c0855eaaSJohn Baldwin INST(0x2c,0x03,0xb0,0xf3) @ aese q0,q14 698*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 699*c0855eaaSJohn Baldwin INST(0x2e,0x03,0xb0,0xf3) @ aese q0,q15 700bc3d5698SJohn Baldwin veor q6,q0,q7 701bc3d5698SJohn Baldwin bhs .Loop_cbc_enc 702bc3d5698SJohn Baldwin 703bc3d5698SJohn Baldwin vst1.8 {q6},[r1]! 704bc3d5698SJohn Baldwin b .Lcbc_done 705bc3d5698SJohn Baldwin 706bc3d5698SJohn Baldwin.align 5 707bc3d5698SJohn Baldwin.Lcbc_enc128: 708bc3d5698SJohn Baldwin vld1.32 {q2,q3},[r7] 709*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 710*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 711bc3d5698SJohn Baldwin b .Lenter_cbc_enc128 712bc3d5698SJohn Baldwin.Loop_cbc_enc128: 713*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 714*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 715bc3d5698SJohn Baldwin vst1.8 {q6},[r1]! 716bc3d5698SJohn Baldwin.Lenter_cbc_enc128: 717*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 718*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 719bc3d5698SJohn Baldwin subs r2,r2,#16 720*c0855eaaSJohn Baldwin INST(0x04,0x03,0xb0,0xf3) @ aese q0,q2 721*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 722*c0855eaaSJohn Baldwin it eq 723bc3d5698SJohn Baldwin moveq r8,#0 724*c0855eaaSJohn Baldwin INST(0x06,0x03,0xb0,0xf3) @ aese q0,q3 725*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 726*c0855eaaSJohn Baldwin INST(0x24,0x03,0xb0,0xf3) @ aese q0,q10 727*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 728*c0855eaaSJohn Baldwin INST(0x26,0x03,0xb0,0xf3) @ aese q0,q11 729*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 730bc3d5698SJohn Baldwin vld1.8 {q8},[r0],r8 731*c0855eaaSJohn Baldwin INST(0x28,0x03,0xb0,0xf3) @ aese q0,q12 732*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 733*c0855eaaSJohn Baldwin INST(0x2a,0x03,0xb0,0xf3) @ aese q0,q13 734*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 735*c0855eaaSJohn Baldwin INST(0x2c,0x03,0xb0,0xf3) @ aese q0,q14 736*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 737bc3d5698SJohn Baldwin veor q8,q8,q5 738*c0855eaaSJohn Baldwin INST(0x2e,0x03,0xb0,0xf3) @ aese q0,q15 739bc3d5698SJohn Baldwin veor q6,q0,q7 740bc3d5698SJohn Baldwin bhs .Loop_cbc_enc128 741bc3d5698SJohn Baldwin 742bc3d5698SJohn Baldwin vst1.8 {q6},[r1]! 743bc3d5698SJohn Baldwin b .Lcbc_done 744bc3d5698SJohn Baldwin.align 5 745bc3d5698SJohn Baldwin.Lcbc_dec: 746bc3d5698SJohn Baldwin vld1.8 {q10},[r0]! 747bc3d5698SJohn Baldwin subs r2,r2,#32 @ bias 748bc3d5698SJohn Baldwin add r6,r5,#2 749bc3d5698SJohn Baldwin vorr q3,q0,q0 750bc3d5698SJohn Baldwin vorr q1,q0,q0 751bc3d5698SJohn Baldwin vorr q11,q10,q10 752bc3d5698SJohn Baldwin blo .Lcbc_dec_tail 753bc3d5698SJohn Baldwin 754bc3d5698SJohn Baldwin vorr q1,q10,q10 755bc3d5698SJohn Baldwin vld1.8 {q10},[r0]! 756bc3d5698SJohn Baldwin vorr q2,q0,q0 757bc3d5698SJohn Baldwin vorr q3,q1,q1 758bc3d5698SJohn Baldwin vorr q11,q10,q10 759bc3d5698SJohn Baldwin.Loop3x_cbc_dec: 760*c0855eaaSJohn Baldwin INST(0x60,0x03,0xb0,0xf3) @ aesd q0,q8 761*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 762*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 763*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 764*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 765*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 766bc3d5698SJohn Baldwin vld1.32 {q8},[r7]! 767bc3d5698SJohn Baldwin subs r6,r6,#2 768*c0855eaaSJohn Baldwin INST(0x62,0x03,0xb0,0xf3) @ aesd q0,q9 769*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 770*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 771*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 772*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 773*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 774bc3d5698SJohn Baldwin vld1.32 {q9},[r7]! 775bc3d5698SJohn Baldwin bgt .Loop3x_cbc_dec 776bc3d5698SJohn Baldwin 777*c0855eaaSJohn Baldwin INST(0x60,0x03,0xb0,0xf3) @ aesd q0,q8 778*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 779*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 780*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 781*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 782*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 783bc3d5698SJohn Baldwin veor q4,q6,q7 784bc3d5698SJohn Baldwin subs r2,r2,#0x30 785bc3d5698SJohn Baldwin veor q5,q2,q7 786*c0855eaaSJohn Baldwin it lo 787bc3d5698SJohn Baldwin movlo r6,r2 @ r6, r6, is zero at this point 788*c0855eaaSJohn Baldwin INST(0x62,0x03,0xb0,0xf3) @ aesd q0,q9 789*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 790*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 791*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 792*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 793*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 794bc3d5698SJohn Baldwin veor q9,q3,q7 795bc3d5698SJohn Baldwin add r0,r0,r6 @ r0 is adjusted in such way that 796bc3d5698SJohn Baldwin @ at exit from the loop q1-q10 797bc3d5698SJohn Baldwin @ are loaded with last "words" 798bc3d5698SJohn Baldwin vorr q6,q11,q11 799bc3d5698SJohn Baldwin mov r7,r3 800*c0855eaaSJohn Baldwin INST(0x68,0x03,0xb0,0xf3) @ aesd q0,q12 801*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 802*c0855eaaSJohn Baldwin INST(0x68,0x23,0xb0,0xf3) @ aesd q1,q12 803*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 804*c0855eaaSJohn Baldwin INST(0x68,0x43,0xf0,0xf3) @ aesd q10,q12 805*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 806bc3d5698SJohn Baldwin vld1.8 {q2},[r0]! 807*c0855eaaSJohn Baldwin INST(0x6a,0x03,0xb0,0xf3) @ aesd q0,q13 808*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 809*c0855eaaSJohn Baldwin INST(0x6a,0x23,0xb0,0xf3) @ aesd q1,q13 810*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 811*c0855eaaSJohn Baldwin INST(0x6a,0x43,0xf0,0xf3) @ aesd q10,q13 812*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 813bc3d5698SJohn Baldwin vld1.8 {q3},[r0]! 814*c0855eaaSJohn Baldwin INST(0x6c,0x03,0xb0,0xf3) @ aesd q0,q14 815*c0855eaaSJohn Baldwin INST(0xc0,0x03,0xb0,0xf3) @ aesimc q0,q0 816*c0855eaaSJohn Baldwin INST(0x6c,0x23,0xb0,0xf3) @ aesd q1,q14 817*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 818*c0855eaaSJohn Baldwin INST(0x6c,0x43,0xf0,0xf3) @ aesd q10,q14 819*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 820bc3d5698SJohn Baldwin vld1.8 {q11},[r0]! 821*c0855eaaSJohn Baldwin INST(0x6e,0x03,0xb0,0xf3) @ aesd q0,q15 822*c0855eaaSJohn Baldwin INST(0x6e,0x23,0xb0,0xf3) @ aesd q1,q15 823*c0855eaaSJohn Baldwin INST(0x6e,0x43,0xf0,0xf3) @ aesd q10,q15 824bc3d5698SJohn Baldwin vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 825bc3d5698SJohn Baldwin add r6,r5,#2 826bc3d5698SJohn Baldwin veor q4,q4,q0 827bc3d5698SJohn Baldwin veor q5,q5,q1 828bc3d5698SJohn Baldwin veor q10,q10,q9 829bc3d5698SJohn Baldwin vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 830bc3d5698SJohn Baldwin vst1.8 {q4},[r1]! 831bc3d5698SJohn Baldwin vorr q0,q2,q2 832bc3d5698SJohn Baldwin vst1.8 {q5},[r1]! 833bc3d5698SJohn Baldwin vorr q1,q3,q3 834bc3d5698SJohn Baldwin vst1.8 {q10},[r1]! 835bc3d5698SJohn Baldwin vorr q10,q11,q11 836bc3d5698SJohn Baldwin bhs .Loop3x_cbc_dec 837bc3d5698SJohn Baldwin 838bc3d5698SJohn Baldwin cmn r2,#0x30 839bc3d5698SJohn Baldwin beq .Lcbc_done 840bc3d5698SJohn Baldwin nop 841bc3d5698SJohn Baldwin 842bc3d5698SJohn Baldwin.Lcbc_dec_tail: 843*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 844*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 845*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 846*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 847bc3d5698SJohn Baldwin vld1.32 {q8},[r7]! 848bc3d5698SJohn Baldwin subs r6,r6,#2 849*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 850*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 851*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 852*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 853bc3d5698SJohn Baldwin vld1.32 {q9},[r7]! 854bc3d5698SJohn Baldwin bgt .Lcbc_dec_tail 855bc3d5698SJohn Baldwin 856*c0855eaaSJohn Baldwin INST(0x60,0x23,0xb0,0xf3) @ aesd q1,q8 857*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 858*c0855eaaSJohn Baldwin INST(0x60,0x43,0xf0,0xf3) @ aesd q10,q8 859*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 860*c0855eaaSJohn Baldwin INST(0x62,0x23,0xb0,0xf3) @ aesd q1,q9 861*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 862*c0855eaaSJohn Baldwin INST(0x62,0x43,0xf0,0xf3) @ aesd q10,q9 863*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 864*c0855eaaSJohn Baldwin INST(0x68,0x23,0xb0,0xf3) @ aesd q1,q12 865*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 866*c0855eaaSJohn Baldwin INST(0x68,0x43,0xf0,0xf3) @ aesd q10,q12 867*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 868bc3d5698SJohn Baldwin cmn r2,#0x20 869*c0855eaaSJohn Baldwin INST(0x6a,0x23,0xb0,0xf3) @ aesd q1,q13 870*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 871*c0855eaaSJohn Baldwin INST(0x6a,0x43,0xf0,0xf3) @ aesd q10,q13 872*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 873bc3d5698SJohn Baldwin veor q5,q6,q7 874*c0855eaaSJohn Baldwin INST(0x6c,0x23,0xb0,0xf3) @ aesd q1,q14 875*c0855eaaSJohn Baldwin INST(0xc2,0x23,0xb0,0xf3) @ aesimc q1,q1 876*c0855eaaSJohn Baldwin INST(0x6c,0x43,0xf0,0xf3) @ aesd q10,q14 877*c0855eaaSJohn Baldwin INST(0xe4,0x43,0xf0,0xf3) @ aesimc q10,q10 878bc3d5698SJohn Baldwin veor q9,q3,q7 879*c0855eaaSJohn Baldwin INST(0x6e,0x23,0xb0,0xf3) @ aesd q1,q15 880*c0855eaaSJohn Baldwin INST(0x6e,0x43,0xf0,0xf3) @ aesd q10,q15 881bc3d5698SJohn Baldwin beq .Lcbc_dec_one 882bc3d5698SJohn Baldwin veor q5,q5,q1 883bc3d5698SJohn Baldwin veor q9,q9,q10 884bc3d5698SJohn Baldwin vorr q6,q11,q11 885bc3d5698SJohn Baldwin vst1.8 {q5},[r1]! 886bc3d5698SJohn Baldwin vst1.8 {q9},[r1]! 887bc3d5698SJohn Baldwin b .Lcbc_done 888bc3d5698SJohn Baldwin 889bc3d5698SJohn Baldwin.Lcbc_dec_one: 890bc3d5698SJohn Baldwin veor q5,q5,q10 891bc3d5698SJohn Baldwin vorr q6,q11,q11 892bc3d5698SJohn Baldwin vst1.8 {q5},[r1]! 893bc3d5698SJohn Baldwin 894bc3d5698SJohn Baldwin.Lcbc_done: 895bc3d5698SJohn Baldwin vst1.8 {q6},[r4] 896bc3d5698SJohn Baldwin.Lcbc_abort: 897bc3d5698SJohn Baldwin vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 898bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,pc} 899bc3d5698SJohn Baldwin.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 900bc3d5698SJohn Baldwin.globl aes_v8_ctr32_encrypt_blocks 901bc3d5698SJohn Baldwin.type aes_v8_ctr32_encrypt_blocks,%function 902bc3d5698SJohn Baldwin.align 5 903bc3d5698SJohn Baldwinaes_v8_ctr32_encrypt_blocks: 904bc3d5698SJohn Baldwin mov ip,sp 905bc3d5698SJohn Baldwin stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 906bc3d5698SJohn Baldwin vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 907bc3d5698SJohn Baldwin ldr r4, [ip] @ load remaining arg 908bc3d5698SJohn Baldwin ldr r5,[r3,#240] 909bc3d5698SJohn Baldwin 910bc3d5698SJohn Baldwin ldr r8, [r4, #12] 911c3c73b4fSJung-uk Kim#ifdef __ARMEB__ 912c3c73b4fSJung-uk Kim vld1.8 {q0},[r4] 913c3c73b4fSJung-uk Kim#else 914bc3d5698SJohn Baldwin vld1.32 {q0},[r4] 915c3c73b4fSJung-uk Kim#endif 916bc3d5698SJohn Baldwin vld1.32 {q8,q9},[r3] @ load key schedule... 917bc3d5698SJohn Baldwin sub r5,r5,#4 918bc3d5698SJohn Baldwin mov r12,#16 919bc3d5698SJohn Baldwin cmp r2,#2 920bc3d5698SJohn Baldwin add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 921bc3d5698SJohn Baldwin sub r5,r5,#2 922bc3d5698SJohn Baldwin vld1.32 {q12,q13},[r7]! 923bc3d5698SJohn Baldwin vld1.32 {q14,q15},[r7]! 924bc3d5698SJohn Baldwin vld1.32 {q7},[r7] 925bc3d5698SJohn Baldwin add r7,r3,#32 926bc3d5698SJohn Baldwin mov r6,r5 927*c0855eaaSJohn Baldwin it lo 928bc3d5698SJohn Baldwin movlo r12,#0 929bc3d5698SJohn Baldwin#ifndef __ARMEB__ 930bc3d5698SJohn Baldwin rev r8, r8 931bc3d5698SJohn Baldwin#endif 932bc3d5698SJohn Baldwin add r10, r8, #1 933bc3d5698SJohn Baldwin vorr q6,q0,q0 934bc3d5698SJohn Baldwin rev r10, r10 935c3c73b4fSJung-uk Kim vmov.32 d13[1],r10 936c3c73b4fSJung-uk Kim add r8, r8, #2 937c3c73b4fSJung-uk Kim vorr q1,q6,q6 938bc3d5698SJohn Baldwin bls .Lctr32_tail 939bc3d5698SJohn Baldwin rev r12, r8 940c3c73b4fSJung-uk Kim vmov.32 d13[1],r12 941bc3d5698SJohn Baldwin sub r2,r2,#3 @ bias 942c3c73b4fSJung-uk Kim vorr q10,q6,q6 943bc3d5698SJohn Baldwin b .Loop3x_ctr32 944bc3d5698SJohn Baldwin 945bc3d5698SJohn Baldwin.align 4 946bc3d5698SJohn Baldwin.Loop3x_ctr32: 947*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 948*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 949*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 950*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 951*c0855eaaSJohn Baldwin INST(0x20,0x43,0xf0,0xf3) @ aese q10,q8 952*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 953bc3d5698SJohn Baldwin vld1.32 {q8},[r7]! 954bc3d5698SJohn Baldwin subs r6,r6,#2 955*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 956*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 957*c0855eaaSJohn Baldwin INST(0x22,0x23,0xb0,0xf3) @ aese q1,q9 958*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 959*c0855eaaSJohn Baldwin INST(0x22,0x43,0xf0,0xf3) @ aese q10,q9 960*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 961bc3d5698SJohn Baldwin vld1.32 {q9},[r7]! 962bc3d5698SJohn Baldwin bgt .Loop3x_ctr32 963bc3d5698SJohn Baldwin 964*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 965*c0855eaaSJohn Baldwin INST(0x80,0x83,0xb0,0xf3) @ aesmc q4,q0 966*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 967*c0855eaaSJohn Baldwin INST(0x82,0xa3,0xb0,0xf3) @ aesmc q5,q1 968bc3d5698SJohn Baldwin vld1.8 {q2},[r0]! 969c3c73b4fSJung-uk Kim add r9,r8,#1 970*c0855eaaSJohn Baldwin INST(0x20,0x43,0xf0,0xf3) @ aese q10,q8 971*c0855eaaSJohn Baldwin INST(0xa4,0x43,0xf0,0xf3) @ aesmc q10,q10 972bc3d5698SJohn Baldwin vld1.8 {q3},[r0]! 973c3c73b4fSJung-uk Kim rev r9,r9 974*c0855eaaSJohn Baldwin INST(0x22,0x83,0xb0,0xf3) @ aese q4,q9 975*c0855eaaSJohn Baldwin INST(0x88,0x83,0xb0,0xf3) @ aesmc q4,q4 976*c0855eaaSJohn Baldwin INST(0x22,0xa3,0xb0,0xf3) @ aese q5,q9 977*c0855eaaSJohn Baldwin INST(0x8a,0xa3,0xb0,0xf3) @ aesmc q5,q5 978bc3d5698SJohn Baldwin vld1.8 {q11},[r0]! 979bc3d5698SJohn Baldwin mov r7,r3 980*c0855eaaSJohn Baldwin INST(0x22,0x43,0xf0,0xf3) @ aese q10,q9 981*c0855eaaSJohn Baldwin INST(0xa4,0x23,0xf0,0xf3) @ aesmc q9,q10 982*c0855eaaSJohn Baldwin INST(0x28,0x83,0xb0,0xf3) @ aese q4,q12 983*c0855eaaSJohn Baldwin INST(0x88,0x83,0xb0,0xf3) @ aesmc q4,q4 984*c0855eaaSJohn Baldwin INST(0x28,0xa3,0xb0,0xf3) @ aese q5,q12 985*c0855eaaSJohn Baldwin INST(0x8a,0xa3,0xb0,0xf3) @ aesmc q5,q5 986bc3d5698SJohn Baldwin veor q2,q2,q7 987bc3d5698SJohn Baldwin add r10,r8,#2 988*c0855eaaSJohn Baldwin INST(0x28,0x23,0xf0,0xf3) @ aese q9,q12 989*c0855eaaSJohn Baldwin INST(0xa2,0x23,0xf0,0xf3) @ aesmc q9,q9 990bc3d5698SJohn Baldwin veor q3,q3,q7 991bc3d5698SJohn Baldwin add r8,r8,#3 992*c0855eaaSJohn Baldwin INST(0x2a,0x83,0xb0,0xf3) @ aese q4,q13 993*c0855eaaSJohn Baldwin INST(0x88,0x83,0xb0,0xf3) @ aesmc q4,q4 994*c0855eaaSJohn Baldwin INST(0x2a,0xa3,0xb0,0xf3) @ aese q5,q13 995*c0855eaaSJohn Baldwin INST(0x8a,0xa3,0xb0,0xf3) @ aesmc q5,q5 996bc3d5698SJohn Baldwin veor q11,q11,q7 997c3c73b4fSJung-uk Kim vmov.32 d13[1], r9 998*c0855eaaSJohn Baldwin INST(0x2a,0x23,0xf0,0xf3) @ aese q9,q13 999*c0855eaaSJohn Baldwin INST(0xa2,0x23,0xf0,0xf3) @ aesmc q9,q9 1000c3c73b4fSJung-uk Kim vorr q0,q6,q6 1001bc3d5698SJohn Baldwin rev r10,r10 1002*c0855eaaSJohn Baldwin INST(0x2c,0x83,0xb0,0xf3) @ aese q4,q14 1003*c0855eaaSJohn Baldwin INST(0x88,0x83,0xb0,0xf3) @ aesmc q4,q4 1004c3c73b4fSJung-uk Kim vmov.32 d13[1], r10 1005c3c73b4fSJung-uk Kim rev r12,r8 1006*c0855eaaSJohn Baldwin INST(0x2c,0xa3,0xb0,0xf3) @ aese q5,q14 1007*c0855eaaSJohn Baldwin INST(0x8a,0xa3,0xb0,0xf3) @ aesmc q5,q5 1008c3c73b4fSJung-uk Kim vorr q1,q6,q6 1009c3c73b4fSJung-uk Kim vmov.32 d13[1], r12 1010*c0855eaaSJohn Baldwin INST(0x2c,0x23,0xf0,0xf3) @ aese q9,q14 1011*c0855eaaSJohn Baldwin INST(0xa2,0x23,0xf0,0xf3) @ aesmc q9,q9 1012c3c73b4fSJung-uk Kim vorr q10,q6,q6 1013bc3d5698SJohn Baldwin subs r2,r2,#3 1014*c0855eaaSJohn Baldwin INST(0x2e,0x83,0xb0,0xf3) @ aese q4,q15 1015*c0855eaaSJohn Baldwin INST(0x2e,0xa3,0xb0,0xf3) @ aese q5,q15 1016*c0855eaaSJohn Baldwin INST(0x2e,0x23,0xf0,0xf3) @ aese q9,q15 1017bc3d5698SJohn Baldwin 1018bc3d5698SJohn Baldwin veor q2,q2,q4 1019bc3d5698SJohn Baldwin vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 1020bc3d5698SJohn Baldwin vst1.8 {q2},[r1]! 1021bc3d5698SJohn Baldwin veor q3,q3,q5 1022bc3d5698SJohn Baldwin mov r6,r5 1023bc3d5698SJohn Baldwin vst1.8 {q3},[r1]! 1024bc3d5698SJohn Baldwin veor q11,q11,q9 1025bc3d5698SJohn Baldwin vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 1026bc3d5698SJohn Baldwin vst1.8 {q11},[r1]! 1027bc3d5698SJohn Baldwin bhs .Loop3x_ctr32 1028bc3d5698SJohn Baldwin 1029bc3d5698SJohn Baldwin adds r2,r2,#3 1030bc3d5698SJohn Baldwin beq .Lctr32_done 1031bc3d5698SJohn Baldwin cmp r2,#1 1032bc3d5698SJohn Baldwin mov r12,#16 1033*c0855eaaSJohn Baldwin it eq 1034bc3d5698SJohn Baldwin moveq r12,#0 1035bc3d5698SJohn Baldwin 1036bc3d5698SJohn Baldwin.Lctr32_tail: 1037*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 1038*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 1039*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 1040*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 1041bc3d5698SJohn Baldwin vld1.32 {q8},[r7]! 1042bc3d5698SJohn Baldwin subs r6,r6,#2 1043*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 1044*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 1045*c0855eaaSJohn Baldwin INST(0x22,0x23,0xb0,0xf3) @ aese q1,q9 1046*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 1047bc3d5698SJohn Baldwin vld1.32 {q9},[r7]! 1048bc3d5698SJohn Baldwin bgt .Lctr32_tail 1049bc3d5698SJohn Baldwin 1050*c0855eaaSJohn Baldwin INST(0x20,0x03,0xb0,0xf3) @ aese q0,q8 1051*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 1052*c0855eaaSJohn Baldwin INST(0x20,0x23,0xb0,0xf3) @ aese q1,q8 1053*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 1054*c0855eaaSJohn Baldwin INST(0x22,0x03,0xb0,0xf3) @ aese q0,q9 1055*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 1056*c0855eaaSJohn Baldwin INST(0x22,0x23,0xb0,0xf3) @ aese q1,q9 1057*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 1058bc3d5698SJohn Baldwin vld1.8 {q2},[r0],r12 1059*c0855eaaSJohn Baldwin INST(0x28,0x03,0xb0,0xf3) @ aese q0,q12 1060*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 1061*c0855eaaSJohn Baldwin INST(0x28,0x23,0xb0,0xf3) @ aese q1,q12 1062*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 1063bc3d5698SJohn Baldwin vld1.8 {q3},[r0] 1064*c0855eaaSJohn Baldwin INST(0x2a,0x03,0xb0,0xf3) @ aese q0,q13 1065*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 1066*c0855eaaSJohn Baldwin INST(0x2a,0x23,0xb0,0xf3) @ aese q1,q13 1067*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 1068bc3d5698SJohn Baldwin veor q2,q2,q7 1069*c0855eaaSJohn Baldwin INST(0x2c,0x03,0xb0,0xf3) @ aese q0,q14 1070*c0855eaaSJohn Baldwin INST(0x80,0x03,0xb0,0xf3) @ aesmc q0,q0 1071*c0855eaaSJohn Baldwin INST(0x2c,0x23,0xb0,0xf3) @ aese q1,q14 1072*c0855eaaSJohn Baldwin INST(0x82,0x23,0xb0,0xf3) @ aesmc q1,q1 1073bc3d5698SJohn Baldwin veor q3,q3,q7 1074*c0855eaaSJohn Baldwin INST(0x2e,0x03,0xb0,0xf3) @ aese q0,q15 1075*c0855eaaSJohn Baldwin INST(0x2e,0x23,0xb0,0xf3) @ aese q1,q15 1076bc3d5698SJohn Baldwin 1077bc3d5698SJohn Baldwin cmp r2,#1 1078bc3d5698SJohn Baldwin veor q2,q2,q0 1079bc3d5698SJohn Baldwin veor q3,q3,q1 1080bc3d5698SJohn Baldwin vst1.8 {q2},[r1]! 1081bc3d5698SJohn Baldwin beq .Lctr32_done 1082bc3d5698SJohn Baldwin vst1.8 {q3},[r1] 1083bc3d5698SJohn Baldwin 1084bc3d5698SJohn Baldwin.Lctr32_done: 1085bc3d5698SJohn Baldwin vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 1086bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 1087bc3d5698SJohn Baldwin.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 1088bc3d5698SJohn Baldwin#endif 1089