1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from bsaes-armv7.pl. */ 2*c0855eaaSJohn Baldwin@ Copyright 2012-2023 The OpenSSL Project Authors. All Rights Reserved. 3bc3d5698SJohn Baldwin@ 4*c0855eaaSJohn Baldwin@ Licensed under the Apache License 2.0 (the "License"). You may not use 5bc3d5698SJohn Baldwin@ this file except in compliance with the License. You can obtain a copy 6bc3d5698SJohn Baldwin@ in the file LICENSE in the source distribution or at 7bc3d5698SJohn Baldwin@ https://www.openssl.org/source/license.html 8bc3d5698SJohn Baldwin 9bc3d5698SJohn Baldwin 10bc3d5698SJohn Baldwin@ ==================================================================== 11bc3d5698SJohn Baldwin@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12bc3d5698SJohn Baldwin@ project. The module is, however, dual licensed under OpenSSL and 13bc3d5698SJohn Baldwin@ CRYPTOGAMS licenses depending on where you obtain it. For further 14bc3d5698SJohn Baldwin@ details see http://www.openssl.org/~appro/cryptogams/. 15bc3d5698SJohn Baldwin@ 16bc3d5698SJohn Baldwin@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel 17*c0855eaaSJohn Baldwin@ of Linaro. 18bc3d5698SJohn Baldwin@ ==================================================================== 19bc3d5698SJohn Baldwin 20bc3d5698SJohn Baldwin@ Bit-sliced AES for ARM NEON 21bc3d5698SJohn Baldwin@ 22bc3d5698SJohn Baldwin@ February 2012. 23bc3d5698SJohn Baldwin@ 24bc3d5698SJohn Baldwin@ This implementation is direct adaptation of bsaes-x86_64 module for 25bc3d5698SJohn Baldwin@ ARM NEON. Except that this module is endian-neutral [in sense that 26bc3d5698SJohn Baldwin@ it can be compiled for either endianness] by courtesy of vld1.8's 27bc3d5698SJohn Baldwin@ neutrality. Initial version doesn't implement interface to OpenSSL, 28bc3d5698SJohn Baldwin@ only low-level primitives and unsupported entry points, just enough 29bc3d5698SJohn Baldwin@ to collect performance results, which for Cortex-A8 core are: 30bc3d5698SJohn Baldwin@ 31bc3d5698SJohn Baldwin@ encrypt 19.5 cycles per byte processed with 128-bit key 32bc3d5698SJohn Baldwin@ decrypt 22.1 cycles per byte processed with 128-bit key 33bc3d5698SJohn Baldwin@ key conv. 440 cycles per 128-bit key/0.18 of 8x block 34bc3d5698SJohn Baldwin@ 35bc3d5698SJohn Baldwin@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, 36bc3d5698SJohn Baldwin@ which is [much] worse than anticipated (for further details see 37bc3d5698SJohn Baldwin@ http://www.openssl.org/~appro/Snapdragon-S4.html). 38bc3d5698SJohn Baldwin@ 39bc3d5698SJohn Baldwin@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code 40bc3d5698SJohn Baldwin@ manages in 20.0 cycles]. 41bc3d5698SJohn Baldwin@ 42bc3d5698SJohn Baldwin@ When comparing to x86_64 results keep in mind that NEON unit is 43bc3d5698SJohn Baldwin@ [mostly] single-issue and thus can't [fully] benefit from 44bc3d5698SJohn Baldwin@ instruction-level parallelism. And when comparing to aes-armv4 45bc3d5698SJohn Baldwin@ results keep in mind key schedule conversion overhead (see 46bc3d5698SJohn Baldwin@ bsaes-x86_64.pl for further details)... 47bc3d5698SJohn Baldwin@ 48bc3d5698SJohn Baldwin@ <appro@openssl.org> 49bc3d5698SJohn Baldwin 50bc3d5698SJohn Baldwin@ April-August 2013 51bc3d5698SJohn Baldwin@ Add CBC, CTR and XTS subroutines and adapt for kernel use; courtesy of Ard. 52bc3d5698SJohn Baldwin 53*c0855eaaSJohn Baldwin@ $output is the last argument if it looks like a file (it has an extension) 54*c0855eaaSJohn Baldwin@ $flavour is the first argument if it doesn't look like a file 55bc3d5698SJohn Baldwin#ifndef __KERNEL__ 56bc3d5698SJohn Baldwin# include "arm_arch.h" 57bc3d5698SJohn Baldwin 58bc3d5698SJohn Baldwin# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} 59bc3d5698SJohn Baldwin# define VFP_ABI_POP vldmia sp!,{d8-d15} 60bc3d5698SJohn Baldwin# define VFP_ABI_FRAME 0x40 61bc3d5698SJohn Baldwin#else 62bc3d5698SJohn Baldwin# define VFP_ABI_PUSH 63bc3d5698SJohn Baldwin# define VFP_ABI_POP 64bc3d5698SJohn Baldwin# define VFP_ABI_FRAME 0 65bc3d5698SJohn Baldwin# define BSAES_ASM_EXTENDED_KEY 66bc3d5698SJohn Baldwin# define XTS_CHAIN_TWEAK 67bc3d5698SJohn Baldwin# define __ARM_ARCH__ __LINUX_ARM_ARCH__ 68bc3d5698SJohn Baldwin# define __ARM_MAX_ARCH__ 7 69bc3d5698SJohn Baldwin#endif 70bc3d5698SJohn Baldwin 71bc3d5698SJohn Baldwin#ifdef __thumb__ 72bc3d5698SJohn Baldwin# define adrl adr 73bc3d5698SJohn Baldwin#endif 74bc3d5698SJohn Baldwin 75bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 76bc3d5698SJohn Baldwin.arch armv7-a 77bc3d5698SJohn Baldwin.fpu neon 78bc3d5698SJohn Baldwin 79bc3d5698SJohn Baldwin.syntax unified @ ARMv7-capable assembler is expected to handle this 80bc3d5698SJohn Baldwin#if defined(__thumb2__) && !defined(__APPLE__) 81bc3d5698SJohn Baldwin.thumb 82bc3d5698SJohn Baldwin#else 83bc3d5698SJohn Baldwin.code 32 84bc3d5698SJohn Baldwin# undef __thumb2__ 85bc3d5698SJohn Baldwin#endif 86bc3d5698SJohn Baldwin 87*c0855eaaSJohn Baldwin.text 88*c0855eaaSJohn Baldwin 89bc3d5698SJohn Baldwin.type _bsaes_decrypt8,%function 90bc3d5698SJohn Baldwin.align 4 91bc3d5698SJohn Baldwin_bsaes_decrypt8: 92bc3d5698SJohn Baldwin adr r6,. 93bc3d5698SJohn Baldwin vldmia r4!, {q9} @ round 0 key 94bc3d5698SJohn Baldwin#if defined(__thumb2__) || defined(__APPLE__) 95bc3d5698SJohn Baldwin adr r6,.LM0ISR 96bc3d5698SJohn Baldwin#else 97bc3d5698SJohn Baldwin add r6,r6,#.LM0ISR-_bsaes_decrypt8 98bc3d5698SJohn Baldwin#endif 99bc3d5698SJohn Baldwin 100bc3d5698SJohn Baldwin vldmia r6!, {q8} @ .LM0ISR 101bc3d5698SJohn Baldwin veor q10, q0, q9 @ xor with round0 key 102bc3d5698SJohn Baldwin veor q11, q1, q9 103bc3d5698SJohn Baldwin vtbl.8 d0, {q10}, d16 104bc3d5698SJohn Baldwin vtbl.8 d1, {q10}, d17 105bc3d5698SJohn Baldwin veor q12, q2, q9 106bc3d5698SJohn Baldwin vtbl.8 d2, {q11}, d16 107bc3d5698SJohn Baldwin vtbl.8 d3, {q11}, d17 108bc3d5698SJohn Baldwin veor q13, q3, q9 109bc3d5698SJohn Baldwin vtbl.8 d4, {q12}, d16 110bc3d5698SJohn Baldwin vtbl.8 d5, {q12}, d17 111bc3d5698SJohn Baldwin veor q14, q4, q9 112bc3d5698SJohn Baldwin vtbl.8 d6, {q13}, d16 113bc3d5698SJohn Baldwin vtbl.8 d7, {q13}, d17 114bc3d5698SJohn Baldwin veor q15, q5, q9 115bc3d5698SJohn Baldwin vtbl.8 d8, {q14}, d16 116bc3d5698SJohn Baldwin vtbl.8 d9, {q14}, d17 117bc3d5698SJohn Baldwin veor q10, q6, q9 118bc3d5698SJohn Baldwin vtbl.8 d10, {q15}, d16 119bc3d5698SJohn Baldwin vtbl.8 d11, {q15}, d17 120bc3d5698SJohn Baldwin veor q11, q7, q9 121bc3d5698SJohn Baldwin vtbl.8 d12, {q10}, d16 122bc3d5698SJohn Baldwin vtbl.8 d13, {q10}, d17 123bc3d5698SJohn Baldwin vtbl.8 d14, {q11}, d16 124bc3d5698SJohn Baldwin vtbl.8 d15, {q11}, d17 125bc3d5698SJohn Baldwin vmov.i8 q8,#0x55 @ compose .LBS0 126bc3d5698SJohn Baldwin vmov.i8 q9,#0x33 @ compose .LBS1 127bc3d5698SJohn Baldwin vshr.u64 q10, q6, #1 128bc3d5698SJohn Baldwin vshr.u64 q11, q4, #1 129bc3d5698SJohn Baldwin veor q10, q10, q7 130bc3d5698SJohn Baldwin veor q11, q11, q5 131bc3d5698SJohn Baldwin vand q10, q10, q8 132bc3d5698SJohn Baldwin vand q11, q11, q8 133bc3d5698SJohn Baldwin veor q7, q7, q10 134bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 135bc3d5698SJohn Baldwin veor q5, q5, q11 136bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 137bc3d5698SJohn Baldwin veor q6, q6, q10 138bc3d5698SJohn Baldwin veor q4, q4, q11 139bc3d5698SJohn Baldwin vshr.u64 q10, q2, #1 140bc3d5698SJohn Baldwin vshr.u64 q11, q0, #1 141bc3d5698SJohn Baldwin veor q10, q10, q3 142bc3d5698SJohn Baldwin veor q11, q11, q1 143bc3d5698SJohn Baldwin vand q10, q10, q8 144bc3d5698SJohn Baldwin vand q11, q11, q8 145bc3d5698SJohn Baldwin veor q3, q3, q10 146bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 147bc3d5698SJohn Baldwin veor q1, q1, q11 148bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 149bc3d5698SJohn Baldwin veor q2, q2, q10 150bc3d5698SJohn Baldwin veor q0, q0, q11 151bc3d5698SJohn Baldwin vmov.i8 q8,#0x0f @ compose .LBS2 152bc3d5698SJohn Baldwin vshr.u64 q10, q5, #2 153bc3d5698SJohn Baldwin vshr.u64 q11, q4, #2 154bc3d5698SJohn Baldwin veor q10, q10, q7 155bc3d5698SJohn Baldwin veor q11, q11, q6 156bc3d5698SJohn Baldwin vand q10, q10, q9 157bc3d5698SJohn Baldwin vand q11, q11, q9 158bc3d5698SJohn Baldwin veor q7, q7, q10 159bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 160bc3d5698SJohn Baldwin veor q6, q6, q11 161bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 162bc3d5698SJohn Baldwin veor q5, q5, q10 163bc3d5698SJohn Baldwin veor q4, q4, q11 164bc3d5698SJohn Baldwin vshr.u64 q10, q1, #2 165bc3d5698SJohn Baldwin vshr.u64 q11, q0, #2 166bc3d5698SJohn Baldwin veor q10, q10, q3 167bc3d5698SJohn Baldwin veor q11, q11, q2 168bc3d5698SJohn Baldwin vand q10, q10, q9 169bc3d5698SJohn Baldwin vand q11, q11, q9 170bc3d5698SJohn Baldwin veor q3, q3, q10 171bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 172bc3d5698SJohn Baldwin veor q2, q2, q11 173bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 174bc3d5698SJohn Baldwin veor q1, q1, q10 175bc3d5698SJohn Baldwin veor q0, q0, q11 176bc3d5698SJohn Baldwin vshr.u64 q10, q3, #4 177bc3d5698SJohn Baldwin vshr.u64 q11, q2, #4 178bc3d5698SJohn Baldwin veor q10, q10, q7 179bc3d5698SJohn Baldwin veor q11, q11, q6 180bc3d5698SJohn Baldwin vand q10, q10, q8 181bc3d5698SJohn Baldwin vand q11, q11, q8 182bc3d5698SJohn Baldwin veor q7, q7, q10 183bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 184bc3d5698SJohn Baldwin veor q6, q6, q11 185bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 186bc3d5698SJohn Baldwin veor q3, q3, q10 187bc3d5698SJohn Baldwin veor q2, q2, q11 188bc3d5698SJohn Baldwin vshr.u64 q10, q1, #4 189bc3d5698SJohn Baldwin vshr.u64 q11, q0, #4 190bc3d5698SJohn Baldwin veor q10, q10, q5 191bc3d5698SJohn Baldwin veor q11, q11, q4 192bc3d5698SJohn Baldwin vand q10, q10, q8 193bc3d5698SJohn Baldwin vand q11, q11, q8 194bc3d5698SJohn Baldwin veor q5, q5, q10 195bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 196bc3d5698SJohn Baldwin veor q4, q4, q11 197bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 198bc3d5698SJohn Baldwin veor q1, q1, q10 199bc3d5698SJohn Baldwin veor q0, q0, q11 200bc3d5698SJohn Baldwin sub r5,r5,#1 201bc3d5698SJohn Baldwin b .Ldec_sbox 202bc3d5698SJohn Baldwin.align 4 203bc3d5698SJohn Baldwin.Ldec_loop: 204bc3d5698SJohn Baldwin vldmia r4!, {q8,q9,q10,q11} 205bc3d5698SJohn Baldwin veor q8, q8, q0 206bc3d5698SJohn Baldwin veor q9, q9, q1 207bc3d5698SJohn Baldwin vtbl.8 d0, {q8}, d24 208bc3d5698SJohn Baldwin vtbl.8 d1, {q8}, d25 209bc3d5698SJohn Baldwin vldmia r4!, {q8} 210bc3d5698SJohn Baldwin veor q10, q10, q2 211bc3d5698SJohn Baldwin vtbl.8 d2, {q9}, d24 212bc3d5698SJohn Baldwin vtbl.8 d3, {q9}, d25 213bc3d5698SJohn Baldwin vldmia r4!, {q9} 214bc3d5698SJohn Baldwin veor q11, q11, q3 215bc3d5698SJohn Baldwin vtbl.8 d4, {q10}, d24 216bc3d5698SJohn Baldwin vtbl.8 d5, {q10}, d25 217bc3d5698SJohn Baldwin vldmia r4!, {q10} 218bc3d5698SJohn Baldwin vtbl.8 d6, {q11}, d24 219bc3d5698SJohn Baldwin vtbl.8 d7, {q11}, d25 220bc3d5698SJohn Baldwin vldmia r4!, {q11} 221bc3d5698SJohn Baldwin veor q8, q8, q4 222bc3d5698SJohn Baldwin veor q9, q9, q5 223bc3d5698SJohn Baldwin vtbl.8 d8, {q8}, d24 224bc3d5698SJohn Baldwin vtbl.8 d9, {q8}, d25 225bc3d5698SJohn Baldwin veor q10, q10, q6 226bc3d5698SJohn Baldwin vtbl.8 d10, {q9}, d24 227bc3d5698SJohn Baldwin vtbl.8 d11, {q9}, d25 228bc3d5698SJohn Baldwin veor q11, q11, q7 229bc3d5698SJohn Baldwin vtbl.8 d12, {q10}, d24 230bc3d5698SJohn Baldwin vtbl.8 d13, {q10}, d25 231bc3d5698SJohn Baldwin vtbl.8 d14, {q11}, d24 232bc3d5698SJohn Baldwin vtbl.8 d15, {q11}, d25 233bc3d5698SJohn Baldwin.Ldec_sbox: 234bc3d5698SJohn Baldwin veor q1, q1, q4 235bc3d5698SJohn Baldwin veor q3, q3, q4 236bc3d5698SJohn Baldwin 237bc3d5698SJohn Baldwin veor q4, q4, q7 238bc3d5698SJohn Baldwin veor q1, q1, q6 239bc3d5698SJohn Baldwin veor q2, q2, q7 240bc3d5698SJohn Baldwin veor q6, q6, q4 241bc3d5698SJohn Baldwin 242bc3d5698SJohn Baldwin veor q0, q0, q1 243bc3d5698SJohn Baldwin veor q2, q2, q5 244bc3d5698SJohn Baldwin veor q7, q7, q6 245bc3d5698SJohn Baldwin veor q3, q3, q0 246bc3d5698SJohn Baldwin veor q5, q5, q0 247bc3d5698SJohn Baldwin veor q1, q1, q3 248bc3d5698SJohn Baldwin veor q11, q3, q0 249bc3d5698SJohn Baldwin veor q10, q7, q4 250bc3d5698SJohn Baldwin veor q9, q1, q6 251bc3d5698SJohn Baldwin veor q13, q4, q0 252bc3d5698SJohn Baldwin vmov q8, q10 253bc3d5698SJohn Baldwin veor q12, q5, q2 254bc3d5698SJohn Baldwin 255bc3d5698SJohn Baldwin vorr q10, q10, q9 256bc3d5698SJohn Baldwin veor q15, q11, q8 257bc3d5698SJohn Baldwin vand q14, q11, q12 258bc3d5698SJohn Baldwin vorr q11, q11, q12 259bc3d5698SJohn Baldwin veor q12, q12, q9 260bc3d5698SJohn Baldwin vand q8, q8, q9 261bc3d5698SJohn Baldwin veor q9, q6, q2 262bc3d5698SJohn Baldwin vand q15, q15, q12 263bc3d5698SJohn Baldwin vand q13, q13, q9 264bc3d5698SJohn Baldwin veor q9, q3, q7 265bc3d5698SJohn Baldwin veor q12, q1, q5 266bc3d5698SJohn Baldwin veor q11, q11, q13 267bc3d5698SJohn Baldwin veor q10, q10, q13 268bc3d5698SJohn Baldwin vand q13, q9, q12 269bc3d5698SJohn Baldwin vorr q9, q9, q12 270bc3d5698SJohn Baldwin veor q11, q11, q15 271bc3d5698SJohn Baldwin veor q8, q8, q13 272bc3d5698SJohn Baldwin veor q10, q10, q14 273bc3d5698SJohn Baldwin veor q9, q9, q15 274bc3d5698SJohn Baldwin veor q8, q8, q14 275bc3d5698SJohn Baldwin vand q12, q4, q6 276bc3d5698SJohn Baldwin veor q9, q9, q14 277bc3d5698SJohn Baldwin vand q13, q0, q2 278bc3d5698SJohn Baldwin vand q14, q7, q1 279bc3d5698SJohn Baldwin vorr q15, q3, q5 280bc3d5698SJohn Baldwin veor q11, q11, q12 281bc3d5698SJohn Baldwin veor q9, q9, q14 282bc3d5698SJohn Baldwin veor q8, q8, q15 283bc3d5698SJohn Baldwin veor q10, q10, q13 284bc3d5698SJohn Baldwin 285bc3d5698SJohn Baldwin @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 286bc3d5698SJohn Baldwin 287bc3d5698SJohn Baldwin @ new smaller inversion 288bc3d5698SJohn Baldwin 289bc3d5698SJohn Baldwin vand q14, q11, q9 290bc3d5698SJohn Baldwin vmov q12, q8 291bc3d5698SJohn Baldwin 292bc3d5698SJohn Baldwin veor q13, q10, q14 293bc3d5698SJohn Baldwin veor q15, q8, q14 294bc3d5698SJohn Baldwin veor q14, q8, q14 @ q14=q15 295bc3d5698SJohn Baldwin 296bc3d5698SJohn Baldwin vbsl q13, q9, q8 297bc3d5698SJohn Baldwin vbsl q15, q11, q10 298bc3d5698SJohn Baldwin veor q11, q11, q10 299bc3d5698SJohn Baldwin 300bc3d5698SJohn Baldwin vbsl q12, q13, q14 301bc3d5698SJohn Baldwin vbsl q8, q14, q13 302bc3d5698SJohn Baldwin 303bc3d5698SJohn Baldwin vand q14, q12, q15 304bc3d5698SJohn Baldwin veor q9, q9, q8 305bc3d5698SJohn Baldwin 306bc3d5698SJohn Baldwin veor q14, q14, q11 307bc3d5698SJohn Baldwin veor q12, q5, q2 308bc3d5698SJohn Baldwin veor q8, q1, q6 309bc3d5698SJohn Baldwin veor q10, q15, q14 310bc3d5698SJohn Baldwin vand q10, q10, q5 311bc3d5698SJohn Baldwin veor q5, q5, q1 312bc3d5698SJohn Baldwin vand q11, q1, q15 313bc3d5698SJohn Baldwin vand q5, q5, q14 314bc3d5698SJohn Baldwin veor q1, q11, q10 315bc3d5698SJohn Baldwin veor q5, q5, q11 316bc3d5698SJohn Baldwin veor q15, q15, q13 317bc3d5698SJohn Baldwin veor q14, q14, q9 318bc3d5698SJohn Baldwin veor q11, q15, q14 319bc3d5698SJohn Baldwin veor q10, q13, q9 320bc3d5698SJohn Baldwin vand q11, q11, q12 321bc3d5698SJohn Baldwin vand q10, q10, q2 322bc3d5698SJohn Baldwin veor q12, q12, q8 323bc3d5698SJohn Baldwin veor q2, q2, q6 324bc3d5698SJohn Baldwin vand q8, q8, q15 325bc3d5698SJohn Baldwin vand q6, q6, q13 326bc3d5698SJohn Baldwin vand q12, q12, q14 327bc3d5698SJohn Baldwin vand q2, q2, q9 328bc3d5698SJohn Baldwin veor q8, q8, q12 329bc3d5698SJohn Baldwin veor q2, q2, q6 330bc3d5698SJohn Baldwin veor q12, q12, q11 331bc3d5698SJohn Baldwin veor q6, q6, q10 332bc3d5698SJohn Baldwin veor q5, q5, q12 333bc3d5698SJohn Baldwin veor q2, q2, q12 334bc3d5698SJohn Baldwin veor q1, q1, q8 335bc3d5698SJohn Baldwin veor q6, q6, q8 336bc3d5698SJohn Baldwin 337bc3d5698SJohn Baldwin veor q12, q3, q0 338bc3d5698SJohn Baldwin veor q8, q7, q4 339bc3d5698SJohn Baldwin veor q11, q15, q14 340bc3d5698SJohn Baldwin veor q10, q13, q9 341bc3d5698SJohn Baldwin vand q11, q11, q12 342bc3d5698SJohn Baldwin vand q10, q10, q0 343bc3d5698SJohn Baldwin veor q12, q12, q8 344bc3d5698SJohn Baldwin veor q0, q0, q4 345bc3d5698SJohn Baldwin vand q8, q8, q15 346bc3d5698SJohn Baldwin vand q4, q4, q13 347bc3d5698SJohn Baldwin vand q12, q12, q14 348bc3d5698SJohn Baldwin vand q0, q0, q9 349bc3d5698SJohn Baldwin veor q8, q8, q12 350bc3d5698SJohn Baldwin veor q0, q0, q4 351bc3d5698SJohn Baldwin veor q12, q12, q11 352bc3d5698SJohn Baldwin veor q4, q4, q10 353bc3d5698SJohn Baldwin veor q15, q15, q13 354bc3d5698SJohn Baldwin veor q14, q14, q9 355bc3d5698SJohn Baldwin veor q10, q15, q14 356bc3d5698SJohn Baldwin vand q10, q10, q3 357bc3d5698SJohn Baldwin veor q3, q3, q7 358bc3d5698SJohn Baldwin vand q11, q7, q15 359bc3d5698SJohn Baldwin vand q3, q3, q14 360bc3d5698SJohn Baldwin veor q7, q11, q10 361bc3d5698SJohn Baldwin veor q3, q3, q11 362bc3d5698SJohn Baldwin veor q3, q3, q12 363bc3d5698SJohn Baldwin veor q0, q0, q12 364bc3d5698SJohn Baldwin veor q7, q7, q8 365bc3d5698SJohn Baldwin veor q4, q4, q8 366bc3d5698SJohn Baldwin veor q1, q1, q7 367bc3d5698SJohn Baldwin veor q6, q6, q5 368bc3d5698SJohn Baldwin 369bc3d5698SJohn Baldwin veor q4, q4, q1 370bc3d5698SJohn Baldwin veor q2, q2, q7 371bc3d5698SJohn Baldwin veor q5, q5, q7 372bc3d5698SJohn Baldwin veor q4, q4, q2 373bc3d5698SJohn Baldwin veor q7, q7, q0 374bc3d5698SJohn Baldwin veor q4, q4, q5 375bc3d5698SJohn Baldwin veor q3, q3, q6 376bc3d5698SJohn Baldwin veor q6, q6, q1 377bc3d5698SJohn Baldwin veor q3, q3, q4 378bc3d5698SJohn Baldwin 379bc3d5698SJohn Baldwin veor q4, q4, q0 380bc3d5698SJohn Baldwin veor q7, q7, q3 381bc3d5698SJohn Baldwin subs r5,r5,#1 382bc3d5698SJohn Baldwin bcc .Ldec_done 383bc3d5698SJohn Baldwin @ multiplication by 0x05-0x00-0x04-0x00 384bc3d5698SJohn Baldwin vext.8 q8, q0, q0, #8 385bc3d5698SJohn Baldwin vext.8 q14, q3, q3, #8 386bc3d5698SJohn Baldwin vext.8 q15, q5, q5, #8 387bc3d5698SJohn Baldwin veor q8, q8, q0 388bc3d5698SJohn Baldwin vext.8 q9, q1, q1, #8 389bc3d5698SJohn Baldwin veor q14, q14, q3 390bc3d5698SJohn Baldwin vext.8 q10, q6, q6, #8 391bc3d5698SJohn Baldwin veor q15, q15, q5 392bc3d5698SJohn Baldwin vext.8 q11, q4, q4, #8 393bc3d5698SJohn Baldwin veor q9, q9, q1 394bc3d5698SJohn Baldwin vext.8 q12, q2, q2, #8 395bc3d5698SJohn Baldwin veor q10, q10, q6 396bc3d5698SJohn Baldwin vext.8 q13, q7, q7, #8 397bc3d5698SJohn Baldwin veor q11, q11, q4 398bc3d5698SJohn Baldwin veor q12, q12, q2 399bc3d5698SJohn Baldwin veor q13, q13, q7 400bc3d5698SJohn Baldwin 401bc3d5698SJohn Baldwin veor q0, q0, q14 402bc3d5698SJohn Baldwin veor q1, q1, q14 403bc3d5698SJohn Baldwin veor q6, q6, q8 404bc3d5698SJohn Baldwin veor q2, q2, q10 405bc3d5698SJohn Baldwin veor q4, q4, q9 406bc3d5698SJohn Baldwin veor q1, q1, q15 407bc3d5698SJohn Baldwin veor q6, q6, q15 408bc3d5698SJohn Baldwin veor q2, q2, q14 409bc3d5698SJohn Baldwin veor q7, q7, q11 410bc3d5698SJohn Baldwin veor q4, q4, q14 411bc3d5698SJohn Baldwin veor q3, q3, q12 412bc3d5698SJohn Baldwin veor q2, q2, q15 413bc3d5698SJohn Baldwin veor q7, q7, q15 414bc3d5698SJohn Baldwin veor q5, q5, q13 415bc3d5698SJohn Baldwin vext.8 q8, q0, q0, #12 @ x0 <<< 32 416bc3d5698SJohn Baldwin vext.8 q9, q1, q1, #12 417bc3d5698SJohn Baldwin veor q0, q0, q8 @ x0 ^ (x0 <<< 32) 418bc3d5698SJohn Baldwin vext.8 q10, q6, q6, #12 419bc3d5698SJohn Baldwin veor q1, q1, q9 420bc3d5698SJohn Baldwin vext.8 q11, q4, q4, #12 421bc3d5698SJohn Baldwin veor q6, q6, q10 422bc3d5698SJohn Baldwin vext.8 q12, q2, q2, #12 423bc3d5698SJohn Baldwin veor q4, q4, q11 424bc3d5698SJohn Baldwin vext.8 q13, q7, q7, #12 425bc3d5698SJohn Baldwin veor q2, q2, q12 426bc3d5698SJohn Baldwin vext.8 q14, q3, q3, #12 427bc3d5698SJohn Baldwin veor q7, q7, q13 428bc3d5698SJohn Baldwin vext.8 q15, q5, q5, #12 429bc3d5698SJohn Baldwin veor q3, q3, q14 430bc3d5698SJohn Baldwin 431bc3d5698SJohn Baldwin veor q9, q9, q0 432bc3d5698SJohn Baldwin veor q5, q5, q15 433bc3d5698SJohn Baldwin vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) 434bc3d5698SJohn Baldwin veor q10, q10, q1 435bc3d5698SJohn Baldwin veor q8, q8, q5 436bc3d5698SJohn Baldwin veor q9, q9, q5 437bc3d5698SJohn Baldwin vext.8 q1, q1, q1, #8 438bc3d5698SJohn Baldwin veor q13, q13, q2 439bc3d5698SJohn Baldwin veor q0, q0, q8 440bc3d5698SJohn Baldwin veor q14, q14, q7 441bc3d5698SJohn Baldwin veor q1, q1, q9 442bc3d5698SJohn Baldwin vext.8 q8, q2, q2, #8 443bc3d5698SJohn Baldwin veor q12, q12, q4 444bc3d5698SJohn Baldwin vext.8 q9, q7, q7, #8 445bc3d5698SJohn Baldwin veor q15, q15, q3 446bc3d5698SJohn Baldwin vext.8 q2, q4, q4, #8 447bc3d5698SJohn Baldwin veor q11, q11, q6 448bc3d5698SJohn Baldwin vext.8 q7, q5, q5, #8 449bc3d5698SJohn Baldwin veor q12, q12, q5 450bc3d5698SJohn Baldwin vext.8 q4, q3, q3, #8 451bc3d5698SJohn Baldwin veor q11, q11, q5 452bc3d5698SJohn Baldwin vext.8 q3, q6, q6, #8 453bc3d5698SJohn Baldwin veor q5, q9, q13 454bc3d5698SJohn Baldwin veor q11, q11, q2 455bc3d5698SJohn Baldwin veor q7, q7, q15 456bc3d5698SJohn Baldwin veor q6, q4, q14 457bc3d5698SJohn Baldwin veor q4, q8, q12 458bc3d5698SJohn Baldwin veor q2, q3, q10 459bc3d5698SJohn Baldwin vmov q3, q11 460bc3d5698SJohn Baldwin @ vmov q5, q9 461bc3d5698SJohn Baldwin vldmia r6, {q12} @ .LISR 462bc3d5698SJohn Baldwin ite eq @ Thumb2 thing, sanity check in ARM 463bc3d5698SJohn Baldwin addeq r6,r6,#0x10 464bc3d5698SJohn Baldwin bne .Ldec_loop 465bc3d5698SJohn Baldwin vldmia r6, {q12} @ .LISRM0 466bc3d5698SJohn Baldwin b .Ldec_loop 467bc3d5698SJohn Baldwin.align 4 468bc3d5698SJohn Baldwin.Ldec_done: 469bc3d5698SJohn Baldwin vmov.i8 q8,#0x55 @ compose .LBS0 470bc3d5698SJohn Baldwin vmov.i8 q9,#0x33 @ compose .LBS1 471bc3d5698SJohn Baldwin vshr.u64 q10, q3, #1 472bc3d5698SJohn Baldwin vshr.u64 q11, q2, #1 473bc3d5698SJohn Baldwin veor q10, q10, q5 474bc3d5698SJohn Baldwin veor q11, q11, q7 475bc3d5698SJohn Baldwin vand q10, q10, q8 476bc3d5698SJohn Baldwin vand q11, q11, q8 477bc3d5698SJohn Baldwin veor q5, q5, q10 478bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 479bc3d5698SJohn Baldwin veor q7, q7, q11 480bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 481bc3d5698SJohn Baldwin veor q3, q3, q10 482bc3d5698SJohn Baldwin veor q2, q2, q11 483bc3d5698SJohn Baldwin vshr.u64 q10, q6, #1 484bc3d5698SJohn Baldwin vshr.u64 q11, q0, #1 485bc3d5698SJohn Baldwin veor q10, q10, q4 486bc3d5698SJohn Baldwin veor q11, q11, q1 487bc3d5698SJohn Baldwin vand q10, q10, q8 488bc3d5698SJohn Baldwin vand q11, q11, q8 489bc3d5698SJohn Baldwin veor q4, q4, q10 490bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 491bc3d5698SJohn Baldwin veor q1, q1, q11 492bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 493bc3d5698SJohn Baldwin veor q6, q6, q10 494bc3d5698SJohn Baldwin veor q0, q0, q11 495bc3d5698SJohn Baldwin vmov.i8 q8,#0x0f @ compose .LBS2 496bc3d5698SJohn Baldwin vshr.u64 q10, q7, #2 497bc3d5698SJohn Baldwin vshr.u64 q11, q2, #2 498bc3d5698SJohn Baldwin veor q10, q10, q5 499bc3d5698SJohn Baldwin veor q11, q11, q3 500bc3d5698SJohn Baldwin vand q10, q10, q9 501bc3d5698SJohn Baldwin vand q11, q11, q9 502bc3d5698SJohn Baldwin veor q5, q5, q10 503bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 504bc3d5698SJohn Baldwin veor q3, q3, q11 505bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 506bc3d5698SJohn Baldwin veor q7, q7, q10 507bc3d5698SJohn Baldwin veor q2, q2, q11 508bc3d5698SJohn Baldwin vshr.u64 q10, q1, #2 509bc3d5698SJohn Baldwin vshr.u64 q11, q0, #2 510bc3d5698SJohn Baldwin veor q10, q10, q4 511bc3d5698SJohn Baldwin veor q11, q11, q6 512bc3d5698SJohn Baldwin vand q10, q10, q9 513bc3d5698SJohn Baldwin vand q11, q11, q9 514bc3d5698SJohn Baldwin veor q4, q4, q10 515bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 516bc3d5698SJohn Baldwin veor q6, q6, q11 517bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 518bc3d5698SJohn Baldwin veor q1, q1, q10 519bc3d5698SJohn Baldwin veor q0, q0, q11 520bc3d5698SJohn Baldwin vshr.u64 q10, q4, #4 521bc3d5698SJohn Baldwin vshr.u64 q11, q6, #4 522bc3d5698SJohn Baldwin veor q10, q10, q5 523bc3d5698SJohn Baldwin veor q11, q11, q3 524bc3d5698SJohn Baldwin vand q10, q10, q8 525bc3d5698SJohn Baldwin vand q11, q11, q8 526bc3d5698SJohn Baldwin veor q5, q5, q10 527bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 528bc3d5698SJohn Baldwin veor q3, q3, q11 529bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 530bc3d5698SJohn Baldwin veor q4, q4, q10 531bc3d5698SJohn Baldwin veor q6, q6, q11 532bc3d5698SJohn Baldwin vshr.u64 q10, q1, #4 533bc3d5698SJohn Baldwin vshr.u64 q11, q0, #4 534bc3d5698SJohn Baldwin veor q10, q10, q7 535bc3d5698SJohn Baldwin veor q11, q11, q2 536bc3d5698SJohn Baldwin vand q10, q10, q8 537bc3d5698SJohn Baldwin vand q11, q11, q8 538bc3d5698SJohn Baldwin veor q7, q7, q10 539bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 540bc3d5698SJohn Baldwin veor q2, q2, q11 541bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 542bc3d5698SJohn Baldwin veor q1, q1, q10 543bc3d5698SJohn Baldwin veor q0, q0, q11 544bc3d5698SJohn Baldwin vldmia r4, {q8} @ last round key 545bc3d5698SJohn Baldwin veor q6, q6, q8 546bc3d5698SJohn Baldwin veor q4, q4, q8 547bc3d5698SJohn Baldwin veor q2, q2, q8 548bc3d5698SJohn Baldwin veor q7, q7, q8 549bc3d5698SJohn Baldwin veor q3, q3, q8 550bc3d5698SJohn Baldwin veor q5, q5, q8 551bc3d5698SJohn Baldwin veor q0, q0, q8 552bc3d5698SJohn Baldwin veor q1, q1, q8 553bc3d5698SJohn Baldwin bx lr 554bc3d5698SJohn Baldwin.size _bsaes_decrypt8,.-_bsaes_decrypt8 555bc3d5698SJohn Baldwin 556bc3d5698SJohn Baldwin.type _bsaes_const,%object 557bc3d5698SJohn Baldwin.align 6 558bc3d5698SJohn Baldwin_bsaes_const: 559bc3d5698SJohn Baldwin.LM0ISR:@ InvShiftRows constants 560bc3d5698SJohn Baldwin.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 561bc3d5698SJohn Baldwin.LISR: 562bc3d5698SJohn Baldwin.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 563bc3d5698SJohn Baldwin.LISRM0: 564bc3d5698SJohn Baldwin.quad 0x01040b0e0205080f, 0x0306090c00070a0d 565bc3d5698SJohn Baldwin.LM0SR:@ ShiftRows constants 566bc3d5698SJohn Baldwin.quad 0x0a0e02060f03070b, 0x0004080c05090d01 567bc3d5698SJohn Baldwin.LSR: 568bc3d5698SJohn Baldwin.quad 0x0504070600030201, 0x0f0e0d0c0a09080b 569bc3d5698SJohn Baldwin.LSRM0: 570bc3d5698SJohn Baldwin.quad 0x0304090e00050a0f, 0x01060b0c0207080d 571bc3d5698SJohn Baldwin.LM0: 572bc3d5698SJohn Baldwin.quad 0x02060a0e03070b0f, 0x0004080c0105090d 573bc3d5698SJohn Baldwin.LREVM0SR: 574bc3d5698SJohn Baldwin.quad 0x090d01050c000408, 0x03070b0f060a0e02 575bc3d5698SJohn Baldwin.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 576bc3d5698SJohn Baldwin.align 2 577bc3d5698SJohn Baldwin.align 6 578bc3d5698SJohn Baldwin.size _bsaes_const,.-_bsaes_const 579bc3d5698SJohn Baldwin 580bc3d5698SJohn Baldwin.type _bsaes_encrypt8,%function 581bc3d5698SJohn Baldwin.align 4 582bc3d5698SJohn Baldwin_bsaes_encrypt8: 583bc3d5698SJohn Baldwin adr r6,. 584bc3d5698SJohn Baldwin vldmia r4!, {q9} @ round 0 key 585bc3d5698SJohn Baldwin#if defined(__thumb2__) || defined(__APPLE__) 586bc3d5698SJohn Baldwin adr r6,.LM0SR 587bc3d5698SJohn Baldwin#else 588bc3d5698SJohn Baldwin sub r6,r6,#_bsaes_encrypt8-.LM0SR 589bc3d5698SJohn Baldwin#endif 590bc3d5698SJohn Baldwin 591bc3d5698SJohn Baldwin vldmia r6!, {q8} @ .LM0SR 592bc3d5698SJohn Baldwin_bsaes_encrypt8_alt: 593bc3d5698SJohn Baldwin veor q10, q0, q9 @ xor with round0 key 594bc3d5698SJohn Baldwin veor q11, q1, q9 595bc3d5698SJohn Baldwin vtbl.8 d0, {q10}, d16 596bc3d5698SJohn Baldwin vtbl.8 d1, {q10}, d17 597bc3d5698SJohn Baldwin veor q12, q2, q9 598bc3d5698SJohn Baldwin vtbl.8 d2, {q11}, d16 599bc3d5698SJohn Baldwin vtbl.8 d3, {q11}, d17 600bc3d5698SJohn Baldwin veor q13, q3, q9 601bc3d5698SJohn Baldwin vtbl.8 d4, {q12}, d16 602bc3d5698SJohn Baldwin vtbl.8 d5, {q12}, d17 603bc3d5698SJohn Baldwin veor q14, q4, q9 604bc3d5698SJohn Baldwin vtbl.8 d6, {q13}, d16 605bc3d5698SJohn Baldwin vtbl.8 d7, {q13}, d17 606bc3d5698SJohn Baldwin veor q15, q5, q9 607bc3d5698SJohn Baldwin vtbl.8 d8, {q14}, d16 608bc3d5698SJohn Baldwin vtbl.8 d9, {q14}, d17 609bc3d5698SJohn Baldwin veor q10, q6, q9 610bc3d5698SJohn Baldwin vtbl.8 d10, {q15}, d16 611bc3d5698SJohn Baldwin vtbl.8 d11, {q15}, d17 612bc3d5698SJohn Baldwin veor q11, q7, q9 613bc3d5698SJohn Baldwin vtbl.8 d12, {q10}, d16 614bc3d5698SJohn Baldwin vtbl.8 d13, {q10}, d17 615bc3d5698SJohn Baldwin vtbl.8 d14, {q11}, d16 616bc3d5698SJohn Baldwin vtbl.8 d15, {q11}, d17 617bc3d5698SJohn Baldwin_bsaes_encrypt8_bitslice: 618bc3d5698SJohn Baldwin vmov.i8 q8,#0x55 @ compose .LBS0 619bc3d5698SJohn Baldwin vmov.i8 q9,#0x33 @ compose .LBS1 620bc3d5698SJohn Baldwin vshr.u64 q10, q6, #1 621bc3d5698SJohn Baldwin vshr.u64 q11, q4, #1 622bc3d5698SJohn Baldwin veor q10, q10, q7 623bc3d5698SJohn Baldwin veor q11, q11, q5 624bc3d5698SJohn Baldwin vand q10, q10, q8 625bc3d5698SJohn Baldwin vand q11, q11, q8 626bc3d5698SJohn Baldwin veor q7, q7, q10 627bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 628bc3d5698SJohn Baldwin veor q5, q5, q11 629bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 630bc3d5698SJohn Baldwin veor q6, q6, q10 631bc3d5698SJohn Baldwin veor q4, q4, q11 632bc3d5698SJohn Baldwin vshr.u64 q10, q2, #1 633bc3d5698SJohn Baldwin vshr.u64 q11, q0, #1 634bc3d5698SJohn Baldwin veor q10, q10, q3 635bc3d5698SJohn Baldwin veor q11, q11, q1 636bc3d5698SJohn Baldwin vand q10, q10, q8 637bc3d5698SJohn Baldwin vand q11, q11, q8 638bc3d5698SJohn Baldwin veor q3, q3, q10 639bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 640bc3d5698SJohn Baldwin veor q1, q1, q11 641bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 642bc3d5698SJohn Baldwin veor q2, q2, q10 643bc3d5698SJohn Baldwin veor q0, q0, q11 644bc3d5698SJohn Baldwin vmov.i8 q8,#0x0f @ compose .LBS2 645bc3d5698SJohn Baldwin vshr.u64 q10, q5, #2 646bc3d5698SJohn Baldwin vshr.u64 q11, q4, #2 647bc3d5698SJohn Baldwin veor q10, q10, q7 648bc3d5698SJohn Baldwin veor q11, q11, q6 649bc3d5698SJohn Baldwin vand q10, q10, q9 650bc3d5698SJohn Baldwin vand q11, q11, q9 651bc3d5698SJohn Baldwin veor q7, q7, q10 652bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 653bc3d5698SJohn Baldwin veor q6, q6, q11 654bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 655bc3d5698SJohn Baldwin veor q5, q5, q10 656bc3d5698SJohn Baldwin veor q4, q4, q11 657bc3d5698SJohn Baldwin vshr.u64 q10, q1, #2 658bc3d5698SJohn Baldwin vshr.u64 q11, q0, #2 659bc3d5698SJohn Baldwin veor q10, q10, q3 660bc3d5698SJohn Baldwin veor q11, q11, q2 661bc3d5698SJohn Baldwin vand q10, q10, q9 662bc3d5698SJohn Baldwin vand q11, q11, q9 663bc3d5698SJohn Baldwin veor q3, q3, q10 664bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 665bc3d5698SJohn Baldwin veor q2, q2, q11 666bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 667bc3d5698SJohn Baldwin veor q1, q1, q10 668bc3d5698SJohn Baldwin veor q0, q0, q11 669bc3d5698SJohn Baldwin vshr.u64 q10, q3, #4 670bc3d5698SJohn Baldwin vshr.u64 q11, q2, #4 671bc3d5698SJohn Baldwin veor q10, q10, q7 672bc3d5698SJohn Baldwin veor q11, q11, q6 673bc3d5698SJohn Baldwin vand q10, q10, q8 674bc3d5698SJohn Baldwin vand q11, q11, q8 675bc3d5698SJohn Baldwin veor q7, q7, q10 676bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 677bc3d5698SJohn Baldwin veor q6, q6, q11 678bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 679bc3d5698SJohn Baldwin veor q3, q3, q10 680bc3d5698SJohn Baldwin veor q2, q2, q11 681bc3d5698SJohn Baldwin vshr.u64 q10, q1, #4 682bc3d5698SJohn Baldwin vshr.u64 q11, q0, #4 683bc3d5698SJohn Baldwin veor q10, q10, q5 684bc3d5698SJohn Baldwin veor q11, q11, q4 685bc3d5698SJohn Baldwin vand q10, q10, q8 686bc3d5698SJohn Baldwin vand q11, q11, q8 687bc3d5698SJohn Baldwin veor q5, q5, q10 688bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 689bc3d5698SJohn Baldwin veor q4, q4, q11 690bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 691bc3d5698SJohn Baldwin veor q1, q1, q10 692bc3d5698SJohn Baldwin veor q0, q0, q11 693bc3d5698SJohn Baldwin sub r5,r5,#1 694bc3d5698SJohn Baldwin b .Lenc_sbox 695bc3d5698SJohn Baldwin.align 4 696bc3d5698SJohn Baldwin.Lenc_loop: 697bc3d5698SJohn Baldwin vldmia r4!, {q8,q9,q10,q11} 698bc3d5698SJohn Baldwin veor q8, q8, q0 699bc3d5698SJohn Baldwin veor q9, q9, q1 700bc3d5698SJohn Baldwin vtbl.8 d0, {q8}, d24 701bc3d5698SJohn Baldwin vtbl.8 d1, {q8}, d25 702bc3d5698SJohn Baldwin vldmia r4!, {q8} 703bc3d5698SJohn Baldwin veor q10, q10, q2 704bc3d5698SJohn Baldwin vtbl.8 d2, {q9}, d24 705bc3d5698SJohn Baldwin vtbl.8 d3, {q9}, d25 706bc3d5698SJohn Baldwin vldmia r4!, {q9} 707bc3d5698SJohn Baldwin veor q11, q11, q3 708bc3d5698SJohn Baldwin vtbl.8 d4, {q10}, d24 709bc3d5698SJohn Baldwin vtbl.8 d5, {q10}, d25 710bc3d5698SJohn Baldwin vldmia r4!, {q10} 711bc3d5698SJohn Baldwin vtbl.8 d6, {q11}, d24 712bc3d5698SJohn Baldwin vtbl.8 d7, {q11}, d25 713bc3d5698SJohn Baldwin vldmia r4!, {q11} 714bc3d5698SJohn Baldwin veor q8, q8, q4 715bc3d5698SJohn Baldwin veor q9, q9, q5 716bc3d5698SJohn Baldwin vtbl.8 d8, {q8}, d24 717bc3d5698SJohn Baldwin vtbl.8 d9, {q8}, d25 718bc3d5698SJohn Baldwin veor q10, q10, q6 719bc3d5698SJohn Baldwin vtbl.8 d10, {q9}, d24 720bc3d5698SJohn Baldwin vtbl.8 d11, {q9}, d25 721bc3d5698SJohn Baldwin veor q11, q11, q7 722bc3d5698SJohn Baldwin vtbl.8 d12, {q10}, d24 723bc3d5698SJohn Baldwin vtbl.8 d13, {q10}, d25 724bc3d5698SJohn Baldwin vtbl.8 d14, {q11}, d24 725bc3d5698SJohn Baldwin vtbl.8 d15, {q11}, d25 726bc3d5698SJohn Baldwin.Lenc_sbox: 727bc3d5698SJohn Baldwin veor q2, q2, q1 728bc3d5698SJohn Baldwin veor q5, q5, q6 729bc3d5698SJohn Baldwin veor q3, q3, q0 730bc3d5698SJohn Baldwin veor q6, q6, q2 731bc3d5698SJohn Baldwin veor q5, q5, q0 732bc3d5698SJohn Baldwin 733bc3d5698SJohn Baldwin veor q6, q6, q3 734bc3d5698SJohn Baldwin veor q3, q3, q7 735bc3d5698SJohn Baldwin veor q7, q7, q5 736bc3d5698SJohn Baldwin veor q3, q3, q4 737bc3d5698SJohn Baldwin veor q4, q4, q5 738bc3d5698SJohn Baldwin 739bc3d5698SJohn Baldwin veor q2, q2, q7 740bc3d5698SJohn Baldwin veor q3, q3, q1 741bc3d5698SJohn Baldwin veor q1, q1, q5 742bc3d5698SJohn Baldwin veor q11, q7, q4 743bc3d5698SJohn Baldwin veor q10, q1, q2 744bc3d5698SJohn Baldwin veor q9, q5, q3 745bc3d5698SJohn Baldwin veor q13, q2, q4 746bc3d5698SJohn Baldwin vmov q8, q10 747bc3d5698SJohn Baldwin veor q12, q6, q0 748bc3d5698SJohn Baldwin 749bc3d5698SJohn Baldwin vorr q10, q10, q9 750bc3d5698SJohn Baldwin veor q15, q11, q8 751bc3d5698SJohn Baldwin vand q14, q11, q12 752bc3d5698SJohn Baldwin vorr q11, q11, q12 753bc3d5698SJohn Baldwin veor q12, q12, q9 754bc3d5698SJohn Baldwin vand q8, q8, q9 755bc3d5698SJohn Baldwin veor q9, q3, q0 756bc3d5698SJohn Baldwin vand q15, q15, q12 757bc3d5698SJohn Baldwin vand q13, q13, q9 758bc3d5698SJohn Baldwin veor q9, q7, q1 759bc3d5698SJohn Baldwin veor q12, q5, q6 760bc3d5698SJohn Baldwin veor q11, q11, q13 761bc3d5698SJohn Baldwin veor q10, q10, q13 762bc3d5698SJohn Baldwin vand q13, q9, q12 763bc3d5698SJohn Baldwin vorr q9, q9, q12 764bc3d5698SJohn Baldwin veor q11, q11, q15 765bc3d5698SJohn Baldwin veor q8, q8, q13 766bc3d5698SJohn Baldwin veor q10, q10, q14 767bc3d5698SJohn Baldwin veor q9, q9, q15 768bc3d5698SJohn Baldwin veor q8, q8, q14 769bc3d5698SJohn Baldwin vand q12, q2, q3 770bc3d5698SJohn Baldwin veor q9, q9, q14 771bc3d5698SJohn Baldwin vand q13, q4, q0 772bc3d5698SJohn Baldwin vand q14, q1, q5 773bc3d5698SJohn Baldwin vorr q15, q7, q6 774bc3d5698SJohn Baldwin veor q11, q11, q12 775bc3d5698SJohn Baldwin veor q9, q9, q14 776bc3d5698SJohn Baldwin veor q8, q8, q15 777bc3d5698SJohn Baldwin veor q10, q10, q13 778bc3d5698SJohn Baldwin 779bc3d5698SJohn Baldwin @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 780bc3d5698SJohn Baldwin 781bc3d5698SJohn Baldwin @ new smaller inversion 782bc3d5698SJohn Baldwin 783bc3d5698SJohn Baldwin vand q14, q11, q9 784bc3d5698SJohn Baldwin vmov q12, q8 785bc3d5698SJohn Baldwin 786bc3d5698SJohn Baldwin veor q13, q10, q14 787bc3d5698SJohn Baldwin veor q15, q8, q14 788bc3d5698SJohn Baldwin veor q14, q8, q14 @ q14=q15 789bc3d5698SJohn Baldwin 790bc3d5698SJohn Baldwin vbsl q13, q9, q8 791bc3d5698SJohn Baldwin vbsl q15, q11, q10 792bc3d5698SJohn Baldwin veor q11, q11, q10 793bc3d5698SJohn Baldwin 794bc3d5698SJohn Baldwin vbsl q12, q13, q14 795bc3d5698SJohn Baldwin vbsl q8, q14, q13 796bc3d5698SJohn Baldwin 797bc3d5698SJohn Baldwin vand q14, q12, q15 798bc3d5698SJohn Baldwin veor q9, q9, q8 799bc3d5698SJohn Baldwin 800bc3d5698SJohn Baldwin veor q14, q14, q11 801bc3d5698SJohn Baldwin veor q12, q6, q0 802bc3d5698SJohn Baldwin veor q8, q5, q3 803bc3d5698SJohn Baldwin veor q10, q15, q14 804bc3d5698SJohn Baldwin vand q10, q10, q6 805bc3d5698SJohn Baldwin veor q6, q6, q5 806bc3d5698SJohn Baldwin vand q11, q5, q15 807bc3d5698SJohn Baldwin vand q6, q6, q14 808bc3d5698SJohn Baldwin veor q5, q11, q10 809bc3d5698SJohn Baldwin veor q6, q6, q11 810bc3d5698SJohn Baldwin veor q15, q15, q13 811bc3d5698SJohn Baldwin veor q14, q14, q9 812bc3d5698SJohn Baldwin veor q11, q15, q14 813bc3d5698SJohn Baldwin veor q10, q13, q9 814bc3d5698SJohn Baldwin vand q11, q11, q12 815bc3d5698SJohn Baldwin vand q10, q10, q0 816bc3d5698SJohn Baldwin veor q12, q12, q8 817bc3d5698SJohn Baldwin veor q0, q0, q3 818bc3d5698SJohn Baldwin vand q8, q8, q15 819bc3d5698SJohn Baldwin vand q3, q3, q13 820bc3d5698SJohn Baldwin vand q12, q12, q14 821bc3d5698SJohn Baldwin vand q0, q0, q9 822bc3d5698SJohn Baldwin veor q8, q8, q12 823bc3d5698SJohn Baldwin veor q0, q0, q3 824bc3d5698SJohn Baldwin veor q12, q12, q11 825bc3d5698SJohn Baldwin veor q3, q3, q10 826bc3d5698SJohn Baldwin veor q6, q6, q12 827bc3d5698SJohn Baldwin veor q0, q0, q12 828bc3d5698SJohn Baldwin veor q5, q5, q8 829bc3d5698SJohn Baldwin veor q3, q3, q8 830bc3d5698SJohn Baldwin 831bc3d5698SJohn Baldwin veor q12, q7, q4 832bc3d5698SJohn Baldwin veor q8, q1, q2 833bc3d5698SJohn Baldwin veor q11, q15, q14 834bc3d5698SJohn Baldwin veor q10, q13, q9 835bc3d5698SJohn Baldwin vand q11, q11, q12 836bc3d5698SJohn Baldwin vand q10, q10, q4 837bc3d5698SJohn Baldwin veor q12, q12, q8 838bc3d5698SJohn Baldwin veor q4, q4, q2 839bc3d5698SJohn Baldwin vand q8, q8, q15 840bc3d5698SJohn Baldwin vand q2, q2, q13 841bc3d5698SJohn Baldwin vand q12, q12, q14 842bc3d5698SJohn Baldwin vand q4, q4, q9 843bc3d5698SJohn Baldwin veor q8, q8, q12 844bc3d5698SJohn Baldwin veor q4, q4, q2 845bc3d5698SJohn Baldwin veor q12, q12, q11 846bc3d5698SJohn Baldwin veor q2, q2, q10 847bc3d5698SJohn Baldwin veor q15, q15, q13 848bc3d5698SJohn Baldwin veor q14, q14, q9 849bc3d5698SJohn Baldwin veor q10, q15, q14 850bc3d5698SJohn Baldwin vand q10, q10, q7 851bc3d5698SJohn Baldwin veor q7, q7, q1 852bc3d5698SJohn Baldwin vand q11, q1, q15 853bc3d5698SJohn Baldwin vand q7, q7, q14 854bc3d5698SJohn Baldwin veor q1, q11, q10 855bc3d5698SJohn Baldwin veor q7, q7, q11 856bc3d5698SJohn Baldwin veor q7, q7, q12 857bc3d5698SJohn Baldwin veor q4, q4, q12 858bc3d5698SJohn Baldwin veor q1, q1, q8 859bc3d5698SJohn Baldwin veor q2, q2, q8 860bc3d5698SJohn Baldwin veor q7, q7, q0 861bc3d5698SJohn Baldwin veor q1, q1, q6 862bc3d5698SJohn Baldwin veor q6, q6, q0 863bc3d5698SJohn Baldwin veor q4, q4, q7 864bc3d5698SJohn Baldwin veor q0, q0, q1 865bc3d5698SJohn Baldwin 866bc3d5698SJohn Baldwin veor q1, q1, q5 867bc3d5698SJohn Baldwin veor q5, q5, q2 868bc3d5698SJohn Baldwin veor q2, q2, q3 869bc3d5698SJohn Baldwin veor q3, q3, q5 870bc3d5698SJohn Baldwin veor q4, q4, q5 871bc3d5698SJohn Baldwin 872bc3d5698SJohn Baldwin veor q6, q6, q3 873bc3d5698SJohn Baldwin subs r5,r5,#1 874bc3d5698SJohn Baldwin bcc .Lenc_done 875bc3d5698SJohn Baldwin vext.8 q8, q0, q0, #12 @ x0 <<< 32 876bc3d5698SJohn Baldwin vext.8 q9, q1, q1, #12 877bc3d5698SJohn Baldwin veor q0, q0, q8 @ x0 ^ (x0 <<< 32) 878bc3d5698SJohn Baldwin vext.8 q10, q4, q4, #12 879bc3d5698SJohn Baldwin veor q1, q1, q9 880bc3d5698SJohn Baldwin vext.8 q11, q6, q6, #12 881bc3d5698SJohn Baldwin veor q4, q4, q10 882bc3d5698SJohn Baldwin vext.8 q12, q3, q3, #12 883bc3d5698SJohn Baldwin veor q6, q6, q11 884bc3d5698SJohn Baldwin vext.8 q13, q7, q7, #12 885bc3d5698SJohn Baldwin veor q3, q3, q12 886bc3d5698SJohn Baldwin vext.8 q14, q2, q2, #12 887bc3d5698SJohn Baldwin veor q7, q7, q13 888bc3d5698SJohn Baldwin vext.8 q15, q5, q5, #12 889bc3d5698SJohn Baldwin veor q2, q2, q14 890bc3d5698SJohn Baldwin 891bc3d5698SJohn Baldwin veor q9, q9, q0 892bc3d5698SJohn Baldwin veor q5, q5, q15 893bc3d5698SJohn Baldwin vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) 894bc3d5698SJohn Baldwin veor q10, q10, q1 895bc3d5698SJohn Baldwin veor q8, q8, q5 896bc3d5698SJohn Baldwin veor q9, q9, q5 897bc3d5698SJohn Baldwin vext.8 q1, q1, q1, #8 898bc3d5698SJohn Baldwin veor q13, q13, q3 899bc3d5698SJohn Baldwin veor q0, q0, q8 900bc3d5698SJohn Baldwin veor q14, q14, q7 901bc3d5698SJohn Baldwin veor q1, q1, q9 902bc3d5698SJohn Baldwin vext.8 q8, q3, q3, #8 903bc3d5698SJohn Baldwin veor q12, q12, q6 904bc3d5698SJohn Baldwin vext.8 q9, q7, q7, #8 905bc3d5698SJohn Baldwin veor q15, q15, q2 906bc3d5698SJohn Baldwin vext.8 q3, q6, q6, #8 907bc3d5698SJohn Baldwin veor q11, q11, q4 908bc3d5698SJohn Baldwin vext.8 q7, q5, q5, #8 909bc3d5698SJohn Baldwin veor q12, q12, q5 910bc3d5698SJohn Baldwin vext.8 q6, q2, q2, #8 911bc3d5698SJohn Baldwin veor q11, q11, q5 912bc3d5698SJohn Baldwin vext.8 q2, q4, q4, #8 913bc3d5698SJohn Baldwin veor q5, q9, q13 914bc3d5698SJohn Baldwin veor q4, q8, q12 915bc3d5698SJohn Baldwin veor q3, q3, q11 916bc3d5698SJohn Baldwin veor q7, q7, q15 917bc3d5698SJohn Baldwin veor q6, q6, q14 918bc3d5698SJohn Baldwin @ vmov q4, q8 919bc3d5698SJohn Baldwin veor q2, q2, q10 920bc3d5698SJohn Baldwin @ vmov q5, q9 921bc3d5698SJohn Baldwin vldmia r6, {q12} @ .LSR 922bc3d5698SJohn Baldwin ite eq @ Thumb2 thing, samity check in ARM 923bc3d5698SJohn Baldwin addeq r6,r6,#0x10 924bc3d5698SJohn Baldwin bne .Lenc_loop 925bc3d5698SJohn Baldwin vldmia r6, {q12} @ .LSRM0 926bc3d5698SJohn Baldwin b .Lenc_loop 927bc3d5698SJohn Baldwin.align 4 928bc3d5698SJohn Baldwin.Lenc_done: 929bc3d5698SJohn Baldwin vmov.i8 q8,#0x55 @ compose .LBS0 930bc3d5698SJohn Baldwin vmov.i8 q9,#0x33 @ compose .LBS1 931bc3d5698SJohn Baldwin vshr.u64 q10, q2, #1 932bc3d5698SJohn Baldwin vshr.u64 q11, q3, #1 933bc3d5698SJohn Baldwin veor q10, q10, q5 934bc3d5698SJohn Baldwin veor q11, q11, q7 935bc3d5698SJohn Baldwin vand q10, q10, q8 936bc3d5698SJohn Baldwin vand q11, q11, q8 937bc3d5698SJohn Baldwin veor q5, q5, q10 938bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 939bc3d5698SJohn Baldwin veor q7, q7, q11 940bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 941bc3d5698SJohn Baldwin veor q2, q2, q10 942bc3d5698SJohn Baldwin veor q3, q3, q11 943bc3d5698SJohn Baldwin vshr.u64 q10, q4, #1 944bc3d5698SJohn Baldwin vshr.u64 q11, q0, #1 945bc3d5698SJohn Baldwin veor q10, q10, q6 946bc3d5698SJohn Baldwin veor q11, q11, q1 947bc3d5698SJohn Baldwin vand q10, q10, q8 948bc3d5698SJohn Baldwin vand q11, q11, q8 949bc3d5698SJohn Baldwin veor q6, q6, q10 950bc3d5698SJohn Baldwin vshl.u64 q10, q10, #1 951bc3d5698SJohn Baldwin veor q1, q1, q11 952bc3d5698SJohn Baldwin vshl.u64 q11, q11, #1 953bc3d5698SJohn Baldwin veor q4, q4, q10 954bc3d5698SJohn Baldwin veor q0, q0, q11 955bc3d5698SJohn Baldwin vmov.i8 q8,#0x0f @ compose .LBS2 956bc3d5698SJohn Baldwin vshr.u64 q10, q7, #2 957bc3d5698SJohn Baldwin vshr.u64 q11, q3, #2 958bc3d5698SJohn Baldwin veor q10, q10, q5 959bc3d5698SJohn Baldwin veor q11, q11, q2 960bc3d5698SJohn Baldwin vand q10, q10, q9 961bc3d5698SJohn Baldwin vand q11, q11, q9 962bc3d5698SJohn Baldwin veor q5, q5, q10 963bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 964bc3d5698SJohn Baldwin veor q2, q2, q11 965bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 966bc3d5698SJohn Baldwin veor q7, q7, q10 967bc3d5698SJohn Baldwin veor q3, q3, q11 968bc3d5698SJohn Baldwin vshr.u64 q10, q1, #2 969bc3d5698SJohn Baldwin vshr.u64 q11, q0, #2 970bc3d5698SJohn Baldwin veor q10, q10, q6 971bc3d5698SJohn Baldwin veor q11, q11, q4 972bc3d5698SJohn Baldwin vand q10, q10, q9 973bc3d5698SJohn Baldwin vand q11, q11, q9 974bc3d5698SJohn Baldwin veor q6, q6, q10 975bc3d5698SJohn Baldwin vshl.u64 q10, q10, #2 976bc3d5698SJohn Baldwin veor q4, q4, q11 977bc3d5698SJohn Baldwin vshl.u64 q11, q11, #2 978bc3d5698SJohn Baldwin veor q1, q1, q10 979bc3d5698SJohn Baldwin veor q0, q0, q11 980bc3d5698SJohn Baldwin vshr.u64 q10, q6, #4 981bc3d5698SJohn Baldwin vshr.u64 q11, q4, #4 982bc3d5698SJohn Baldwin veor q10, q10, q5 983bc3d5698SJohn Baldwin veor q11, q11, q2 984bc3d5698SJohn Baldwin vand q10, q10, q8 985bc3d5698SJohn Baldwin vand q11, q11, q8 986bc3d5698SJohn Baldwin veor q5, q5, q10 987bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 988bc3d5698SJohn Baldwin veor q2, q2, q11 989bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 990bc3d5698SJohn Baldwin veor q6, q6, q10 991bc3d5698SJohn Baldwin veor q4, q4, q11 992bc3d5698SJohn Baldwin vshr.u64 q10, q1, #4 993bc3d5698SJohn Baldwin vshr.u64 q11, q0, #4 994bc3d5698SJohn Baldwin veor q10, q10, q7 995bc3d5698SJohn Baldwin veor q11, q11, q3 996bc3d5698SJohn Baldwin vand q10, q10, q8 997bc3d5698SJohn Baldwin vand q11, q11, q8 998bc3d5698SJohn Baldwin veor q7, q7, q10 999bc3d5698SJohn Baldwin vshl.u64 q10, q10, #4 1000bc3d5698SJohn Baldwin veor q3, q3, q11 1001bc3d5698SJohn Baldwin vshl.u64 q11, q11, #4 1002bc3d5698SJohn Baldwin veor q1, q1, q10 1003bc3d5698SJohn Baldwin veor q0, q0, q11 1004bc3d5698SJohn Baldwin vldmia r4, {q8} @ last round key 1005bc3d5698SJohn Baldwin veor q4, q4, q8 1006bc3d5698SJohn Baldwin veor q6, q6, q8 1007bc3d5698SJohn Baldwin veor q3, q3, q8 1008bc3d5698SJohn Baldwin veor q7, q7, q8 1009bc3d5698SJohn Baldwin veor q2, q2, q8 1010bc3d5698SJohn Baldwin veor q5, q5, q8 1011bc3d5698SJohn Baldwin veor q0, q0, q8 1012bc3d5698SJohn Baldwin veor q1, q1, q8 1013bc3d5698SJohn Baldwin bx lr 1014bc3d5698SJohn Baldwin.size _bsaes_encrypt8,.-_bsaes_encrypt8 1015bc3d5698SJohn Baldwin.type _bsaes_key_convert,%function 1016bc3d5698SJohn Baldwin.align 4 1017bc3d5698SJohn Baldwin_bsaes_key_convert: 1018bc3d5698SJohn Baldwin adr r6,. 1019bc3d5698SJohn Baldwin vld1.8 {q7}, [r4]! @ load round 0 key 1020bc3d5698SJohn Baldwin#if defined(__thumb2__) || defined(__APPLE__) 1021bc3d5698SJohn Baldwin adr r6,.LM0 1022bc3d5698SJohn Baldwin#else 1023bc3d5698SJohn Baldwin sub r6,r6,#_bsaes_key_convert-.LM0 1024bc3d5698SJohn Baldwin#endif 1025bc3d5698SJohn Baldwin vld1.8 {q15}, [r4]! @ load round 1 key 1026bc3d5698SJohn Baldwin 1027bc3d5698SJohn Baldwin vmov.i8 q8, #0x01 @ bit masks 1028bc3d5698SJohn Baldwin vmov.i8 q9, #0x02 1029bc3d5698SJohn Baldwin vmov.i8 q10, #0x04 1030bc3d5698SJohn Baldwin vmov.i8 q11, #0x08 1031bc3d5698SJohn Baldwin vmov.i8 q12, #0x10 1032bc3d5698SJohn Baldwin vmov.i8 q13, #0x20 1033bc3d5698SJohn Baldwin vldmia r6, {q14} @ .LM0 1034bc3d5698SJohn Baldwin 1035bc3d5698SJohn Baldwin#ifdef __ARMEL__ 1036bc3d5698SJohn Baldwin vrev32.8 q7, q7 1037bc3d5698SJohn Baldwin vrev32.8 q15, q15 1038bc3d5698SJohn Baldwin#endif 1039bc3d5698SJohn Baldwin sub r5,r5,#1 1040bc3d5698SJohn Baldwin vstmia r12!, {q7} @ save round 0 key 1041bc3d5698SJohn Baldwin b .Lkey_loop 1042bc3d5698SJohn Baldwin 1043bc3d5698SJohn Baldwin.align 4 1044bc3d5698SJohn Baldwin.Lkey_loop: 1045bc3d5698SJohn Baldwin vtbl.8 d14,{q15},d28 1046bc3d5698SJohn Baldwin vtbl.8 d15,{q15},d29 1047bc3d5698SJohn Baldwin vmov.i8 q6, #0x40 1048bc3d5698SJohn Baldwin vmov.i8 q15, #0x80 1049bc3d5698SJohn Baldwin 1050bc3d5698SJohn Baldwin vtst.8 q0, q7, q8 1051bc3d5698SJohn Baldwin vtst.8 q1, q7, q9 1052bc3d5698SJohn Baldwin vtst.8 q2, q7, q10 1053bc3d5698SJohn Baldwin vtst.8 q3, q7, q11 1054bc3d5698SJohn Baldwin vtst.8 q4, q7, q12 1055bc3d5698SJohn Baldwin vtst.8 q5, q7, q13 1056bc3d5698SJohn Baldwin vtst.8 q6, q7, q6 1057bc3d5698SJohn Baldwin vtst.8 q7, q7, q15 1058bc3d5698SJohn Baldwin vld1.8 {q15}, [r4]! @ load next round key 1059bc3d5698SJohn Baldwin vmvn q0, q0 @ "pnot" 1060bc3d5698SJohn Baldwin vmvn q1, q1 1061bc3d5698SJohn Baldwin vmvn q5, q5 1062bc3d5698SJohn Baldwin vmvn q6, q6 1063bc3d5698SJohn Baldwin#ifdef __ARMEL__ 1064bc3d5698SJohn Baldwin vrev32.8 q15, q15 1065bc3d5698SJohn Baldwin#endif 1066bc3d5698SJohn Baldwin subs r5,r5,#1 1067bc3d5698SJohn Baldwin vstmia r12!,{q0,q1,q2,q3,q4,q5,q6,q7} @ write bit-sliced round key 1068bc3d5698SJohn Baldwin bne .Lkey_loop 1069bc3d5698SJohn Baldwin 1070bc3d5698SJohn Baldwin vmov.i8 q7,#0x63 @ compose .L63 1071bc3d5698SJohn Baldwin @ don't save last round key 1072bc3d5698SJohn Baldwin bx lr 1073bc3d5698SJohn Baldwin.size _bsaes_key_convert,.-_bsaes_key_convert 1074bc3d5698SJohn Baldwin 1075bc3d5698SJohn Baldwin 1076bc3d5698SJohn Baldwin 1077*c0855eaaSJohn Baldwin.globl ossl_bsaes_cbc_encrypt 1078*c0855eaaSJohn Baldwin.type ossl_bsaes_cbc_encrypt,%function 1079bc3d5698SJohn Baldwin.align 5 1080*c0855eaaSJohn Baldwinossl_bsaes_cbc_encrypt: 1081bc3d5698SJohn Baldwin#ifndef __KERNEL__ 1082bc3d5698SJohn Baldwin cmp r2, #128 1083bc3d5698SJohn Baldwin#ifndef __thumb__ 1084bc3d5698SJohn Baldwin blo AES_cbc_encrypt 1085bc3d5698SJohn Baldwin#else 1086*c0855eaaSJohn Baldwin bhs .Lcbc_do_bsaes 1087bc3d5698SJohn Baldwin b AES_cbc_encrypt 1088*c0855eaaSJohn Baldwin.Lcbc_do_bsaes: 1089bc3d5698SJohn Baldwin#endif 1090bc3d5698SJohn Baldwin#endif 1091bc3d5698SJohn Baldwin 1092bc3d5698SJohn Baldwin @ it is up to the caller to make sure we are called with enc == 0 1093bc3d5698SJohn Baldwin 1094bc3d5698SJohn Baldwin mov ip, sp 1095bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} 1096bc3d5698SJohn Baldwin VFP_ABI_PUSH 1097bc3d5698SJohn Baldwin ldr r8, [ip] @ IV is 1st arg on the stack 1098bc3d5698SJohn Baldwin mov r2, r2, lsr#4 @ len in 16 byte blocks 1099bc3d5698SJohn Baldwin sub sp, #0x10 @ scratch space to carry over the IV 1100bc3d5698SJohn Baldwin mov r9, sp @ save sp 1101bc3d5698SJohn Baldwin 1102bc3d5698SJohn Baldwin ldr r10, [r3, #240] @ get # of rounds 1103bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1104bc3d5698SJohn Baldwin @ allocate the key schedule on the stack 1105bc3d5698SJohn Baldwin sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key 1106bc3d5698SJohn Baldwin add r12, #96 @ sifze of bit-slices key schedule 1107bc3d5698SJohn Baldwin 1108bc3d5698SJohn Baldwin @ populate the key schedule 1109bc3d5698SJohn Baldwin mov r4, r3 @ pass key 1110bc3d5698SJohn Baldwin mov r5, r10 @ pass # of rounds 1111bc3d5698SJohn Baldwin mov sp, r12 @ sp is sp 1112bc3d5698SJohn Baldwin bl _bsaes_key_convert 1113bc3d5698SJohn Baldwin vldmia sp, {q6} 1114bc3d5698SJohn Baldwin vstmia r12, {q15} @ save last round key 1115bc3d5698SJohn Baldwin veor q7, q7, q6 @ fix up round 0 key 1116bc3d5698SJohn Baldwin vstmia sp, {q7} 1117bc3d5698SJohn Baldwin#else 1118bc3d5698SJohn Baldwin ldr r12, [r3, #244] 1119bc3d5698SJohn Baldwin eors r12, #1 1120bc3d5698SJohn Baldwin beq 0f 1121bc3d5698SJohn Baldwin 1122bc3d5698SJohn Baldwin @ populate the key schedule 1123bc3d5698SJohn Baldwin str r12, [r3, #244] 1124bc3d5698SJohn Baldwin mov r4, r3 @ pass key 1125bc3d5698SJohn Baldwin mov r5, r10 @ pass # of rounds 1126bc3d5698SJohn Baldwin add r12, r3, #248 @ pass key schedule 1127bc3d5698SJohn Baldwin bl _bsaes_key_convert 1128bc3d5698SJohn Baldwin add r4, r3, #248 1129bc3d5698SJohn Baldwin vldmia r4, {q6} 1130bc3d5698SJohn Baldwin vstmia r12, {q15} @ save last round key 1131bc3d5698SJohn Baldwin veor q7, q7, q6 @ fix up round 0 key 1132bc3d5698SJohn Baldwin vstmia r4, {q7} 1133bc3d5698SJohn Baldwin 1134bc3d5698SJohn Baldwin.align 2 1135454c425dSMark Johnston0: 1136bc3d5698SJohn Baldwin#endif 1137bc3d5698SJohn Baldwin 1138bc3d5698SJohn Baldwin vld1.8 {q15}, [r8] @ load IV 1139bc3d5698SJohn Baldwin b .Lcbc_dec_loop 1140bc3d5698SJohn Baldwin 1141bc3d5698SJohn Baldwin.align 4 1142bc3d5698SJohn Baldwin.Lcbc_dec_loop: 1143bc3d5698SJohn Baldwin subs r2, r2, #0x8 1144bc3d5698SJohn Baldwin bmi .Lcbc_dec_loop_finish 1145bc3d5698SJohn Baldwin 1146bc3d5698SJohn Baldwin vld1.8 {q0,q1}, [r0]! @ load input 1147bc3d5698SJohn Baldwin vld1.8 {q2,q3}, [r0]! 1148bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1149bc3d5698SJohn Baldwin mov r4, sp @ pass the key 1150bc3d5698SJohn Baldwin#else 1151bc3d5698SJohn Baldwin add r4, r3, #248 1152bc3d5698SJohn Baldwin#endif 1153bc3d5698SJohn Baldwin vld1.8 {q4,q5}, [r0]! 1154bc3d5698SJohn Baldwin mov r5, r10 1155bc3d5698SJohn Baldwin vld1.8 {q6,q7}, [r0] 1156bc3d5698SJohn Baldwin sub r0, r0, #0x60 1157bc3d5698SJohn Baldwin vstmia r9, {q15} @ put aside IV 1158bc3d5698SJohn Baldwin 1159bc3d5698SJohn Baldwin bl _bsaes_decrypt8 1160bc3d5698SJohn Baldwin 1161bc3d5698SJohn Baldwin vldmia r9, {q14} @ reload IV 1162bc3d5698SJohn Baldwin vld1.8 {q8,q9}, [r0]! @ reload input 1163bc3d5698SJohn Baldwin veor q0, q0, q14 @ ^= IV 1164bc3d5698SJohn Baldwin vld1.8 {q10,q11}, [r0]! 1165bc3d5698SJohn Baldwin veor q1, q1, q8 1166bc3d5698SJohn Baldwin veor q6, q6, q9 1167bc3d5698SJohn Baldwin vld1.8 {q12,q13}, [r0]! 1168bc3d5698SJohn Baldwin veor q4, q4, q10 1169bc3d5698SJohn Baldwin veor q2, q2, q11 1170bc3d5698SJohn Baldwin vld1.8 {q14,q15}, [r0]! 1171bc3d5698SJohn Baldwin veor q7, q7, q12 1172bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1173bc3d5698SJohn Baldwin veor q3, q3, q13 1174bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1175bc3d5698SJohn Baldwin veor q5, q5, q14 1176bc3d5698SJohn Baldwin vst1.8 {q4}, [r1]! 1177bc3d5698SJohn Baldwin vst1.8 {q2}, [r1]! 1178bc3d5698SJohn Baldwin vst1.8 {q7}, [r1]! 1179bc3d5698SJohn Baldwin vst1.8 {q3}, [r1]! 1180bc3d5698SJohn Baldwin vst1.8 {q5}, [r1]! 1181bc3d5698SJohn Baldwin 1182bc3d5698SJohn Baldwin b .Lcbc_dec_loop 1183bc3d5698SJohn Baldwin 1184bc3d5698SJohn Baldwin.Lcbc_dec_loop_finish: 1185bc3d5698SJohn Baldwin adds r2, r2, #8 1186bc3d5698SJohn Baldwin beq .Lcbc_dec_done 1187bc3d5698SJohn Baldwin 1188bc3d5698SJohn Baldwin vld1.8 {q0}, [r0]! @ load input 1189bc3d5698SJohn Baldwin cmp r2, #2 1190bc3d5698SJohn Baldwin blo .Lcbc_dec_one 1191bc3d5698SJohn Baldwin vld1.8 {q1}, [r0]! 1192bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1193bc3d5698SJohn Baldwin mov r4, sp @ pass the key 1194bc3d5698SJohn Baldwin#else 1195bc3d5698SJohn Baldwin add r4, r3, #248 1196bc3d5698SJohn Baldwin#endif 1197bc3d5698SJohn Baldwin mov r5, r10 1198bc3d5698SJohn Baldwin vstmia r9, {q15} @ put aside IV 1199bc3d5698SJohn Baldwin beq .Lcbc_dec_two 1200bc3d5698SJohn Baldwin vld1.8 {q2}, [r0]! 1201bc3d5698SJohn Baldwin cmp r2, #4 1202bc3d5698SJohn Baldwin blo .Lcbc_dec_three 1203bc3d5698SJohn Baldwin vld1.8 {q3}, [r0]! 1204bc3d5698SJohn Baldwin beq .Lcbc_dec_four 1205bc3d5698SJohn Baldwin vld1.8 {q4}, [r0]! 1206bc3d5698SJohn Baldwin cmp r2, #6 1207bc3d5698SJohn Baldwin blo .Lcbc_dec_five 1208bc3d5698SJohn Baldwin vld1.8 {q5}, [r0]! 1209bc3d5698SJohn Baldwin beq .Lcbc_dec_six 1210bc3d5698SJohn Baldwin vld1.8 {q6}, [r0]! 1211bc3d5698SJohn Baldwin sub r0, r0, #0x70 1212bc3d5698SJohn Baldwin 1213bc3d5698SJohn Baldwin bl _bsaes_decrypt8 1214bc3d5698SJohn Baldwin 1215bc3d5698SJohn Baldwin vldmia r9, {q14} @ reload IV 1216bc3d5698SJohn Baldwin vld1.8 {q8,q9}, [r0]! @ reload input 1217bc3d5698SJohn Baldwin veor q0, q0, q14 @ ^= IV 1218bc3d5698SJohn Baldwin vld1.8 {q10,q11}, [r0]! 1219bc3d5698SJohn Baldwin veor q1, q1, q8 1220bc3d5698SJohn Baldwin veor q6, q6, q9 1221bc3d5698SJohn Baldwin vld1.8 {q12,q13}, [r0]! 1222bc3d5698SJohn Baldwin veor q4, q4, q10 1223bc3d5698SJohn Baldwin veor q2, q2, q11 1224bc3d5698SJohn Baldwin vld1.8 {q15}, [r0]! 1225bc3d5698SJohn Baldwin veor q7, q7, q12 1226bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1227bc3d5698SJohn Baldwin veor q3, q3, q13 1228bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1229bc3d5698SJohn Baldwin vst1.8 {q4}, [r1]! 1230bc3d5698SJohn Baldwin vst1.8 {q2}, [r1]! 1231bc3d5698SJohn Baldwin vst1.8 {q7}, [r1]! 1232bc3d5698SJohn Baldwin vst1.8 {q3}, [r1]! 1233bc3d5698SJohn Baldwin b .Lcbc_dec_done 1234bc3d5698SJohn Baldwin.align 4 1235bc3d5698SJohn Baldwin.Lcbc_dec_six: 1236bc3d5698SJohn Baldwin sub r0, r0, #0x60 1237bc3d5698SJohn Baldwin bl _bsaes_decrypt8 1238bc3d5698SJohn Baldwin vldmia r9,{q14} @ reload IV 1239bc3d5698SJohn Baldwin vld1.8 {q8,q9}, [r0]! @ reload input 1240bc3d5698SJohn Baldwin veor q0, q0, q14 @ ^= IV 1241bc3d5698SJohn Baldwin vld1.8 {q10,q11}, [r0]! 1242bc3d5698SJohn Baldwin veor q1, q1, q8 1243bc3d5698SJohn Baldwin veor q6, q6, q9 1244bc3d5698SJohn Baldwin vld1.8 {q12}, [r0]! 1245bc3d5698SJohn Baldwin veor q4, q4, q10 1246bc3d5698SJohn Baldwin veor q2, q2, q11 1247bc3d5698SJohn Baldwin vld1.8 {q15}, [r0]! 1248bc3d5698SJohn Baldwin veor q7, q7, q12 1249bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1250bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1251bc3d5698SJohn Baldwin vst1.8 {q4}, [r1]! 1252bc3d5698SJohn Baldwin vst1.8 {q2}, [r1]! 1253bc3d5698SJohn Baldwin vst1.8 {q7}, [r1]! 1254bc3d5698SJohn Baldwin b .Lcbc_dec_done 1255bc3d5698SJohn Baldwin.align 4 1256bc3d5698SJohn Baldwin.Lcbc_dec_five: 1257bc3d5698SJohn Baldwin sub r0, r0, #0x50 1258bc3d5698SJohn Baldwin bl _bsaes_decrypt8 1259bc3d5698SJohn Baldwin vldmia r9, {q14} @ reload IV 1260bc3d5698SJohn Baldwin vld1.8 {q8,q9}, [r0]! @ reload input 1261bc3d5698SJohn Baldwin veor q0, q0, q14 @ ^= IV 1262bc3d5698SJohn Baldwin vld1.8 {q10,q11}, [r0]! 1263bc3d5698SJohn Baldwin veor q1, q1, q8 1264bc3d5698SJohn Baldwin veor q6, q6, q9 1265bc3d5698SJohn Baldwin vld1.8 {q15}, [r0]! 1266bc3d5698SJohn Baldwin veor q4, q4, q10 1267bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1268bc3d5698SJohn Baldwin veor q2, q2, q11 1269bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1270bc3d5698SJohn Baldwin vst1.8 {q4}, [r1]! 1271bc3d5698SJohn Baldwin vst1.8 {q2}, [r1]! 1272bc3d5698SJohn Baldwin b .Lcbc_dec_done 1273bc3d5698SJohn Baldwin.align 4 1274bc3d5698SJohn Baldwin.Lcbc_dec_four: 1275bc3d5698SJohn Baldwin sub r0, r0, #0x40 1276bc3d5698SJohn Baldwin bl _bsaes_decrypt8 1277bc3d5698SJohn Baldwin vldmia r9, {q14} @ reload IV 1278bc3d5698SJohn Baldwin vld1.8 {q8,q9}, [r0]! @ reload input 1279bc3d5698SJohn Baldwin veor q0, q0, q14 @ ^= IV 1280bc3d5698SJohn Baldwin vld1.8 {q10}, [r0]! 1281bc3d5698SJohn Baldwin veor q1, q1, q8 1282bc3d5698SJohn Baldwin veor q6, q6, q9 1283bc3d5698SJohn Baldwin vld1.8 {q15}, [r0]! 1284bc3d5698SJohn Baldwin veor q4, q4, q10 1285bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1286bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1287bc3d5698SJohn Baldwin vst1.8 {q4}, [r1]! 1288bc3d5698SJohn Baldwin b .Lcbc_dec_done 1289bc3d5698SJohn Baldwin.align 4 1290bc3d5698SJohn Baldwin.Lcbc_dec_three: 1291bc3d5698SJohn Baldwin sub r0, r0, #0x30 1292bc3d5698SJohn Baldwin bl _bsaes_decrypt8 1293bc3d5698SJohn Baldwin vldmia r9, {q14} @ reload IV 1294bc3d5698SJohn Baldwin vld1.8 {q8,q9}, [r0]! @ reload input 1295bc3d5698SJohn Baldwin veor q0, q0, q14 @ ^= IV 1296bc3d5698SJohn Baldwin vld1.8 {q15}, [r0]! 1297bc3d5698SJohn Baldwin veor q1, q1, q8 1298bc3d5698SJohn Baldwin veor q6, q6, q9 1299bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1300bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1301bc3d5698SJohn Baldwin b .Lcbc_dec_done 1302bc3d5698SJohn Baldwin.align 4 1303bc3d5698SJohn Baldwin.Lcbc_dec_two: 1304bc3d5698SJohn Baldwin sub r0, r0, #0x20 1305bc3d5698SJohn Baldwin bl _bsaes_decrypt8 1306bc3d5698SJohn Baldwin vldmia r9, {q14} @ reload IV 1307bc3d5698SJohn Baldwin vld1.8 {q8}, [r0]! @ reload input 1308bc3d5698SJohn Baldwin veor q0, q0, q14 @ ^= IV 1309bc3d5698SJohn Baldwin vld1.8 {q15}, [r0]! @ reload input 1310bc3d5698SJohn Baldwin veor q1, q1, q8 1311bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1312bc3d5698SJohn Baldwin b .Lcbc_dec_done 1313bc3d5698SJohn Baldwin.align 4 1314bc3d5698SJohn Baldwin.Lcbc_dec_one: 1315bc3d5698SJohn Baldwin sub r0, r0, #0x10 1316bc3d5698SJohn Baldwin mov r10, r1 @ save original out pointer 1317bc3d5698SJohn Baldwin mov r1, r9 @ use the iv scratch space as out buffer 1318bc3d5698SJohn Baldwin mov r2, r3 1319bc3d5698SJohn Baldwin vmov q4,q15 @ just in case ensure that IV 1320bc3d5698SJohn Baldwin vmov q5,q0 @ and input are preserved 1321bc3d5698SJohn Baldwin bl AES_decrypt 1322bc3d5698SJohn Baldwin vld1.8 {q0}, [r9] @ load result 1323bc3d5698SJohn Baldwin veor q0, q0, q4 @ ^= IV 1324bc3d5698SJohn Baldwin vmov q15, q5 @ q5 holds input 1325bc3d5698SJohn Baldwin vst1.8 {q0}, [r10] @ write output 1326bc3d5698SJohn Baldwin 1327bc3d5698SJohn Baldwin.Lcbc_dec_done: 1328bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1329bc3d5698SJohn Baldwin vmov.i32 q0, #0 1330bc3d5698SJohn Baldwin vmov.i32 q1, #0 1331bc3d5698SJohn Baldwin.Lcbc_dec_bzero:@ wipe key schedule [if any] 1332bc3d5698SJohn Baldwin vstmia sp!, {q0,q1} 1333bc3d5698SJohn Baldwin cmp sp, r9 1334bc3d5698SJohn Baldwin bne .Lcbc_dec_bzero 1335bc3d5698SJohn Baldwin#endif 1336bc3d5698SJohn Baldwin 1337bc3d5698SJohn Baldwin mov sp, r9 1338bc3d5698SJohn Baldwin add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb 1339bc3d5698SJohn Baldwin vst1.8 {q15}, [r8] @ return IV 1340bc3d5698SJohn Baldwin VFP_ABI_POP 1341bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} 1342*c0855eaaSJohn Baldwin.size ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt 1343bc3d5698SJohn Baldwin 1344*c0855eaaSJohn Baldwin.globl ossl_bsaes_ctr32_encrypt_blocks 1345*c0855eaaSJohn Baldwin.type ossl_bsaes_ctr32_encrypt_blocks,%function 1346bc3d5698SJohn Baldwin.align 5 1347*c0855eaaSJohn Baldwinossl_bsaes_ctr32_encrypt_blocks: 1348bc3d5698SJohn Baldwin cmp r2, #8 @ use plain AES for 1349bc3d5698SJohn Baldwin blo .Lctr_enc_short @ small sizes 1350bc3d5698SJohn Baldwin 1351bc3d5698SJohn Baldwin mov ip, sp 1352bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} 1353bc3d5698SJohn Baldwin VFP_ABI_PUSH 1354bc3d5698SJohn Baldwin ldr r8, [ip] @ ctr is 1st arg on the stack 1355bc3d5698SJohn Baldwin sub sp, sp, #0x10 @ scratch space to carry over the ctr 1356bc3d5698SJohn Baldwin mov r9, sp @ save sp 1357bc3d5698SJohn Baldwin 1358bc3d5698SJohn Baldwin ldr r10, [r3, #240] @ get # of rounds 1359bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1360bc3d5698SJohn Baldwin @ allocate the key schedule on the stack 1361bc3d5698SJohn Baldwin sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key 1362bc3d5698SJohn Baldwin add r12, #96 @ size of bit-sliced key schedule 1363bc3d5698SJohn Baldwin 1364bc3d5698SJohn Baldwin @ populate the key schedule 1365bc3d5698SJohn Baldwin mov r4, r3 @ pass key 1366bc3d5698SJohn Baldwin mov r5, r10 @ pass # of rounds 1367bc3d5698SJohn Baldwin mov sp, r12 @ sp is sp 1368bc3d5698SJohn Baldwin bl _bsaes_key_convert 1369bc3d5698SJohn Baldwin veor q7,q7,q15 @ fix up last round key 1370bc3d5698SJohn Baldwin vstmia r12, {q7} @ save last round key 1371bc3d5698SJohn Baldwin 1372bc3d5698SJohn Baldwin vld1.8 {q0}, [r8] @ load counter 1373bc3d5698SJohn Baldwin#ifdef __APPLE__ 1374bc3d5698SJohn Baldwin mov r8, #:lower16:(.LREVM0SR-.LM0) 1375bc3d5698SJohn Baldwin add r8, r6, r8 1376bc3d5698SJohn Baldwin#else 1377bc3d5698SJohn Baldwin add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 1378bc3d5698SJohn Baldwin#endif 1379bc3d5698SJohn Baldwin vldmia sp, {q4} @ load round0 key 1380bc3d5698SJohn Baldwin#else 1381bc3d5698SJohn Baldwin ldr r12, [r3, #244] 1382bc3d5698SJohn Baldwin eors r12, #1 1383bc3d5698SJohn Baldwin beq 0f 1384bc3d5698SJohn Baldwin 1385bc3d5698SJohn Baldwin @ populate the key schedule 1386bc3d5698SJohn Baldwin str r12, [r3, #244] 1387bc3d5698SJohn Baldwin mov r4, r3 @ pass key 1388bc3d5698SJohn Baldwin mov r5, r10 @ pass # of rounds 1389bc3d5698SJohn Baldwin add r12, r3, #248 @ pass key schedule 1390bc3d5698SJohn Baldwin bl _bsaes_key_convert 1391bc3d5698SJohn Baldwin veor q7,q7,q15 @ fix up last round key 1392bc3d5698SJohn Baldwin vstmia r12, {q7} @ save last round key 1393bc3d5698SJohn Baldwin 1394bc3d5698SJohn Baldwin.align 2 1395454c425dSMark Johnston0: add r12, r3, #248 1396bc3d5698SJohn Baldwin vld1.8 {q0}, [r8] @ load counter 1397e415d255SJung-uk Kim add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 1398bc3d5698SJohn Baldwin vldmia r12, {q4} @ load round0 key 1399bc3d5698SJohn Baldwin sub sp, #0x10 @ place for adjusted round0 key 1400bc3d5698SJohn Baldwin#endif 1401bc3d5698SJohn Baldwin 1402bc3d5698SJohn Baldwin vmov.i32 q8,#1 @ compose 1<<96 1403bc3d5698SJohn Baldwin veor q9,q9,q9 1404bc3d5698SJohn Baldwin vrev32.8 q0,q0 1405bc3d5698SJohn Baldwin vext.8 q8,q9,q8,#4 1406bc3d5698SJohn Baldwin vrev32.8 q4,q4 1407bc3d5698SJohn Baldwin vadd.u32 q9,q8,q8 @ compose 2<<96 1408bc3d5698SJohn Baldwin vstmia sp, {q4} @ save adjusted round0 key 1409bc3d5698SJohn Baldwin b .Lctr_enc_loop 1410bc3d5698SJohn Baldwin 1411bc3d5698SJohn Baldwin.align 4 1412bc3d5698SJohn Baldwin.Lctr_enc_loop: 1413bc3d5698SJohn Baldwin vadd.u32 q10, q8, q9 @ compose 3<<96 1414bc3d5698SJohn Baldwin vadd.u32 q1, q0, q8 @ +1 1415bc3d5698SJohn Baldwin vadd.u32 q2, q0, q9 @ +2 1416bc3d5698SJohn Baldwin vadd.u32 q3, q0, q10 @ +3 1417bc3d5698SJohn Baldwin vadd.u32 q4, q1, q10 1418bc3d5698SJohn Baldwin vadd.u32 q5, q2, q10 1419bc3d5698SJohn Baldwin vadd.u32 q6, q3, q10 1420bc3d5698SJohn Baldwin vadd.u32 q7, q4, q10 1421bc3d5698SJohn Baldwin vadd.u32 q10, q5, q10 @ next counter 1422bc3d5698SJohn Baldwin 1423bc3d5698SJohn Baldwin @ Borrow prologue from _bsaes_encrypt8 to use the opportunity 1424bc3d5698SJohn Baldwin @ to flip byte order in 32-bit counter 1425bc3d5698SJohn Baldwin 1426bc3d5698SJohn Baldwin vldmia sp, {q9} @ load round0 key 1427bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1428bc3d5698SJohn Baldwin add r4, sp, #0x10 @ pass next round key 1429bc3d5698SJohn Baldwin#else 1430bc3d5698SJohn Baldwin add r4, r3, #264 1431bc3d5698SJohn Baldwin#endif 1432bc3d5698SJohn Baldwin vldmia r8, {q8} @ .LREVM0SR 1433bc3d5698SJohn Baldwin mov r5, r10 @ pass rounds 1434bc3d5698SJohn Baldwin vstmia r9, {q10} @ save next counter 1435bc3d5698SJohn Baldwin#ifdef __APPLE__ 1436bc3d5698SJohn Baldwin mov r6, #:lower16:(.LREVM0SR-.LSR) 1437bc3d5698SJohn Baldwin sub r6, r8, r6 1438bc3d5698SJohn Baldwin#else 1439bc3d5698SJohn Baldwin sub r6, r8, #.LREVM0SR-.LSR @ pass constants 1440bc3d5698SJohn Baldwin#endif 1441bc3d5698SJohn Baldwin 1442bc3d5698SJohn Baldwin bl _bsaes_encrypt8_alt 1443bc3d5698SJohn Baldwin 1444bc3d5698SJohn Baldwin subs r2, r2, #8 1445bc3d5698SJohn Baldwin blo .Lctr_enc_loop_done 1446bc3d5698SJohn Baldwin 1447bc3d5698SJohn Baldwin vld1.8 {q8,q9}, [r0]! @ load input 1448bc3d5698SJohn Baldwin vld1.8 {q10,q11}, [r0]! 1449bc3d5698SJohn Baldwin veor q0, q8 1450bc3d5698SJohn Baldwin veor q1, q9 1451bc3d5698SJohn Baldwin vld1.8 {q12,q13}, [r0]! 1452bc3d5698SJohn Baldwin veor q4, q10 1453bc3d5698SJohn Baldwin veor q6, q11 1454bc3d5698SJohn Baldwin vld1.8 {q14,q15}, [r0]! 1455bc3d5698SJohn Baldwin veor q3, q12 1456bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r1]! @ write output 1457bc3d5698SJohn Baldwin veor q7, q13 1458bc3d5698SJohn Baldwin veor q2, q14 1459bc3d5698SJohn Baldwin vst1.8 {q4}, [r1]! 1460bc3d5698SJohn Baldwin veor q5, q15 1461bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1462bc3d5698SJohn Baldwin vmov.i32 q8, #1 @ compose 1<<96 1463bc3d5698SJohn Baldwin vst1.8 {q3}, [r1]! 1464bc3d5698SJohn Baldwin veor q9, q9, q9 1465bc3d5698SJohn Baldwin vst1.8 {q7}, [r1]! 1466bc3d5698SJohn Baldwin vext.8 q8, q9, q8, #4 1467bc3d5698SJohn Baldwin vst1.8 {q2}, [r1]! 1468bc3d5698SJohn Baldwin vadd.u32 q9,q8,q8 @ compose 2<<96 1469bc3d5698SJohn Baldwin vst1.8 {q5}, [r1]! 1470bc3d5698SJohn Baldwin vldmia r9, {q0} @ load counter 1471bc3d5698SJohn Baldwin 1472bc3d5698SJohn Baldwin bne .Lctr_enc_loop 1473bc3d5698SJohn Baldwin b .Lctr_enc_done 1474bc3d5698SJohn Baldwin 1475bc3d5698SJohn Baldwin.align 4 1476bc3d5698SJohn Baldwin.Lctr_enc_loop_done: 1477bc3d5698SJohn Baldwin add r2, r2, #8 1478bc3d5698SJohn Baldwin vld1.8 {q8}, [r0]! @ load input 1479bc3d5698SJohn Baldwin veor q0, q8 1480bc3d5698SJohn Baldwin vst1.8 {q0}, [r1]! @ write output 1481bc3d5698SJohn Baldwin cmp r2, #2 1482bc3d5698SJohn Baldwin blo .Lctr_enc_done 1483bc3d5698SJohn Baldwin vld1.8 {q9}, [r0]! 1484bc3d5698SJohn Baldwin veor q1, q9 1485bc3d5698SJohn Baldwin vst1.8 {q1}, [r1]! 1486bc3d5698SJohn Baldwin beq .Lctr_enc_done 1487bc3d5698SJohn Baldwin vld1.8 {q10}, [r0]! 1488bc3d5698SJohn Baldwin veor q4, q10 1489bc3d5698SJohn Baldwin vst1.8 {q4}, [r1]! 1490bc3d5698SJohn Baldwin cmp r2, #4 1491bc3d5698SJohn Baldwin blo .Lctr_enc_done 1492bc3d5698SJohn Baldwin vld1.8 {q11}, [r0]! 1493bc3d5698SJohn Baldwin veor q6, q11 1494bc3d5698SJohn Baldwin vst1.8 {q6}, [r1]! 1495bc3d5698SJohn Baldwin beq .Lctr_enc_done 1496bc3d5698SJohn Baldwin vld1.8 {q12}, [r0]! 1497bc3d5698SJohn Baldwin veor q3, q12 1498bc3d5698SJohn Baldwin vst1.8 {q3}, [r1]! 1499bc3d5698SJohn Baldwin cmp r2, #6 1500bc3d5698SJohn Baldwin blo .Lctr_enc_done 1501bc3d5698SJohn Baldwin vld1.8 {q13}, [r0]! 1502bc3d5698SJohn Baldwin veor q7, q13 1503bc3d5698SJohn Baldwin vst1.8 {q7}, [r1]! 1504bc3d5698SJohn Baldwin beq .Lctr_enc_done 1505bc3d5698SJohn Baldwin vld1.8 {q14}, [r0] 1506bc3d5698SJohn Baldwin veor q2, q14 1507bc3d5698SJohn Baldwin vst1.8 {q2}, [r1]! 1508bc3d5698SJohn Baldwin 1509bc3d5698SJohn Baldwin.Lctr_enc_done: 1510bc3d5698SJohn Baldwin vmov.i32 q0, #0 1511bc3d5698SJohn Baldwin vmov.i32 q1, #0 1512bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1513bc3d5698SJohn Baldwin.Lctr_enc_bzero:@ wipe key schedule [if any] 1514bc3d5698SJohn Baldwin vstmia sp!, {q0,q1} 1515bc3d5698SJohn Baldwin cmp sp, r9 1516bc3d5698SJohn Baldwin bne .Lctr_enc_bzero 1517bc3d5698SJohn Baldwin#else 1518bc3d5698SJohn Baldwin vstmia sp, {q0,q1} 1519bc3d5698SJohn Baldwin#endif 1520bc3d5698SJohn Baldwin 1521bc3d5698SJohn Baldwin mov sp, r9 1522bc3d5698SJohn Baldwin add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb 1523bc3d5698SJohn Baldwin VFP_ABI_POP 1524bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return 1525bc3d5698SJohn Baldwin 1526bc3d5698SJohn Baldwin.align 4 1527bc3d5698SJohn Baldwin.Lctr_enc_short: 1528bc3d5698SJohn Baldwin ldr ip, [sp] @ ctr pointer is passed on stack 1529bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,r7,r8, lr} 1530bc3d5698SJohn Baldwin 1531bc3d5698SJohn Baldwin mov r4, r0 @ copy arguments 1532bc3d5698SJohn Baldwin mov r5, r1 1533bc3d5698SJohn Baldwin mov r6, r2 1534bc3d5698SJohn Baldwin mov r7, r3 1535bc3d5698SJohn Baldwin ldr r8, [ip, #12] @ load counter .LSW 1536bc3d5698SJohn Baldwin vld1.8 {q1}, [ip] @ load whole counter value 1537bc3d5698SJohn Baldwin#ifdef __ARMEL__ 1538bc3d5698SJohn Baldwin rev r8, r8 1539bc3d5698SJohn Baldwin#endif 1540bc3d5698SJohn Baldwin sub sp, sp, #0x10 1541bc3d5698SJohn Baldwin vst1.8 {q1}, [sp] @ copy counter value 1542bc3d5698SJohn Baldwin sub sp, sp, #0x10 1543bc3d5698SJohn Baldwin 1544bc3d5698SJohn Baldwin.Lctr_enc_short_loop: 1545bc3d5698SJohn Baldwin add r0, sp, #0x10 @ input counter value 1546bc3d5698SJohn Baldwin mov r1, sp @ output on the stack 1547bc3d5698SJohn Baldwin mov r2, r7 @ key 1548bc3d5698SJohn Baldwin 1549bc3d5698SJohn Baldwin bl AES_encrypt 1550bc3d5698SJohn Baldwin 1551bc3d5698SJohn Baldwin vld1.8 {q0}, [r4]! @ load input 1552bc3d5698SJohn Baldwin vld1.8 {q1}, [sp] @ load encrypted counter 1553bc3d5698SJohn Baldwin add r8, r8, #1 1554bc3d5698SJohn Baldwin#ifdef __ARMEL__ 1555bc3d5698SJohn Baldwin rev r0, r8 1556bc3d5698SJohn Baldwin str r0, [sp, #0x1c] @ next counter value 1557bc3d5698SJohn Baldwin#else 1558bc3d5698SJohn Baldwin str r8, [sp, #0x1c] @ next counter value 1559bc3d5698SJohn Baldwin#endif 1560bc3d5698SJohn Baldwin veor q0,q0,q1 1561bc3d5698SJohn Baldwin vst1.8 {q0}, [r5]! @ store output 1562bc3d5698SJohn Baldwin subs r6, r6, #1 1563bc3d5698SJohn Baldwin bne .Lctr_enc_short_loop 1564bc3d5698SJohn Baldwin 1565bc3d5698SJohn Baldwin vmov.i32 q0, #0 1566bc3d5698SJohn Baldwin vmov.i32 q1, #0 1567bc3d5698SJohn Baldwin vstmia sp!, {q0,q1} 1568bc3d5698SJohn Baldwin 1569bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,r7,r8, pc} 1570*c0855eaaSJohn Baldwin.size ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks 1571*c0855eaaSJohn Baldwin.globl ossl_bsaes_xts_encrypt 1572*c0855eaaSJohn Baldwin.type ossl_bsaes_xts_encrypt,%function 1573bc3d5698SJohn Baldwin.align 4 1574*c0855eaaSJohn Baldwinossl_bsaes_xts_encrypt: 1575bc3d5698SJohn Baldwin mov ip, sp 1576bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} @ 0x20 1577bc3d5698SJohn Baldwin VFP_ABI_PUSH 1578bc3d5698SJohn Baldwin mov r6, sp @ future r3 1579bc3d5698SJohn Baldwin 1580bc3d5698SJohn Baldwin mov r7, r0 1581bc3d5698SJohn Baldwin mov r8, r1 1582bc3d5698SJohn Baldwin mov r9, r2 1583bc3d5698SJohn Baldwin mov r10, r3 1584bc3d5698SJohn Baldwin 1585bc3d5698SJohn Baldwin sub r0, sp, #0x10 @ 0x10 1586bc3d5698SJohn Baldwin bic r0, #0xf @ align at 16 bytes 1587bc3d5698SJohn Baldwin mov sp, r0 1588bc3d5698SJohn Baldwin 1589bc3d5698SJohn Baldwin#ifdef XTS_CHAIN_TWEAK 1590bc3d5698SJohn Baldwin ldr r0, [ip] @ pointer to input tweak 1591bc3d5698SJohn Baldwin#else 1592bc3d5698SJohn Baldwin @ generate initial tweak 1593bc3d5698SJohn Baldwin ldr r0, [ip, #4] @ iv[] 1594bc3d5698SJohn Baldwin mov r1, sp 1595bc3d5698SJohn Baldwin ldr r2, [ip, #0] @ key2 1596bc3d5698SJohn Baldwin bl AES_encrypt 1597bc3d5698SJohn Baldwin mov r0,sp @ pointer to initial tweak 1598bc3d5698SJohn Baldwin#endif 1599bc3d5698SJohn Baldwin 1600bc3d5698SJohn Baldwin ldr r1, [r10, #240] @ get # of rounds 1601bc3d5698SJohn Baldwin mov r3, r6 1602bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1603bc3d5698SJohn Baldwin @ allocate the key schedule on the stack 1604bc3d5698SJohn Baldwin sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key 1605bc3d5698SJohn Baldwin @ add r12, #96 @ size of bit-sliced key schedule 1606bc3d5698SJohn Baldwin sub r12, #48 @ place for tweak[9] 1607bc3d5698SJohn Baldwin 1608bc3d5698SJohn Baldwin @ populate the key schedule 1609bc3d5698SJohn Baldwin mov r4, r10 @ pass key 1610bc3d5698SJohn Baldwin mov r5, r1 @ pass # of rounds 1611bc3d5698SJohn Baldwin mov sp, r12 1612bc3d5698SJohn Baldwin add r12, #0x90 @ pass key schedule 1613bc3d5698SJohn Baldwin bl _bsaes_key_convert 1614bc3d5698SJohn Baldwin veor q7, q7, q15 @ fix up last round key 1615bc3d5698SJohn Baldwin vstmia r12, {q7} @ save last round key 1616bc3d5698SJohn Baldwin#else 1617bc3d5698SJohn Baldwin ldr r12, [r10, #244] 1618bc3d5698SJohn Baldwin eors r12, #1 1619bc3d5698SJohn Baldwin beq 0f 1620bc3d5698SJohn Baldwin 1621bc3d5698SJohn Baldwin str r12, [r10, #244] 1622bc3d5698SJohn Baldwin mov r4, r10 @ pass key 1623bc3d5698SJohn Baldwin mov r5, r1 @ pass # of rounds 1624bc3d5698SJohn Baldwin add r12, r10, #248 @ pass key schedule 1625bc3d5698SJohn Baldwin bl _bsaes_key_convert 1626bc3d5698SJohn Baldwin veor q7, q7, q15 @ fix up last round key 1627bc3d5698SJohn Baldwin vstmia r12, {q7} 1628bc3d5698SJohn Baldwin 1629bc3d5698SJohn Baldwin.align 2 1630454c425dSMark Johnston0: sub sp, #0x90 @ place for tweak[9] 1631bc3d5698SJohn Baldwin#endif 1632bc3d5698SJohn Baldwin 1633bc3d5698SJohn Baldwin vld1.8 {q8}, [r0] @ initial tweak 1634bc3d5698SJohn Baldwin adr r2, .Lxts_magic 1635bc3d5698SJohn Baldwin 1636bc3d5698SJohn Baldwin subs r9, #0x80 1637bc3d5698SJohn Baldwin blo .Lxts_enc_short 1638bc3d5698SJohn Baldwin b .Lxts_enc_loop 1639bc3d5698SJohn Baldwin 1640bc3d5698SJohn Baldwin.align 4 1641bc3d5698SJohn Baldwin.Lxts_enc_loop: 1642bc3d5698SJohn Baldwin vldmia r2, {q5} @ load XTS magic 1643bc3d5698SJohn Baldwin vshr.s64 q6, q8, #63 1644bc3d5698SJohn Baldwin mov r0, sp 1645bc3d5698SJohn Baldwin vand q6, q6, q5 1646bc3d5698SJohn Baldwin vadd.u64 q9, q8, q8 1647bc3d5698SJohn Baldwin vst1.64 {q8}, [r0,:128]! 1648bc3d5698SJohn Baldwin vswp d13,d12 1649bc3d5698SJohn Baldwin vshr.s64 q7, q9, #63 1650bc3d5698SJohn Baldwin veor q9, q9, q6 1651bc3d5698SJohn Baldwin vand q7, q7, q5 1652bc3d5698SJohn Baldwin vadd.u64 q10, q9, q9 1653bc3d5698SJohn Baldwin vst1.64 {q9}, [r0,:128]! 1654bc3d5698SJohn Baldwin vswp d15,d14 1655bc3d5698SJohn Baldwin vshr.s64 q6, q10, #63 1656bc3d5698SJohn Baldwin veor q10, q10, q7 1657bc3d5698SJohn Baldwin vand q6, q6, q5 1658bc3d5698SJohn Baldwin vld1.8 {q0}, [r7]! 1659bc3d5698SJohn Baldwin vadd.u64 q11, q10, q10 1660bc3d5698SJohn Baldwin vst1.64 {q10}, [r0,:128]! 1661bc3d5698SJohn Baldwin vswp d13,d12 1662bc3d5698SJohn Baldwin vshr.s64 q7, q11, #63 1663bc3d5698SJohn Baldwin veor q11, q11, q6 1664bc3d5698SJohn Baldwin vand q7, q7, q5 1665bc3d5698SJohn Baldwin vld1.8 {q1}, [r7]! 1666bc3d5698SJohn Baldwin veor q0, q0, q8 1667bc3d5698SJohn Baldwin vadd.u64 q12, q11, q11 1668bc3d5698SJohn Baldwin vst1.64 {q11}, [r0,:128]! 1669bc3d5698SJohn Baldwin vswp d15,d14 1670bc3d5698SJohn Baldwin vshr.s64 q6, q12, #63 1671bc3d5698SJohn Baldwin veor q12, q12, q7 1672bc3d5698SJohn Baldwin vand q6, q6, q5 1673bc3d5698SJohn Baldwin vld1.8 {q2}, [r7]! 1674bc3d5698SJohn Baldwin veor q1, q1, q9 1675bc3d5698SJohn Baldwin vadd.u64 q13, q12, q12 1676bc3d5698SJohn Baldwin vst1.64 {q12}, [r0,:128]! 1677bc3d5698SJohn Baldwin vswp d13,d12 1678bc3d5698SJohn Baldwin vshr.s64 q7, q13, #63 1679bc3d5698SJohn Baldwin veor q13, q13, q6 1680bc3d5698SJohn Baldwin vand q7, q7, q5 1681bc3d5698SJohn Baldwin vld1.8 {q3}, [r7]! 1682bc3d5698SJohn Baldwin veor q2, q2, q10 1683bc3d5698SJohn Baldwin vadd.u64 q14, q13, q13 1684bc3d5698SJohn Baldwin vst1.64 {q13}, [r0,:128]! 1685bc3d5698SJohn Baldwin vswp d15,d14 1686bc3d5698SJohn Baldwin vshr.s64 q6, q14, #63 1687bc3d5698SJohn Baldwin veor q14, q14, q7 1688bc3d5698SJohn Baldwin vand q6, q6, q5 1689bc3d5698SJohn Baldwin vld1.8 {q4}, [r7]! 1690bc3d5698SJohn Baldwin veor q3, q3, q11 1691bc3d5698SJohn Baldwin vadd.u64 q15, q14, q14 1692bc3d5698SJohn Baldwin vst1.64 {q14}, [r0,:128]! 1693bc3d5698SJohn Baldwin vswp d13,d12 1694bc3d5698SJohn Baldwin vshr.s64 q7, q15, #63 1695bc3d5698SJohn Baldwin veor q15, q15, q6 1696bc3d5698SJohn Baldwin vand q7, q7, q5 1697bc3d5698SJohn Baldwin vld1.8 {q5}, [r7]! 1698bc3d5698SJohn Baldwin veor q4, q4, q12 1699bc3d5698SJohn Baldwin vadd.u64 q8, q15, q15 1700bc3d5698SJohn Baldwin vst1.64 {q15}, [r0,:128]! 1701bc3d5698SJohn Baldwin vswp d15,d14 1702bc3d5698SJohn Baldwin veor q8, q8, q7 1703bc3d5698SJohn Baldwin vst1.64 {q8}, [r0,:128] @ next round tweak 1704bc3d5698SJohn Baldwin 1705bc3d5698SJohn Baldwin vld1.8 {q6,q7}, [r7]! 1706bc3d5698SJohn Baldwin veor q5, q5, q13 1707bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1708bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 1709bc3d5698SJohn Baldwin#else 1710bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 1711bc3d5698SJohn Baldwin#endif 1712bc3d5698SJohn Baldwin veor q6, q6, q14 1713bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 1714bc3d5698SJohn Baldwin veor q7, q7, q15 1715bc3d5698SJohn Baldwin mov r0, sp 1716bc3d5698SJohn Baldwin 1717bc3d5698SJohn Baldwin bl _bsaes_encrypt8 1718bc3d5698SJohn Baldwin 1719bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 1720bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 1721bc3d5698SJohn Baldwin veor q0, q0, q8 1722bc3d5698SJohn Baldwin vld1.64 {q12,q13}, [r0,:128]! 1723bc3d5698SJohn Baldwin veor q1, q1, q9 1724bc3d5698SJohn Baldwin veor q8, q4, q10 1725bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 1726bc3d5698SJohn Baldwin veor q9, q6, q11 1727bc3d5698SJohn Baldwin vld1.64 {q14,q15}, [r0,:128]! 1728bc3d5698SJohn Baldwin veor q10, q3, q12 1729bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 1730bc3d5698SJohn Baldwin veor q11, q7, q13 1731bc3d5698SJohn Baldwin veor q12, q2, q14 1732bc3d5698SJohn Baldwin vst1.8 {q10,q11}, [r8]! 1733bc3d5698SJohn Baldwin veor q13, q5, q15 1734bc3d5698SJohn Baldwin vst1.8 {q12,q13}, [r8]! 1735bc3d5698SJohn Baldwin 1736bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 1737bc3d5698SJohn Baldwin 1738bc3d5698SJohn Baldwin subs r9, #0x80 1739bc3d5698SJohn Baldwin bpl .Lxts_enc_loop 1740bc3d5698SJohn Baldwin 1741bc3d5698SJohn Baldwin.Lxts_enc_short: 1742bc3d5698SJohn Baldwin adds r9, #0x70 1743bc3d5698SJohn Baldwin bmi .Lxts_enc_done 1744bc3d5698SJohn Baldwin 1745bc3d5698SJohn Baldwin vldmia r2, {q5} @ load XTS magic 1746bc3d5698SJohn Baldwin vshr.s64 q7, q8, #63 1747bc3d5698SJohn Baldwin mov r0, sp 1748bc3d5698SJohn Baldwin vand q7, q7, q5 1749bc3d5698SJohn Baldwin vadd.u64 q9, q8, q8 1750bc3d5698SJohn Baldwin vst1.64 {q8}, [r0,:128]! 1751bc3d5698SJohn Baldwin vswp d15,d14 1752bc3d5698SJohn Baldwin vshr.s64 q6, q9, #63 1753bc3d5698SJohn Baldwin veor q9, q9, q7 1754bc3d5698SJohn Baldwin vand q6, q6, q5 1755bc3d5698SJohn Baldwin vadd.u64 q10, q9, q9 1756bc3d5698SJohn Baldwin vst1.64 {q9}, [r0,:128]! 1757bc3d5698SJohn Baldwin vswp d13,d12 1758bc3d5698SJohn Baldwin vshr.s64 q7, q10, #63 1759bc3d5698SJohn Baldwin veor q10, q10, q6 1760bc3d5698SJohn Baldwin vand q7, q7, q5 1761bc3d5698SJohn Baldwin vld1.8 {q0}, [r7]! 1762bc3d5698SJohn Baldwin subs r9, #0x10 1763bc3d5698SJohn Baldwin bmi .Lxts_enc_1 1764bc3d5698SJohn Baldwin vadd.u64 q11, q10, q10 1765bc3d5698SJohn Baldwin vst1.64 {q10}, [r0,:128]! 1766bc3d5698SJohn Baldwin vswp d15,d14 1767bc3d5698SJohn Baldwin vshr.s64 q6, q11, #63 1768bc3d5698SJohn Baldwin veor q11, q11, q7 1769bc3d5698SJohn Baldwin vand q6, q6, q5 1770bc3d5698SJohn Baldwin vld1.8 {q1}, [r7]! 1771bc3d5698SJohn Baldwin subs r9, #0x10 1772bc3d5698SJohn Baldwin bmi .Lxts_enc_2 1773bc3d5698SJohn Baldwin veor q0, q0, q8 1774bc3d5698SJohn Baldwin vadd.u64 q12, q11, q11 1775bc3d5698SJohn Baldwin vst1.64 {q11}, [r0,:128]! 1776bc3d5698SJohn Baldwin vswp d13,d12 1777bc3d5698SJohn Baldwin vshr.s64 q7, q12, #63 1778bc3d5698SJohn Baldwin veor q12, q12, q6 1779bc3d5698SJohn Baldwin vand q7, q7, q5 1780bc3d5698SJohn Baldwin vld1.8 {q2}, [r7]! 1781bc3d5698SJohn Baldwin subs r9, #0x10 1782bc3d5698SJohn Baldwin bmi .Lxts_enc_3 1783bc3d5698SJohn Baldwin veor q1, q1, q9 1784bc3d5698SJohn Baldwin vadd.u64 q13, q12, q12 1785bc3d5698SJohn Baldwin vst1.64 {q12}, [r0,:128]! 1786bc3d5698SJohn Baldwin vswp d15,d14 1787bc3d5698SJohn Baldwin vshr.s64 q6, q13, #63 1788bc3d5698SJohn Baldwin veor q13, q13, q7 1789bc3d5698SJohn Baldwin vand q6, q6, q5 1790bc3d5698SJohn Baldwin vld1.8 {q3}, [r7]! 1791bc3d5698SJohn Baldwin subs r9, #0x10 1792bc3d5698SJohn Baldwin bmi .Lxts_enc_4 1793bc3d5698SJohn Baldwin veor q2, q2, q10 1794bc3d5698SJohn Baldwin vadd.u64 q14, q13, q13 1795bc3d5698SJohn Baldwin vst1.64 {q13}, [r0,:128]! 1796bc3d5698SJohn Baldwin vswp d13,d12 1797bc3d5698SJohn Baldwin vshr.s64 q7, q14, #63 1798bc3d5698SJohn Baldwin veor q14, q14, q6 1799bc3d5698SJohn Baldwin vand q7, q7, q5 1800bc3d5698SJohn Baldwin vld1.8 {q4}, [r7]! 1801bc3d5698SJohn Baldwin subs r9, #0x10 1802bc3d5698SJohn Baldwin bmi .Lxts_enc_5 1803bc3d5698SJohn Baldwin veor q3, q3, q11 1804bc3d5698SJohn Baldwin vadd.u64 q15, q14, q14 1805bc3d5698SJohn Baldwin vst1.64 {q14}, [r0,:128]! 1806bc3d5698SJohn Baldwin vswp d15,d14 1807bc3d5698SJohn Baldwin vshr.s64 q6, q15, #63 1808bc3d5698SJohn Baldwin veor q15, q15, q7 1809bc3d5698SJohn Baldwin vand q6, q6, q5 1810bc3d5698SJohn Baldwin vld1.8 {q5}, [r7]! 1811bc3d5698SJohn Baldwin subs r9, #0x10 1812bc3d5698SJohn Baldwin bmi .Lxts_enc_6 1813bc3d5698SJohn Baldwin veor q4, q4, q12 1814bc3d5698SJohn Baldwin sub r9, #0x10 1815bc3d5698SJohn Baldwin vst1.64 {q15}, [r0,:128] @ next round tweak 1816bc3d5698SJohn Baldwin 1817bc3d5698SJohn Baldwin vld1.8 {q6}, [r7]! 1818bc3d5698SJohn Baldwin veor q5, q5, q13 1819bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1820bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 1821bc3d5698SJohn Baldwin#else 1822bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 1823bc3d5698SJohn Baldwin#endif 1824bc3d5698SJohn Baldwin veor q6, q6, q14 1825bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 1826bc3d5698SJohn Baldwin mov r0, sp 1827bc3d5698SJohn Baldwin 1828bc3d5698SJohn Baldwin bl _bsaes_encrypt8 1829bc3d5698SJohn Baldwin 1830bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 1831bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 1832bc3d5698SJohn Baldwin veor q0, q0, q8 1833bc3d5698SJohn Baldwin vld1.64 {q12,q13}, [r0,:128]! 1834bc3d5698SJohn Baldwin veor q1, q1, q9 1835bc3d5698SJohn Baldwin veor q8, q4, q10 1836bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 1837bc3d5698SJohn Baldwin veor q9, q6, q11 1838bc3d5698SJohn Baldwin vld1.64 {q14}, [r0,:128]! 1839bc3d5698SJohn Baldwin veor q10, q3, q12 1840bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 1841bc3d5698SJohn Baldwin veor q11, q7, q13 1842bc3d5698SJohn Baldwin veor q12, q2, q14 1843bc3d5698SJohn Baldwin vst1.8 {q10,q11}, [r8]! 1844bc3d5698SJohn Baldwin vst1.8 {q12}, [r8]! 1845bc3d5698SJohn Baldwin 1846bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 1847bc3d5698SJohn Baldwin b .Lxts_enc_done 1848bc3d5698SJohn Baldwin.align 4 1849bc3d5698SJohn Baldwin.Lxts_enc_6: 1850bc3d5698SJohn Baldwin veor q4, q4, q12 1851bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1852bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 1853bc3d5698SJohn Baldwin#else 1854bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 1855bc3d5698SJohn Baldwin#endif 1856bc3d5698SJohn Baldwin veor q5, q5, q13 1857bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 1858bc3d5698SJohn Baldwin mov r0, sp 1859bc3d5698SJohn Baldwin 1860bc3d5698SJohn Baldwin bl _bsaes_encrypt8 1861bc3d5698SJohn Baldwin 1862bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 1863bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 1864bc3d5698SJohn Baldwin veor q0, q0, q8 1865bc3d5698SJohn Baldwin vld1.64 {q12,q13}, [r0,:128]! 1866bc3d5698SJohn Baldwin veor q1, q1, q9 1867bc3d5698SJohn Baldwin veor q8, q4, q10 1868bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 1869bc3d5698SJohn Baldwin veor q9, q6, q11 1870bc3d5698SJohn Baldwin veor q10, q3, q12 1871bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 1872bc3d5698SJohn Baldwin veor q11, q7, q13 1873bc3d5698SJohn Baldwin vst1.8 {q10,q11}, [r8]! 1874bc3d5698SJohn Baldwin 1875bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 1876bc3d5698SJohn Baldwin b .Lxts_enc_done 1877bc3d5698SJohn Baldwin 1878bc3d5698SJohn Baldwin@ put this in range for both ARM and Thumb mode adr instructions 1879bc3d5698SJohn Baldwin.align 5 1880bc3d5698SJohn Baldwin.Lxts_magic: 1881bc3d5698SJohn Baldwin.quad 1, 0x87 1882bc3d5698SJohn Baldwin 1883bc3d5698SJohn Baldwin.align 5 1884bc3d5698SJohn Baldwin.Lxts_enc_5: 1885bc3d5698SJohn Baldwin veor q3, q3, q11 1886bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1887bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 1888bc3d5698SJohn Baldwin#else 1889bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 1890bc3d5698SJohn Baldwin#endif 1891bc3d5698SJohn Baldwin veor q4, q4, q12 1892bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 1893bc3d5698SJohn Baldwin mov r0, sp 1894bc3d5698SJohn Baldwin 1895bc3d5698SJohn Baldwin bl _bsaes_encrypt8 1896bc3d5698SJohn Baldwin 1897bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 1898bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 1899bc3d5698SJohn Baldwin veor q0, q0, q8 1900bc3d5698SJohn Baldwin vld1.64 {q12}, [r0,:128]! 1901bc3d5698SJohn Baldwin veor q1, q1, q9 1902bc3d5698SJohn Baldwin veor q8, q4, q10 1903bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 1904bc3d5698SJohn Baldwin veor q9, q6, q11 1905bc3d5698SJohn Baldwin veor q10, q3, q12 1906bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 1907bc3d5698SJohn Baldwin vst1.8 {q10}, [r8]! 1908bc3d5698SJohn Baldwin 1909bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 1910bc3d5698SJohn Baldwin b .Lxts_enc_done 1911bc3d5698SJohn Baldwin.align 4 1912bc3d5698SJohn Baldwin.Lxts_enc_4: 1913bc3d5698SJohn Baldwin veor q2, q2, q10 1914bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1915bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 1916bc3d5698SJohn Baldwin#else 1917bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 1918bc3d5698SJohn Baldwin#endif 1919bc3d5698SJohn Baldwin veor q3, q3, q11 1920bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 1921bc3d5698SJohn Baldwin mov r0, sp 1922bc3d5698SJohn Baldwin 1923bc3d5698SJohn Baldwin bl _bsaes_encrypt8 1924bc3d5698SJohn Baldwin 1925bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 1926bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 1927bc3d5698SJohn Baldwin veor q0, q0, q8 1928bc3d5698SJohn Baldwin veor q1, q1, q9 1929bc3d5698SJohn Baldwin veor q8, q4, q10 1930bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 1931bc3d5698SJohn Baldwin veor q9, q6, q11 1932bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 1933bc3d5698SJohn Baldwin 1934bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 1935bc3d5698SJohn Baldwin b .Lxts_enc_done 1936bc3d5698SJohn Baldwin.align 4 1937bc3d5698SJohn Baldwin.Lxts_enc_3: 1938bc3d5698SJohn Baldwin veor q1, q1, q9 1939bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1940bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 1941bc3d5698SJohn Baldwin#else 1942bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 1943bc3d5698SJohn Baldwin#endif 1944bc3d5698SJohn Baldwin veor q2, q2, q10 1945bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 1946bc3d5698SJohn Baldwin mov r0, sp 1947bc3d5698SJohn Baldwin 1948bc3d5698SJohn Baldwin bl _bsaes_encrypt8 1949bc3d5698SJohn Baldwin 1950bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 1951bc3d5698SJohn Baldwin vld1.64 {q10}, [r0,:128]! 1952bc3d5698SJohn Baldwin veor q0, q0, q8 1953bc3d5698SJohn Baldwin veor q1, q1, q9 1954bc3d5698SJohn Baldwin veor q8, q4, q10 1955bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 1956bc3d5698SJohn Baldwin vst1.8 {q8}, [r8]! 1957bc3d5698SJohn Baldwin 1958bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 1959bc3d5698SJohn Baldwin b .Lxts_enc_done 1960bc3d5698SJohn Baldwin.align 4 1961bc3d5698SJohn Baldwin.Lxts_enc_2: 1962bc3d5698SJohn Baldwin veor q0, q0, q8 1963bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 1964bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 1965bc3d5698SJohn Baldwin#else 1966bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 1967bc3d5698SJohn Baldwin#endif 1968bc3d5698SJohn Baldwin veor q1, q1, q9 1969bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 1970bc3d5698SJohn Baldwin mov r0, sp 1971bc3d5698SJohn Baldwin 1972bc3d5698SJohn Baldwin bl _bsaes_encrypt8 1973bc3d5698SJohn Baldwin 1974bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 1975bc3d5698SJohn Baldwin veor q0, q0, q8 1976bc3d5698SJohn Baldwin veor q1, q1, q9 1977bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 1978bc3d5698SJohn Baldwin 1979bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 1980bc3d5698SJohn Baldwin b .Lxts_enc_done 1981bc3d5698SJohn Baldwin.align 4 1982bc3d5698SJohn Baldwin.Lxts_enc_1: 1983bc3d5698SJohn Baldwin mov r0, sp 1984bc3d5698SJohn Baldwin veor q0, q0, q8 1985bc3d5698SJohn Baldwin mov r1, sp 1986bc3d5698SJohn Baldwin vst1.8 {q0}, [sp,:128] 1987bc3d5698SJohn Baldwin mov r2, r10 1988bc3d5698SJohn Baldwin mov r4, r3 @ preserve fp 1989bc3d5698SJohn Baldwin 1990bc3d5698SJohn Baldwin bl AES_encrypt 1991bc3d5698SJohn Baldwin 1992bc3d5698SJohn Baldwin vld1.8 {q0}, [sp,:128] 1993bc3d5698SJohn Baldwin veor q0, q0, q8 1994bc3d5698SJohn Baldwin vst1.8 {q0}, [r8]! 1995bc3d5698SJohn Baldwin mov r3, r4 1996bc3d5698SJohn Baldwin 1997bc3d5698SJohn Baldwin vmov q8, q9 @ next round tweak 1998bc3d5698SJohn Baldwin 1999bc3d5698SJohn Baldwin.Lxts_enc_done: 2000bc3d5698SJohn Baldwin#ifndef XTS_CHAIN_TWEAK 2001bc3d5698SJohn Baldwin adds r9, #0x10 2002bc3d5698SJohn Baldwin beq .Lxts_enc_ret 2003bc3d5698SJohn Baldwin sub r6, r8, #0x10 2004bc3d5698SJohn Baldwin 2005bc3d5698SJohn Baldwin.Lxts_enc_steal: 2006bc3d5698SJohn Baldwin ldrb r0, [r7], #1 2007bc3d5698SJohn Baldwin ldrb r1, [r8, #-0x10] 2008bc3d5698SJohn Baldwin strb r0, [r8, #-0x10] 2009bc3d5698SJohn Baldwin strb r1, [r8], #1 2010bc3d5698SJohn Baldwin 2011bc3d5698SJohn Baldwin subs r9, #1 2012bc3d5698SJohn Baldwin bhi .Lxts_enc_steal 2013bc3d5698SJohn Baldwin 2014bc3d5698SJohn Baldwin vld1.8 {q0}, [r6] 2015bc3d5698SJohn Baldwin mov r0, sp 2016bc3d5698SJohn Baldwin veor q0, q0, q8 2017bc3d5698SJohn Baldwin mov r1, sp 2018bc3d5698SJohn Baldwin vst1.8 {q0}, [sp,:128] 2019bc3d5698SJohn Baldwin mov r2, r10 2020bc3d5698SJohn Baldwin mov r4, r3 @ preserve fp 2021bc3d5698SJohn Baldwin 2022bc3d5698SJohn Baldwin bl AES_encrypt 2023bc3d5698SJohn Baldwin 2024bc3d5698SJohn Baldwin vld1.8 {q0}, [sp,:128] 2025bc3d5698SJohn Baldwin veor q0, q0, q8 2026bc3d5698SJohn Baldwin vst1.8 {q0}, [r6] 2027bc3d5698SJohn Baldwin mov r3, r4 2028bc3d5698SJohn Baldwin#endif 2029bc3d5698SJohn Baldwin 2030bc3d5698SJohn Baldwin.Lxts_enc_ret: 2031bc3d5698SJohn Baldwin bic r0, r3, #0xf 2032bc3d5698SJohn Baldwin vmov.i32 q0, #0 2033bc3d5698SJohn Baldwin vmov.i32 q1, #0 2034bc3d5698SJohn Baldwin#ifdef XTS_CHAIN_TWEAK 2035bc3d5698SJohn Baldwin ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak 2036bc3d5698SJohn Baldwin#endif 2037bc3d5698SJohn Baldwin.Lxts_enc_bzero:@ wipe key schedule [if any] 2038bc3d5698SJohn Baldwin vstmia sp!, {q0,q1} 2039bc3d5698SJohn Baldwin cmp sp, r0 2040bc3d5698SJohn Baldwin bne .Lxts_enc_bzero 2041bc3d5698SJohn Baldwin 2042bc3d5698SJohn Baldwin mov sp, r3 2043bc3d5698SJohn Baldwin#ifdef XTS_CHAIN_TWEAK 2044bc3d5698SJohn Baldwin vst1.8 {q8}, [r1] 2045bc3d5698SJohn Baldwin#endif 2046bc3d5698SJohn Baldwin VFP_ABI_POP 2047bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return 2048bc3d5698SJohn Baldwin 2049*c0855eaaSJohn Baldwin.size ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt 2050bc3d5698SJohn Baldwin 2051*c0855eaaSJohn Baldwin.globl ossl_bsaes_xts_decrypt 2052*c0855eaaSJohn Baldwin.type ossl_bsaes_xts_decrypt,%function 2053bc3d5698SJohn Baldwin.align 4 2054*c0855eaaSJohn Baldwinossl_bsaes_xts_decrypt: 2055bc3d5698SJohn Baldwin mov ip, sp 2056bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} @ 0x20 2057bc3d5698SJohn Baldwin VFP_ABI_PUSH 2058bc3d5698SJohn Baldwin mov r6, sp @ future r3 2059bc3d5698SJohn Baldwin 2060bc3d5698SJohn Baldwin mov r7, r0 2061bc3d5698SJohn Baldwin mov r8, r1 2062bc3d5698SJohn Baldwin mov r9, r2 2063bc3d5698SJohn Baldwin mov r10, r3 2064bc3d5698SJohn Baldwin 2065bc3d5698SJohn Baldwin sub r0, sp, #0x10 @ 0x10 2066bc3d5698SJohn Baldwin bic r0, #0xf @ align at 16 bytes 2067bc3d5698SJohn Baldwin mov sp, r0 2068bc3d5698SJohn Baldwin 2069bc3d5698SJohn Baldwin#ifdef XTS_CHAIN_TWEAK 2070bc3d5698SJohn Baldwin ldr r0, [ip] @ pointer to input tweak 2071bc3d5698SJohn Baldwin#else 2072bc3d5698SJohn Baldwin @ generate initial tweak 2073bc3d5698SJohn Baldwin ldr r0, [ip, #4] @ iv[] 2074bc3d5698SJohn Baldwin mov r1, sp 2075bc3d5698SJohn Baldwin ldr r2, [ip, #0] @ key2 2076bc3d5698SJohn Baldwin bl AES_encrypt 2077bc3d5698SJohn Baldwin mov r0, sp @ pointer to initial tweak 2078bc3d5698SJohn Baldwin#endif 2079bc3d5698SJohn Baldwin 2080bc3d5698SJohn Baldwin ldr r1, [r10, #240] @ get # of rounds 2081bc3d5698SJohn Baldwin mov r3, r6 2082bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2083bc3d5698SJohn Baldwin @ allocate the key schedule on the stack 2084bc3d5698SJohn Baldwin sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key 2085bc3d5698SJohn Baldwin @ add r12, #96 @ size of bit-sliced key schedule 2086bc3d5698SJohn Baldwin sub r12, #48 @ place for tweak[9] 2087bc3d5698SJohn Baldwin 2088bc3d5698SJohn Baldwin @ populate the key schedule 2089bc3d5698SJohn Baldwin mov r4, r10 @ pass key 2090bc3d5698SJohn Baldwin mov r5, r1 @ pass # of rounds 2091bc3d5698SJohn Baldwin mov sp, r12 2092bc3d5698SJohn Baldwin add r12, #0x90 @ pass key schedule 2093bc3d5698SJohn Baldwin bl _bsaes_key_convert 2094bc3d5698SJohn Baldwin add r4, sp, #0x90 2095bc3d5698SJohn Baldwin vldmia r4, {q6} 2096bc3d5698SJohn Baldwin vstmia r12, {q15} @ save last round key 2097bc3d5698SJohn Baldwin veor q7, q7, q6 @ fix up round 0 key 2098bc3d5698SJohn Baldwin vstmia r4, {q7} 2099bc3d5698SJohn Baldwin#else 2100bc3d5698SJohn Baldwin ldr r12, [r10, #244] 2101bc3d5698SJohn Baldwin eors r12, #1 2102bc3d5698SJohn Baldwin beq 0f 2103bc3d5698SJohn Baldwin 2104bc3d5698SJohn Baldwin str r12, [r10, #244] 2105bc3d5698SJohn Baldwin mov r4, r10 @ pass key 2106bc3d5698SJohn Baldwin mov r5, r1 @ pass # of rounds 2107bc3d5698SJohn Baldwin add r12, r10, #248 @ pass key schedule 2108bc3d5698SJohn Baldwin bl _bsaes_key_convert 2109bc3d5698SJohn Baldwin add r4, r10, #248 2110bc3d5698SJohn Baldwin vldmia r4, {q6} 2111bc3d5698SJohn Baldwin vstmia r12, {q15} @ save last round key 2112bc3d5698SJohn Baldwin veor q7, q7, q6 @ fix up round 0 key 2113bc3d5698SJohn Baldwin vstmia r4, {q7} 2114bc3d5698SJohn Baldwin 2115bc3d5698SJohn Baldwin.align 2 2116454c425dSMark Johnston0: sub sp, #0x90 @ place for tweak[9] 2117bc3d5698SJohn Baldwin#endif 2118bc3d5698SJohn Baldwin vld1.8 {q8}, [r0] @ initial tweak 2119bc3d5698SJohn Baldwin adr r2, .Lxts_magic 2120bc3d5698SJohn Baldwin 2121bc3d5698SJohn Baldwin#ifndef XTS_CHAIN_TWEAK 2122bc3d5698SJohn Baldwin tst r9, #0xf @ if not multiple of 16 2123bc3d5698SJohn Baldwin it ne @ Thumb2 thing, sanity check in ARM 2124bc3d5698SJohn Baldwin subne r9, #0x10 @ subtract another 16 bytes 2125bc3d5698SJohn Baldwin#endif 2126bc3d5698SJohn Baldwin subs r9, #0x80 2127bc3d5698SJohn Baldwin 2128bc3d5698SJohn Baldwin blo .Lxts_dec_short 2129bc3d5698SJohn Baldwin b .Lxts_dec_loop 2130bc3d5698SJohn Baldwin 2131bc3d5698SJohn Baldwin.align 4 2132bc3d5698SJohn Baldwin.Lxts_dec_loop: 2133bc3d5698SJohn Baldwin vldmia r2, {q5} @ load XTS magic 2134bc3d5698SJohn Baldwin vshr.s64 q6, q8, #63 2135bc3d5698SJohn Baldwin mov r0, sp 2136bc3d5698SJohn Baldwin vand q6, q6, q5 2137bc3d5698SJohn Baldwin vadd.u64 q9, q8, q8 2138bc3d5698SJohn Baldwin vst1.64 {q8}, [r0,:128]! 2139bc3d5698SJohn Baldwin vswp d13,d12 2140bc3d5698SJohn Baldwin vshr.s64 q7, q9, #63 2141bc3d5698SJohn Baldwin veor q9, q9, q6 2142bc3d5698SJohn Baldwin vand q7, q7, q5 2143bc3d5698SJohn Baldwin vadd.u64 q10, q9, q9 2144bc3d5698SJohn Baldwin vst1.64 {q9}, [r0,:128]! 2145bc3d5698SJohn Baldwin vswp d15,d14 2146bc3d5698SJohn Baldwin vshr.s64 q6, q10, #63 2147bc3d5698SJohn Baldwin veor q10, q10, q7 2148bc3d5698SJohn Baldwin vand q6, q6, q5 2149bc3d5698SJohn Baldwin vld1.8 {q0}, [r7]! 2150bc3d5698SJohn Baldwin vadd.u64 q11, q10, q10 2151bc3d5698SJohn Baldwin vst1.64 {q10}, [r0,:128]! 2152bc3d5698SJohn Baldwin vswp d13,d12 2153bc3d5698SJohn Baldwin vshr.s64 q7, q11, #63 2154bc3d5698SJohn Baldwin veor q11, q11, q6 2155bc3d5698SJohn Baldwin vand q7, q7, q5 2156bc3d5698SJohn Baldwin vld1.8 {q1}, [r7]! 2157bc3d5698SJohn Baldwin veor q0, q0, q8 2158bc3d5698SJohn Baldwin vadd.u64 q12, q11, q11 2159bc3d5698SJohn Baldwin vst1.64 {q11}, [r0,:128]! 2160bc3d5698SJohn Baldwin vswp d15,d14 2161bc3d5698SJohn Baldwin vshr.s64 q6, q12, #63 2162bc3d5698SJohn Baldwin veor q12, q12, q7 2163bc3d5698SJohn Baldwin vand q6, q6, q5 2164bc3d5698SJohn Baldwin vld1.8 {q2}, [r7]! 2165bc3d5698SJohn Baldwin veor q1, q1, q9 2166bc3d5698SJohn Baldwin vadd.u64 q13, q12, q12 2167bc3d5698SJohn Baldwin vst1.64 {q12}, [r0,:128]! 2168bc3d5698SJohn Baldwin vswp d13,d12 2169bc3d5698SJohn Baldwin vshr.s64 q7, q13, #63 2170bc3d5698SJohn Baldwin veor q13, q13, q6 2171bc3d5698SJohn Baldwin vand q7, q7, q5 2172bc3d5698SJohn Baldwin vld1.8 {q3}, [r7]! 2173bc3d5698SJohn Baldwin veor q2, q2, q10 2174bc3d5698SJohn Baldwin vadd.u64 q14, q13, q13 2175bc3d5698SJohn Baldwin vst1.64 {q13}, [r0,:128]! 2176bc3d5698SJohn Baldwin vswp d15,d14 2177bc3d5698SJohn Baldwin vshr.s64 q6, q14, #63 2178bc3d5698SJohn Baldwin veor q14, q14, q7 2179bc3d5698SJohn Baldwin vand q6, q6, q5 2180bc3d5698SJohn Baldwin vld1.8 {q4}, [r7]! 2181bc3d5698SJohn Baldwin veor q3, q3, q11 2182bc3d5698SJohn Baldwin vadd.u64 q15, q14, q14 2183bc3d5698SJohn Baldwin vst1.64 {q14}, [r0,:128]! 2184bc3d5698SJohn Baldwin vswp d13,d12 2185bc3d5698SJohn Baldwin vshr.s64 q7, q15, #63 2186bc3d5698SJohn Baldwin veor q15, q15, q6 2187bc3d5698SJohn Baldwin vand q7, q7, q5 2188bc3d5698SJohn Baldwin vld1.8 {q5}, [r7]! 2189bc3d5698SJohn Baldwin veor q4, q4, q12 2190bc3d5698SJohn Baldwin vadd.u64 q8, q15, q15 2191bc3d5698SJohn Baldwin vst1.64 {q15}, [r0,:128]! 2192bc3d5698SJohn Baldwin vswp d15,d14 2193bc3d5698SJohn Baldwin veor q8, q8, q7 2194bc3d5698SJohn Baldwin vst1.64 {q8}, [r0,:128] @ next round tweak 2195bc3d5698SJohn Baldwin 2196bc3d5698SJohn Baldwin vld1.8 {q6,q7}, [r7]! 2197bc3d5698SJohn Baldwin veor q5, q5, q13 2198bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2199bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 2200bc3d5698SJohn Baldwin#else 2201bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 2202bc3d5698SJohn Baldwin#endif 2203bc3d5698SJohn Baldwin veor q6, q6, q14 2204bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 2205bc3d5698SJohn Baldwin veor q7, q7, q15 2206bc3d5698SJohn Baldwin mov r0, sp 2207bc3d5698SJohn Baldwin 2208bc3d5698SJohn Baldwin bl _bsaes_decrypt8 2209bc3d5698SJohn Baldwin 2210bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 2211bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 2212bc3d5698SJohn Baldwin veor q0, q0, q8 2213bc3d5698SJohn Baldwin vld1.64 {q12,q13}, [r0,:128]! 2214bc3d5698SJohn Baldwin veor q1, q1, q9 2215bc3d5698SJohn Baldwin veor q8, q6, q10 2216bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 2217bc3d5698SJohn Baldwin veor q9, q4, q11 2218bc3d5698SJohn Baldwin vld1.64 {q14,q15}, [r0,:128]! 2219bc3d5698SJohn Baldwin veor q10, q2, q12 2220bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 2221bc3d5698SJohn Baldwin veor q11, q7, q13 2222bc3d5698SJohn Baldwin veor q12, q3, q14 2223bc3d5698SJohn Baldwin vst1.8 {q10,q11}, [r8]! 2224bc3d5698SJohn Baldwin veor q13, q5, q15 2225bc3d5698SJohn Baldwin vst1.8 {q12,q13}, [r8]! 2226bc3d5698SJohn Baldwin 2227bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 2228bc3d5698SJohn Baldwin 2229bc3d5698SJohn Baldwin subs r9, #0x80 2230bc3d5698SJohn Baldwin bpl .Lxts_dec_loop 2231bc3d5698SJohn Baldwin 2232bc3d5698SJohn Baldwin.Lxts_dec_short: 2233bc3d5698SJohn Baldwin adds r9, #0x70 2234bc3d5698SJohn Baldwin bmi .Lxts_dec_done 2235bc3d5698SJohn Baldwin 2236bc3d5698SJohn Baldwin vldmia r2, {q5} @ load XTS magic 2237bc3d5698SJohn Baldwin vshr.s64 q7, q8, #63 2238bc3d5698SJohn Baldwin mov r0, sp 2239bc3d5698SJohn Baldwin vand q7, q7, q5 2240bc3d5698SJohn Baldwin vadd.u64 q9, q8, q8 2241bc3d5698SJohn Baldwin vst1.64 {q8}, [r0,:128]! 2242bc3d5698SJohn Baldwin vswp d15,d14 2243bc3d5698SJohn Baldwin vshr.s64 q6, q9, #63 2244bc3d5698SJohn Baldwin veor q9, q9, q7 2245bc3d5698SJohn Baldwin vand q6, q6, q5 2246bc3d5698SJohn Baldwin vadd.u64 q10, q9, q9 2247bc3d5698SJohn Baldwin vst1.64 {q9}, [r0,:128]! 2248bc3d5698SJohn Baldwin vswp d13,d12 2249bc3d5698SJohn Baldwin vshr.s64 q7, q10, #63 2250bc3d5698SJohn Baldwin veor q10, q10, q6 2251bc3d5698SJohn Baldwin vand q7, q7, q5 2252bc3d5698SJohn Baldwin vld1.8 {q0}, [r7]! 2253bc3d5698SJohn Baldwin subs r9, #0x10 2254bc3d5698SJohn Baldwin bmi .Lxts_dec_1 2255bc3d5698SJohn Baldwin vadd.u64 q11, q10, q10 2256bc3d5698SJohn Baldwin vst1.64 {q10}, [r0,:128]! 2257bc3d5698SJohn Baldwin vswp d15,d14 2258bc3d5698SJohn Baldwin vshr.s64 q6, q11, #63 2259bc3d5698SJohn Baldwin veor q11, q11, q7 2260bc3d5698SJohn Baldwin vand q6, q6, q5 2261bc3d5698SJohn Baldwin vld1.8 {q1}, [r7]! 2262bc3d5698SJohn Baldwin subs r9, #0x10 2263bc3d5698SJohn Baldwin bmi .Lxts_dec_2 2264bc3d5698SJohn Baldwin veor q0, q0, q8 2265bc3d5698SJohn Baldwin vadd.u64 q12, q11, q11 2266bc3d5698SJohn Baldwin vst1.64 {q11}, [r0,:128]! 2267bc3d5698SJohn Baldwin vswp d13,d12 2268bc3d5698SJohn Baldwin vshr.s64 q7, q12, #63 2269bc3d5698SJohn Baldwin veor q12, q12, q6 2270bc3d5698SJohn Baldwin vand q7, q7, q5 2271bc3d5698SJohn Baldwin vld1.8 {q2}, [r7]! 2272bc3d5698SJohn Baldwin subs r9, #0x10 2273bc3d5698SJohn Baldwin bmi .Lxts_dec_3 2274bc3d5698SJohn Baldwin veor q1, q1, q9 2275bc3d5698SJohn Baldwin vadd.u64 q13, q12, q12 2276bc3d5698SJohn Baldwin vst1.64 {q12}, [r0,:128]! 2277bc3d5698SJohn Baldwin vswp d15,d14 2278bc3d5698SJohn Baldwin vshr.s64 q6, q13, #63 2279bc3d5698SJohn Baldwin veor q13, q13, q7 2280bc3d5698SJohn Baldwin vand q6, q6, q5 2281bc3d5698SJohn Baldwin vld1.8 {q3}, [r7]! 2282bc3d5698SJohn Baldwin subs r9, #0x10 2283bc3d5698SJohn Baldwin bmi .Lxts_dec_4 2284bc3d5698SJohn Baldwin veor q2, q2, q10 2285bc3d5698SJohn Baldwin vadd.u64 q14, q13, q13 2286bc3d5698SJohn Baldwin vst1.64 {q13}, [r0,:128]! 2287bc3d5698SJohn Baldwin vswp d13,d12 2288bc3d5698SJohn Baldwin vshr.s64 q7, q14, #63 2289bc3d5698SJohn Baldwin veor q14, q14, q6 2290bc3d5698SJohn Baldwin vand q7, q7, q5 2291bc3d5698SJohn Baldwin vld1.8 {q4}, [r7]! 2292bc3d5698SJohn Baldwin subs r9, #0x10 2293bc3d5698SJohn Baldwin bmi .Lxts_dec_5 2294bc3d5698SJohn Baldwin veor q3, q3, q11 2295bc3d5698SJohn Baldwin vadd.u64 q15, q14, q14 2296bc3d5698SJohn Baldwin vst1.64 {q14}, [r0,:128]! 2297bc3d5698SJohn Baldwin vswp d15,d14 2298bc3d5698SJohn Baldwin vshr.s64 q6, q15, #63 2299bc3d5698SJohn Baldwin veor q15, q15, q7 2300bc3d5698SJohn Baldwin vand q6, q6, q5 2301bc3d5698SJohn Baldwin vld1.8 {q5}, [r7]! 2302bc3d5698SJohn Baldwin subs r9, #0x10 2303bc3d5698SJohn Baldwin bmi .Lxts_dec_6 2304bc3d5698SJohn Baldwin veor q4, q4, q12 2305bc3d5698SJohn Baldwin sub r9, #0x10 2306bc3d5698SJohn Baldwin vst1.64 {q15}, [r0,:128] @ next round tweak 2307bc3d5698SJohn Baldwin 2308bc3d5698SJohn Baldwin vld1.8 {q6}, [r7]! 2309bc3d5698SJohn Baldwin veor q5, q5, q13 2310bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2311bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 2312bc3d5698SJohn Baldwin#else 2313bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 2314bc3d5698SJohn Baldwin#endif 2315bc3d5698SJohn Baldwin veor q6, q6, q14 2316bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 2317bc3d5698SJohn Baldwin mov r0, sp 2318bc3d5698SJohn Baldwin 2319bc3d5698SJohn Baldwin bl _bsaes_decrypt8 2320bc3d5698SJohn Baldwin 2321bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 2322bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 2323bc3d5698SJohn Baldwin veor q0, q0, q8 2324bc3d5698SJohn Baldwin vld1.64 {q12,q13}, [r0,:128]! 2325bc3d5698SJohn Baldwin veor q1, q1, q9 2326bc3d5698SJohn Baldwin veor q8, q6, q10 2327bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 2328bc3d5698SJohn Baldwin veor q9, q4, q11 2329bc3d5698SJohn Baldwin vld1.64 {q14}, [r0,:128]! 2330bc3d5698SJohn Baldwin veor q10, q2, q12 2331bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 2332bc3d5698SJohn Baldwin veor q11, q7, q13 2333bc3d5698SJohn Baldwin veor q12, q3, q14 2334bc3d5698SJohn Baldwin vst1.8 {q10,q11}, [r8]! 2335bc3d5698SJohn Baldwin vst1.8 {q12}, [r8]! 2336bc3d5698SJohn Baldwin 2337bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 2338bc3d5698SJohn Baldwin b .Lxts_dec_done 2339bc3d5698SJohn Baldwin.align 4 2340bc3d5698SJohn Baldwin.Lxts_dec_6: 2341bc3d5698SJohn Baldwin vst1.64 {q14}, [r0,:128] @ next round tweak 2342bc3d5698SJohn Baldwin 2343bc3d5698SJohn Baldwin veor q4, q4, q12 2344bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2345bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 2346bc3d5698SJohn Baldwin#else 2347bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 2348bc3d5698SJohn Baldwin#endif 2349bc3d5698SJohn Baldwin veor q5, q5, q13 2350bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 2351bc3d5698SJohn Baldwin mov r0, sp 2352bc3d5698SJohn Baldwin 2353bc3d5698SJohn Baldwin bl _bsaes_decrypt8 2354bc3d5698SJohn Baldwin 2355bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 2356bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 2357bc3d5698SJohn Baldwin veor q0, q0, q8 2358bc3d5698SJohn Baldwin vld1.64 {q12,q13}, [r0,:128]! 2359bc3d5698SJohn Baldwin veor q1, q1, q9 2360bc3d5698SJohn Baldwin veor q8, q6, q10 2361bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 2362bc3d5698SJohn Baldwin veor q9, q4, q11 2363bc3d5698SJohn Baldwin veor q10, q2, q12 2364bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 2365bc3d5698SJohn Baldwin veor q11, q7, q13 2366bc3d5698SJohn Baldwin vst1.8 {q10,q11}, [r8]! 2367bc3d5698SJohn Baldwin 2368bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 2369bc3d5698SJohn Baldwin b .Lxts_dec_done 2370bc3d5698SJohn Baldwin.align 4 2371bc3d5698SJohn Baldwin.Lxts_dec_5: 2372bc3d5698SJohn Baldwin veor q3, q3, q11 2373bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2374bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 2375bc3d5698SJohn Baldwin#else 2376bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 2377bc3d5698SJohn Baldwin#endif 2378bc3d5698SJohn Baldwin veor q4, q4, q12 2379bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 2380bc3d5698SJohn Baldwin mov r0, sp 2381bc3d5698SJohn Baldwin 2382bc3d5698SJohn Baldwin bl _bsaes_decrypt8 2383bc3d5698SJohn Baldwin 2384bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 2385bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 2386bc3d5698SJohn Baldwin veor q0, q0, q8 2387bc3d5698SJohn Baldwin vld1.64 {q12}, [r0,:128]! 2388bc3d5698SJohn Baldwin veor q1, q1, q9 2389bc3d5698SJohn Baldwin veor q8, q6, q10 2390bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 2391bc3d5698SJohn Baldwin veor q9, q4, q11 2392bc3d5698SJohn Baldwin veor q10, q2, q12 2393bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 2394bc3d5698SJohn Baldwin vst1.8 {q10}, [r8]! 2395bc3d5698SJohn Baldwin 2396bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 2397bc3d5698SJohn Baldwin b .Lxts_dec_done 2398bc3d5698SJohn Baldwin.align 4 2399bc3d5698SJohn Baldwin.Lxts_dec_4: 2400bc3d5698SJohn Baldwin veor q2, q2, q10 2401bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2402bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 2403bc3d5698SJohn Baldwin#else 2404bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 2405bc3d5698SJohn Baldwin#endif 2406bc3d5698SJohn Baldwin veor q3, q3, q11 2407bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 2408bc3d5698SJohn Baldwin mov r0, sp 2409bc3d5698SJohn Baldwin 2410bc3d5698SJohn Baldwin bl _bsaes_decrypt8 2411bc3d5698SJohn Baldwin 2412bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 2413bc3d5698SJohn Baldwin vld1.64 {q10,q11}, [r0,:128]! 2414bc3d5698SJohn Baldwin veor q0, q0, q8 2415bc3d5698SJohn Baldwin veor q1, q1, q9 2416bc3d5698SJohn Baldwin veor q8, q6, q10 2417bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 2418bc3d5698SJohn Baldwin veor q9, q4, q11 2419bc3d5698SJohn Baldwin vst1.8 {q8,q9}, [r8]! 2420bc3d5698SJohn Baldwin 2421bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 2422bc3d5698SJohn Baldwin b .Lxts_dec_done 2423bc3d5698SJohn Baldwin.align 4 2424bc3d5698SJohn Baldwin.Lxts_dec_3: 2425bc3d5698SJohn Baldwin veor q1, q1, q9 2426bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2427bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 2428bc3d5698SJohn Baldwin#else 2429bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 2430bc3d5698SJohn Baldwin#endif 2431bc3d5698SJohn Baldwin veor q2, q2, q10 2432bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 2433bc3d5698SJohn Baldwin mov r0, sp 2434bc3d5698SJohn Baldwin 2435bc3d5698SJohn Baldwin bl _bsaes_decrypt8 2436bc3d5698SJohn Baldwin 2437bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 2438bc3d5698SJohn Baldwin vld1.64 {q10}, [r0,:128]! 2439bc3d5698SJohn Baldwin veor q0, q0, q8 2440bc3d5698SJohn Baldwin veor q1, q1, q9 2441bc3d5698SJohn Baldwin veor q8, q6, q10 2442bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 2443bc3d5698SJohn Baldwin vst1.8 {q8}, [r8]! 2444bc3d5698SJohn Baldwin 2445bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 2446bc3d5698SJohn Baldwin b .Lxts_dec_done 2447bc3d5698SJohn Baldwin.align 4 2448bc3d5698SJohn Baldwin.Lxts_dec_2: 2449bc3d5698SJohn Baldwin veor q0, q0, q8 2450bc3d5698SJohn Baldwin#ifndef BSAES_ASM_EXTENDED_KEY 2451bc3d5698SJohn Baldwin add r4, sp, #0x90 @ pass key schedule 2452bc3d5698SJohn Baldwin#else 2453bc3d5698SJohn Baldwin add r4, r10, #248 @ pass key schedule 2454bc3d5698SJohn Baldwin#endif 2455bc3d5698SJohn Baldwin veor q1, q1, q9 2456bc3d5698SJohn Baldwin mov r5, r1 @ pass rounds 2457bc3d5698SJohn Baldwin mov r0, sp 2458bc3d5698SJohn Baldwin 2459bc3d5698SJohn Baldwin bl _bsaes_decrypt8 2460bc3d5698SJohn Baldwin 2461bc3d5698SJohn Baldwin vld1.64 {q8,q9}, [r0,:128]! 2462bc3d5698SJohn Baldwin veor q0, q0, q8 2463bc3d5698SJohn Baldwin veor q1, q1, q9 2464bc3d5698SJohn Baldwin vst1.8 {q0,q1}, [r8]! 2465bc3d5698SJohn Baldwin 2466bc3d5698SJohn Baldwin vld1.64 {q8}, [r0,:128] @ next round tweak 2467bc3d5698SJohn Baldwin b .Lxts_dec_done 2468bc3d5698SJohn Baldwin.align 4 2469bc3d5698SJohn Baldwin.Lxts_dec_1: 2470bc3d5698SJohn Baldwin mov r0, sp 2471bc3d5698SJohn Baldwin veor q0, q0, q8 2472bc3d5698SJohn Baldwin mov r1, sp 2473bc3d5698SJohn Baldwin vst1.8 {q0}, [sp,:128] 2474bc3d5698SJohn Baldwin mov r5, r2 @ preserve magic 2475bc3d5698SJohn Baldwin mov r2, r10 2476bc3d5698SJohn Baldwin mov r4, r3 @ preserve fp 2477bc3d5698SJohn Baldwin 2478bc3d5698SJohn Baldwin bl AES_decrypt 2479bc3d5698SJohn Baldwin 2480bc3d5698SJohn Baldwin vld1.8 {q0}, [sp,:128] 2481bc3d5698SJohn Baldwin veor q0, q0, q8 2482bc3d5698SJohn Baldwin vst1.8 {q0}, [r8]! 2483bc3d5698SJohn Baldwin mov r3, r4 2484bc3d5698SJohn Baldwin mov r2, r5 2485bc3d5698SJohn Baldwin 2486bc3d5698SJohn Baldwin vmov q8, q9 @ next round tweak 2487bc3d5698SJohn Baldwin 2488bc3d5698SJohn Baldwin.Lxts_dec_done: 2489bc3d5698SJohn Baldwin#ifndef XTS_CHAIN_TWEAK 2490bc3d5698SJohn Baldwin adds r9, #0x10 2491bc3d5698SJohn Baldwin beq .Lxts_dec_ret 2492bc3d5698SJohn Baldwin 2493bc3d5698SJohn Baldwin @ calculate one round of extra tweak for the stolen ciphertext 2494bc3d5698SJohn Baldwin vldmia r2, {q5} 2495bc3d5698SJohn Baldwin vshr.s64 q6, q8, #63 2496bc3d5698SJohn Baldwin vand q6, q6, q5 2497bc3d5698SJohn Baldwin vadd.u64 q9, q8, q8 2498bc3d5698SJohn Baldwin vswp d13,d12 2499bc3d5698SJohn Baldwin veor q9, q9, q6 2500bc3d5698SJohn Baldwin 2501bc3d5698SJohn Baldwin @ perform the final decryption with the last tweak value 2502bc3d5698SJohn Baldwin vld1.8 {q0}, [r7]! 2503bc3d5698SJohn Baldwin mov r0, sp 2504bc3d5698SJohn Baldwin veor q0, q0, q9 2505bc3d5698SJohn Baldwin mov r1, sp 2506bc3d5698SJohn Baldwin vst1.8 {q0}, [sp,:128] 2507bc3d5698SJohn Baldwin mov r2, r10 2508bc3d5698SJohn Baldwin mov r4, r3 @ preserve fp 2509bc3d5698SJohn Baldwin 2510bc3d5698SJohn Baldwin bl AES_decrypt 2511bc3d5698SJohn Baldwin 2512bc3d5698SJohn Baldwin vld1.8 {q0}, [sp,:128] 2513bc3d5698SJohn Baldwin veor q0, q0, q9 2514bc3d5698SJohn Baldwin vst1.8 {q0}, [r8] 2515bc3d5698SJohn Baldwin 2516bc3d5698SJohn Baldwin mov r6, r8 2517bc3d5698SJohn Baldwin.Lxts_dec_steal: 2518bc3d5698SJohn Baldwin ldrb r1, [r8] 2519bc3d5698SJohn Baldwin ldrb r0, [r7], #1 2520bc3d5698SJohn Baldwin strb r1, [r8, #0x10] 2521bc3d5698SJohn Baldwin strb r0, [r8], #1 2522bc3d5698SJohn Baldwin 2523bc3d5698SJohn Baldwin subs r9, #1 2524bc3d5698SJohn Baldwin bhi .Lxts_dec_steal 2525bc3d5698SJohn Baldwin 2526bc3d5698SJohn Baldwin vld1.8 {q0}, [r6] 2527bc3d5698SJohn Baldwin mov r0, sp 2528bc3d5698SJohn Baldwin veor q0, q8 2529bc3d5698SJohn Baldwin mov r1, sp 2530bc3d5698SJohn Baldwin vst1.8 {q0}, [sp,:128] 2531bc3d5698SJohn Baldwin mov r2, r10 2532bc3d5698SJohn Baldwin 2533bc3d5698SJohn Baldwin bl AES_decrypt 2534bc3d5698SJohn Baldwin 2535bc3d5698SJohn Baldwin vld1.8 {q0}, [sp,:128] 2536bc3d5698SJohn Baldwin veor q0, q0, q8 2537bc3d5698SJohn Baldwin vst1.8 {q0}, [r6] 2538bc3d5698SJohn Baldwin mov r3, r4 2539bc3d5698SJohn Baldwin#endif 2540bc3d5698SJohn Baldwin 2541bc3d5698SJohn Baldwin.Lxts_dec_ret: 2542bc3d5698SJohn Baldwin bic r0, r3, #0xf 2543bc3d5698SJohn Baldwin vmov.i32 q0, #0 2544bc3d5698SJohn Baldwin vmov.i32 q1, #0 2545bc3d5698SJohn Baldwin#ifdef XTS_CHAIN_TWEAK 2546bc3d5698SJohn Baldwin ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak 2547bc3d5698SJohn Baldwin#endif 2548bc3d5698SJohn Baldwin.Lxts_dec_bzero:@ wipe key schedule [if any] 2549bc3d5698SJohn Baldwin vstmia sp!, {q0,q1} 2550bc3d5698SJohn Baldwin cmp sp, r0 2551bc3d5698SJohn Baldwin bne .Lxts_dec_bzero 2552bc3d5698SJohn Baldwin 2553bc3d5698SJohn Baldwin mov sp, r3 2554bc3d5698SJohn Baldwin#ifdef XTS_CHAIN_TWEAK 2555bc3d5698SJohn Baldwin vst1.8 {q8}, [r1] 2556bc3d5698SJohn Baldwin#endif 2557bc3d5698SJohn Baldwin VFP_ABI_POP 2558bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return 2559bc3d5698SJohn Baldwin 2560*c0855eaaSJohn Baldwin.size ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt 2561bc3d5698SJohn Baldwin#endif 2562