1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from sha512-armv4.pl. */ 2bc3d5698SJohn Baldwin@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3bc3d5698SJohn Baldwin@ 4*c0855eaaSJohn Baldwin@ Licensed under the Apache License 2.0 (the "License"). You may not use 5bc3d5698SJohn Baldwin@ this file except in compliance with the License. You can obtain a copy 6bc3d5698SJohn Baldwin@ in the file LICENSE in the source distribution or at 7bc3d5698SJohn Baldwin@ https://www.openssl.org/source/license.html 8bc3d5698SJohn Baldwin 9bc3d5698SJohn Baldwin 10bc3d5698SJohn Baldwin@ ==================================================================== 11bc3d5698SJohn Baldwin@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12bc3d5698SJohn Baldwin@ project. The module is, however, dual licensed under OpenSSL and 13bc3d5698SJohn Baldwin@ CRYPTOGAMS licenses depending on where you obtain it. For further 14bc3d5698SJohn Baldwin@ details see http://www.openssl.org/~appro/cryptogams/. 15bc3d5698SJohn Baldwin@ 16bc3d5698SJohn Baldwin@ Permission to use under GPL terms is granted. 17bc3d5698SJohn Baldwin@ ==================================================================== 18bc3d5698SJohn Baldwin 19bc3d5698SJohn Baldwin@ SHA512 block procedure for ARMv4. September 2007. 20bc3d5698SJohn Baldwin 21bc3d5698SJohn Baldwin@ This code is ~4.5 (four and a half) times faster than code generated 22bc3d5698SJohn Baldwin@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue 23bc3d5698SJohn Baldwin@ Xscale PXA250 core]. 24bc3d5698SJohn Baldwin@ 25bc3d5698SJohn Baldwin@ July 2010. 26bc3d5698SJohn Baldwin@ 27bc3d5698SJohn Baldwin@ Rescheduling for dual-issue pipeline resulted in 6% improvement on 28bc3d5698SJohn Baldwin@ Cortex A8 core and ~40 cycles per processed byte. 29bc3d5698SJohn Baldwin 30bc3d5698SJohn Baldwin@ February 2011. 31bc3d5698SJohn Baldwin@ 32bc3d5698SJohn Baldwin@ Profiler-assisted and platform-specific optimization resulted in 7% 33bc3d5698SJohn Baldwin@ improvement on Coxtex A8 core and ~38 cycles per byte. 34bc3d5698SJohn Baldwin 35bc3d5698SJohn Baldwin@ March 2011. 36bc3d5698SJohn Baldwin@ 37bc3d5698SJohn Baldwin@ Add NEON implementation. On Cortex A8 it was measured to process 38bc3d5698SJohn Baldwin@ one byte in 23.3 cycles or ~60% faster than integer-only code. 39bc3d5698SJohn Baldwin 40bc3d5698SJohn Baldwin@ August 2012. 41bc3d5698SJohn Baldwin@ 42bc3d5698SJohn Baldwin@ Improve NEON performance by 12% on Snapdragon S4. In absolute 43bc3d5698SJohn Baldwin@ terms it's 22.6 cycles per byte, which is disappointing result. 44bc3d5698SJohn Baldwin@ Technical writers asserted that 3-way S4 pipeline can sustain 45bc3d5698SJohn Baldwin@ multiple NEON instructions per cycle, but dual NEON issue could 46bc3d5698SJohn Baldwin@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html 47bc3d5698SJohn Baldwin@ for further details. On side note Cortex-A15 processes one byte in 48bc3d5698SJohn Baldwin@ 16 cycles. 49bc3d5698SJohn Baldwin 50bc3d5698SJohn Baldwin@ Byte order [in]dependence. ========================================= 51bc3d5698SJohn Baldwin@ 52bc3d5698SJohn Baldwin@ Originally caller was expected to maintain specific *dword* order in 53bc3d5698SJohn Baldwin@ h[0-7], namely with most significant dword at *lower* address, which 54bc3d5698SJohn Baldwin@ was reflected in below two parameters as 0 and 4. Now caller is 55bc3d5698SJohn Baldwin@ expected to maintain native byte order for whole 64-bit values. 56bc3d5698SJohn Baldwin#ifndef __KERNEL__ 57bc3d5698SJohn Baldwin# include "arm_arch.h" 58bc3d5698SJohn Baldwin# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} 59bc3d5698SJohn Baldwin# define VFP_ABI_POP vldmia sp!,{d8-d15} 60bc3d5698SJohn Baldwin#else 61bc3d5698SJohn Baldwin# define __ARM_ARCH__ __LINUX_ARM_ARCH__ 62bc3d5698SJohn Baldwin# define __ARM_MAX_ARCH__ 7 63bc3d5698SJohn Baldwin# define VFP_ABI_PUSH 64bc3d5698SJohn Baldwin# define VFP_ABI_POP 65bc3d5698SJohn Baldwin#endif 66bc3d5698SJohn Baldwin 67bc3d5698SJohn Baldwin#ifdef __ARMEL__ 68bc3d5698SJohn Baldwin# define LO 0 69bc3d5698SJohn Baldwin# define HI 4 70bc3d5698SJohn Baldwin# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 71bc3d5698SJohn Baldwin#else 72bc3d5698SJohn Baldwin# define HI 0 73bc3d5698SJohn Baldwin# define LO 4 74bc3d5698SJohn Baldwin# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 75bc3d5698SJohn Baldwin#endif 76bc3d5698SJohn Baldwin 77bc3d5698SJohn Baldwin#if defined(__thumb2__) 78bc3d5698SJohn Baldwin.syntax unified 79bc3d5698SJohn Baldwin.thumb 80bc3d5698SJohn Baldwin# define adrl adr 81bc3d5698SJohn Baldwin#else 82bc3d5698SJohn Baldwin.code 32 83bc3d5698SJohn Baldwin#endif 84bc3d5698SJohn Baldwin 85*c0855eaaSJohn Baldwin.text 86*c0855eaaSJohn Baldwin 87bc3d5698SJohn Baldwin.type K512,%object 88bc3d5698SJohn Baldwin.align 5 89bc3d5698SJohn BaldwinK512: 90bc3d5698SJohn Baldwin WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) 91bc3d5698SJohn Baldwin WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) 92bc3d5698SJohn Baldwin WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) 93bc3d5698SJohn Baldwin WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) 94bc3d5698SJohn Baldwin WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) 95bc3d5698SJohn Baldwin WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) 96bc3d5698SJohn Baldwin WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) 97bc3d5698SJohn Baldwin WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) 98bc3d5698SJohn Baldwin WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) 99bc3d5698SJohn Baldwin WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) 100bc3d5698SJohn Baldwin WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) 101bc3d5698SJohn Baldwin WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) 102bc3d5698SJohn Baldwin WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) 103bc3d5698SJohn Baldwin WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) 104bc3d5698SJohn Baldwin WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) 105bc3d5698SJohn Baldwin WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) 106bc3d5698SJohn Baldwin WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) 107bc3d5698SJohn Baldwin WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) 108bc3d5698SJohn Baldwin WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) 109bc3d5698SJohn Baldwin WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) 110bc3d5698SJohn Baldwin WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) 111bc3d5698SJohn Baldwin WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) 112bc3d5698SJohn Baldwin WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) 113bc3d5698SJohn Baldwin WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) 114bc3d5698SJohn Baldwin WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) 115bc3d5698SJohn Baldwin WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) 116bc3d5698SJohn Baldwin WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) 117bc3d5698SJohn Baldwin WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) 118bc3d5698SJohn Baldwin WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) 119bc3d5698SJohn Baldwin WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) 120bc3d5698SJohn Baldwin WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) 121bc3d5698SJohn Baldwin WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) 122bc3d5698SJohn Baldwin WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) 123bc3d5698SJohn Baldwin WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) 124bc3d5698SJohn Baldwin WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) 125bc3d5698SJohn Baldwin WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) 126bc3d5698SJohn Baldwin WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) 127bc3d5698SJohn Baldwin WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) 128bc3d5698SJohn Baldwin WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) 129bc3d5698SJohn Baldwin WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) 130bc3d5698SJohn Baldwin.size K512,.-K512 131bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 132bc3d5698SJohn Baldwin.LOPENSSL_armcap: 133*c0855eaaSJohn Baldwin# ifdef _WIN32 134*c0855eaaSJohn Baldwin.word OPENSSL_armcap_P 135*c0855eaaSJohn Baldwin# else 136bc3d5698SJohn Baldwin.word OPENSSL_armcap_P-.Lsha512_block_data_order 137*c0855eaaSJohn Baldwin# endif 138bc3d5698SJohn Baldwin.skip 32-4 139bc3d5698SJohn Baldwin#else 140bc3d5698SJohn Baldwin.skip 32 141bc3d5698SJohn Baldwin#endif 142bc3d5698SJohn Baldwin 143bc3d5698SJohn Baldwin.globl sha512_block_data_order 144bc3d5698SJohn Baldwin.type sha512_block_data_order,%function 145bc3d5698SJohn Baldwinsha512_block_data_order: 146bc3d5698SJohn Baldwin.Lsha512_block_data_order: 147bc3d5698SJohn Baldwin#if __ARM_ARCH__<7 && !defined(__thumb2__) 148bc3d5698SJohn Baldwin sub r3,pc,#8 @ sha512_block_data_order 149bc3d5698SJohn Baldwin#else 150bc3d5698SJohn Baldwin adr r3,.Lsha512_block_data_order 151bc3d5698SJohn Baldwin#endif 152bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 153bc3d5698SJohn Baldwin ldr r12,.LOPENSSL_armcap 154*c0855eaaSJohn Baldwin# if !defined(_WIN32) 155bc3d5698SJohn Baldwin ldr r12,[r3,r12] @ OPENSSL_armcap_P 156*c0855eaaSJohn Baldwin# endif 157*c0855eaaSJohn Baldwin# if defined(__APPLE__) || defined(_WIN32) 158bc3d5698SJohn Baldwin ldr r12,[r12] 159bc3d5698SJohn Baldwin# endif 160bc3d5698SJohn Baldwin tst r12,#ARMV7_NEON 161bc3d5698SJohn Baldwin bne .LNEON 162bc3d5698SJohn Baldwin#endif 163bc3d5698SJohn Baldwin add r2,r1,r2,lsl#7 @ len to point at the end of inp 164bc3d5698SJohn Baldwin stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 165bc3d5698SJohn Baldwin sub r14,r3,#672 @ K512 166bc3d5698SJohn Baldwin sub sp,sp,#9*8 167bc3d5698SJohn Baldwin 168bc3d5698SJohn Baldwin ldr r7,[r0,#32+LO] 169bc3d5698SJohn Baldwin ldr r8,[r0,#32+HI] 170bc3d5698SJohn Baldwin ldr r9, [r0,#48+LO] 171bc3d5698SJohn Baldwin ldr r10, [r0,#48+HI] 172bc3d5698SJohn Baldwin ldr r11, [r0,#56+LO] 173bc3d5698SJohn Baldwin ldr r12, [r0,#56+HI] 174bc3d5698SJohn Baldwin.Loop: 175bc3d5698SJohn Baldwin str r9, [sp,#48+0] 176bc3d5698SJohn Baldwin str r10, [sp,#48+4] 177bc3d5698SJohn Baldwin str r11, [sp,#56+0] 178bc3d5698SJohn Baldwin str r12, [sp,#56+4] 179bc3d5698SJohn Baldwin ldr r5,[r0,#0+LO] 180bc3d5698SJohn Baldwin ldr r6,[r0,#0+HI] 181bc3d5698SJohn Baldwin ldr r3,[r0,#8+LO] 182bc3d5698SJohn Baldwin ldr r4,[r0,#8+HI] 183bc3d5698SJohn Baldwin ldr r9, [r0,#16+LO] 184bc3d5698SJohn Baldwin ldr r10, [r0,#16+HI] 185bc3d5698SJohn Baldwin ldr r11, [r0,#24+LO] 186bc3d5698SJohn Baldwin ldr r12, [r0,#24+HI] 187bc3d5698SJohn Baldwin str r3,[sp,#8+0] 188bc3d5698SJohn Baldwin str r4,[sp,#8+4] 189bc3d5698SJohn Baldwin str r9, [sp,#16+0] 190bc3d5698SJohn Baldwin str r10, [sp,#16+4] 191bc3d5698SJohn Baldwin str r11, [sp,#24+0] 192bc3d5698SJohn Baldwin str r12, [sp,#24+4] 193bc3d5698SJohn Baldwin ldr r3,[r0,#40+LO] 194bc3d5698SJohn Baldwin ldr r4,[r0,#40+HI] 195bc3d5698SJohn Baldwin str r3,[sp,#40+0] 196bc3d5698SJohn Baldwin str r4,[sp,#40+4] 197bc3d5698SJohn Baldwin 198bc3d5698SJohn Baldwin.L00_15: 199bc3d5698SJohn Baldwin#if __ARM_ARCH__<7 200bc3d5698SJohn Baldwin ldrb r3,[r1,#7] 201bc3d5698SJohn Baldwin ldrb r9, [r1,#6] 202bc3d5698SJohn Baldwin ldrb r10, [r1,#5] 203bc3d5698SJohn Baldwin ldrb r11, [r1,#4] 204bc3d5698SJohn Baldwin ldrb r4,[r1,#3] 205bc3d5698SJohn Baldwin ldrb r12, [r1,#2] 206bc3d5698SJohn Baldwin orr r3,r3,r9,lsl#8 207bc3d5698SJohn Baldwin ldrb r9, [r1,#1] 208bc3d5698SJohn Baldwin orr r3,r3,r10,lsl#16 209bc3d5698SJohn Baldwin ldrb r10, [r1],#8 210bc3d5698SJohn Baldwin orr r3,r3,r11,lsl#24 211bc3d5698SJohn Baldwin orr r4,r4,r12,lsl#8 212bc3d5698SJohn Baldwin orr r4,r4,r9,lsl#16 213bc3d5698SJohn Baldwin orr r4,r4,r10,lsl#24 214bc3d5698SJohn Baldwin#else 215bc3d5698SJohn Baldwin ldr r3,[r1,#4] 216bc3d5698SJohn Baldwin ldr r4,[r1],#8 217bc3d5698SJohn Baldwin#ifdef __ARMEL__ 218bc3d5698SJohn Baldwin rev r3,r3 219bc3d5698SJohn Baldwin rev r4,r4 220bc3d5698SJohn Baldwin#endif 221bc3d5698SJohn Baldwin#endif 222bc3d5698SJohn Baldwin @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 223bc3d5698SJohn Baldwin @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 224bc3d5698SJohn Baldwin @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 225bc3d5698SJohn Baldwin mov r9,r7,lsr#14 226bc3d5698SJohn Baldwin str r3,[sp,#64+0] 227bc3d5698SJohn Baldwin mov r10,r8,lsr#14 228bc3d5698SJohn Baldwin str r4,[sp,#64+4] 229bc3d5698SJohn Baldwin eor r9,r9,r8,lsl#18 230bc3d5698SJohn Baldwin ldr r11,[sp,#56+0] @ h.lo 231bc3d5698SJohn Baldwin eor r10,r10,r7,lsl#18 232bc3d5698SJohn Baldwin ldr r12,[sp,#56+4] @ h.hi 233bc3d5698SJohn Baldwin eor r9,r9,r7,lsr#18 234bc3d5698SJohn Baldwin eor r10,r10,r8,lsr#18 235bc3d5698SJohn Baldwin eor r9,r9,r8,lsl#14 236bc3d5698SJohn Baldwin eor r10,r10,r7,lsl#14 237bc3d5698SJohn Baldwin eor r9,r9,r8,lsr#9 238bc3d5698SJohn Baldwin eor r10,r10,r7,lsr#9 239bc3d5698SJohn Baldwin eor r9,r9,r7,lsl#23 240bc3d5698SJohn Baldwin eor r10,r10,r8,lsl#23 @ Sigma1(e) 241bc3d5698SJohn Baldwin adds r3,r3,r9 242bc3d5698SJohn Baldwin ldr r9,[sp,#40+0] @ f.lo 243bc3d5698SJohn Baldwin adc r4,r4,r10 @ T += Sigma1(e) 244bc3d5698SJohn Baldwin ldr r10,[sp,#40+4] @ f.hi 245bc3d5698SJohn Baldwin adds r3,r3,r11 246bc3d5698SJohn Baldwin ldr r11,[sp,#48+0] @ g.lo 247bc3d5698SJohn Baldwin adc r4,r4,r12 @ T += h 248bc3d5698SJohn Baldwin ldr r12,[sp,#48+4] @ g.hi 249bc3d5698SJohn Baldwin 250bc3d5698SJohn Baldwin eor r9,r9,r11 251bc3d5698SJohn Baldwin str r7,[sp,#32+0] 252bc3d5698SJohn Baldwin eor r10,r10,r12 253bc3d5698SJohn Baldwin str r8,[sp,#32+4] 254bc3d5698SJohn Baldwin and r9,r9,r7 255bc3d5698SJohn Baldwin str r5,[sp,#0+0] 256bc3d5698SJohn Baldwin and r10,r10,r8 257bc3d5698SJohn Baldwin str r6,[sp,#0+4] 258bc3d5698SJohn Baldwin eor r9,r9,r11 259bc3d5698SJohn Baldwin ldr r11,[r14,#LO] @ K[i].lo 260bc3d5698SJohn Baldwin eor r10,r10,r12 @ Ch(e,f,g) 261bc3d5698SJohn Baldwin ldr r12,[r14,#HI] @ K[i].hi 262bc3d5698SJohn Baldwin 263bc3d5698SJohn Baldwin adds r3,r3,r9 264bc3d5698SJohn Baldwin ldr r7,[sp,#24+0] @ d.lo 265bc3d5698SJohn Baldwin adc r4,r4,r10 @ T += Ch(e,f,g) 266bc3d5698SJohn Baldwin ldr r8,[sp,#24+4] @ d.hi 267bc3d5698SJohn Baldwin adds r3,r3,r11 268bc3d5698SJohn Baldwin and r9,r11,#0xff 269bc3d5698SJohn Baldwin adc r4,r4,r12 @ T += K[i] 270bc3d5698SJohn Baldwin adds r7,r7,r3 271bc3d5698SJohn Baldwin ldr r11,[sp,#8+0] @ b.lo 272bc3d5698SJohn Baldwin adc r8,r8,r4 @ d += T 273bc3d5698SJohn Baldwin teq r9,#148 274bc3d5698SJohn Baldwin 275bc3d5698SJohn Baldwin ldr r12,[sp,#16+0] @ c.lo 276bc3d5698SJohn Baldwin#ifdef __thumb2__ 277bc3d5698SJohn Baldwin it eq @ Thumb2 thing, sanity check in ARM 278bc3d5698SJohn Baldwin#endif 279bc3d5698SJohn Baldwin orreq r14,r14,#1 280bc3d5698SJohn Baldwin @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 281bc3d5698SJohn Baldwin @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 282bc3d5698SJohn Baldwin @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 283bc3d5698SJohn Baldwin mov r9,r5,lsr#28 284bc3d5698SJohn Baldwin mov r10,r6,lsr#28 285bc3d5698SJohn Baldwin eor r9,r9,r6,lsl#4 286bc3d5698SJohn Baldwin eor r10,r10,r5,lsl#4 287bc3d5698SJohn Baldwin eor r9,r9,r6,lsr#2 288bc3d5698SJohn Baldwin eor r10,r10,r5,lsr#2 289bc3d5698SJohn Baldwin eor r9,r9,r5,lsl#30 290bc3d5698SJohn Baldwin eor r10,r10,r6,lsl#30 291bc3d5698SJohn Baldwin eor r9,r9,r6,lsr#7 292bc3d5698SJohn Baldwin eor r10,r10,r5,lsr#7 293bc3d5698SJohn Baldwin eor r9,r9,r5,lsl#25 294bc3d5698SJohn Baldwin eor r10,r10,r6,lsl#25 @ Sigma0(a) 295bc3d5698SJohn Baldwin adds r3,r3,r9 296bc3d5698SJohn Baldwin and r9,r5,r11 297bc3d5698SJohn Baldwin adc r4,r4,r10 @ T += Sigma0(a) 298bc3d5698SJohn Baldwin 299bc3d5698SJohn Baldwin ldr r10,[sp,#8+4] @ b.hi 300bc3d5698SJohn Baldwin orr r5,r5,r11 301bc3d5698SJohn Baldwin ldr r11,[sp,#16+4] @ c.hi 302bc3d5698SJohn Baldwin and r5,r5,r12 303bc3d5698SJohn Baldwin and r12,r6,r10 304bc3d5698SJohn Baldwin orr r6,r6,r10 305bc3d5698SJohn Baldwin orr r5,r5,r9 @ Maj(a,b,c).lo 306bc3d5698SJohn Baldwin and r6,r6,r11 307bc3d5698SJohn Baldwin adds r5,r5,r3 308bc3d5698SJohn Baldwin orr r6,r6,r12 @ Maj(a,b,c).hi 309bc3d5698SJohn Baldwin sub sp,sp,#8 310bc3d5698SJohn Baldwin adc r6,r6,r4 @ h += T 311bc3d5698SJohn Baldwin tst r14,#1 312bc3d5698SJohn Baldwin add r14,r14,#8 313bc3d5698SJohn Baldwin tst r14,#1 314bc3d5698SJohn Baldwin beq .L00_15 315bc3d5698SJohn Baldwin ldr r9,[sp,#184+0] 316bc3d5698SJohn Baldwin ldr r10,[sp,#184+4] 317bc3d5698SJohn Baldwin bic r14,r14,#1 318bc3d5698SJohn Baldwin.L16_79: 319bc3d5698SJohn Baldwin @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) 320bc3d5698SJohn Baldwin @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 321bc3d5698SJohn Baldwin @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 322bc3d5698SJohn Baldwin mov r3,r9,lsr#1 323bc3d5698SJohn Baldwin ldr r11,[sp,#80+0] 324bc3d5698SJohn Baldwin mov r4,r10,lsr#1 325bc3d5698SJohn Baldwin ldr r12,[sp,#80+4] 326bc3d5698SJohn Baldwin eor r3,r3,r10,lsl#31 327bc3d5698SJohn Baldwin eor r4,r4,r9,lsl#31 328bc3d5698SJohn Baldwin eor r3,r3,r9,lsr#8 329bc3d5698SJohn Baldwin eor r4,r4,r10,lsr#8 330bc3d5698SJohn Baldwin eor r3,r3,r10,lsl#24 331bc3d5698SJohn Baldwin eor r4,r4,r9,lsl#24 332bc3d5698SJohn Baldwin eor r3,r3,r9,lsr#7 333bc3d5698SJohn Baldwin eor r4,r4,r10,lsr#7 334bc3d5698SJohn Baldwin eor r3,r3,r10,lsl#25 335bc3d5698SJohn Baldwin 336bc3d5698SJohn Baldwin @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) 337bc3d5698SJohn Baldwin @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 338bc3d5698SJohn Baldwin @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 339bc3d5698SJohn Baldwin mov r9,r11,lsr#19 340bc3d5698SJohn Baldwin mov r10,r12,lsr#19 341bc3d5698SJohn Baldwin eor r9,r9,r12,lsl#13 342bc3d5698SJohn Baldwin eor r10,r10,r11,lsl#13 343bc3d5698SJohn Baldwin eor r9,r9,r12,lsr#29 344bc3d5698SJohn Baldwin eor r10,r10,r11,lsr#29 345bc3d5698SJohn Baldwin eor r9,r9,r11,lsl#3 346bc3d5698SJohn Baldwin eor r10,r10,r12,lsl#3 347bc3d5698SJohn Baldwin eor r9,r9,r11,lsr#6 348bc3d5698SJohn Baldwin eor r10,r10,r12,lsr#6 349bc3d5698SJohn Baldwin ldr r11,[sp,#120+0] 350bc3d5698SJohn Baldwin eor r9,r9,r12,lsl#26 351bc3d5698SJohn Baldwin 352bc3d5698SJohn Baldwin ldr r12,[sp,#120+4] 353bc3d5698SJohn Baldwin adds r3,r3,r9 354bc3d5698SJohn Baldwin ldr r9,[sp,#192+0] 355bc3d5698SJohn Baldwin adc r4,r4,r10 356bc3d5698SJohn Baldwin 357bc3d5698SJohn Baldwin ldr r10,[sp,#192+4] 358bc3d5698SJohn Baldwin adds r3,r3,r11 359bc3d5698SJohn Baldwin adc r4,r4,r12 360bc3d5698SJohn Baldwin adds r3,r3,r9 361bc3d5698SJohn Baldwin adc r4,r4,r10 362bc3d5698SJohn Baldwin @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 363bc3d5698SJohn Baldwin @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 364bc3d5698SJohn Baldwin @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 365bc3d5698SJohn Baldwin mov r9,r7,lsr#14 366bc3d5698SJohn Baldwin str r3,[sp,#64+0] 367bc3d5698SJohn Baldwin mov r10,r8,lsr#14 368bc3d5698SJohn Baldwin str r4,[sp,#64+4] 369bc3d5698SJohn Baldwin eor r9,r9,r8,lsl#18 370bc3d5698SJohn Baldwin ldr r11,[sp,#56+0] @ h.lo 371bc3d5698SJohn Baldwin eor r10,r10,r7,lsl#18 372bc3d5698SJohn Baldwin ldr r12,[sp,#56+4] @ h.hi 373bc3d5698SJohn Baldwin eor r9,r9,r7,lsr#18 374bc3d5698SJohn Baldwin eor r10,r10,r8,lsr#18 375bc3d5698SJohn Baldwin eor r9,r9,r8,lsl#14 376bc3d5698SJohn Baldwin eor r10,r10,r7,lsl#14 377bc3d5698SJohn Baldwin eor r9,r9,r8,lsr#9 378bc3d5698SJohn Baldwin eor r10,r10,r7,lsr#9 379bc3d5698SJohn Baldwin eor r9,r9,r7,lsl#23 380bc3d5698SJohn Baldwin eor r10,r10,r8,lsl#23 @ Sigma1(e) 381bc3d5698SJohn Baldwin adds r3,r3,r9 382bc3d5698SJohn Baldwin ldr r9,[sp,#40+0] @ f.lo 383bc3d5698SJohn Baldwin adc r4,r4,r10 @ T += Sigma1(e) 384bc3d5698SJohn Baldwin ldr r10,[sp,#40+4] @ f.hi 385bc3d5698SJohn Baldwin adds r3,r3,r11 386bc3d5698SJohn Baldwin ldr r11,[sp,#48+0] @ g.lo 387bc3d5698SJohn Baldwin adc r4,r4,r12 @ T += h 388bc3d5698SJohn Baldwin ldr r12,[sp,#48+4] @ g.hi 389bc3d5698SJohn Baldwin 390bc3d5698SJohn Baldwin eor r9,r9,r11 391bc3d5698SJohn Baldwin str r7,[sp,#32+0] 392bc3d5698SJohn Baldwin eor r10,r10,r12 393bc3d5698SJohn Baldwin str r8,[sp,#32+4] 394bc3d5698SJohn Baldwin and r9,r9,r7 395bc3d5698SJohn Baldwin str r5,[sp,#0+0] 396bc3d5698SJohn Baldwin and r10,r10,r8 397bc3d5698SJohn Baldwin str r6,[sp,#0+4] 398bc3d5698SJohn Baldwin eor r9,r9,r11 399bc3d5698SJohn Baldwin ldr r11,[r14,#LO] @ K[i].lo 400bc3d5698SJohn Baldwin eor r10,r10,r12 @ Ch(e,f,g) 401bc3d5698SJohn Baldwin ldr r12,[r14,#HI] @ K[i].hi 402bc3d5698SJohn Baldwin 403bc3d5698SJohn Baldwin adds r3,r3,r9 404bc3d5698SJohn Baldwin ldr r7,[sp,#24+0] @ d.lo 405bc3d5698SJohn Baldwin adc r4,r4,r10 @ T += Ch(e,f,g) 406bc3d5698SJohn Baldwin ldr r8,[sp,#24+4] @ d.hi 407bc3d5698SJohn Baldwin adds r3,r3,r11 408bc3d5698SJohn Baldwin and r9,r11,#0xff 409bc3d5698SJohn Baldwin adc r4,r4,r12 @ T += K[i] 410bc3d5698SJohn Baldwin adds r7,r7,r3 411bc3d5698SJohn Baldwin ldr r11,[sp,#8+0] @ b.lo 412bc3d5698SJohn Baldwin adc r8,r8,r4 @ d += T 413bc3d5698SJohn Baldwin teq r9,#23 414bc3d5698SJohn Baldwin 415bc3d5698SJohn Baldwin ldr r12,[sp,#16+0] @ c.lo 416bc3d5698SJohn Baldwin#ifdef __thumb2__ 417bc3d5698SJohn Baldwin it eq @ Thumb2 thing, sanity check in ARM 418bc3d5698SJohn Baldwin#endif 419bc3d5698SJohn Baldwin orreq r14,r14,#1 420bc3d5698SJohn Baldwin @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 421bc3d5698SJohn Baldwin @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 422bc3d5698SJohn Baldwin @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 423bc3d5698SJohn Baldwin mov r9,r5,lsr#28 424bc3d5698SJohn Baldwin mov r10,r6,lsr#28 425bc3d5698SJohn Baldwin eor r9,r9,r6,lsl#4 426bc3d5698SJohn Baldwin eor r10,r10,r5,lsl#4 427bc3d5698SJohn Baldwin eor r9,r9,r6,lsr#2 428bc3d5698SJohn Baldwin eor r10,r10,r5,lsr#2 429bc3d5698SJohn Baldwin eor r9,r9,r5,lsl#30 430bc3d5698SJohn Baldwin eor r10,r10,r6,lsl#30 431bc3d5698SJohn Baldwin eor r9,r9,r6,lsr#7 432bc3d5698SJohn Baldwin eor r10,r10,r5,lsr#7 433bc3d5698SJohn Baldwin eor r9,r9,r5,lsl#25 434bc3d5698SJohn Baldwin eor r10,r10,r6,lsl#25 @ Sigma0(a) 435bc3d5698SJohn Baldwin adds r3,r3,r9 436bc3d5698SJohn Baldwin and r9,r5,r11 437bc3d5698SJohn Baldwin adc r4,r4,r10 @ T += Sigma0(a) 438bc3d5698SJohn Baldwin 439bc3d5698SJohn Baldwin ldr r10,[sp,#8+4] @ b.hi 440bc3d5698SJohn Baldwin orr r5,r5,r11 441bc3d5698SJohn Baldwin ldr r11,[sp,#16+4] @ c.hi 442bc3d5698SJohn Baldwin and r5,r5,r12 443bc3d5698SJohn Baldwin and r12,r6,r10 444bc3d5698SJohn Baldwin orr r6,r6,r10 445bc3d5698SJohn Baldwin orr r5,r5,r9 @ Maj(a,b,c).lo 446bc3d5698SJohn Baldwin and r6,r6,r11 447bc3d5698SJohn Baldwin adds r5,r5,r3 448bc3d5698SJohn Baldwin orr r6,r6,r12 @ Maj(a,b,c).hi 449bc3d5698SJohn Baldwin sub sp,sp,#8 450bc3d5698SJohn Baldwin adc r6,r6,r4 @ h += T 451bc3d5698SJohn Baldwin tst r14,#1 452bc3d5698SJohn Baldwin add r14,r14,#8 453bc3d5698SJohn Baldwin#ifdef __thumb2__ 454bc3d5698SJohn Baldwin ittt eq @ Thumb2 thing, sanity check in ARM 455bc3d5698SJohn Baldwin#endif 456bc3d5698SJohn Baldwin ldreq r9,[sp,#184+0] 457bc3d5698SJohn Baldwin ldreq r10,[sp,#184+4] 458bc3d5698SJohn Baldwin beq .L16_79 459bc3d5698SJohn Baldwin bic r14,r14,#1 460bc3d5698SJohn Baldwin 461bc3d5698SJohn Baldwin ldr r3,[sp,#8+0] 462bc3d5698SJohn Baldwin ldr r4,[sp,#8+4] 463bc3d5698SJohn Baldwin ldr r9, [r0,#0+LO] 464bc3d5698SJohn Baldwin ldr r10, [r0,#0+HI] 465bc3d5698SJohn Baldwin ldr r11, [r0,#8+LO] 466bc3d5698SJohn Baldwin ldr r12, [r0,#8+HI] 467bc3d5698SJohn Baldwin adds r9,r5,r9 468bc3d5698SJohn Baldwin str r9, [r0,#0+LO] 469bc3d5698SJohn Baldwin adc r10,r6,r10 470bc3d5698SJohn Baldwin str r10, [r0,#0+HI] 471bc3d5698SJohn Baldwin adds r11,r3,r11 472bc3d5698SJohn Baldwin str r11, [r0,#8+LO] 473bc3d5698SJohn Baldwin adc r12,r4,r12 474bc3d5698SJohn Baldwin str r12, [r0,#8+HI] 475bc3d5698SJohn Baldwin 476bc3d5698SJohn Baldwin ldr r5,[sp,#16+0] 477bc3d5698SJohn Baldwin ldr r6,[sp,#16+4] 478bc3d5698SJohn Baldwin ldr r3,[sp,#24+0] 479bc3d5698SJohn Baldwin ldr r4,[sp,#24+4] 480bc3d5698SJohn Baldwin ldr r9, [r0,#16+LO] 481bc3d5698SJohn Baldwin ldr r10, [r0,#16+HI] 482bc3d5698SJohn Baldwin ldr r11, [r0,#24+LO] 483bc3d5698SJohn Baldwin ldr r12, [r0,#24+HI] 484bc3d5698SJohn Baldwin adds r9,r5,r9 485bc3d5698SJohn Baldwin str r9, [r0,#16+LO] 486bc3d5698SJohn Baldwin adc r10,r6,r10 487bc3d5698SJohn Baldwin str r10, [r0,#16+HI] 488bc3d5698SJohn Baldwin adds r11,r3,r11 489bc3d5698SJohn Baldwin str r11, [r0,#24+LO] 490bc3d5698SJohn Baldwin adc r12,r4,r12 491bc3d5698SJohn Baldwin str r12, [r0,#24+HI] 492bc3d5698SJohn Baldwin 493bc3d5698SJohn Baldwin ldr r3,[sp,#40+0] 494bc3d5698SJohn Baldwin ldr r4,[sp,#40+4] 495bc3d5698SJohn Baldwin ldr r9, [r0,#32+LO] 496bc3d5698SJohn Baldwin ldr r10, [r0,#32+HI] 497bc3d5698SJohn Baldwin ldr r11, [r0,#40+LO] 498bc3d5698SJohn Baldwin ldr r12, [r0,#40+HI] 499bc3d5698SJohn Baldwin adds r7,r7,r9 500bc3d5698SJohn Baldwin str r7,[r0,#32+LO] 501bc3d5698SJohn Baldwin adc r8,r8,r10 502bc3d5698SJohn Baldwin str r8,[r0,#32+HI] 503bc3d5698SJohn Baldwin adds r11,r3,r11 504bc3d5698SJohn Baldwin str r11, [r0,#40+LO] 505bc3d5698SJohn Baldwin adc r12,r4,r12 506bc3d5698SJohn Baldwin str r12, [r0,#40+HI] 507bc3d5698SJohn Baldwin 508bc3d5698SJohn Baldwin ldr r5,[sp,#48+0] 509bc3d5698SJohn Baldwin ldr r6,[sp,#48+4] 510bc3d5698SJohn Baldwin ldr r3,[sp,#56+0] 511bc3d5698SJohn Baldwin ldr r4,[sp,#56+4] 512bc3d5698SJohn Baldwin ldr r9, [r0,#48+LO] 513bc3d5698SJohn Baldwin ldr r10, [r0,#48+HI] 514bc3d5698SJohn Baldwin ldr r11, [r0,#56+LO] 515bc3d5698SJohn Baldwin ldr r12, [r0,#56+HI] 516bc3d5698SJohn Baldwin adds r9,r5,r9 517bc3d5698SJohn Baldwin str r9, [r0,#48+LO] 518bc3d5698SJohn Baldwin adc r10,r6,r10 519bc3d5698SJohn Baldwin str r10, [r0,#48+HI] 520bc3d5698SJohn Baldwin adds r11,r3,r11 521bc3d5698SJohn Baldwin str r11, [r0,#56+LO] 522bc3d5698SJohn Baldwin adc r12,r4,r12 523bc3d5698SJohn Baldwin str r12, [r0,#56+HI] 524bc3d5698SJohn Baldwin 525bc3d5698SJohn Baldwin add sp,sp,#640 526bc3d5698SJohn Baldwin sub r14,r14,#640 527bc3d5698SJohn Baldwin 528bc3d5698SJohn Baldwin teq r1,r2 529bc3d5698SJohn Baldwin bne .Loop 530bc3d5698SJohn Baldwin 531bc3d5698SJohn Baldwin add sp,sp,#8*9 @ destroy frame 532bc3d5698SJohn Baldwin#if __ARM_ARCH__>=5 533bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 534bc3d5698SJohn Baldwin#else 535bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 536bc3d5698SJohn Baldwin tst lr,#1 537bc3d5698SJohn Baldwin moveq pc,lr @ be binary compatible with V4, yet 538bc3d5698SJohn Baldwin.word 0xe12fff1e @ interoperable with Thumb ISA:-) 539bc3d5698SJohn Baldwin#endif 540bc3d5698SJohn Baldwin.size sha512_block_data_order,.-sha512_block_data_order 541bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 542bc3d5698SJohn Baldwin.arch armv7-a 543bc3d5698SJohn Baldwin.fpu neon 544bc3d5698SJohn Baldwin 545bc3d5698SJohn Baldwin.globl sha512_block_data_order_neon 546bc3d5698SJohn Baldwin.type sha512_block_data_order_neon,%function 547bc3d5698SJohn Baldwin.align 4 548bc3d5698SJohn Baldwinsha512_block_data_order_neon: 549bc3d5698SJohn Baldwin.LNEON: 550bc3d5698SJohn Baldwin dmb @ errata #451034 on early Cortex A8 551bc3d5698SJohn Baldwin add r2,r1,r2,lsl#7 @ len to point at the end of inp 552bc3d5698SJohn Baldwin adr r3,K512 553bc3d5698SJohn Baldwin VFP_ABI_PUSH 554bc3d5698SJohn Baldwin vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context 555bc3d5698SJohn Baldwin.Loop_neon: 556bc3d5698SJohn Baldwin vshr.u64 d24,d20,#14 @ 0 557bc3d5698SJohn Baldwin#if 0<16 558bc3d5698SJohn Baldwin vld1.64 {d0},[r1]! @ handles unaligned 559bc3d5698SJohn Baldwin#endif 560bc3d5698SJohn Baldwin vshr.u64 d25,d20,#18 561bc3d5698SJohn Baldwin#if 0>0 562bc3d5698SJohn Baldwin vadd.i64 d16,d30 @ h+=Maj from the past 563bc3d5698SJohn Baldwin#endif 564bc3d5698SJohn Baldwin vshr.u64 d26,d20,#41 565bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 566bc3d5698SJohn Baldwin vsli.64 d24,d20,#50 567bc3d5698SJohn Baldwin vsli.64 d25,d20,#46 568bc3d5698SJohn Baldwin vmov d29,d20 569bc3d5698SJohn Baldwin vsli.64 d26,d20,#23 570bc3d5698SJohn Baldwin#if 0<16 && defined(__ARMEL__) 571bc3d5698SJohn Baldwin vrev64.8 d0,d0 572bc3d5698SJohn Baldwin#endif 573bc3d5698SJohn Baldwin veor d25,d24 574bc3d5698SJohn Baldwin vbsl d29,d21,d22 @ Ch(e,f,g) 575bc3d5698SJohn Baldwin vshr.u64 d24,d16,#28 576bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 577bc3d5698SJohn Baldwin vadd.i64 d27,d29,d23 578bc3d5698SJohn Baldwin vshr.u64 d25,d16,#34 579bc3d5698SJohn Baldwin vsli.64 d24,d16,#36 580bc3d5698SJohn Baldwin vadd.i64 d27,d26 581bc3d5698SJohn Baldwin vshr.u64 d26,d16,#39 582bc3d5698SJohn Baldwin vadd.i64 d28,d0 583bc3d5698SJohn Baldwin vsli.64 d25,d16,#30 584bc3d5698SJohn Baldwin veor d30,d16,d17 585bc3d5698SJohn Baldwin vsli.64 d26,d16,#25 586bc3d5698SJohn Baldwin veor d23,d24,d25 587bc3d5698SJohn Baldwin vadd.i64 d27,d28 588bc3d5698SJohn Baldwin vbsl d30,d18,d17 @ Maj(a,b,c) 589bc3d5698SJohn Baldwin veor d23,d26 @ Sigma0(a) 590bc3d5698SJohn Baldwin vadd.i64 d19,d27 591bc3d5698SJohn Baldwin vadd.i64 d30,d27 592bc3d5698SJohn Baldwin @ vadd.i64 d23,d30 593bc3d5698SJohn Baldwin vshr.u64 d24,d19,#14 @ 1 594bc3d5698SJohn Baldwin#if 1<16 595bc3d5698SJohn Baldwin vld1.64 {d1},[r1]! @ handles unaligned 596bc3d5698SJohn Baldwin#endif 597bc3d5698SJohn Baldwin vshr.u64 d25,d19,#18 598bc3d5698SJohn Baldwin#if 1>0 599bc3d5698SJohn Baldwin vadd.i64 d23,d30 @ h+=Maj from the past 600bc3d5698SJohn Baldwin#endif 601bc3d5698SJohn Baldwin vshr.u64 d26,d19,#41 602bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 603bc3d5698SJohn Baldwin vsli.64 d24,d19,#50 604bc3d5698SJohn Baldwin vsli.64 d25,d19,#46 605bc3d5698SJohn Baldwin vmov d29,d19 606bc3d5698SJohn Baldwin vsli.64 d26,d19,#23 607bc3d5698SJohn Baldwin#if 1<16 && defined(__ARMEL__) 608bc3d5698SJohn Baldwin vrev64.8 d1,d1 609bc3d5698SJohn Baldwin#endif 610bc3d5698SJohn Baldwin veor d25,d24 611bc3d5698SJohn Baldwin vbsl d29,d20,d21 @ Ch(e,f,g) 612bc3d5698SJohn Baldwin vshr.u64 d24,d23,#28 613bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 614bc3d5698SJohn Baldwin vadd.i64 d27,d29,d22 615bc3d5698SJohn Baldwin vshr.u64 d25,d23,#34 616bc3d5698SJohn Baldwin vsli.64 d24,d23,#36 617bc3d5698SJohn Baldwin vadd.i64 d27,d26 618bc3d5698SJohn Baldwin vshr.u64 d26,d23,#39 619bc3d5698SJohn Baldwin vadd.i64 d28,d1 620bc3d5698SJohn Baldwin vsli.64 d25,d23,#30 621bc3d5698SJohn Baldwin veor d30,d23,d16 622bc3d5698SJohn Baldwin vsli.64 d26,d23,#25 623bc3d5698SJohn Baldwin veor d22,d24,d25 624bc3d5698SJohn Baldwin vadd.i64 d27,d28 625bc3d5698SJohn Baldwin vbsl d30,d17,d16 @ Maj(a,b,c) 626bc3d5698SJohn Baldwin veor d22,d26 @ Sigma0(a) 627bc3d5698SJohn Baldwin vadd.i64 d18,d27 628bc3d5698SJohn Baldwin vadd.i64 d30,d27 629bc3d5698SJohn Baldwin @ vadd.i64 d22,d30 630bc3d5698SJohn Baldwin vshr.u64 d24,d18,#14 @ 2 631bc3d5698SJohn Baldwin#if 2<16 632bc3d5698SJohn Baldwin vld1.64 {d2},[r1]! @ handles unaligned 633bc3d5698SJohn Baldwin#endif 634bc3d5698SJohn Baldwin vshr.u64 d25,d18,#18 635bc3d5698SJohn Baldwin#if 2>0 636bc3d5698SJohn Baldwin vadd.i64 d22,d30 @ h+=Maj from the past 637bc3d5698SJohn Baldwin#endif 638bc3d5698SJohn Baldwin vshr.u64 d26,d18,#41 639bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 640bc3d5698SJohn Baldwin vsli.64 d24,d18,#50 641bc3d5698SJohn Baldwin vsli.64 d25,d18,#46 642bc3d5698SJohn Baldwin vmov d29,d18 643bc3d5698SJohn Baldwin vsli.64 d26,d18,#23 644bc3d5698SJohn Baldwin#if 2<16 && defined(__ARMEL__) 645bc3d5698SJohn Baldwin vrev64.8 d2,d2 646bc3d5698SJohn Baldwin#endif 647bc3d5698SJohn Baldwin veor d25,d24 648bc3d5698SJohn Baldwin vbsl d29,d19,d20 @ Ch(e,f,g) 649bc3d5698SJohn Baldwin vshr.u64 d24,d22,#28 650bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 651bc3d5698SJohn Baldwin vadd.i64 d27,d29,d21 652bc3d5698SJohn Baldwin vshr.u64 d25,d22,#34 653bc3d5698SJohn Baldwin vsli.64 d24,d22,#36 654bc3d5698SJohn Baldwin vadd.i64 d27,d26 655bc3d5698SJohn Baldwin vshr.u64 d26,d22,#39 656bc3d5698SJohn Baldwin vadd.i64 d28,d2 657bc3d5698SJohn Baldwin vsli.64 d25,d22,#30 658bc3d5698SJohn Baldwin veor d30,d22,d23 659bc3d5698SJohn Baldwin vsli.64 d26,d22,#25 660bc3d5698SJohn Baldwin veor d21,d24,d25 661bc3d5698SJohn Baldwin vadd.i64 d27,d28 662bc3d5698SJohn Baldwin vbsl d30,d16,d23 @ Maj(a,b,c) 663bc3d5698SJohn Baldwin veor d21,d26 @ Sigma0(a) 664bc3d5698SJohn Baldwin vadd.i64 d17,d27 665bc3d5698SJohn Baldwin vadd.i64 d30,d27 666bc3d5698SJohn Baldwin @ vadd.i64 d21,d30 667bc3d5698SJohn Baldwin vshr.u64 d24,d17,#14 @ 3 668bc3d5698SJohn Baldwin#if 3<16 669bc3d5698SJohn Baldwin vld1.64 {d3},[r1]! @ handles unaligned 670bc3d5698SJohn Baldwin#endif 671bc3d5698SJohn Baldwin vshr.u64 d25,d17,#18 672bc3d5698SJohn Baldwin#if 3>0 673bc3d5698SJohn Baldwin vadd.i64 d21,d30 @ h+=Maj from the past 674bc3d5698SJohn Baldwin#endif 675bc3d5698SJohn Baldwin vshr.u64 d26,d17,#41 676bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 677bc3d5698SJohn Baldwin vsli.64 d24,d17,#50 678bc3d5698SJohn Baldwin vsli.64 d25,d17,#46 679bc3d5698SJohn Baldwin vmov d29,d17 680bc3d5698SJohn Baldwin vsli.64 d26,d17,#23 681bc3d5698SJohn Baldwin#if 3<16 && defined(__ARMEL__) 682bc3d5698SJohn Baldwin vrev64.8 d3,d3 683bc3d5698SJohn Baldwin#endif 684bc3d5698SJohn Baldwin veor d25,d24 685bc3d5698SJohn Baldwin vbsl d29,d18,d19 @ Ch(e,f,g) 686bc3d5698SJohn Baldwin vshr.u64 d24,d21,#28 687bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 688bc3d5698SJohn Baldwin vadd.i64 d27,d29,d20 689bc3d5698SJohn Baldwin vshr.u64 d25,d21,#34 690bc3d5698SJohn Baldwin vsli.64 d24,d21,#36 691bc3d5698SJohn Baldwin vadd.i64 d27,d26 692bc3d5698SJohn Baldwin vshr.u64 d26,d21,#39 693bc3d5698SJohn Baldwin vadd.i64 d28,d3 694bc3d5698SJohn Baldwin vsli.64 d25,d21,#30 695bc3d5698SJohn Baldwin veor d30,d21,d22 696bc3d5698SJohn Baldwin vsli.64 d26,d21,#25 697bc3d5698SJohn Baldwin veor d20,d24,d25 698bc3d5698SJohn Baldwin vadd.i64 d27,d28 699bc3d5698SJohn Baldwin vbsl d30,d23,d22 @ Maj(a,b,c) 700bc3d5698SJohn Baldwin veor d20,d26 @ Sigma0(a) 701bc3d5698SJohn Baldwin vadd.i64 d16,d27 702bc3d5698SJohn Baldwin vadd.i64 d30,d27 703bc3d5698SJohn Baldwin @ vadd.i64 d20,d30 704bc3d5698SJohn Baldwin vshr.u64 d24,d16,#14 @ 4 705bc3d5698SJohn Baldwin#if 4<16 706bc3d5698SJohn Baldwin vld1.64 {d4},[r1]! @ handles unaligned 707bc3d5698SJohn Baldwin#endif 708bc3d5698SJohn Baldwin vshr.u64 d25,d16,#18 709bc3d5698SJohn Baldwin#if 4>0 710bc3d5698SJohn Baldwin vadd.i64 d20,d30 @ h+=Maj from the past 711bc3d5698SJohn Baldwin#endif 712bc3d5698SJohn Baldwin vshr.u64 d26,d16,#41 713bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 714bc3d5698SJohn Baldwin vsli.64 d24,d16,#50 715bc3d5698SJohn Baldwin vsli.64 d25,d16,#46 716bc3d5698SJohn Baldwin vmov d29,d16 717bc3d5698SJohn Baldwin vsli.64 d26,d16,#23 718bc3d5698SJohn Baldwin#if 4<16 && defined(__ARMEL__) 719bc3d5698SJohn Baldwin vrev64.8 d4,d4 720bc3d5698SJohn Baldwin#endif 721bc3d5698SJohn Baldwin veor d25,d24 722bc3d5698SJohn Baldwin vbsl d29,d17,d18 @ Ch(e,f,g) 723bc3d5698SJohn Baldwin vshr.u64 d24,d20,#28 724bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 725bc3d5698SJohn Baldwin vadd.i64 d27,d29,d19 726bc3d5698SJohn Baldwin vshr.u64 d25,d20,#34 727bc3d5698SJohn Baldwin vsli.64 d24,d20,#36 728bc3d5698SJohn Baldwin vadd.i64 d27,d26 729bc3d5698SJohn Baldwin vshr.u64 d26,d20,#39 730bc3d5698SJohn Baldwin vadd.i64 d28,d4 731bc3d5698SJohn Baldwin vsli.64 d25,d20,#30 732bc3d5698SJohn Baldwin veor d30,d20,d21 733bc3d5698SJohn Baldwin vsli.64 d26,d20,#25 734bc3d5698SJohn Baldwin veor d19,d24,d25 735bc3d5698SJohn Baldwin vadd.i64 d27,d28 736bc3d5698SJohn Baldwin vbsl d30,d22,d21 @ Maj(a,b,c) 737bc3d5698SJohn Baldwin veor d19,d26 @ Sigma0(a) 738bc3d5698SJohn Baldwin vadd.i64 d23,d27 739bc3d5698SJohn Baldwin vadd.i64 d30,d27 740bc3d5698SJohn Baldwin @ vadd.i64 d19,d30 741bc3d5698SJohn Baldwin vshr.u64 d24,d23,#14 @ 5 742bc3d5698SJohn Baldwin#if 5<16 743bc3d5698SJohn Baldwin vld1.64 {d5},[r1]! @ handles unaligned 744bc3d5698SJohn Baldwin#endif 745bc3d5698SJohn Baldwin vshr.u64 d25,d23,#18 746bc3d5698SJohn Baldwin#if 5>0 747bc3d5698SJohn Baldwin vadd.i64 d19,d30 @ h+=Maj from the past 748bc3d5698SJohn Baldwin#endif 749bc3d5698SJohn Baldwin vshr.u64 d26,d23,#41 750bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 751bc3d5698SJohn Baldwin vsli.64 d24,d23,#50 752bc3d5698SJohn Baldwin vsli.64 d25,d23,#46 753bc3d5698SJohn Baldwin vmov d29,d23 754bc3d5698SJohn Baldwin vsli.64 d26,d23,#23 755bc3d5698SJohn Baldwin#if 5<16 && defined(__ARMEL__) 756bc3d5698SJohn Baldwin vrev64.8 d5,d5 757bc3d5698SJohn Baldwin#endif 758bc3d5698SJohn Baldwin veor d25,d24 759bc3d5698SJohn Baldwin vbsl d29,d16,d17 @ Ch(e,f,g) 760bc3d5698SJohn Baldwin vshr.u64 d24,d19,#28 761bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 762bc3d5698SJohn Baldwin vadd.i64 d27,d29,d18 763bc3d5698SJohn Baldwin vshr.u64 d25,d19,#34 764bc3d5698SJohn Baldwin vsli.64 d24,d19,#36 765bc3d5698SJohn Baldwin vadd.i64 d27,d26 766bc3d5698SJohn Baldwin vshr.u64 d26,d19,#39 767bc3d5698SJohn Baldwin vadd.i64 d28,d5 768bc3d5698SJohn Baldwin vsli.64 d25,d19,#30 769bc3d5698SJohn Baldwin veor d30,d19,d20 770bc3d5698SJohn Baldwin vsli.64 d26,d19,#25 771bc3d5698SJohn Baldwin veor d18,d24,d25 772bc3d5698SJohn Baldwin vadd.i64 d27,d28 773bc3d5698SJohn Baldwin vbsl d30,d21,d20 @ Maj(a,b,c) 774bc3d5698SJohn Baldwin veor d18,d26 @ Sigma0(a) 775bc3d5698SJohn Baldwin vadd.i64 d22,d27 776bc3d5698SJohn Baldwin vadd.i64 d30,d27 777bc3d5698SJohn Baldwin @ vadd.i64 d18,d30 778bc3d5698SJohn Baldwin vshr.u64 d24,d22,#14 @ 6 779bc3d5698SJohn Baldwin#if 6<16 780bc3d5698SJohn Baldwin vld1.64 {d6},[r1]! @ handles unaligned 781bc3d5698SJohn Baldwin#endif 782bc3d5698SJohn Baldwin vshr.u64 d25,d22,#18 783bc3d5698SJohn Baldwin#if 6>0 784bc3d5698SJohn Baldwin vadd.i64 d18,d30 @ h+=Maj from the past 785bc3d5698SJohn Baldwin#endif 786bc3d5698SJohn Baldwin vshr.u64 d26,d22,#41 787bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 788bc3d5698SJohn Baldwin vsli.64 d24,d22,#50 789bc3d5698SJohn Baldwin vsli.64 d25,d22,#46 790bc3d5698SJohn Baldwin vmov d29,d22 791bc3d5698SJohn Baldwin vsli.64 d26,d22,#23 792bc3d5698SJohn Baldwin#if 6<16 && defined(__ARMEL__) 793bc3d5698SJohn Baldwin vrev64.8 d6,d6 794bc3d5698SJohn Baldwin#endif 795bc3d5698SJohn Baldwin veor d25,d24 796bc3d5698SJohn Baldwin vbsl d29,d23,d16 @ Ch(e,f,g) 797bc3d5698SJohn Baldwin vshr.u64 d24,d18,#28 798bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 799bc3d5698SJohn Baldwin vadd.i64 d27,d29,d17 800bc3d5698SJohn Baldwin vshr.u64 d25,d18,#34 801bc3d5698SJohn Baldwin vsli.64 d24,d18,#36 802bc3d5698SJohn Baldwin vadd.i64 d27,d26 803bc3d5698SJohn Baldwin vshr.u64 d26,d18,#39 804bc3d5698SJohn Baldwin vadd.i64 d28,d6 805bc3d5698SJohn Baldwin vsli.64 d25,d18,#30 806bc3d5698SJohn Baldwin veor d30,d18,d19 807bc3d5698SJohn Baldwin vsli.64 d26,d18,#25 808bc3d5698SJohn Baldwin veor d17,d24,d25 809bc3d5698SJohn Baldwin vadd.i64 d27,d28 810bc3d5698SJohn Baldwin vbsl d30,d20,d19 @ Maj(a,b,c) 811bc3d5698SJohn Baldwin veor d17,d26 @ Sigma0(a) 812bc3d5698SJohn Baldwin vadd.i64 d21,d27 813bc3d5698SJohn Baldwin vadd.i64 d30,d27 814bc3d5698SJohn Baldwin @ vadd.i64 d17,d30 815bc3d5698SJohn Baldwin vshr.u64 d24,d21,#14 @ 7 816bc3d5698SJohn Baldwin#if 7<16 817bc3d5698SJohn Baldwin vld1.64 {d7},[r1]! @ handles unaligned 818bc3d5698SJohn Baldwin#endif 819bc3d5698SJohn Baldwin vshr.u64 d25,d21,#18 820bc3d5698SJohn Baldwin#if 7>0 821bc3d5698SJohn Baldwin vadd.i64 d17,d30 @ h+=Maj from the past 822bc3d5698SJohn Baldwin#endif 823bc3d5698SJohn Baldwin vshr.u64 d26,d21,#41 824bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 825bc3d5698SJohn Baldwin vsli.64 d24,d21,#50 826bc3d5698SJohn Baldwin vsli.64 d25,d21,#46 827bc3d5698SJohn Baldwin vmov d29,d21 828bc3d5698SJohn Baldwin vsli.64 d26,d21,#23 829bc3d5698SJohn Baldwin#if 7<16 && defined(__ARMEL__) 830bc3d5698SJohn Baldwin vrev64.8 d7,d7 831bc3d5698SJohn Baldwin#endif 832bc3d5698SJohn Baldwin veor d25,d24 833bc3d5698SJohn Baldwin vbsl d29,d22,d23 @ Ch(e,f,g) 834bc3d5698SJohn Baldwin vshr.u64 d24,d17,#28 835bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 836bc3d5698SJohn Baldwin vadd.i64 d27,d29,d16 837bc3d5698SJohn Baldwin vshr.u64 d25,d17,#34 838bc3d5698SJohn Baldwin vsli.64 d24,d17,#36 839bc3d5698SJohn Baldwin vadd.i64 d27,d26 840bc3d5698SJohn Baldwin vshr.u64 d26,d17,#39 841bc3d5698SJohn Baldwin vadd.i64 d28,d7 842bc3d5698SJohn Baldwin vsli.64 d25,d17,#30 843bc3d5698SJohn Baldwin veor d30,d17,d18 844bc3d5698SJohn Baldwin vsli.64 d26,d17,#25 845bc3d5698SJohn Baldwin veor d16,d24,d25 846bc3d5698SJohn Baldwin vadd.i64 d27,d28 847bc3d5698SJohn Baldwin vbsl d30,d19,d18 @ Maj(a,b,c) 848bc3d5698SJohn Baldwin veor d16,d26 @ Sigma0(a) 849bc3d5698SJohn Baldwin vadd.i64 d20,d27 850bc3d5698SJohn Baldwin vadd.i64 d30,d27 851bc3d5698SJohn Baldwin @ vadd.i64 d16,d30 852bc3d5698SJohn Baldwin vshr.u64 d24,d20,#14 @ 8 853bc3d5698SJohn Baldwin#if 8<16 854bc3d5698SJohn Baldwin vld1.64 {d8},[r1]! @ handles unaligned 855bc3d5698SJohn Baldwin#endif 856bc3d5698SJohn Baldwin vshr.u64 d25,d20,#18 857bc3d5698SJohn Baldwin#if 8>0 858bc3d5698SJohn Baldwin vadd.i64 d16,d30 @ h+=Maj from the past 859bc3d5698SJohn Baldwin#endif 860bc3d5698SJohn Baldwin vshr.u64 d26,d20,#41 861bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 862bc3d5698SJohn Baldwin vsli.64 d24,d20,#50 863bc3d5698SJohn Baldwin vsli.64 d25,d20,#46 864bc3d5698SJohn Baldwin vmov d29,d20 865bc3d5698SJohn Baldwin vsli.64 d26,d20,#23 866bc3d5698SJohn Baldwin#if 8<16 && defined(__ARMEL__) 867bc3d5698SJohn Baldwin vrev64.8 d8,d8 868bc3d5698SJohn Baldwin#endif 869bc3d5698SJohn Baldwin veor d25,d24 870bc3d5698SJohn Baldwin vbsl d29,d21,d22 @ Ch(e,f,g) 871bc3d5698SJohn Baldwin vshr.u64 d24,d16,#28 872bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 873bc3d5698SJohn Baldwin vadd.i64 d27,d29,d23 874bc3d5698SJohn Baldwin vshr.u64 d25,d16,#34 875bc3d5698SJohn Baldwin vsli.64 d24,d16,#36 876bc3d5698SJohn Baldwin vadd.i64 d27,d26 877bc3d5698SJohn Baldwin vshr.u64 d26,d16,#39 878bc3d5698SJohn Baldwin vadd.i64 d28,d8 879bc3d5698SJohn Baldwin vsli.64 d25,d16,#30 880bc3d5698SJohn Baldwin veor d30,d16,d17 881bc3d5698SJohn Baldwin vsli.64 d26,d16,#25 882bc3d5698SJohn Baldwin veor d23,d24,d25 883bc3d5698SJohn Baldwin vadd.i64 d27,d28 884bc3d5698SJohn Baldwin vbsl d30,d18,d17 @ Maj(a,b,c) 885bc3d5698SJohn Baldwin veor d23,d26 @ Sigma0(a) 886bc3d5698SJohn Baldwin vadd.i64 d19,d27 887bc3d5698SJohn Baldwin vadd.i64 d30,d27 888bc3d5698SJohn Baldwin @ vadd.i64 d23,d30 889bc3d5698SJohn Baldwin vshr.u64 d24,d19,#14 @ 9 890bc3d5698SJohn Baldwin#if 9<16 891bc3d5698SJohn Baldwin vld1.64 {d9},[r1]! @ handles unaligned 892bc3d5698SJohn Baldwin#endif 893bc3d5698SJohn Baldwin vshr.u64 d25,d19,#18 894bc3d5698SJohn Baldwin#if 9>0 895bc3d5698SJohn Baldwin vadd.i64 d23,d30 @ h+=Maj from the past 896bc3d5698SJohn Baldwin#endif 897bc3d5698SJohn Baldwin vshr.u64 d26,d19,#41 898bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 899bc3d5698SJohn Baldwin vsli.64 d24,d19,#50 900bc3d5698SJohn Baldwin vsli.64 d25,d19,#46 901bc3d5698SJohn Baldwin vmov d29,d19 902bc3d5698SJohn Baldwin vsli.64 d26,d19,#23 903bc3d5698SJohn Baldwin#if 9<16 && defined(__ARMEL__) 904bc3d5698SJohn Baldwin vrev64.8 d9,d9 905bc3d5698SJohn Baldwin#endif 906bc3d5698SJohn Baldwin veor d25,d24 907bc3d5698SJohn Baldwin vbsl d29,d20,d21 @ Ch(e,f,g) 908bc3d5698SJohn Baldwin vshr.u64 d24,d23,#28 909bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 910bc3d5698SJohn Baldwin vadd.i64 d27,d29,d22 911bc3d5698SJohn Baldwin vshr.u64 d25,d23,#34 912bc3d5698SJohn Baldwin vsli.64 d24,d23,#36 913bc3d5698SJohn Baldwin vadd.i64 d27,d26 914bc3d5698SJohn Baldwin vshr.u64 d26,d23,#39 915bc3d5698SJohn Baldwin vadd.i64 d28,d9 916bc3d5698SJohn Baldwin vsli.64 d25,d23,#30 917bc3d5698SJohn Baldwin veor d30,d23,d16 918bc3d5698SJohn Baldwin vsli.64 d26,d23,#25 919bc3d5698SJohn Baldwin veor d22,d24,d25 920bc3d5698SJohn Baldwin vadd.i64 d27,d28 921bc3d5698SJohn Baldwin vbsl d30,d17,d16 @ Maj(a,b,c) 922bc3d5698SJohn Baldwin veor d22,d26 @ Sigma0(a) 923bc3d5698SJohn Baldwin vadd.i64 d18,d27 924bc3d5698SJohn Baldwin vadd.i64 d30,d27 925bc3d5698SJohn Baldwin @ vadd.i64 d22,d30 926bc3d5698SJohn Baldwin vshr.u64 d24,d18,#14 @ 10 927bc3d5698SJohn Baldwin#if 10<16 928bc3d5698SJohn Baldwin vld1.64 {d10},[r1]! @ handles unaligned 929bc3d5698SJohn Baldwin#endif 930bc3d5698SJohn Baldwin vshr.u64 d25,d18,#18 931bc3d5698SJohn Baldwin#if 10>0 932bc3d5698SJohn Baldwin vadd.i64 d22,d30 @ h+=Maj from the past 933bc3d5698SJohn Baldwin#endif 934bc3d5698SJohn Baldwin vshr.u64 d26,d18,#41 935bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 936bc3d5698SJohn Baldwin vsli.64 d24,d18,#50 937bc3d5698SJohn Baldwin vsli.64 d25,d18,#46 938bc3d5698SJohn Baldwin vmov d29,d18 939bc3d5698SJohn Baldwin vsli.64 d26,d18,#23 940bc3d5698SJohn Baldwin#if 10<16 && defined(__ARMEL__) 941bc3d5698SJohn Baldwin vrev64.8 d10,d10 942bc3d5698SJohn Baldwin#endif 943bc3d5698SJohn Baldwin veor d25,d24 944bc3d5698SJohn Baldwin vbsl d29,d19,d20 @ Ch(e,f,g) 945bc3d5698SJohn Baldwin vshr.u64 d24,d22,#28 946bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 947bc3d5698SJohn Baldwin vadd.i64 d27,d29,d21 948bc3d5698SJohn Baldwin vshr.u64 d25,d22,#34 949bc3d5698SJohn Baldwin vsli.64 d24,d22,#36 950bc3d5698SJohn Baldwin vadd.i64 d27,d26 951bc3d5698SJohn Baldwin vshr.u64 d26,d22,#39 952bc3d5698SJohn Baldwin vadd.i64 d28,d10 953bc3d5698SJohn Baldwin vsli.64 d25,d22,#30 954bc3d5698SJohn Baldwin veor d30,d22,d23 955bc3d5698SJohn Baldwin vsli.64 d26,d22,#25 956bc3d5698SJohn Baldwin veor d21,d24,d25 957bc3d5698SJohn Baldwin vadd.i64 d27,d28 958bc3d5698SJohn Baldwin vbsl d30,d16,d23 @ Maj(a,b,c) 959bc3d5698SJohn Baldwin veor d21,d26 @ Sigma0(a) 960bc3d5698SJohn Baldwin vadd.i64 d17,d27 961bc3d5698SJohn Baldwin vadd.i64 d30,d27 962bc3d5698SJohn Baldwin @ vadd.i64 d21,d30 963bc3d5698SJohn Baldwin vshr.u64 d24,d17,#14 @ 11 964bc3d5698SJohn Baldwin#if 11<16 965bc3d5698SJohn Baldwin vld1.64 {d11},[r1]! @ handles unaligned 966bc3d5698SJohn Baldwin#endif 967bc3d5698SJohn Baldwin vshr.u64 d25,d17,#18 968bc3d5698SJohn Baldwin#if 11>0 969bc3d5698SJohn Baldwin vadd.i64 d21,d30 @ h+=Maj from the past 970bc3d5698SJohn Baldwin#endif 971bc3d5698SJohn Baldwin vshr.u64 d26,d17,#41 972bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 973bc3d5698SJohn Baldwin vsli.64 d24,d17,#50 974bc3d5698SJohn Baldwin vsli.64 d25,d17,#46 975bc3d5698SJohn Baldwin vmov d29,d17 976bc3d5698SJohn Baldwin vsli.64 d26,d17,#23 977bc3d5698SJohn Baldwin#if 11<16 && defined(__ARMEL__) 978bc3d5698SJohn Baldwin vrev64.8 d11,d11 979bc3d5698SJohn Baldwin#endif 980bc3d5698SJohn Baldwin veor d25,d24 981bc3d5698SJohn Baldwin vbsl d29,d18,d19 @ Ch(e,f,g) 982bc3d5698SJohn Baldwin vshr.u64 d24,d21,#28 983bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 984bc3d5698SJohn Baldwin vadd.i64 d27,d29,d20 985bc3d5698SJohn Baldwin vshr.u64 d25,d21,#34 986bc3d5698SJohn Baldwin vsli.64 d24,d21,#36 987bc3d5698SJohn Baldwin vadd.i64 d27,d26 988bc3d5698SJohn Baldwin vshr.u64 d26,d21,#39 989bc3d5698SJohn Baldwin vadd.i64 d28,d11 990bc3d5698SJohn Baldwin vsli.64 d25,d21,#30 991bc3d5698SJohn Baldwin veor d30,d21,d22 992bc3d5698SJohn Baldwin vsli.64 d26,d21,#25 993bc3d5698SJohn Baldwin veor d20,d24,d25 994bc3d5698SJohn Baldwin vadd.i64 d27,d28 995bc3d5698SJohn Baldwin vbsl d30,d23,d22 @ Maj(a,b,c) 996bc3d5698SJohn Baldwin veor d20,d26 @ Sigma0(a) 997bc3d5698SJohn Baldwin vadd.i64 d16,d27 998bc3d5698SJohn Baldwin vadd.i64 d30,d27 999bc3d5698SJohn Baldwin @ vadd.i64 d20,d30 1000bc3d5698SJohn Baldwin vshr.u64 d24,d16,#14 @ 12 1001bc3d5698SJohn Baldwin#if 12<16 1002bc3d5698SJohn Baldwin vld1.64 {d12},[r1]! @ handles unaligned 1003bc3d5698SJohn Baldwin#endif 1004bc3d5698SJohn Baldwin vshr.u64 d25,d16,#18 1005bc3d5698SJohn Baldwin#if 12>0 1006bc3d5698SJohn Baldwin vadd.i64 d20,d30 @ h+=Maj from the past 1007bc3d5698SJohn Baldwin#endif 1008bc3d5698SJohn Baldwin vshr.u64 d26,d16,#41 1009bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1010bc3d5698SJohn Baldwin vsli.64 d24,d16,#50 1011bc3d5698SJohn Baldwin vsli.64 d25,d16,#46 1012bc3d5698SJohn Baldwin vmov d29,d16 1013bc3d5698SJohn Baldwin vsli.64 d26,d16,#23 1014bc3d5698SJohn Baldwin#if 12<16 && defined(__ARMEL__) 1015bc3d5698SJohn Baldwin vrev64.8 d12,d12 1016bc3d5698SJohn Baldwin#endif 1017bc3d5698SJohn Baldwin veor d25,d24 1018bc3d5698SJohn Baldwin vbsl d29,d17,d18 @ Ch(e,f,g) 1019bc3d5698SJohn Baldwin vshr.u64 d24,d20,#28 1020bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1021bc3d5698SJohn Baldwin vadd.i64 d27,d29,d19 1022bc3d5698SJohn Baldwin vshr.u64 d25,d20,#34 1023bc3d5698SJohn Baldwin vsli.64 d24,d20,#36 1024bc3d5698SJohn Baldwin vadd.i64 d27,d26 1025bc3d5698SJohn Baldwin vshr.u64 d26,d20,#39 1026bc3d5698SJohn Baldwin vadd.i64 d28,d12 1027bc3d5698SJohn Baldwin vsli.64 d25,d20,#30 1028bc3d5698SJohn Baldwin veor d30,d20,d21 1029bc3d5698SJohn Baldwin vsli.64 d26,d20,#25 1030bc3d5698SJohn Baldwin veor d19,d24,d25 1031bc3d5698SJohn Baldwin vadd.i64 d27,d28 1032bc3d5698SJohn Baldwin vbsl d30,d22,d21 @ Maj(a,b,c) 1033bc3d5698SJohn Baldwin veor d19,d26 @ Sigma0(a) 1034bc3d5698SJohn Baldwin vadd.i64 d23,d27 1035bc3d5698SJohn Baldwin vadd.i64 d30,d27 1036bc3d5698SJohn Baldwin @ vadd.i64 d19,d30 1037bc3d5698SJohn Baldwin vshr.u64 d24,d23,#14 @ 13 1038bc3d5698SJohn Baldwin#if 13<16 1039bc3d5698SJohn Baldwin vld1.64 {d13},[r1]! @ handles unaligned 1040bc3d5698SJohn Baldwin#endif 1041bc3d5698SJohn Baldwin vshr.u64 d25,d23,#18 1042bc3d5698SJohn Baldwin#if 13>0 1043bc3d5698SJohn Baldwin vadd.i64 d19,d30 @ h+=Maj from the past 1044bc3d5698SJohn Baldwin#endif 1045bc3d5698SJohn Baldwin vshr.u64 d26,d23,#41 1046bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1047bc3d5698SJohn Baldwin vsli.64 d24,d23,#50 1048bc3d5698SJohn Baldwin vsli.64 d25,d23,#46 1049bc3d5698SJohn Baldwin vmov d29,d23 1050bc3d5698SJohn Baldwin vsli.64 d26,d23,#23 1051bc3d5698SJohn Baldwin#if 13<16 && defined(__ARMEL__) 1052bc3d5698SJohn Baldwin vrev64.8 d13,d13 1053bc3d5698SJohn Baldwin#endif 1054bc3d5698SJohn Baldwin veor d25,d24 1055bc3d5698SJohn Baldwin vbsl d29,d16,d17 @ Ch(e,f,g) 1056bc3d5698SJohn Baldwin vshr.u64 d24,d19,#28 1057bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1058bc3d5698SJohn Baldwin vadd.i64 d27,d29,d18 1059bc3d5698SJohn Baldwin vshr.u64 d25,d19,#34 1060bc3d5698SJohn Baldwin vsli.64 d24,d19,#36 1061bc3d5698SJohn Baldwin vadd.i64 d27,d26 1062bc3d5698SJohn Baldwin vshr.u64 d26,d19,#39 1063bc3d5698SJohn Baldwin vadd.i64 d28,d13 1064bc3d5698SJohn Baldwin vsli.64 d25,d19,#30 1065bc3d5698SJohn Baldwin veor d30,d19,d20 1066bc3d5698SJohn Baldwin vsli.64 d26,d19,#25 1067bc3d5698SJohn Baldwin veor d18,d24,d25 1068bc3d5698SJohn Baldwin vadd.i64 d27,d28 1069bc3d5698SJohn Baldwin vbsl d30,d21,d20 @ Maj(a,b,c) 1070bc3d5698SJohn Baldwin veor d18,d26 @ Sigma0(a) 1071bc3d5698SJohn Baldwin vadd.i64 d22,d27 1072bc3d5698SJohn Baldwin vadd.i64 d30,d27 1073bc3d5698SJohn Baldwin @ vadd.i64 d18,d30 1074bc3d5698SJohn Baldwin vshr.u64 d24,d22,#14 @ 14 1075bc3d5698SJohn Baldwin#if 14<16 1076bc3d5698SJohn Baldwin vld1.64 {d14},[r1]! @ handles unaligned 1077bc3d5698SJohn Baldwin#endif 1078bc3d5698SJohn Baldwin vshr.u64 d25,d22,#18 1079bc3d5698SJohn Baldwin#if 14>0 1080bc3d5698SJohn Baldwin vadd.i64 d18,d30 @ h+=Maj from the past 1081bc3d5698SJohn Baldwin#endif 1082bc3d5698SJohn Baldwin vshr.u64 d26,d22,#41 1083bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1084bc3d5698SJohn Baldwin vsli.64 d24,d22,#50 1085bc3d5698SJohn Baldwin vsli.64 d25,d22,#46 1086bc3d5698SJohn Baldwin vmov d29,d22 1087bc3d5698SJohn Baldwin vsli.64 d26,d22,#23 1088bc3d5698SJohn Baldwin#if 14<16 && defined(__ARMEL__) 1089bc3d5698SJohn Baldwin vrev64.8 d14,d14 1090bc3d5698SJohn Baldwin#endif 1091bc3d5698SJohn Baldwin veor d25,d24 1092bc3d5698SJohn Baldwin vbsl d29,d23,d16 @ Ch(e,f,g) 1093bc3d5698SJohn Baldwin vshr.u64 d24,d18,#28 1094bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1095bc3d5698SJohn Baldwin vadd.i64 d27,d29,d17 1096bc3d5698SJohn Baldwin vshr.u64 d25,d18,#34 1097bc3d5698SJohn Baldwin vsli.64 d24,d18,#36 1098bc3d5698SJohn Baldwin vadd.i64 d27,d26 1099bc3d5698SJohn Baldwin vshr.u64 d26,d18,#39 1100bc3d5698SJohn Baldwin vadd.i64 d28,d14 1101bc3d5698SJohn Baldwin vsli.64 d25,d18,#30 1102bc3d5698SJohn Baldwin veor d30,d18,d19 1103bc3d5698SJohn Baldwin vsli.64 d26,d18,#25 1104bc3d5698SJohn Baldwin veor d17,d24,d25 1105bc3d5698SJohn Baldwin vadd.i64 d27,d28 1106bc3d5698SJohn Baldwin vbsl d30,d20,d19 @ Maj(a,b,c) 1107bc3d5698SJohn Baldwin veor d17,d26 @ Sigma0(a) 1108bc3d5698SJohn Baldwin vadd.i64 d21,d27 1109bc3d5698SJohn Baldwin vadd.i64 d30,d27 1110bc3d5698SJohn Baldwin @ vadd.i64 d17,d30 1111bc3d5698SJohn Baldwin vshr.u64 d24,d21,#14 @ 15 1112bc3d5698SJohn Baldwin#if 15<16 1113bc3d5698SJohn Baldwin vld1.64 {d15},[r1]! @ handles unaligned 1114bc3d5698SJohn Baldwin#endif 1115bc3d5698SJohn Baldwin vshr.u64 d25,d21,#18 1116bc3d5698SJohn Baldwin#if 15>0 1117bc3d5698SJohn Baldwin vadd.i64 d17,d30 @ h+=Maj from the past 1118bc3d5698SJohn Baldwin#endif 1119bc3d5698SJohn Baldwin vshr.u64 d26,d21,#41 1120bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1121bc3d5698SJohn Baldwin vsli.64 d24,d21,#50 1122bc3d5698SJohn Baldwin vsli.64 d25,d21,#46 1123bc3d5698SJohn Baldwin vmov d29,d21 1124bc3d5698SJohn Baldwin vsli.64 d26,d21,#23 1125bc3d5698SJohn Baldwin#if 15<16 && defined(__ARMEL__) 1126bc3d5698SJohn Baldwin vrev64.8 d15,d15 1127bc3d5698SJohn Baldwin#endif 1128bc3d5698SJohn Baldwin veor d25,d24 1129bc3d5698SJohn Baldwin vbsl d29,d22,d23 @ Ch(e,f,g) 1130bc3d5698SJohn Baldwin vshr.u64 d24,d17,#28 1131bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1132bc3d5698SJohn Baldwin vadd.i64 d27,d29,d16 1133bc3d5698SJohn Baldwin vshr.u64 d25,d17,#34 1134bc3d5698SJohn Baldwin vsli.64 d24,d17,#36 1135bc3d5698SJohn Baldwin vadd.i64 d27,d26 1136bc3d5698SJohn Baldwin vshr.u64 d26,d17,#39 1137bc3d5698SJohn Baldwin vadd.i64 d28,d15 1138bc3d5698SJohn Baldwin vsli.64 d25,d17,#30 1139bc3d5698SJohn Baldwin veor d30,d17,d18 1140bc3d5698SJohn Baldwin vsli.64 d26,d17,#25 1141bc3d5698SJohn Baldwin veor d16,d24,d25 1142bc3d5698SJohn Baldwin vadd.i64 d27,d28 1143bc3d5698SJohn Baldwin vbsl d30,d19,d18 @ Maj(a,b,c) 1144bc3d5698SJohn Baldwin veor d16,d26 @ Sigma0(a) 1145bc3d5698SJohn Baldwin vadd.i64 d20,d27 1146bc3d5698SJohn Baldwin vadd.i64 d30,d27 1147bc3d5698SJohn Baldwin @ vadd.i64 d16,d30 1148bc3d5698SJohn Baldwin mov r12,#4 1149bc3d5698SJohn Baldwin.L16_79_neon: 1150bc3d5698SJohn Baldwin subs r12,#1 1151bc3d5698SJohn Baldwin vshr.u64 q12,q7,#19 1152bc3d5698SJohn Baldwin vshr.u64 q13,q7,#61 1153bc3d5698SJohn Baldwin vadd.i64 d16,d30 @ h+=Maj from the past 1154bc3d5698SJohn Baldwin vshr.u64 q15,q7,#6 1155bc3d5698SJohn Baldwin vsli.64 q12,q7,#45 1156bc3d5698SJohn Baldwin vext.8 q14,q0,q1,#8 @ X[i+1] 1157bc3d5698SJohn Baldwin vsli.64 q13,q7,#3 1158bc3d5698SJohn Baldwin veor q15,q12 1159bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1160bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1161bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1162bc3d5698SJohn Baldwin vadd.i64 q0,q15 1163bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1164bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1165bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1166bc3d5698SJohn Baldwin vext.8 q14,q4,q5,#8 @ X[i+9] 1167bc3d5698SJohn Baldwin veor q15,q12 1168bc3d5698SJohn Baldwin vshr.u64 d24,d20,#14 @ from NEON_00_15 1169bc3d5698SJohn Baldwin vadd.i64 q0,q14 1170bc3d5698SJohn Baldwin vshr.u64 d25,d20,#18 @ from NEON_00_15 1171bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1172bc3d5698SJohn Baldwin vshr.u64 d26,d20,#41 @ from NEON_00_15 1173bc3d5698SJohn Baldwin vadd.i64 q0,q15 1174bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1175bc3d5698SJohn Baldwin vsli.64 d24,d20,#50 1176bc3d5698SJohn Baldwin vsli.64 d25,d20,#46 1177bc3d5698SJohn Baldwin vmov d29,d20 1178bc3d5698SJohn Baldwin vsli.64 d26,d20,#23 1179bc3d5698SJohn Baldwin#if 16<16 && defined(__ARMEL__) 1180bc3d5698SJohn Baldwin vrev64.8 , 1181bc3d5698SJohn Baldwin#endif 1182bc3d5698SJohn Baldwin veor d25,d24 1183bc3d5698SJohn Baldwin vbsl d29,d21,d22 @ Ch(e,f,g) 1184bc3d5698SJohn Baldwin vshr.u64 d24,d16,#28 1185bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1186bc3d5698SJohn Baldwin vadd.i64 d27,d29,d23 1187bc3d5698SJohn Baldwin vshr.u64 d25,d16,#34 1188bc3d5698SJohn Baldwin vsli.64 d24,d16,#36 1189bc3d5698SJohn Baldwin vadd.i64 d27,d26 1190bc3d5698SJohn Baldwin vshr.u64 d26,d16,#39 1191bc3d5698SJohn Baldwin vadd.i64 d28,d0 1192bc3d5698SJohn Baldwin vsli.64 d25,d16,#30 1193bc3d5698SJohn Baldwin veor d30,d16,d17 1194bc3d5698SJohn Baldwin vsli.64 d26,d16,#25 1195bc3d5698SJohn Baldwin veor d23,d24,d25 1196bc3d5698SJohn Baldwin vadd.i64 d27,d28 1197bc3d5698SJohn Baldwin vbsl d30,d18,d17 @ Maj(a,b,c) 1198bc3d5698SJohn Baldwin veor d23,d26 @ Sigma0(a) 1199bc3d5698SJohn Baldwin vadd.i64 d19,d27 1200bc3d5698SJohn Baldwin vadd.i64 d30,d27 1201bc3d5698SJohn Baldwin @ vadd.i64 d23,d30 1202bc3d5698SJohn Baldwin vshr.u64 d24,d19,#14 @ 17 1203bc3d5698SJohn Baldwin#if 17<16 1204bc3d5698SJohn Baldwin vld1.64 {d1},[r1]! @ handles unaligned 1205bc3d5698SJohn Baldwin#endif 1206bc3d5698SJohn Baldwin vshr.u64 d25,d19,#18 1207bc3d5698SJohn Baldwin#if 17>0 1208bc3d5698SJohn Baldwin vadd.i64 d23,d30 @ h+=Maj from the past 1209bc3d5698SJohn Baldwin#endif 1210bc3d5698SJohn Baldwin vshr.u64 d26,d19,#41 1211bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1212bc3d5698SJohn Baldwin vsli.64 d24,d19,#50 1213bc3d5698SJohn Baldwin vsli.64 d25,d19,#46 1214bc3d5698SJohn Baldwin vmov d29,d19 1215bc3d5698SJohn Baldwin vsli.64 d26,d19,#23 1216bc3d5698SJohn Baldwin#if 17<16 && defined(__ARMEL__) 1217bc3d5698SJohn Baldwin vrev64.8 , 1218bc3d5698SJohn Baldwin#endif 1219bc3d5698SJohn Baldwin veor d25,d24 1220bc3d5698SJohn Baldwin vbsl d29,d20,d21 @ Ch(e,f,g) 1221bc3d5698SJohn Baldwin vshr.u64 d24,d23,#28 1222bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1223bc3d5698SJohn Baldwin vadd.i64 d27,d29,d22 1224bc3d5698SJohn Baldwin vshr.u64 d25,d23,#34 1225bc3d5698SJohn Baldwin vsli.64 d24,d23,#36 1226bc3d5698SJohn Baldwin vadd.i64 d27,d26 1227bc3d5698SJohn Baldwin vshr.u64 d26,d23,#39 1228bc3d5698SJohn Baldwin vadd.i64 d28,d1 1229bc3d5698SJohn Baldwin vsli.64 d25,d23,#30 1230bc3d5698SJohn Baldwin veor d30,d23,d16 1231bc3d5698SJohn Baldwin vsli.64 d26,d23,#25 1232bc3d5698SJohn Baldwin veor d22,d24,d25 1233bc3d5698SJohn Baldwin vadd.i64 d27,d28 1234bc3d5698SJohn Baldwin vbsl d30,d17,d16 @ Maj(a,b,c) 1235bc3d5698SJohn Baldwin veor d22,d26 @ Sigma0(a) 1236bc3d5698SJohn Baldwin vadd.i64 d18,d27 1237bc3d5698SJohn Baldwin vadd.i64 d30,d27 1238bc3d5698SJohn Baldwin @ vadd.i64 d22,d30 1239bc3d5698SJohn Baldwin vshr.u64 q12,q0,#19 1240bc3d5698SJohn Baldwin vshr.u64 q13,q0,#61 1241bc3d5698SJohn Baldwin vadd.i64 d22,d30 @ h+=Maj from the past 1242bc3d5698SJohn Baldwin vshr.u64 q15,q0,#6 1243bc3d5698SJohn Baldwin vsli.64 q12,q0,#45 1244bc3d5698SJohn Baldwin vext.8 q14,q1,q2,#8 @ X[i+1] 1245bc3d5698SJohn Baldwin vsli.64 q13,q0,#3 1246bc3d5698SJohn Baldwin veor q15,q12 1247bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1248bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1249bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1250bc3d5698SJohn Baldwin vadd.i64 q1,q15 1251bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1252bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1253bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1254bc3d5698SJohn Baldwin vext.8 q14,q5,q6,#8 @ X[i+9] 1255bc3d5698SJohn Baldwin veor q15,q12 1256bc3d5698SJohn Baldwin vshr.u64 d24,d18,#14 @ from NEON_00_15 1257bc3d5698SJohn Baldwin vadd.i64 q1,q14 1258bc3d5698SJohn Baldwin vshr.u64 d25,d18,#18 @ from NEON_00_15 1259bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1260bc3d5698SJohn Baldwin vshr.u64 d26,d18,#41 @ from NEON_00_15 1261bc3d5698SJohn Baldwin vadd.i64 q1,q15 1262bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1263bc3d5698SJohn Baldwin vsli.64 d24,d18,#50 1264bc3d5698SJohn Baldwin vsli.64 d25,d18,#46 1265bc3d5698SJohn Baldwin vmov d29,d18 1266bc3d5698SJohn Baldwin vsli.64 d26,d18,#23 1267bc3d5698SJohn Baldwin#if 18<16 && defined(__ARMEL__) 1268bc3d5698SJohn Baldwin vrev64.8 , 1269bc3d5698SJohn Baldwin#endif 1270bc3d5698SJohn Baldwin veor d25,d24 1271bc3d5698SJohn Baldwin vbsl d29,d19,d20 @ Ch(e,f,g) 1272bc3d5698SJohn Baldwin vshr.u64 d24,d22,#28 1273bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1274bc3d5698SJohn Baldwin vadd.i64 d27,d29,d21 1275bc3d5698SJohn Baldwin vshr.u64 d25,d22,#34 1276bc3d5698SJohn Baldwin vsli.64 d24,d22,#36 1277bc3d5698SJohn Baldwin vadd.i64 d27,d26 1278bc3d5698SJohn Baldwin vshr.u64 d26,d22,#39 1279bc3d5698SJohn Baldwin vadd.i64 d28,d2 1280bc3d5698SJohn Baldwin vsli.64 d25,d22,#30 1281bc3d5698SJohn Baldwin veor d30,d22,d23 1282bc3d5698SJohn Baldwin vsli.64 d26,d22,#25 1283bc3d5698SJohn Baldwin veor d21,d24,d25 1284bc3d5698SJohn Baldwin vadd.i64 d27,d28 1285bc3d5698SJohn Baldwin vbsl d30,d16,d23 @ Maj(a,b,c) 1286bc3d5698SJohn Baldwin veor d21,d26 @ Sigma0(a) 1287bc3d5698SJohn Baldwin vadd.i64 d17,d27 1288bc3d5698SJohn Baldwin vadd.i64 d30,d27 1289bc3d5698SJohn Baldwin @ vadd.i64 d21,d30 1290bc3d5698SJohn Baldwin vshr.u64 d24,d17,#14 @ 19 1291bc3d5698SJohn Baldwin#if 19<16 1292bc3d5698SJohn Baldwin vld1.64 {d3},[r1]! @ handles unaligned 1293bc3d5698SJohn Baldwin#endif 1294bc3d5698SJohn Baldwin vshr.u64 d25,d17,#18 1295bc3d5698SJohn Baldwin#if 19>0 1296bc3d5698SJohn Baldwin vadd.i64 d21,d30 @ h+=Maj from the past 1297bc3d5698SJohn Baldwin#endif 1298bc3d5698SJohn Baldwin vshr.u64 d26,d17,#41 1299bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1300bc3d5698SJohn Baldwin vsli.64 d24,d17,#50 1301bc3d5698SJohn Baldwin vsli.64 d25,d17,#46 1302bc3d5698SJohn Baldwin vmov d29,d17 1303bc3d5698SJohn Baldwin vsli.64 d26,d17,#23 1304bc3d5698SJohn Baldwin#if 19<16 && defined(__ARMEL__) 1305bc3d5698SJohn Baldwin vrev64.8 , 1306bc3d5698SJohn Baldwin#endif 1307bc3d5698SJohn Baldwin veor d25,d24 1308bc3d5698SJohn Baldwin vbsl d29,d18,d19 @ Ch(e,f,g) 1309bc3d5698SJohn Baldwin vshr.u64 d24,d21,#28 1310bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1311bc3d5698SJohn Baldwin vadd.i64 d27,d29,d20 1312bc3d5698SJohn Baldwin vshr.u64 d25,d21,#34 1313bc3d5698SJohn Baldwin vsli.64 d24,d21,#36 1314bc3d5698SJohn Baldwin vadd.i64 d27,d26 1315bc3d5698SJohn Baldwin vshr.u64 d26,d21,#39 1316bc3d5698SJohn Baldwin vadd.i64 d28,d3 1317bc3d5698SJohn Baldwin vsli.64 d25,d21,#30 1318bc3d5698SJohn Baldwin veor d30,d21,d22 1319bc3d5698SJohn Baldwin vsli.64 d26,d21,#25 1320bc3d5698SJohn Baldwin veor d20,d24,d25 1321bc3d5698SJohn Baldwin vadd.i64 d27,d28 1322bc3d5698SJohn Baldwin vbsl d30,d23,d22 @ Maj(a,b,c) 1323bc3d5698SJohn Baldwin veor d20,d26 @ Sigma0(a) 1324bc3d5698SJohn Baldwin vadd.i64 d16,d27 1325bc3d5698SJohn Baldwin vadd.i64 d30,d27 1326bc3d5698SJohn Baldwin @ vadd.i64 d20,d30 1327bc3d5698SJohn Baldwin vshr.u64 q12,q1,#19 1328bc3d5698SJohn Baldwin vshr.u64 q13,q1,#61 1329bc3d5698SJohn Baldwin vadd.i64 d20,d30 @ h+=Maj from the past 1330bc3d5698SJohn Baldwin vshr.u64 q15,q1,#6 1331bc3d5698SJohn Baldwin vsli.64 q12,q1,#45 1332bc3d5698SJohn Baldwin vext.8 q14,q2,q3,#8 @ X[i+1] 1333bc3d5698SJohn Baldwin vsli.64 q13,q1,#3 1334bc3d5698SJohn Baldwin veor q15,q12 1335bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1336bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1337bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1338bc3d5698SJohn Baldwin vadd.i64 q2,q15 1339bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1340bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1341bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1342bc3d5698SJohn Baldwin vext.8 q14,q6,q7,#8 @ X[i+9] 1343bc3d5698SJohn Baldwin veor q15,q12 1344bc3d5698SJohn Baldwin vshr.u64 d24,d16,#14 @ from NEON_00_15 1345bc3d5698SJohn Baldwin vadd.i64 q2,q14 1346bc3d5698SJohn Baldwin vshr.u64 d25,d16,#18 @ from NEON_00_15 1347bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1348bc3d5698SJohn Baldwin vshr.u64 d26,d16,#41 @ from NEON_00_15 1349bc3d5698SJohn Baldwin vadd.i64 q2,q15 1350bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1351bc3d5698SJohn Baldwin vsli.64 d24,d16,#50 1352bc3d5698SJohn Baldwin vsli.64 d25,d16,#46 1353bc3d5698SJohn Baldwin vmov d29,d16 1354bc3d5698SJohn Baldwin vsli.64 d26,d16,#23 1355bc3d5698SJohn Baldwin#if 20<16 && defined(__ARMEL__) 1356bc3d5698SJohn Baldwin vrev64.8 , 1357bc3d5698SJohn Baldwin#endif 1358bc3d5698SJohn Baldwin veor d25,d24 1359bc3d5698SJohn Baldwin vbsl d29,d17,d18 @ Ch(e,f,g) 1360bc3d5698SJohn Baldwin vshr.u64 d24,d20,#28 1361bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1362bc3d5698SJohn Baldwin vadd.i64 d27,d29,d19 1363bc3d5698SJohn Baldwin vshr.u64 d25,d20,#34 1364bc3d5698SJohn Baldwin vsli.64 d24,d20,#36 1365bc3d5698SJohn Baldwin vadd.i64 d27,d26 1366bc3d5698SJohn Baldwin vshr.u64 d26,d20,#39 1367bc3d5698SJohn Baldwin vadd.i64 d28,d4 1368bc3d5698SJohn Baldwin vsli.64 d25,d20,#30 1369bc3d5698SJohn Baldwin veor d30,d20,d21 1370bc3d5698SJohn Baldwin vsli.64 d26,d20,#25 1371bc3d5698SJohn Baldwin veor d19,d24,d25 1372bc3d5698SJohn Baldwin vadd.i64 d27,d28 1373bc3d5698SJohn Baldwin vbsl d30,d22,d21 @ Maj(a,b,c) 1374bc3d5698SJohn Baldwin veor d19,d26 @ Sigma0(a) 1375bc3d5698SJohn Baldwin vadd.i64 d23,d27 1376bc3d5698SJohn Baldwin vadd.i64 d30,d27 1377bc3d5698SJohn Baldwin @ vadd.i64 d19,d30 1378bc3d5698SJohn Baldwin vshr.u64 d24,d23,#14 @ 21 1379bc3d5698SJohn Baldwin#if 21<16 1380bc3d5698SJohn Baldwin vld1.64 {d5},[r1]! @ handles unaligned 1381bc3d5698SJohn Baldwin#endif 1382bc3d5698SJohn Baldwin vshr.u64 d25,d23,#18 1383bc3d5698SJohn Baldwin#if 21>0 1384bc3d5698SJohn Baldwin vadd.i64 d19,d30 @ h+=Maj from the past 1385bc3d5698SJohn Baldwin#endif 1386bc3d5698SJohn Baldwin vshr.u64 d26,d23,#41 1387bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1388bc3d5698SJohn Baldwin vsli.64 d24,d23,#50 1389bc3d5698SJohn Baldwin vsli.64 d25,d23,#46 1390bc3d5698SJohn Baldwin vmov d29,d23 1391bc3d5698SJohn Baldwin vsli.64 d26,d23,#23 1392bc3d5698SJohn Baldwin#if 21<16 && defined(__ARMEL__) 1393bc3d5698SJohn Baldwin vrev64.8 , 1394bc3d5698SJohn Baldwin#endif 1395bc3d5698SJohn Baldwin veor d25,d24 1396bc3d5698SJohn Baldwin vbsl d29,d16,d17 @ Ch(e,f,g) 1397bc3d5698SJohn Baldwin vshr.u64 d24,d19,#28 1398bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1399bc3d5698SJohn Baldwin vadd.i64 d27,d29,d18 1400bc3d5698SJohn Baldwin vshr.u64 d25,d19,#34 1401bc3d5698SJohn Baldwin vsli.64 d24,d19,#36 1402bc3d5698SJohn Baldwin vadd.i64 d27,d26 1403bc3d5698SJohn Baldwin vshr.u64 d26,d19,#39 1404bc3d5698SJohn Baldwin vadd.i64 d28,d5 1405bc3d5698SJohn Baldwin vsli.64 d25,d19,#30 1406bc3d5698SJohn Baldwin veor d30,d19,d20 1407bc3d5698SJohn Baldwin vsli.64 d26,d19,#25 1408bc3d5698SJohn Baldwin veor d18,d24,d25 1409bc3d5698SJohn Baldwin vadd.i64 d27,d28 1410bc3d5698SJohn Baldwin vbsl d30,d21,d20 @ Maj(a,b,c) 1411bc3d5698SJohn Baldwin veor d18,d26 @ Sigma0(a) 1412bc3d5698SJohn Baldwin vadd.i64 d22,d27 1413bc3d5698SJohn Baldwin vadd.i64 d30,d27 1414bc3d5698SJohn Baldwin @ vadd.i64 d18,d30 1415bc3d5698SJohn Baldwin vshr.u64 q12,q2,#19 1416bc3d5698SJohn Baldwin vshr.u64 q13,q2,#61 1417bc3d5698SJohn Baldwin vadd.i64 d18,d30 @ h+=Maj from the past 1418bc3d5698SJohn Baldwin vshr.u64 q15,q2,#6 1419bc3d5698SJohn Baldwin vsli.64 q12,q2,#45 1420bc3d5698SJohn Baldwin vext.8 q14,q3,q4,#8 @ X[i+1] 1421bc3d5698SJohn Baldwin vsli.64 q13,q2,#3 1422bc3d5698SJohn Baldwin veor q15,q12 1423bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1424bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1425bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1426bc3d5698SJohn Baldwin vadd.i64 q3,q15 1427bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1428bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1429bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1430bc3d5698SJohn Baldwin vext.8 q14,q7,q0,#8 @ X[i+9] 1431bc3d5698SJohn Baldwin veor q15,q12 1432bc3d5698SJohn Baldwin vshr.u64 d24,d22,#14 @ from NEON_00_15 1433bc3d5698SJohn Baldwin vadd.i64 q3,q14 1434bc3d5698SJohn Baldwin vshr.u64 d25,d22,#18 @ from NEON_00_15 1435bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1436bc3d5698SJohn Baldwin vshr.u64 d26,d22,#41 @ from NEON_00_15 1437bc3d5698SJohn Baldwin vadd.i64 q3,q15 1438bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1439bc3d5698SJohn Baldwin vsli.64 d24,d22,#50 1440bc3d5698SJohn Baldwin vsli.64 d25,d22,#46 1441bc3d5698SJohn Baldwin vmov d29,d22 1442bc3d5698SJohn Baldwin vsli.64 d26,d22,#23 1443bc3d5698SJohn Baldwin#if 22<16 && defined(__ARMEL__) 1444bc3d5698SJohn Baldwin vrev64.8 , 1445bc3d5698SJohn Baldwin#endif 1446bc3d5698SJohn Baldwin veor d25,d24 1447bc3d5698SJohn Baldwin vbsl d29,d23,d16 @ Ch(e,f,g) 1448bc3d5698SJohn Baldwin vshr.u64 d24,d18,#28 1449bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1450bc3d5698SJohn Baldwin vadd.i64 d27,d29,d17 1451bc3d5698SJohn Baldwin vshr.u64 d25,d18,#34 1452bc3d5698SJohn Baldwin vsli.64 d24,d18,#36 1453bc3d5698SJohn Baldwin vadd.i64 d27,d26 1454bc3d5698SJohn Baldwin vshr.u64 d26,d18,#39 1455bc3d5698SJohn Baldwin vadd.i64 d28,d6 1456bc3d5698SJohn Baldwin vsli.64 d25,d18,#30 1457bc3d5698SJohn Baldwin veor d30,d18,d19 1458bc3d5698SJohn Baldwin vsli.64 d26,d18,#25 1459bc3d5698SJohn Baldwin veor d17,d24,d25 1460bc3d5698SJohn Baldwin vadd.i64 d27,d28 1461bc3d5698SJohn Baldwin vbsl d30,d20,d19 @ Maj(a,b,c) 1462bc3d5698SJohn Baldwin veor d17,d26 @ Sigma0(a) 1463bc3d5698SJohn Baldwin vadd.i64 d21,d27 1464bc3d5698SJohn Baldwin vadd.i64 d30,d27 1465bc3d5698SJohn Baldwin @ vadd.i64 d17,d30 1466bc3d5698SJohn Baldwin vshr.u64 d24,d21,#14 @ 23 1467bc3d5698SJohn Baldwin#if 23<16 1468bc3d5698SJohn Baldwin vld1.64 {d7},[r1]! @ handles unaligned 1469bc3d5698SJohn Baldwin#endif 1470bc3d5698SJohn Baldwin vshr.u64 d25,d21,#18 1471bc3d5698SJohn Baldwin#if 23>0 1472bc3d5698SJohn Baldwin vadd.i64 d17,d30 @ h+=Maj from the past 1473bc3d5698SJohn Baldwin#endif 1474bc3d5698SJohn Baldwin vshr.u64 d26,d21,#41 1475bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1476bc3d5698SJohn Baldwin vsli.64 d24,d21,#50 1477bc3d5698SJohn Baldwin vsli.64 d25,d21,#46 1478bc3d5698SJohn Baldwin vmov d29,d21 1479bc3d5698SJohn Baldwin vsli.64 d26,d21,#23 1480bc3d5698SJohn Baldwin#if 23<16 && defined(__ARMEL__) 1481bc3d5698SJohn Baldwin vrev64.8 , 1482bc3d5698SJohn Baldwin#endif 1483bc3d5698SJohn Baldwin veor d25,d24 1484bc3d5698SJohn Baldwin vbsl d29,d22,d23 @ Ch(e,f,g) 1485bc3d5698SJohn Baldwin vshr.u64 d24,d17,#28 1486bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1487bc3d5698SJohn Baldwin vadd.i64 d27,d29,d16 1488bc3d5698SJohn Baldwin vshr.u64 d25,d17,#34 1489bc3d5698SJohn Baldwin vsli.64 d24,d17,#36 1490bc3d5698SJohn Baldwin vadd.i64 d27,d26 1491bc3d5698SJohn Baldwin vshr.u64 d26,d17,#39 1492bc3d5698SJohn Baldwin vadd.i64 d28,d7 1493bc3d5698SJohn Baldwin vsli.64 d25,d17,#30 1494bc3d5698SJohn Baldwin veor d30,d17,d18 1495bc3d5698SJohn Baldwin vsli.64 d26,d17,#25 1496bc3d5698SJohn Baldwin veor d16,d24,d25 1497bc3d5698SJohn Baldwin vadd.i64 d27,d28 1498bc3d5698SJohn Baldwin vbsl d30,d19,d18 @ Maj(a,b,c) 1499bc3d5698SJohn Baldwin veor d16,d26 @ Sigma0(a) 1500bc3d5698SJohn Baldwin vadd.i64 d20,d27 1501bc3d5698SJohn Baldwin vadd.i64 d30,d27 1502bc3d5698SJohn Baldwin @ vadd.i64 d16,d30 1503bc3d5698SJohn Baldwin vshr.u64 q12,q3,#19 1504bc3d5698SJohn Baldwin vshr.u64 q13,q3,#61 1505bc3d5698SJohn Baldwin vadd.i64 d16,d30 @ h+=Maj from the past 1506bc3d5698SJohn Baldwin vshr.u64 q15,q3,#6 1507bc3d5698SJohn Baldwin vsli.64 q12,q3,#45 1508bc3d5698SJohn Baldwin vext.8 q14,q4,q5,#8 @ X[i+1] 1509bc3d5698SJohn Baldwin vsli.64 q13,q3,#3 1510bc3d5698SJohn Baldwin veor q15,q12 1511bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1512bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1513bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1514bc3d5698SJohn Baldwin vadd.i64 q4,q15 1515bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1516bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1517bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1518bc3d5698SJohn Baldwin vext.8 q14,q0,q1,#8 @ X[i+9] 1519bc3d5698SJohn Baldwin veor q15,q12 1520bc3d5698SJohn Baldwin vshr.u64 d24,d20,#14 @ from NEON_00_15 1521bc3d5698SJohn Baldwin vadd.i64 q4,q14 1522bc3d5698SJohn Baldwin vshr.u64 d25,d20,#18 @ from NEON_00_15 1523bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1524bc3d5698SJohn Baldwin vshr.u64 d26,d20,#41 @ from NEON_00_15 1525bc3d5698SJohn Baldwin vadd.i64 q4,q15 1526bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1527bc3d5698SJohn Baldwin vsli.64 d24,d20,#50 1528bc3d5698SJohn Baldwin vsli.64 d25,d20,#46 1529bc3d5698SJohn Baldwin vmov d29,d20 1530bc3d5698SJohn Baldwin vsli.64 d26,d20,#23 1531bc3d5698SJohn Baldwin#if 24<16 && defined(__ARMEL__) 1532bc3d5698SJohn Baldwin vrev64.8 , 1533bc3d5698SJohn Baldwin#endif 1534bc3d5698SJohn Baldwin veor d25,d24 1535bc3d5698SJohn Baldwin vbsl d29,d21,d22 @ Ch(e,f,g) 1536bc3d5698SJohn Baldwin vshr.u64 d24,d16,#28 1537bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1538bc3d5698SJohn Baldwin vadd.i64 d27,d29,d23 1539bc3d5698SJohn Baldwin vshr.u64 d25,d16,#34 1540bc3d5698SJohn Baldwin vsli.64 d24,d16,#36 1541bc3d5698SJohn Baldwin vadd.i64 d27,d26 1542bc3d5698SJohn Baldwin vshr.u64 d26,d16,#39 1543bc3d5698SJohn Baldwin vadd.i64 d28,d8 1544bc3d5698SJohn Baldwin vsli.64 d25,d16,#30 1545bc3d5698SJohn Baldwin veor d30,d16,d17 1546bc3d5698SJohn Baldwin vsli.64 d26,d16,#25 1547bc3d5698SJohn Baldwin veor d23,d24,d25 1548bc3d5698SJohn Baldwin vadd.i64 d27,d28 1549bc3d5698SJohn Baldwin vbsl d30,d18,d17 @ Maj(a,b,c) 1550bc3d5698SJohn Baldwin veor d23,d26 @ Sigma0(a) 1551bc3d5698SJohn Baldwin vadd.i64 d19,d27 1552bc3d5698SJohn Baldwin vadd.i64 d30,d27 1553bc3d5698SJohn Baldwin @ vadd.i64 d23,d30 1554bc3d5698SJohn Baldwin vshr.u64 d24,d19,#14 @ 25 1555bc3d5698SJohn Baldwin#if 25<16 1556bc3d5698SJohn Baldwin vld1.64 {d9},[r1]! @ handles unaligned 1557bc3d5698SJohn Baldwin#endif 1558bc3d5698SJohn Baldwin vshr.u64 d25,d19,#18 1559bc3d5698SJohn Baldwin#if 25>0 1560bc3d5698SJohn Baldwin vadd.i64 d23,d30 @ h+=Maj from the past 1561bc3d5698SJohn Baldwin#endif 1562bc3d5698SJohn Baldwin vshr.u64 d26,d19,#41 1563bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1564bc3d5698SJohn Baldwin vsli.64 d24,d19,#50 1565bc3d5698SJohn Baldwin vsli.64 d25,d19,#46 1566bc3d5698SJohn Baldwin vmov d29,d19 1567bc3d5698SJohn Baldwin vsli.64 d26,d19,#23 1568bc3d5698SJohn Baldwin#if 25<16 && defined(__ARMEL__) 1569bc3d5698SJohn Baldwin vrev64.8 , 1570bc3d5698SJohn Baldwin#endif 1571bc3d5698SJohn Baldwin veor d25,d24 1572bc3d5698SJohn Baldwin vbsl d29,d20,d21 @ Ch(e,f,g) 1573bc3d5698SJohn Baldwin vshr.u64 d24,d23,#28 1574bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1575bc3d5698SJohn Baldwin vadd.i64 d27,d29,d22 1576bc3d5698SJohn Baldwin vshr.u64 d25,d23,#34 1577bc3d5698SJohn Baldwin vsli.64 d24,d23,#36 1578bc3d5698SJohn Baldwin vadd.i64 d27,d26 1579bc3d5698SJohn Baldwin vshr.u64 d26,d23,#39 1580bc3d5698SJohn Baldwin vadd.i64 d28,d9 1581bc3d5698SJohn Baldwin vsli.64 d25,d23,#30 1582bc3d5698SJohn Baldwin veor d30,d23,d16 1583bc3d5698SJohn Baldwin vsli.64 d26,d23,#25 1584bc3d5698SJohn Baldwin veor d22,d24,d25 1585bc3d5698SJohn Baldwin vadd.i64 d27,d28 1586bc3d5698SJohn Baldwin vbsl d30,d17,d16 @ Maj(a,b,c) 1587bc3d5698SJohn Baldwin veor d22,d26 @ Sigma0(a) 1588bc3d5698SJohn Baldwin vadd.i64 d18,d27 1589bc3d5698SJohn Baldwin vadd.i64 d30,d27 1590bc3d5698SJohn Baldwin @ vadd.i64 d22,d30 1591bc3d5698SJohn Baldwin vshr.u64 q12,q4,#19 1592bc3d5698SJohn Baldwin vshr.u64 q13,q4,#61 1593bc3d5698SJohn Baldwin vadd.i64 d22,d30 @ h+=Maj from the past 1594bc3d5698SJohn Baldwin vshr.u64 q15,q4,#6 1595bc3d5698SJohn Baldwin vsli.64 q12,q4,#45 1596bc3d5698SJohn Baldwin vext.8 q14,q5,q6,#8 @ X[i+1] 1597bc3d5698SJohn Baldwin vsli.64 q13,q4,#3 1598bc3d5698SJohn Baldwin veor q15,q12 1599bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1600bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1601bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1602bc3d5698SJohn Baldwin vadd.i64 q5,q15 1603bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1604bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1605bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1606bc3d5698SJohn Baldwin vext.8 q14,q1,q2,#8 @ X[i+9] 1607bc3d5698SJohn Baldwin veor q15,q12 1608bc3d5698SJohn Baldwin vshr.u64 d24,d18,#14 @ from NEON_00_15 1609bc3d5698SJohn Baldwin vadd.i64 q5,q14 1610bc3d5698SJohn Baldwin vshr.u64 d25,d18,#18 @ from NEON_00_15 1611bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1612bc3d5698SJohn Baldwin vshr.u64 d26,d18,#41 @ from NEON_00_15 1613bc3d5698SJohn Baldwin vadd.i64 q5,q15 1614bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1615bc3d5698SJohn Baldwin vsli.64 d24,d18,#50 1616bc3d5698SJohn Baldwin vsli.64 d25,d18,#46 1617bc3d5698SJohn Baldwin vmov d29,d18 1618bc3d5698SJohn Baldwin vsli.64 d26,d18,#23 1619bc3d5698SJohn Baldwin#if 26<16 && defined(__ARMEL__) 1620bc3d5698SJohn Baldwin vrev64.8 , 1621bc3d5698SJohn Baldwin#endif 1622bc3d5698SJohn Baldwin veor d25,d24 1623bc3d5698SJohn Baldwin vbsl d29,d19,d20 @ Ch(e,f,g) 1624bc3d5698SJohn Baldwin vshr.u64 d24,d22,#28 1625bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1626bc3d5698SJohn Baldwin vadd.i64 d27,d29,d21 1627bc3d5698SJohn Baldwin vshr.u64 d25,d22,#34 1628bc3d5698SJohn Baldwin vsli.64 d24,d22,#36 1629bc3d5698SJohn Baldwin vadd.i64 d27,d26 1630bc3d5698SJohn Baldwin vshr.u64 d26,d22,#39 1631bc3d5698SJohn Baldwin vadd.i64 d28,d10 1632bc3d5698SJohn Baldwin vsli.64 d25,d22,#30 1633bc3d5698SJohn Baldwin veor d30,d22,d23 1634bc3d5698SJohn Baldwin vsli.64 d26,d22,#25 1635bc3d5698SJohn Baldwin veor d21,d24,d25 1636bc3d5698SJohn Baldwin vadd.i64 d27,d28 1637bc3d5698SJohn Baldwin vbsl d30,d16,d23 @ Maj(a,b,c) 1638bc3d5698SJohn Baldwin veor d21,d26 @ Sigma0(a) 1639bc3d5698SJohn Baldwin vadd.i64 d17,d27 1640bc3d5698SJohn Baldwin vadd.i64 d30,d27 1641bc3d5698SJohn Baldwin @ vadd.i64 d21,d30 1642bc3d5698SJohn Baldwin vshr.u64 d24,d17,#14 @ 27 1643bc3d5698SJohn Baldwin#if 27<16 1644bc3d5698SJohn Baldwin vld1.64 {d11},[r1]! @ handles unaligned 1645bc3d5698SJohn Baldwin#endif 1646bc3d5698SJohn Baldwin vshr.u64 d25,d17,#18 1647bc3d5698SJohn Baldwin#if 27>0 1648bc3d5698SJohn Baldwin vadd.i64 d21,d30 @ h+=Maj from the past 1649bc3d5698SJohn Baldwin#endif 1650bc3d5698SJohn Baldwin vshr.u64 d26,d17,#41 1651bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1652bc3d5698SJohn Baldwin vsli.64 d24,d17,#50 1653bc3d5698SJohn Baldwin vsli.64 d25,d17,#46 1654bc3d5698SJohn Baldwin vmov d29,d17 1655bc3d5698SJohn Baldwin vsli.64 d26,d17,#23 1656bc3d5698SJohn Baldwin#if 27<16 && defined(__ARMEL__) 1657bc3d5698SJohn Baldwin vrev64.8 , 1658bc3d5698SJohn Baldwin#endif 1659bc3d5698SJohn Baldwin veor d25,d24 1660bc3d5698SJohn Baldwin vbsl d29,d18,d19 @ Ch(e,f,g) 1661bc3d5698SJohn Baldwin vshr.u64 d24,d21,#28 1662bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1663bc3d5698SJohn Baldwin vadd.i64 d27,d29,d20 1664bc3d5698SJohn Baldwin vshr.u64 d25,d21,#34 1665bc3d5698SJohn Baldwin vsli.64 d24,d21,#36 1666bc3d5698SJohn Baldwin vadd.i64 d27,d26 1667bc3d5698SJohn Baldwin vshr.u64 d26,d21,#39 1668bc3d5698SJohn Baldwin vadd.i64 d28,d11 1669bc3d5698SJohn Baldwin vsli.64 d25,d21,#30 1670bc3d5698SJohn Baldwin veor d30,d21,d22 1671bc3d5698SJohn Baldwin vsli.64 d26,d21,#25 1672bc3d5698SJohn Baldwin veor d20,d24,d25 1673bc3d5698SJohn Baldwin vadd.i64 d27,d28 1674bc3d5698SJohn Baldwin vbsl d30,d23,d22 @ Maj(a,b,c) 1675bc3d5698SJohn Baldwin veor d20,d26 @ Sigma0(a) 1676bc3d5698SJohn Baldwin vadd.i64 d16,d27 1677bc3d5698SJohn Baldwin vadd.i64 d30,d27 1678bc3d5698SJohn Baldwin @ vadd.i64 d20,d30 1679bc3d5698SJohn Baldwin vshr.u64 q12,q5,#19 1680bc3d5698SJohn Baldwin vshr.u64 q13,q5,#61 1681bc3d5698SJohn Baldwin vadd.i64 d20,d30 @ h+=Maj from the past 1682bc3d5698SJohn Baldwin vshr.u64 q15,q5,#6 1683bc3d5698SJohn Baldwin vsli.64 q12,q5,#45 1684bc3d5698SJohn Baldwin vext.8 q14,q6,q7,#8 @ X[i+1] 1685bc3d5698SJohn Baldwin vsli.64 q13,q5,#3 1686bc3d5698SJohn Baldwin veor q15,q12 1687bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1688bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1689bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1690bc3d5698SJohn Baldwin vadd.i64 q6,q15 1691bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1692bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1693bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1694bc3d5698SJohn Baldwin vext.8 q14,q2,q3,#8 @ X[i+9] 1695bc3d5698SJohn Baldwin veor q15,q12 1696bc3d5698SJohn Baldwin vshr.u64 d24,d16,#14 @ from NEON_00_15 1697bc3d5698SJohn Baldwin vadd.i64 q6,q14 1698bc3d5698SJohn Baldwin vshr.u64 d25,d16,#18 @ from NEON_00_15 1699bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1700bc3d5698SJohn Baldwin vshr.u64 d26,d16,#41 @ from NEON_00_15 1701bc3d5698SJohn Baldwin vadd.i64 q6,q15 1702bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1703bc3d5698SJohn Baldwin vsli.64 d24,d16,#50 1704bc3d5698SJohn Baldwin vsli.64 d25,d16,#46 1705bc3d5698SJohn Baldwin vmov d29,d16 1706bc3d5698SJohn Baldwin vsli.64 d26,d16,#23 1707bc3d5698SJohn Baldwin#if 28<16 && defined(__ARMEL__) 1708bc3d5698SJohn Baldwin vrev64.8 , 1709bc3d5698SJohn Baldwin#endif 1710bc3d5698SJohn Baldwin veor d25,d24 1711bc3d5698SJohn Baldwin vbsl d29,d17,d18 @ Ch(e,f,g) 1712bc3d5698SJohn Baldwin vshr.u64 d24,d20,#28 1713bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1714bc3d5698SJohn Baldwin vadd.i64 d27,d29,d19 1715bc3d5698SJohn Baldwin vshr.u64 d25,d20,#34 1716bc3d5698SJohn Baldwin vsli.64 d24,d20,#36 1717bc3d5698SJohn Baldwin vadd.i64 d27,d26 1718bc3d5698SJohn Baldwin vshr.u64 d26,d20,#39 1719bc3d5698SJohn Baldwin vadd.i64 d28,d12 1720bc3d5698SJohn Baldwin vsli.64 d25,d20,#30 1721bc3d5698SJohn Baldwin veor d30,d20,d21 1722bc3d5698SJohn Baldwin vsli.64 d26,d20,#25 1723bc3d5698SJohn Baldwin veor d19,d24,d25 1724bc3d5698SJohn Baldwin vadd.i64 d27,d28 1725bc3d5698SJohn Baldwin vbsl d30,d22,d21 @ Maj(a,b,c) 1726bc3d5698SJohn Baldwin veor d19,d26 @ Sigma0(a) 1727bc3d5698SJohn Baldwin vadd.i64 d23,d27 1728bc3d5698SJohn Baldwin vadd.i64 d30,d27 1729bc3d5698SJohn Baldwin @ vadd.i64 d19,d30 1730bc3d5698SJohn Baldwin vshr.u64 d24,d23,#14 @ 29 1731bc3d5698SJohn Baldwin#if 29<16 1732bc3d5698SJohn Baldwin vld1.64 {d13},[r1]! @ handles unaligned 1733bc3d5698SJohn Baldwin#endif 1734bc3d5698SJohn Baldwin vshr.u64 d25,d23,#18 1735bc3d5698SJohn Baldwin#if 29>0 1736bc3d5698SJohn Baldwin vadd.i64 d19,d30 @ h+=Maj from the past 1737bc3d5698SJohn Baldwin#endif 1738bc3d5698SJohn Baldwin vshr.u64 d26,d23,#41 1739bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1740bc3d5698SJohn Baldwin vsli.64 d24,d23,#50 1741bc3d5698SJohn Baldwin vsli.64 d25,d23,#46 1742bc3d5698SJohn Baldwin vmov d29,d23 1743bc3d5698SJohn Baldwin vsli.64 d26,d23,#23 1744bc3d5698SJohn Baldwin#if 29<16 && defined(__ARMEL__) 1745bc3d5698SJohn Baldwin vrev64.8 , 1746bc3d5698SJohn Baldwin#endif 1747bc3d5698SJohn Baldwin veor d25,d24 1748bc3d5698SJohn Baldwin vbsl d29,d16,d17 @ Ch(e,f,g) 1749bc3d5698SJohn Baldwin vshr.u64 d24,d19,#28 1750bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1751bc3d5698SJohn Baldwin vadd.i64 d27,d29,d18 1752bc3d5698SJohn Baldwin vshr.u64 d25,d19,#34 1753bc3d5698SJohn Baldwin vsli.64 d24,d19,#36 1754bc3d5698SJohn Baldwin vadd.i64 d27,d26 1755bc3d5698SJohn Baldwin vshr.u64 d26,d19,#39 1756bc3d5698SJohn Baldwin vadd.i64 d28,d13 1757bc3d5698SJohn Baldwin vsli.64 d25,d19,#30 1758bc3d5698SJohn Baldwin veor d30,d19,d20 1759bc3d5698SJohn Baldwin vsli.64 d26,d19,#25 1760bc3d5698SJohn Baldwin veor d18,d24,d25 1761bc3d5698SJohn Baldwin vadd.i64 d27,d28 1762bc3d5698SJohn Baldwin vbsl d30,d21,d20 @ Maj(a,b,c) 1763bc3d5698SJohn Baldwin veor d18,d26 @ Sigma0(a) 1764bc3d5698SJohn Baldwin vadd.i64 d22,d27 1765bc3d5698SJohn Baldwin vadd.i64 d30,d27 1766bc3d5698SJohn Baldwin @ vadd.i64 d18,d30 1767bc3d5698SJohn Baldwin vshr.u64 q12,q6,#19 1768bc3d5698SJohn Baldwin vshr.u64 q13,q6,#61 1769bc3d5698SJohn Baldwin vadd.i64 d18,d30 @ h+=Maj from the past 1770bc3d5698SJohn Baldwin vshr.u64 q15,q6,#6 1771bc3d5698SJohn Baldwin vsli.64 q12,q6,#45 1772bc3d5698SJohn Baldwin vext.8 q14,q7,q0,#8 @ X[i+1] 1773bc3d5698SJohn Baldwin vsli.64 q13,q6,#3 1774bc3d5698SJohn Baldwin veor q15,q12 1775bc3d5698SJohn Baldwin vshr.u64 q12,q14,#1 1776bc3d5698SJohn Baldwin veor q15,q13 @ sigma1(X[i+14]) 1777bc3d5698SJohn Baldwin vshr.u64 q13,q14,#8 1778bc3d5698SJohn Baldwin vadd.i64 q7,q15 1779bc3d5698SJohn Baldwin vshr.u64 q15,q14,#7 1780bc3d5698SJohn Baldwin vsli.64 q12,q14,#63 1781bc3d5698SJohn Baldwin vsli.64 q13,q14,#56 1782bc3d5698SJohn Baldwin vext.8 q14,q3,q4,#8 @ X[i+9] 1783bc3d5698SJohn Baldwin veor q15,q12 1784bc3d5698SJohn Baldwin vshr.u64 d24,d22,#14 @ from NEON_00_15 1785bc3d5698SJohn Baldwin vadd.i64 q7,q14 1786bc3d5698SJohn Baldwin vshr.u64 d25,d22,#18 @ from NEON_00_15 1787bc3d5698SJohn Baldwin veor q15,q13 @ sigma0(X[i+1]) 1788bc3d5698SJohn Baldwin vshr.u64 d26,d22,#41 @ from NEON_00_15 1789bc3d5698SJohn Baldwin vadd.i64 q7,q15 1790bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1791bc3d5698SJohn Baldwin vsli.64 d24,d22,#50 1792bc3d5698SJohn Baldwin vsli.64 d25,d22,#46 1793bc3d5698SJohn Baldwin vmov d29,d22 1794bc3d5698SJohn Baldwin vsli.64 d26,d22,#23 1795bc3d5698SJohn Baldwin#if 30<16 && defined(__ARMEL__) 1796bc3d5698SJohn Baldwin vrev64.8 , 1797bc3d5698SJohn Baldwin#endif 1798bc3d5698SJohn Baldwin veor d25,d24 1799bc3d5698SJohn Baldwin vbsl d29,d23,d16 @ Ch(e,f,g) 1800bc3d5698SJohn Baldwin vshr.u64 d24,d18,#28 1801bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1802bc3d5698SJohn Baldwin vadd.i64 d27,d29,d17 1803bc3d5698SJohn Baldwin vshr.u64 d25,d18,#34 1804bc3d5698SJohn Baldwin vsli.64 d24,d18,#36 1805bc3d5698SJohn Baldwin vadd.i64 d27,d26 1806bc3d5698SJohn Baldwin vshr.u64 d26,d18,#39 1807bc3d5698SJohn Baldwin vadd.i64 d28,d14 1808bc3d5698SJohn Baldwin vsli.64 d25,d18,#30 1809bc3d5698SJohn Baldwin veor d30,d18,d19 1810bc3d5698SJohn Baldwin vsli.64 d26,d18,#25 1811bc3d5698SJohn Baldwin veor d17,d24,d25 1812bc3d5698SJohn Baldwin vadd.i64 d27,d28 1813bc3d5698SJohn Baldwin vbsl d30,d20,d19 @ Maj(a,b,c) 1814bc3d5698SJohn Baldwin veor d17,d26 @ Sigma0(a) 1815bc3d5698SJohn Baldwin vadd.i64 d21,d27 1816bc3d5698SJohn Baldwin vadd.i64 d30,d27 1817bc3d5698SJohn Baldwin @ vadd.i64 d17,d30 1818bc3d5698SJohn Baldwin vshr.u64 d24,d21,#14 @ 31 1819bc3d5698SJohn Baldwin#if 31<16 1820bc3d5698SJohn Baldwin vld1.64 {d15},[r1]! @ handles unaligned 1821bc3d5698SJohn Baldwin#endif 1822bc3d5698SJohn Baldwin vshr.u64 d25,d21,#18 1823bc3d5698SJohn Baldwin#if 31>0 1824bc3d5698SJohn Baldwin vadd.i64 d17,d30 @ h+=Maj from the past 1825bc3d5698SJohn Baldwin#endif 1826bc3d5698SJohn Baldwin vshr.u64 d26,d21,#41 1827bc3d5698SJohn Baldwin vld1.64 {d28},[r3,:64]! @ K[i++] 1828bc3d5698SJohn Baldwin vsli.64 d24,d21,#50 1829bc3d5698SJohn Baldwin vsli.64 d25,d21,#46 1830bc3d5698SJohn Baldwin vmov d29,d21 1831bc3d5698SJohn Baldwin vsli.64 d26,d21,#23 1832bc3d5698SJohn Baldwin#if 31<16 && defined(__ARMEL__) 1833bc3d5698SJohn Baldwin vrev64.8 , 1834bc3d5698SJohn Baldwin#endif 1835bc3d5698SJohn Baldwin veor d25,d24 1836bc3d5698SJohn Baldwin vbsl d29,d22,d23 @ Ch(e,f,g) 1837bc3d5698SJohn Baldwin vshr.u64 d24,d17,#28 1838bc3d5698SJohn Baldwin veor d26,d25 @ Sigma1(e) 1839bc3d5698SJohn Baldwin vadd.i64 d27,d29,d16 1840bc3d5698SJohn Baldwin vshr.u64 d25,d17,#34 1841bc3d5698SJohn Baldwin vsli.64 d24,d17,#36 1842bc3d5698SJohn Baldwin vadd.i64 d27,d26 1843bc3d5698SJohn Baldwin vshr.u64 d26,d17,#39 1844bc3d5698SJohn Baldwin vadd.i64 d28,d15 1845bc3d5698SJohn Baldwin vsli.64 d25,d17,#30 1846bc3d5698SJohn Baldwin veor d30,d17,d18 1847bc3d5698SJohn Baldwin vsli.64 d26,d17,#25 1848bc3d5698SJohn Baldwin veor d16,d24,d25 1849bc3d5698SJohn Baldwin vadd.i64 d27,d28 1850bc3d5698SJohn Baldwin vbsl d30,d19,d18 @ Maj(a,b,c) 1851bc3d5698SJohn Baldwin veor d16,d26 @ Sigma0(a) 1852bc3d5698SJohn Baldwin vadd.i64 d20,d27 1853bc3d5698SJohn Baldwin vadd.i64 d30,d27 1854bc3d5698SJohn Baldwin @ vadd.i64 d16,d30 1855bc3d5698SJohn Baldwin bne .L16_79_neon 1856bc3d5698SJohn Baldwin 1857bc3d5698SJohn Baldwin vadd.i64 d16,d30 @ h+=Maj from the past 1858bc3d5698SJohn Baldwin vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp 1859bc3d5698SJohn Baldwin vadd.i64 q8,q12 @ vectorized accumulate 1860bc3d5698SJohn Baldwin vadd.i64 q9,q13 1861bc3d5698SJohn Baldwin vadd.i64 q10,q14 1862bc3d5698SJohn Baldwin vadd.i64 q11,q15 1863bc3d5698SJohn Baldwin vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context 1864bc3d5698SJohn Baldwin teq r1,r2 1865bc3d5698SJohn Baldwin sub r3,#640 @ rewind K512 1866bc3d5698SJohn Baldwin bne .Loop_neon 1867bc3d5698SJohn Baldwin 1868bc3d5698SJohn Baldwin VFP_ABI_POP 1869bc3d5698SJohn Baldwin bx lr @ .word 0xe12fff1e 1870bc3d5698SJohn Baldwin.size sha512_block_data_order_neon,.-sha512_block_data_order_neon 1871bc3d5698SJohn Baldwin#endif 1872bc3d5698SJohn Baldwin.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1873bc3d5698SJohn Baldwin.align 2 1874bc3d5698SJohn Baldwin.align 2 1875bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 1876bc3d5698SJohn Baldwin.comm OPENSSL_armcap_P,4,4 1877bc3d5698SJohn Baldwin#endif 1878