1*bc3d5698SJohn Baldwin/* $FreeBSD$ */ 2*bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */ 3*bc3d5698SJohn Baldwin#include "arm_arch.h" 4*bc3d5698SJohn Baldwin 5*bc3d5698SJohn Baldwin.text 6*bc3d5698SJohn Baldwin 7*bc3d5698SJohn Baldwin#if defined(__thumb2__) 8*bc3d5698SJohn Baldwin.syntax unified 9*bc3d5698SJohn Baldwin.thumb 10*bc3d5698SJohn Baldwin#else 11*bc3d5698SJohn Baldwin.code 32 12*bc3d5698SJohn Baldwin#endif 13*bc3d5698SJohn Baldwin 14*bc3d5698SJohn Baldwin.type iotas32, %object 15*bc3d5698SJohn Baldwin.align 5 16*bc3d5698SJohn Baldwiniotas32: 17*bc3d5698SJohn Baldwin.long 0x00000001, 0x00000000 18*bc3d5698SJohn Baldwin.long 0x00000000, 0x00000089 19*bc3d5698SJohn Baldwin.long 0x00000000, 0x8000008b 20*bc3d5698SJohn Baldwin.long 0x00000000, 0x80008080 21*bc3d5698SJohn Baldwin.long 0x00000001, 0x0000008b 22*bc3d5698SJohn Baldwin.long 0x00000001, 0x00008000 23*bc3d5698SJohn Baldwin.long 0x00000001, 0x80008088 24*bc3d5698SJohn Baldwin.long 0x00000001, 0x80000082 25*bc3d5698SJohn Baldwin.long 0x00000000, 0x0000000b 26*bc3d5698SJohn Baldwin.long 0x00000000, 0x0000000a 27*bc3d5698SJohn Baldwin.long 0x00000001, 0x00008082 28*bc3d5698SJohn Baldwin.long 0x00000000, 0x00008003 29*bc3d5698SJohn Baldwin.long 0x00000001, 0x0000808b 30*bc3d5698SJohn Baldwin.long 0x00000001, 0x8000000b 31*bc3d5698SJohn Baldwin.long 0x00000001, 0x8000008a 32*bc3d5698SJohn Baldwin.long 0x00000001, 0x80000081 33*bc3d5698SJohn Baldwin.long 0x00000000, 0x80000081 34*bc3d5698SJohn Baldwin.long 0x00000000, 0x80000008 35*bc3d5698SJohn Baldwin.long 0x00000000, 0x00000083 36*bc3d5698SJohn Baldwin.long 0x00000000, 0x80008003 37*bc3d5698SJohn Baldwin.long 0x00000001, 0x80008088 38*bc3d5698SJohn Baldwin.long 0x00000000, 0x80000088 39*bc3d5698SJohn Baldwin.long 0x00000001, 0x00008000 40*bc3d5698SJohn Baldwin.long 0x00000000, 0x80008082 41*bc3d5698SJohn Baldwin.size iotas32,.-iotas32 42*bc3d5698SJohn Baldwin 43*bc3d5698SJohn Baldwin.type KeccakF1600_int, %function 44*bc3d5698SJohn Baldwin.align 5 45*bc3d5698SJohn BaldwinKeccakF1600_int: 46*bc3d5698SJohn Baldwin add r9,sp,#176 47*bc3d5698SJohn Baldwin add r12,sp,#0 48*bc3d5698SJohn Baldwin add r10,sp,#40 49*bc3d5698SJohn Baldwin ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4] 50*bc3d5698SJohn BaldwinKeccakF1600_enter: 51*bc3d5698SJohn Baldwin str lr,[sp,#440] 52*bc3d5698SJohn Baldwin eor r11,r11,r11 53*bc3d5698SJohn Baldwin str r11,[sp,#444] 54*bc3d5698SJohn Baldwin b .Lround2x 55*bc3d5698SJohn Baldwin 56*bc3d5698SJohn Baldwin.align 4 57*bc3d5698SJohn Baldwin.Lround2x: 58*bc3d5698SJohn Baldwin ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 59*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 60*bc3d5698SJohn Baldwin#ifdef __thumb2__ 61*bc3d5698SJohn Baldwin eor r0,r0,r10 62*bc3d5698SJohn Baldwin eor r1,r1,r11 63*bc3d5698SJohn Baldwin eor r2,r2,r12 64*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#56] 65*bc3d5698SJohn Baldwin eor r3,r3,r14 66*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#64] 67*bc3d5698SJohn Baldwin eor r4,r4,r10 68*bc3d5698SJohn Baldwin eor r5,r5,r11 69*bc3d5698SJohn Baldwin eor r6,r6,r12 70*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#72] 71*bc3d5698SJohn Baldwin eor r7,r7,r14 72*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#80] 73*bc3d5698SJohn Baldwin eor r8,r8,r10 74*bc3d5698SJohn Baldwin eor r9,r9,r11 75*bc3d5698SJohn Baldwin eor r0,r0,r12 76*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#88] 77*bc3d5698SJohn Baldwin eor r1,r1,r14 78*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#96] 79*bc3d5698SJohn Baldwin eor r2,r2,r10 80*bc3d5698SJohn Baldwin eor r3,r3,r11 81*bc3d5698SJohn Baldwin eor r4,r4,r12 82*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#104] 83*bc3d5698SJohn Baldwin eor r5,r5,r14 84*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#112] 85*bc3d5698SJohn Baldwin eor r6,r6,r10 86*bc3d5698SJohn Baldwin eor r7,r7,r11 87*bc3d5698SJohn Baldwin eor r8,r8,r12 88*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#120] 89*bc3d5698SJohn Baldwin eor r9,r9,r14 90*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#128] 91*bc3d5698SJohn Baldwin eor r0,r0,r10 92*bc3d5698SJohn Baldwin eor r1,r1,r11 93*bc3d5698SJohn Baldwin eor r2,r2,r12 94*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#136] 95*bc3d5698SJohn Baldwin eor r3,r3,r14 96*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#144] 97*bc3d5698SJohn Baldwin eor r4,r4,r10 98*bc3d5698SJohn Baldwin eor r5,r5,r11 99*bc3d5698SJohn Baldwin eor r6,r6,r12 100*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#152] 101*bc3d5698SJohn Baldwin eor r7,r7,r14 102*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#160] 103*bc3d5698SJohn Baldwin eor r8,r8,r10 104*bc3d5698SJohn Baldwin eor r9,r9,r11 105*bc3d5698SJohn Baldwin eor r0,r0,r12 106*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#168] 107*bc3d5698SJohn Baldwin eor r1,r1,r14 108*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#16] 109*bc3d5698SJohn Baldwin eor r2,r2,r10 110*bc3d5698SJohn Baldwin eor r3,r3,r11 111*bc3d5698SJohn Baldwin eor r4,r4,r12 112*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#24] 113*bc3d5698SJohn Baldwin eor r5,r5,r14 114*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#32] 115*bc3d5698SJohn Baldwin#else 116*bc3d5698SJohn Baldwin eor r0,r0,r10 117*bc3d5698SJohn Baldwin add r10,sp,#56 118*bc3d5698SJohn Baldwin eor r1,r1,r11 119*bc3d5698SJohn Baldwin eor r2,r2,r12 120*bc3d5698SJohn Baldwin eor r3,r3,r14 121*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 122*bc3d5698SJohn Baldwin eor r4,r4,r10 123*bc3d5698SJohn Baldwin add r10,sp,#72 124*bc3d5698SJohn Baldwin eor r5,r5,r11 125*bc3d5698SJohn Baldwin eor r6,r6,r12 126*bc3d5698SJohn Baldwin eor r7,r7,r14 127*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 128*bc3d5698SJohn Baldwin eor r8,r8,r10 129*bc3d5698SJohn Baldwin add r10,sp,#88 130*bc3d5698SJohn Baldwin eor r9,r9,r11 131*bc3d5698SJohn Baldwin eor r0,r0,r12 132*bc3d5698SJohn Baldwin eor r1,r1,r14 133*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 134*bc3d5698SJohn Baldwin eor r2,r2,r10 135*bc3d5698SJohn Baldwin add r10,sp,#104 136*bc3d5698SJohn Baldwin eor r3,r3,r11 137*bc3d5698SJohn Baldwin eor r4,r4,r12 138*bc3d5698SJohn Baldwin eor r5,r5,r14 139*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 140*bc3d5698SJohn Baldwin eor r6,r6,r10 141*bc3d5698SJohn Baldwin add r10,sp,#120 142*bc3d5698SJohn Baldwin eor r7,r7,r11 143*bc3d5698SJohn Baldwin eor r8,r8,r12 144*bc3d5698SJohn Baldwin eor r9,r9,r14 145*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 146*bc3d5698SJohn Baldwin eor r0,r0,r10 147*bc3d5698SJohn Baldwin add r10,sp,#136 148*bc3d5698SJohn Baldwin eor r1,r1,r11 149*bc3d5698SJohn Baldwin eor r2,r2,r12 150*bc3d5698SJohn Baldwin eor r3,r3,r14 151*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 152*bc3d5698SJohn Baldwin eor r4,r4,r10 153*bc3d5698SJohn Baldwin add r10,sp,#152 154*bc3d5698SJohn Baldwin eor r5,r5,r11 155*bc3d5698SJohn Baldwin eor r6,r6,r12 156*bc3d5698SJohn Baldwin eor r7,r7,r14 157*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 158*bc3d5698SJohn Baldwin eor r8,r8,r10 159*bc3d5698SJohn Baldwin ldr r10,[sp,#168] @ A[4][1] 160*bc3d5698SJohn Baldwin eor r9,r9,r11 161*bc3d5698SJohn Baldwin ldr r11,[sp,#168+4] 162*bc3d5698SJohn Baldwin eor r0,r0,r12 163*bc3d5698SJohn Baldwin ldr r12,[sp,#16] @ A[0][2] 164*bc3d5698SJohn Baldwin eor r1,r1,r14 165*bc3d5698SJohn Baldwin ldr r14,[sp,#16+4] 166*bc3d5698SJohn Baldwin eor r2,r2,r10 167*bc3d5698SJohn Baldwin add r10,sp,#24 168*bc3d5698SJohn Baldwin eor r3,r3,r11 169*bc3d5698SJohn Baldwin eor r4,r4,r12 170*bc3d5698SJohn Baldwin eor r5,r5,r14 171*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 172*bc3d5698SJohn Baldwin#endif 173*bc3d5698SJohn Baldwin eor r6,r6,r10 174*bc3d5698SJohn Baldwin eor r7,r7,r11 175*bc3d5698SJohn Baldwin eor r8,r8,r12 176*bc3d5698SJohn Baldwin eor r9,r9,r14 177*bc3d5698SJohn Baldwin 178*bc3d5698SJohn Baldwin eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 179*bc3d5698SJohn Baldwin#ifndef __thumb2__ 180*bc3d5698SJohn Baldwin str r10,[sp,#208] @ D[1] = E[0] 181*bc3d5698SJohn Baldwin#endif 182*bc3d5698SJohn Baldwin eor r11,r1,r4 183*bc3d5698SJohn Baldwin#ifndef __thumb2__ 184*bc3d5698SJohn Baldwin str r11,[sp,#208+4] 185*bc3d5698SJohn Baldwin#else 186*bc3d5698SJohn Baldwin strd r10,r11,[sp,#208] @ D[1] = E[0] 187*bc3d5698SJohn Baldwin#endif 188*bc3d5698SJohn Baldwin eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 189*bc3d5698SJohn Baldwin eor r14,r7,r0 190*bc3d5698SJohn Baldwin#ifndef __thumb2__ 191*bc3d5698SJohn Baldwin str r12,[sp,#232] @ D[4] = E[1] 192*bc3d5698SJohn Baldwin#endif 193*bc3d5698SJohn Baldwin eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 194*bc3d5698SJohn Baldwin#ifndef __thumb2__ 195*bc3d5698SJohn Baldwin str r14,[sp,#232+4] 196*bc3d5698SJohn Baldwin#else 197*bc3d5698SJohn Baldwin strd r12,r14,[sp,#232] @ D[4] = E[1] 198*bc3d5698SJohn Baldwin#endif 199*bc3d5698SJohn Baldwin eor r1,r9,r2 200*bc3d5698SJohn Baldwin#ifndef __thumb2__ 201*bc3d5698SJohn Baldwin str r0,[sp,#200] @ D[0] = C[0] 202*bc3d5698SJohn Baldwin#endif 203*bc3d5698SJohn Baldwin eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 204*bc3d5698SJohn Baldwin#ifndef __thumb2__ 205*bc3d5698SJohn Baldwin ldr r7,[sp,#144] 206*bc3d5698SJohn Baldwin#endif 207*bc3d5698SJohn Baldwin eor r3,r3,r6 208*bc3d5698SJohn Baldwin#ifndef __thumb2__ 209*bc3d5698SJohn Baldwin str r1,[sp,#200+4] 210*bc3d5698SJohn Baldwin#else 211*bc3d5698SJohn Baldwin strd r0,r1,[sp,#200] @ D[0] = C[0] 212*bc3d5698SJohn Baldwin#endif 213*bc3d5698SJohn Baldwin#ifndef __thumb2__ 214*bc3d5698SJohn Baldwin ldr r6,[sp,#144+4] 215*bc3d5698SJohn Baldwin#else 216*bc3d5698SJohn Baldwin ldrd r7,r6,[sp,#144] 217*bc3d5698SJohn Baldwin#endif 218*bc3d5698SJohn Baldwin#ifndef __thumb2__ 219*bc3d5698SJohn Baldwin str r2,[sp,#216] @ D[2] = C[1] 220*bc3d5698SJohn Baldwin#endif 221*bc3d5698SJohn Baldwin eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 222*bc3d5698SJohn Baldwin#ifndef __thumb2__ 223*bc3d5698SJohn Baldwin str r3,[sp,#216+4] 224*bc3d5698SJohn Baldwin#else 225*bc3d5698SJohn Baldwin strd r2,r3,[sp,#216] @ D[2] = C[1] 226*bc3d5698SJohn Baldwin#endif 227*bc3d5698SJohn Baldwin eor r5,r5,r8 228*bc3d5698SJohn Baldwin 229*bc3d5698SJohn Baldwin#ifndef __thumb2__ 230*bc3d5698SJohn Baldwin ldr r8,[sp,#192] 231*bc3d5698SJohn Baldwin#endif 232*bc3d5698SJohn Baldwin#ifndef __thumb2__ 233*bc3d5698SJohn Baldwin ldr r9,[sp,#192+4] 234*bc3d5698SJohn Baldwin#else 235*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#192] 236*bc3d5698SJohn Baldwin#endif 237*bc3d5698SJohn Baldwin#ifndef __thumb2__ 238*bc3d5698SJohn Baldwin str r4,[sp,#224] @ D[3] = C[2] 239*bc3d5698SJohn Baldwin#endif 240*bc3d5698SJohn Baldwin eor r7,r7,r4 241*bc3d5698SJohn Baldwin#ifndef __thumb2__ 242*bc3d5698SJohn Baldwin str r5,[sp,#224+4] 243*bc3d5698SJohn Baldwin#else 244*bc3d5698SJohn Baldwin strd r4,r5,[sp,#224] @ D[3] = C[2] 245*bc3d5698SJohn Baldwin#endif 246*bc3d5698SJohn Baldwin eor r6,r6,r5 247*bc3d5698SJohn Baldwin#ifndef __thumb2__ 248*bc3d5698SJohn Baldwin ldr r4,[sp,#0] 249*bc3d5698SJohn Baldwin#endif 250*bc3d5698SJohn Baldwin @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 251*bc3d5698SJohn Baldwin @ mov r6,r6,ror#32-11 252*bc3d5698SJohn Baldwin#ifndef __thumb2__ 253*bc3d5698SJohn Baldwin ldr r5,[sp,#0+4] 254*bc3d5698SJohn Baldwin#else 255*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#0] 256*bc3d5698SJohn Baldwin#endif 257*bc3d5698SJohn Baldwin eor r8,r8,r12 258*bc3d5698SJohn Baldwin eor r9,r9,r14 259*bc3d5698SJohn Baldwin#ifndef __thumb2__ 260*bc3d5698SJohn Baldwin ldr r12,[sp,#96] 261*bc3d5698SJohn Baldwin#endif 262*bc3d5698SJohn Baldwin eor r0,r0,r4 263*bc3d5698SJohn Baldwin#ifndef __thumb2__ 264*bc3d5698SJohn Baldwin ldr r14,[sp,#96+4] 265*bc3d5698SJohn Baldwin#else 266*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#96] 267*bc3d5698SJohn Baldwin#endif 268*bc3d5698SJohn Baldwin @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 269*bc3d5698SJohn Baldwin @ mov r9,r9,ror#32-7 270*bc3d5698SJohn Baldwin eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 271*bc3d5698SJohn Baldwin eor r12,r12,r2 272*bc3d5698SJohn Baldwin#ifndef __thumb2__ 273*bc3d5698SJohn Baldwin ldr r2,[sp,#48] 274*bc3d5698SJohn Baldwin#endif 275*bc3d5698SJohn Baldwin eor r14,r14,r3 276*bc3d5698SJohn Baldwin#ifndef __thumb2__ 277*bc3d5698SJohn Baldwin ldr r3,[sp,#48+4] 278*bc3d5698SJohn Baldwin#else 279*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#48] 280*bc3d5698SJohn Baldwin#endif 281*bc3d5698SJohn Baldwin mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 282*bc3d5698SJohn Baldwin ldr r12,[sp,#444] @ load counter 283*bc3d5698SJohn Baldwin eor r2,r2,r10 284*bc3d5698SJohn Baldwin adr r10,iotas32 285*bc3d5698SJohn Baldwin mov r4,r14,ror#32-22 286*bc3d5698SJohn Baldwin add r14,r10,r12 287*bc3d5698SJohn Baldwin eor r3,r3,r11 288*bc3d5698SJohn Baldwin ldmia r14,{r10,r11} @ iotas[i] 289*bc3d5698SJohn Baldwin bic r12,r4,r2,ror#32-22 290*bc3d5698SJohn Baldwin bic r14,r5,r3,ror#32-22 291*bc3d5698SJohn Baldwin mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 292*bc3d5698SJohn Baldwin mov r3,r3,ror#32-22 293*bc3d5698SJohn Baldwin eor r12,r12,r0 294*bc3d5698SJohn Baldwin eor r14,r14,r1 295*bc3d5698SJohn Baldwin eor r10,r10,r12 296*bc3d5698SJohn Baldwin eor r11,r11,r14 297*bc3d5698SJohn Baldwin#ifndef __thumb2__ 298*bc3d5698SJohn Baldwin str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 299*bc3d5698SJohn Baldwin#endif 300*bc3d5698SJohn Baldwin bic r12,r6,r4,ror#11 301*bc3d5698SJohn Baldwin#ifndef __thumb2__ 302*bc3d5698SJohn Baldwin str r11,[sp,#240+4] 303*bc3d5698SJohn Baldwin#else 304*bc3d5698SJohn Baldwin strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 305*bc3d5698SJohn Baldwin#endif 306*bc3d5698SJohn Baldwin bic r14,r7,r5,ror#10 307*bc3d5698SJohn Baldwin bic r10,r8,r6,ror#32-(11-7) 308*bc3d5698SJohn Baldwin bic r11,r9,r7,ror#32-(10-7) 309*bc3d5698SJohn Baldwin eor r12,r2,r12,ror#32-11 310*bc3d5698SJohn Baldwin#ifndef __thumb2__ 311*bc3d5698SJohn Baldwin str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 312*bc3d5698SJohn Baldwin#endif 313*bc3d5698SJohn Baldwin eor r14,r3,r14,ror#32-10 314*bc3d5698SJohn Baldwin#ifndef __thumb2__ 315*bc3d5698SJohn Baldwin str r14,[sp,#248+4] 316*bc3d5698SJohn Baldwin#else 317*bc3d5698SJohn Baldwin strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 318*bc3d5698SJohn Baldwin#endif 319*bc3d5698SJohn Baldwin eor r10,r4,r10,ror#32-7 320*bc3d5698SJohn Baldwin eor r11,r5,r11,ror#32-7 321*bc3d5698SJohn Baldwin#ifndef __thumb2__ 322*bc3d5698SJohn Baldwin str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 323*bc3d5698SJohn Baldwin#endif 324*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#32-7 325*bc3d5698SJohn Baldwin#ifndef __thumb2__ 326*bc3d5698SJohn Baldwin str r11,[sp,#256+4] 327*bc3d5698SJohn Baldwin#else 328*bc3d5698SJohn Baldwin strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 329*bc3d5698SJohn Baldwin#endif 330*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#32-7 331*bc3d5698SJohn Baldwin eor r12,r12,r6,ror#32-11 332*bc3d5698SJohn Baldwin#ifndef __thumb2__ 333*bc3d5698SJohn Baldwin str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 334*bc3d5698SJohn Baldwin#endif 335*bc3d5698SJohn Baldwin eor r14,r14,r7,ror#32-10 336*bc3d5698SJohn Baldwin#ifndef __thumb2__ 337*bc3d5698SJohn Baldwin str r14,[sp,#264+4] 338*bc3d5698SJohn Baldwin#else 339*bc3d5698SJohn Baldwin strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 340*bc3d5698SJohn Baldwin#endif 341*bc3d5698SJohn Baldwin bic r10,r2,r0 342*bc3d5698SJohn Baldwin add r14,sp,#224 343*bc3d5698SJohn Baldwin#ifndef __thumb2__ 344*bc3d5698SJohn Baldwin ldr r0,[sp,#24] @ A[0][3] 345*bc3d5698SJohn Baldwin#endif 346*bc3d5698SJohn Baldwin bic r11,r3,r1 347*bc3d5698SJohn Baldwin#ifndef __thumb2__ 348*bc3d5698SJohn Baldwin ldr r1,[sp,#24+4] 349*bc3d5698SJohn Baldwin#else 350*bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#24] @ A[0][3] 351*bc3d5698SJohn Baldwin#endif 352*bc3d5698SJohn Baldwin eor r10,r10,r8,ror#32-7 353*bc3d5698SJohn Baldwin eor r11,r11,r9,ror#32-7 354*bc3d5698SJohn Baldwin#ifndef __thumb2__ 355*bc3d5698SJohn Baldwin str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 356*bc3d5698SJohn Baldwin#endif 357*bc3d5698SJohn Baldwin add r9,sp,#200 358*bc3d5698SJohn Baldwin#ifndef __thumb2__ 359*bc3d5698SJohn Baldwin str r11,[sp,#272+4] 360*bc3d5698SJohn Baldwin#else 361*bc3d5698SJohn Baldwin strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 362*bc3d5698SJohn Baldwin#endif 363*bc3d5698SJohn Baldwin 364*bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[3..4] 365*bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[0..1] 366*bc3d5698SJohn Baldwin 367*bc3d5698SJohn Baldwin#ifndef __thumb2__ 368*bc3d5698SJohn Baldwin ldr r2,[sp,#72] @ A[1][4] 369*bc3d5698SJohn Baldwin#endif 370*bc3d5698SJohn Baldwin eor r0,r0,r10 371*bc3d5698SJohn Baldwin#ifndef __thumb2__ 372*bc3d5698SJohn Baldwin ldr r3,[sp,#72+4] 373*bc3d5698SJohn Baldwin#else 374*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#72] @ A[1][4] 375*bc3d5698SJohn Baldwin#endif 376*bc3d5698SJohn Baldwin eor r1,r1,r11 377*bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 378*bc3d5698SJohn Baldwin#ifndef __thumb2__ 379*bc3d5698SJohn Baldwin ldr r10,[sp,#128] @ A[3][1] 380*bc3d5698SJohn Baldwin#endif 381*bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-14 382*bc3d5698SJohn Baldwin#ifndef __thumb2__ 383*bc3d5698SJohn Baldwin ldr r11,[sp,#128+4] 384*bc3d5698SJohn Baldwin#else 385*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#128] @ A[3][1] 386*bc3d5698SJohn Baldwin#endif 387*bc3d5698SJohn Baldwin 388*bc3d5698SJohn Baldwin eor r2,r2,r12 389*bc3d5698SJohn Baldwin#ifndef __thumb2__ 390*bc3d5698SJohn Baldwin ldr r4,[sp,#80] @ A[2][0] 391*bc3d5698SJohn Baldwin#endif 392*bc3d5698SJohn Baldwin eor r3,r3,r14 393*bc3d5698SJohn Baldwin#ifndef __thumb2__ 394*bc3d5698SJohn Baldwin ldr r5,[sp,#80+4] 395*bc3d5698SJohn Baldwin#else 396*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#80] @ A[2][0] 397*bc3d5698SJohn Baldwin#endif 398*bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 399*bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-10 400*bc3d5698SJohn Baldwin 401*bc3d5698SJohn Baldwin eor r6,r6,r4 402*bc3d5698SJohn Baldwin#ifndef __thumb2__ 403*bc3d5698SJohn Baldwin ldr r12,[sp,#216] @ D[2] 404*bc3d5698SJohn Baldwin#endif 405*bc3d5698SJohn Baldwin eor r7,r7,r5 406*bc3d5698SJohn Baldwin#ifndef __thumb2__ 407*bc3d5698SJohn Baldwin ldr r14,[sp,#216+4] 408*bc3d5698SJohn Baldwin#else 409*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#216] @ D[2] 410*bc3d5698SJohn Baldwin#endif 411*bc3d5698SJohn Baldwin mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 412*bc3d5698SJohn Baldwin mov r4,r7,ror#32-2 413*bc3d5698SJohn Baldwin 414*bc3d5698SJohn Baldwin eor r10,r10,r8 415*bc3d5698SJohn Baldwin#ifndef __thumb2__ 416*bc3d5698SJohn Baldwin ldr r8,[sp,#176] @ A[4][2] 417*bc3d5698SJohn Baldwin#endif 418*bc3d5698SJohn Baldwin eor r11,r11,r9 419*bc3d5698SJohn Baldwin#ifndef __thumb2__ 420*bc3d5698SJohn Baldwin ldr r9,[sp,#176+4] 421*bc3d5698SJohn Baldwin#else 422*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#176] @ A[4][2] 423*bc3d5698SJohn Baldwin#endif 424*bc3d5698SJohn Baldwin mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 425*bc3d5698SJohn Baldwin mov r6,r11,ror#32-23 426*bc3d5698SJohn Baldwin 427*bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-10 428*bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-10 429*bc3d5698SJohn Baldwin eor r12,r12,r8 430*bc3d5698SJohn Baldwin eor r14,r14,r9 431*bc3d5698SJohn Baldwin mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 432*bc3d5698SJohn Baldwin mov r8,r14,ror#32-31 433*bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 434*bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-14 435*bc3d5698SJohn Baldwin#ifndef __thumb2__ 436*bc3d5698SJohn Baldwin str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 437*bc3d5698SJohn Baldwin#endif 438*bc3d5698SJohn Baldwin bic r12,r6,r4 439*bc3d5698SJohn Baldwin#ifndef __thumb2__ 440*bc3d5698SJohn Baldwin str r11,[sp,#280+4] 441*bc3d5698SJohn Baldwin#else 442*bc3d5698SJohn Baldwin strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 443*bc3d5698SJohn Baldwin#endif 444*bc3d5698SJohn Baldwin bic r14,r7,r5 445*bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-10 446*bc3d5698SJohn Baldwin#ifndef __thumb2__ 447*bc3d5698SJohn Baldwin str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 448*bc3d5698SJohn Baldwin#endif 449*bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-10 450*bc3d5698SJohn Baldwin#ifndef __thumb2__ 451*bc3d5698SJohn Baldwin str r14,[sp,#288+4] 452*bc3d5698SJohn Baldwin#else 453*bc3d5698SJohn Baldwin strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 454*bc3d5698SJohn Baldwin#endif 455*bc3d5698SJohn Baldwin bic r10,r8,r6 456*bc3d5698SJohn Baldwin bic r11,r9,r7 457*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 458*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#14 459*bc3d5698SJohn Baldwin eor r10,r10,r4 460*bc3d5698SJohn Baldwin eor r11,r11,r5 461*bc3d5698SJohn Baldwin#ifndef __thumb2__ 462*bc3d5698SJohn Baldwin str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 463*bc3d5698SJohn Baldwin#endif 464*bc3d5698SJohn Baldwin bic r2,r2,r0,ror#32-(14-10) 465*bc3d5698SJohn Baldwin#ifndef __thumb2__ 466*bc3d5698SJohn Baldwin str r11,[sp,#296+4] 467*bc3d5698SJohn Baldwin#else 468*bc3d5698SJohn Baldwin strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 469*bc3d5698SJohn Baldwin#endif 470*bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 471*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-(14-10) 472*bc3d5698SJohn Baldwin#ifndef __thumb2__ 473*bc3d5698SJohn Baldwin str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 474*bc3d5698SJohn Baldwin#endif 475*bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-14 476*bc3d5698SJohn Baldwin#ifndef __thumb2__ 477*bc3d5698SJohn Baldwin str r14,[sp,#304+4] 478*bc3d5698SJohn Baldwin#else 479*bc3d5698SJohn Baldwin strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 480*bc3d5698SJohn Baldwin#endif 481*bc3d5698SJohn Baldwin add r12,sp,#208 482*bc3d5698SJohn Baldwin#ifndef __thumb2__ 483*bc3d5698SJohn Baldwin ldr r1,[sp,#8] @ A[0][1] 484*bc3d5698SJohn Baldwin#endif 485*bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-10 486*bc3d5698SJohn Baldwin#ifndef __thumb2__ 487*bc3d5698SJohn Baldwin ldr r0,[sp,#8+4] 488*bc3d5698SJohn Baldwin#else 489*bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#8] @ A[0][1] 490*bc3d5698SJohn Baldwin#endif 491*bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-10 492*bc3d5698SJohn Baldwin#ifndef __thumb2__ 493*bc3d5698SJohn Baldwin str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 494*bc3d5698SJohn Baldwin#endif 495*bc3d5698SJohn Baldwin#ifndef __thumb2__ 496*bc3d5698SJohn Baldwin str r11,[sp,#312+4] 497*bc3d5698SJohn Baldwin#else 498*bc3d5698SJohn Baldwin strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 499*bc3d5698SJohn Baldwin#endif 500*bc3d5698SJohn Baldwin 501*bc3d5698SJohn Baldwin add r9,sp,#224 502*bc3d5698SJohn Baldwin ldmia r12,{r10,r11,r12,r14} @ D[1..2] 503*bc3d5698SJohn Baldwin#ifndef __thumb2__ 504*bc3d5698SJohn Baldwin ldr r2,[sp,#56] @ A[1][2] 505*bc3d5698SJohn Baldwin#endif 506*bc3d5698SJohn Baldwin#ifndef __thumb2__ 507*bc3d5698SJohn Baldwin ldr r3,[sp,#56+4] 508*bc3d5698SJohn Baldwin#else 509*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#56] @ A[1][2] 510*bc3d5698SJohn Baldwin#endif 511*bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[3..4] 512*bc3d5698SJohn Baldwin 513*bc3d5698SJohn Baldwin eor r1,r1,r10 514*bc3d5698SJohn Baldwin#ifndef __thumb2__ 515*bc3d5698SJohn Baldwin ldr r4,[sp,#104] @ A[2][3] 516*bc3d5698SJohn Baldwin#endif 517*bc3d5698SJohn Baldwin eor r0,r0,r11 518*bc3d5698SJohn Baldwin#ifndef __thumb2__ 519*bc3d5698SJohn Baldwin ldr r5,[sp,#104+4] 520*bc3d5698SJohn Baldwin#else 521*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#104] @ A[2][3] 522*bc3d5698SJohn Baldwin#endif 523*bc3d5698SJohn Baldwin mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 524*bc3d5698SJohn Baldwin 525*bc3d5698SJohn Baldwin eor r2,r2,r12 526*bc3d5698SJohn Baldwin#ifndef __thumb2__ 527*bc3d5698SJohn Baldwin ldr r10,[sp,#152] @ A[3][4] 528*bc3d5698SJohn Baldwin#endif 529*bc3d5698SJohn Baldwin eor r3,r3,r14 530*bc3d5698SJohn Baldwin#ifndef __thumb2__ 531*bc3d5698SJohn Baldwin ldr r11,[sp,#152+4] 532*bc3d5698SJohn Baldwin#else 533*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#152] @ A[3][4] 534*bc3d5698SJohn Baldwin#endif 535*bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 536*bc3d5698SJohn Baldwin#ifndef __thumb2__ 537*bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 538*bc3d5698SJohn Baldwin#endif 539*bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-3 540*bc3d5698SJohn Baldwin#ifndef __thumb2__ 541*bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 542*bc3d5698SJohn Baldwin#else 543*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 544*bc3d5698SJohn Baldwin#endif 545*bc3d5698SJohn Baldwin 546*bc3d5698SJohn Baldwin eor r4,r4,r6 547*bc3d5698SJohn Baldwin eor r5,r5,r7 548*bc3d5698SJohn Baldwin @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 549*bc3d5698SJohn Baldwin @ mov r4,r7,ror#32-13 @ [track reverse order below] 550*bc3d5698SJohn Baldwin 551*bc3d5698SJohn Baldwin eor r10,r10,r8 552*bc3d5698SJohn Baldwin#ifndef __thumb2__ 553*bc3d5698SJohn Baldwin ldr r8,[sp,#160] @ A[4][0] 554*bc3d5698SJohn Baldwin#endif 555*bc3d5698SJohn Baldwin eor r11,r11,r9 556*bc3d5698SJohn Baldwin#ifndef __thumb2__ 557*bc3d5698SJohn Baldwin ldr r9,[sp,#160+4] 558*bc3d5698SJohn Baldwin#else 559*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#160] @ A[4][0] 560*bc3d5698SJohn Baldwin#endif 561*bc3d5698SJohn Baldwin mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 562*bc3d5698SJohn Baldwin mov r7,r11,ror#32-4 563*bc3d5698SJohn Baldwin 564*bc3d5698SJohn Baldwin eor r12,r12,r8 565*bc3d5698SJohn Baldwin eor r14,r14,r9 566*bc3d5698SJohn Baldwin mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 567*bc3d5698SJohn Baldwin mov r9,r14,ror#32-9 568*bc3d5698SJohn Baldwin 569*bc3d5698SJohn Baldwin bic r10,r5,r2,ror#13-3 570*bc3d5698SJohn Baldwin bic r11,r4,r3,ror#12-3 571*bc3d5698SJohn Baldwin bic r12,r6,r5,ror#32-13 572*bc3d5698SJohn Baldwin bic r14,r7,r4,ror#32-12 573*bc3d5698SJohn Baldwin eor r10,r0,r10,ror#32-13 574*bc3d5698SJohn Baldwin eor r11,r1,r11,ror#32-12 575*bc3d5698SJohn Baldwin#ifndef __thumb2__ 576*bc3d5698SJohn Baldwin str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 577*bc3d5698SJohn Baldwin#endif 578*bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-3 579*bc3d5698SJohn Baldwin#ifndef __thumb2__ 580*bc3d5698SJohn Baldwin str r11,[sp,#320+4] 581*bc3d5698SJohn Baldwin#else 582*bc3d5698SJohn Baldwin strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 583*bc3d5698SJohn Baldwin#endif 584*bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-3 585*bc3d5698SJohn Baldwin#ifndef __thumb2__ 586*bc3d5698SJohn Baldwin str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 587*bc3d5698SJohn Baldwin#endif 588*bc3d5698SJohn Baldwin bic r10,r8,r6 589*bc3d5698SJohn Baldwin bic r11,r9,r7 590*bc3d5698SJohn Baldwin#ifndef __thumb2__ 591*bc3d5698SJohn Baldwin str r14,[sp,#328+4] 592*bc3d5698SJohn Baldwin#else 593*bc3d5698SJohn Baldwin strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 594*bc3d5698SJohn Baldwin#endif 595*bc3d5698SJohn Baldwin eor r10,r10,r5,ror#32-13 596*bc3d5698SJohn Baldwin eor r11,r11,r4,ror#32-12 597*bc3d5698SJohn Baldwin#ifndef __thumb2__ 598*bc3d5698SJohn Baldwin str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 599*bc3d5698SJohn Baldwin#endif 600*bc3d5698SJohn Baldwin bic r12,r0,r8 601*bc3d5698SJohn Baldwin#ifndef __thumb2__ 602*bc3d5698SJohn Baldwin str r11,[sp,#336+4] 603*bc3d5698SJohn Baldwin#else 604*bc3d5698SJohn Baldwin strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 605*bc3d5698SJohn Baldwin#endif 606*bc3d5698SJohn Baldwin bic r14,r1,r9 607*bc3d5698SJohn Baldwin eor r12,r12,r6 608*bc3d5698SJohn Baldwin eor r14,r14,r7 609*bc3d5698SJohn Baldwin#ifndef __thumb2__ 610*bc3d5698SJohn Baldwin str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 611*bc3d5698SJohn Baldwin#endif 612*bc3d5698SJohn Baldwin bic r10,r2,r0,ror#3 613*bc3d5698SJohn Baldwin#ifndef __thumb2__ 614*bc3d5698SJohn Baldwin str r14,[sp,#344+4] 615*bc3d5698SJohn Baldwin#else 616*bc3d5698SJohn Baldwin strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 617*bc3d5698SJohn Baldwin#endif 618*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#3 619*bc3d5698SJohn Baldwin#ifndef __thumb2__ 620*bc3d5698SJohn Baldwin ldr r1,[sp,#32] @ A[0][4] [in reverse order] 621*bc3d5698SJohn Baldwin#endif 622*bc3d5698SJohn Baldwin eor r10,r8,r10,ror#32-3 623*bc3d5698SJohn Baldwin#ifndef __thumb2__ 624*bc3d5698SJohn Baldwin ldr r0,[sp,#32+4] 625*bc3d5698SJohn Baldwin#else 626*bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 627*bc3d5698SJohn Baldwin#endif 628*bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-3 629*bc3d5698SJohn Baldwin#ifndef __thumb2__ 630*bc3d5698SJohn Baldwin str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 631*bc3d5698SJohn Baldwin#endif 632*bc3d5698SJohn Baldwin add r9,sp,#208 633*bc3d5698SJohn Baldwin#ifndef __thumb2__ 634*bc3d5698SJohn Baldwin str r11,[sp,#352+4] 635*bc3d5698SJohn Baldwin#else 636*bc3d5698SJohn Baldwin strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 637*bc3d5698SJohn Baldwin#endif 638*bc3d5698SJohn Baldwin 639*bc3d5698SJohn Baldwin#ifndef __thumb2__ 640*bc3d5698SJohn Baldwin ldr r10,[sp,#232] @ D[4] 641*bc3d5698SJohn Baldwin#endif 642*bc3d5698SJohn Baldwin#ifndef __thumb2__ 643*bc3d5698SJohn Baldwin ldr r11,[sp,#232+4] 644*bc3d5698SJohn Baldwin#else 645*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#232] @ D[4] 646*bc3d5698SJohn Baldwin#endif 647*bc3d5698SJohn Baldwin#ifndef __thumb2__ 648*bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 649*bc3d5698SJohn Baldwin#endif 650*bc3d5698SJohn Baldwin#ifndef __thumb2__ 651*bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 652*bc3d5698SJohn Baldwin#else 653*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 654*bc3d5698SJohn Baldwin#endif 655*bc3d5698SJohn Baldwin 656*bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[1..2] 657*bc3d5698SJohn Baldwin 658*bc3d5698SJohn Baldwin eor r1,r1,r10 659*bc3d5698SJohn Baldwin#ifndef __thumb2__ 660*bc3d5698SJohn Baldwin ldr r2,[sp,#40] @ A[1][0] 661*bc3d5698SJohn Baldwin#endif 662*bc3d5698SJohn Baldwin eor r0,r0,r11 663*bc3d5698SJohn Baldwin#ifndef __thumb2__ 664*bc3d5698SJohn Baldwin ldr r3,[sp,#40+4] 665*bc3d5698SJohn Baldwin#else 666*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#40] @ A[1][0] 667*bc3d5698SJohn Baldwin#endif 668*bc3d5698SJohn Baldwin @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 669*bc3d5698SJohn Baldwin#ifndef __thumb2__ 670*bc3d5698SJohn Baldwin ldr r4,[sp,#88] @ A[2][1] 671*bc3d5698SJohn Baldwin#endif 672*bc3d5698SJohn Baldwin @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 673*bc3d5698SJohn Baldwin#ifndef __thumb2__ 674*bc3d5698SJohn Baldwin ldr r5,[sp,#88+4] 675*bc3d5698SJohn Baldwin#else 676*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#88] @ A[2][1] 677*bc3d5698SJohn Baldwin#endif 678*bc3d5698SJohn Baldwin 679*bc3d5698SJohn Baldwin eor r2,r2,r12 680*bc3d5698SJohn Baldwin#ifndef __thumb2__ 681*bc3d5698SJohn Baldwin ldr r10,[sp,#136] @ A[3][2] 682*bc3d5698SJohn Baldwin#endif 683*bc3d5698SJohn Baldwin eor r3,r3,r14 684*bc3d5698SJohn Baldwin#ifndef __thumb2__ 685*bc3d5698SJohn Baldwin ldr r11,[sp,#136+4] 686*bc3d5698SJohn Baldwin#else 687*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#136] @ A[3][2] 688*bc3d5698SJohn Baldwin#endif 689*bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 690*bc3d5698SJohn Baldwin#ifndef __thumb2__ 691*bc3d5698SJohn Baldwin ldr r12,[sp,#224] @ D[3] 692*bc3d5698SJohn Baldwin#endif 693*bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-18 694*bc3d5698SJohn Baldwin#ifndef __thumb2__ 695*bc3d5698SJohn Baldwin ldr r14,[sp,#224+4] 696*bc3d5698SJohn Baldwin#else 697*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#224] @ D[3] 698*bc3d5698SJohn Baldwin#endif 699*bc3d5698SJohn Baldwin 700*bc3d5698SJohn Baldwin eor r6,r6,r4 701*bc3d5698SJohn Baldwin eor r7,r7,r5 702*bc3d5698SJohn Baldwin mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 703*bc3d5698SJohn Baldwin mov r5,r7,ror#32-5 704*bc3d5698SJohn Baldwin 705*bc3d5698SJohn Baldwin eor r10,r10,r8 706*bc3d5698SJohn Baldwin#ifndef __thumb2__ 707*bc3d5698SJohn Baldwin ldr r8,[sp,#184] @ A[4][3] 708*bc3d5698SJohn Baldwin#endif 709*bc3d5698SJohn Baldwin eor r11,r11,r9 710*bc3d5698SJohn Baldwin#ifndef __thumb2__ 711*bc3d5698SJohn Baldwin ldr r9,[sp,#184+4] 712*bc3d5698SJohn Baldwin#else 713*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#184] @ A[4][3] 714*bc3d5698SJohn Baldwin#endif 715*bc3d5698SJohn Baldwin mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 716*bc3d5698SJohn Baldwin mov r6,r11,ror#32-8 717*bc3d5698SJohn Baldwin 718*bc3d5698SJohn Baldwin eor r12,r12,r8 719*bc3d5698SJohn Baldwin eor r14,r14,r9 720*bc3d5698SJohn Baldwin mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 721*bc3d5698SJohn Baldwin mov r9,r14,ror#32-28 722*bc3d5698SJohn Baldwin 723*bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-18 724*bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-18 725*bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 726*bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-13 727*bc3d5698SJohn Baldwin#ifndef __thumb2__ 728*bc3d5698SJohn Baldwin str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 729*bc3d5698SJohn Baldwin#endif 730*bc3d5698SJohn Baldwin bic r12,r6,r4 731*bc3d5698SJohn Baldwin#ifndef __thumb2__ 732*bc3d5698SJohn Baldwin str r11,[sp,#360+4] 733*bc3d5698SJohn Baldwin#else 734*bc3d5698SJohn Baldwin strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 735*bc3d5698SJohn Baldwin#endif 736*bc3d5698SJohn Baldwin bic r14,r7,r5 737*bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-18 738*bc3d5698SJohn Baldwin#ifndef __thumb2__ 739*bc3d5698SJohn Baldwin str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 740*bc3d5698SJohn Baldwin#endif 741*bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-18 742*bc3d5698SJohn Baldwin#ifndef __thumb2__ 743*bc3d5698SJohn Baldwin str r14,[sp,#368+4] 744*bc3d5698SJohn Baldwin#else 745*bc3d5698SJohn Baldwin strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 746*bc3d5698SJohn Baldwin#endif 747*bc3d5698SJohn Baldwin bic r10,r8,r6 748*bc3d5698SJohn Baldwin bic r11,r9,r7 749*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 750*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#13 751*bc3d5698SJohn Baldwin eor r10,r10,r4 752*bc3d5698SJohn Baldwin eor r11,r11,r5 753*bc3d5698SJohn Baldwin#ifndef __thumb2__ 754*bc3d5698SJohn Baldwin str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 755*bc3d5698SJohn Baldwin#endif 756*bc3d5698SJohn Baldwin bic r2,r2,r0,ror#18-14 757*bc3d5698SJohn Baldwin#ifndef __thumb2__ 758*bc3d5698SJohn Baldwin str r11,[sp,#376+4] 759*bc3d5698SJohn Baldwin#else 760*bc3d5698SJohn Baldwin strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 761*bc3d5698SJohn Baldwin#endif 762*bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 763*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#18-13 764*bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-13 765*bc3d5698SJohn Baldwin#ifndef __thumb2__ 766*bc3d5698SJohn Baldwin str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 767*bc3d5698SJohn Baldwin#endif 768*bc3d5698SJohn Baldwin#ifndef __thumb2__ 769*bc3d5698SJohn Baldwin str r14,[sp,#384+4] 770*bc3d5698SJohn Baldwin#else 771*bc3d5698SJohn Baldwin strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 772*bc3d5698SJohn Baldwin#endif 773*bc3d5698SJohn Baldwin add r14,sp,#216 774*bc3d5698SJohn Baldwin#ifndef __thumb2__ 775*bc3d5698SJohn Baldwin ldr r0,[sp,#16] @ A[0][2] 776*bc3d5698SJohn Baldwin#endif 777*bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-18 778*bc3d5698SJohn Baldwin#ifndef __thumb2__ 779*bc3d5698SJohn Baldwin ldr r1,[sp,#16+4] 780*bc3d5698SJohn Baldwin#else 781*bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#16] @ A[0][2] 782*bc3d5698SJohn Baldwin#endif 783*bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-18 784*bc3d5698SJohn Baldwin#ifndef __thumb2__ 785*bc3d5698SJohn Baldwin str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 786*bc3d5698SJohn Baldwin#endif 787*bc3d5698SJohn Baldwin#ifndef __thumb2__ 788*bc3d5698SJohn Baldwin str r11,[sp,#392+4] 789*bc3d5698SJohn Baldwin#else 790*bc3d5698SJohn Baldwin strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 791*bc3d5698SJohn Baldwin#endif 792*bc3d5698SJohn Baldwin 793*bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[2..3] 794*bc3d5698SJohn Baldwin#ifndef __thumb2__ 795*bc3d5698SJohn Baldwin ldr r2,[sp,#64] @ A[1][3] 796*bc3d5698SJohn Baldwin#endif 797*bc3d5698SJohn Baldwin#ifndef __thumb2__ 798*bc3d5698SJohn Baldwin ldr r3,[sp,#64+4] 799*bc3d5698SJohn Baldwin#else 800*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#64] @ A[1][3] 801*bc3d5698SJohn Baldwin#endif 802*bc3d5698SJohn Baldwin#ifndef __thumb2__ 803*bc3d5698SJohn Baldwin ldr r6,[sp,#232] @ D[4] 804*bc3d5698SJohn Baldwin#endif 805*bc3d5698SJohn Baldwin#ifndef __thumb2__ 806*bc3d5698SJohn Baldwin ldr r7,[sp,#232+4] 807*bc3d5698SJohn Baldwin#else 808*bc3d5698SJohn Baldwin ldrd r6,r7,[sp,#232] @ D[4] 809*bc3d5698SJohn Baldwin#endif 810*bc3d5698SJohn Baldwin 811*bc3d5698SJohn Baldwin eor r0,r0,r10 812*bc3d5698SJohn Baldwin#ifndef __thumb2__ 813*bc3d5698SJohn Baldwin ldr r4,[sp,#112] @ A[2][4] 814*bc3d5698SJohn Baldwin#endif 815*bc3d5698SJohn Baldwin eor r1,r1,r11 816*bc3d5698SJohn Baldwin#ifndef __thumb2__ 817*bc3d5698SJohn Baldwin ldr r5,[sp,#112+4] 818*bc3d5698SJohn Baldwin#else 819*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#112] @ A[2][4] 820*bc3d5698SJohn Baldwin#endif 821*bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 822*bc3d5698SJohn Baldwin#ifndef __thumb2__ 823*bc3d5698SJohn Baldwin ldr r8,[sp,#200] @ D[0] 824*bc3d5698SJohn Baldwin#endif 825*bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-31 826*bc3d5698SJohn Baldwin#ifndef __thumb2__ 827*bc3d5698SJohn Baldwin ldr r9,[sp,#200+4] 828*bc3d5698SJohn Baldwin#else 829*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#200] @ D[0] 830*bc3d5698SJohn Baldwin#endif 831*bc3d5698SJohn Baldwin 832*bc3d5698SJohn Baldwin eor r12,r12,r2 833*bc3d5698SJohn Baldwin#ifndef __thumb2__ 834*bc3d5698SJohn Baldwin ldr r10,[sp,#120] @ A[3][0] 835*bc3d5698SJohn Baldwin#endif 836*bc3d5698SJohn Baldwin eor r14,r14,r3 837*bc3d5698SJohn Baldwin#ifndef __thumb2__ 838*bc3d5698SJohn Baldwin ldr r11,[sp,#120+4] 839*bc3d5698SJohn Baldwin#else 840*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#120] @ A[3][0] 841*bc3d5698SJohn Baldwin#endif 842*bc3d5698SJohn Baldwin mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 843*bc3d5698SJohn Baldwin#ifndef __thumb2__ 844*bc3d5698SJohn Baldwin ldr r12,[sp,#208] @ D[1] 845*bc3d5698SJohn Baldwin#endif 846*bc3d5698SJohn Baldwin mov r2,r14,ror#32-28 847*bc3d5698SJohn Baldwin#ifndef __thumb2__ 848*bc3d5698SJohn Baldwin ldr r14,[sp,#208+4] 849*bc3d5698SJohn Baldwin#else 850*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#208] @ D[1] 851*bc3d5698SJohn Baldwin#endif 852*bc3d5698SJohn Baldwin 853*bc3d5698SJohn Baldwin eor r6,r6,r4 854*bc3d5698SJohn Baldwin eor r7,r7,r5 855*bc3d5698SJohn Baldwin mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 856*bc3d5698SJohn Baldwin mov r4,r7,ror#32-20 857*bc3d5698SJohn Baldwin 858*bc3d5698SJohn Baldwin eor r10,r10,r8 859*bc3d5698SJohn Baldwin#ifndef __thumb2__ 860*bc3d5698SJohn Baldwin ldr r8,[sp,#168] @ A[4][1] 861*bc3d5698SJohn Baldwin#endif 862*bc3d5698SJohn Baldwin eor r11,r11,r9 863*bc3d5698SJohn Baldwin#ifndef __thumb2__ 864*bc3d5698SJohn Baldwin ldr r9,[sp,#168+4] 865*bc3d5698SJohn Baldwin#else 866*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#168] @ A[4][1] 867*bc3d5698SJohn Baldwin#endif 868*bc3d5698SJohn Baldwin mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 869*bc3d5698SJohn Baldwin mov r6,r11,ror#32-21 870*bc3d5698SJohn Baldwin 871*bc3d5698SJohn Baldwin eor r8,r8,r12 872*bc3d5698SJohn Baldwin eor r9,r9,r14 873*bc3d5698SJohn Baldwin @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 874*bc3d5698SJohn Baldwin @ mov r9,r3,ror#32-1 875*bc3d5698SJohn Baldwin 876*bc3d5698SJohn Baldwin bic r10,r4,r2 877*bc3d5698SJohn Baldwin bic r11,r5,r3 878*bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-31 879*bc3d5698SJohn Baldwin#ifndef __thumb2__ 880*bc3d5698SJohn Baldwin str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 881*bc3d5698SJohn Baldwin#endif 882*bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-31 883*bc3d5698SJohn Baldwin#ifndef __thumb2__ 884*bc3d5698SJohn Baldwin str r11,[sp,#400+4] 885*bc3d5698SJohn Baldwin#else 886*bc3d5698SJohn Baldwin strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 887*bc3d5698SJohn Baldwin#endif 888*bc3d5698SJohn Baldwin bic r12,r6,r4 889*bc3d5698SJohn Baldwin bic r14,r7,r5 890*bc3d5698SJohn Baldwin eor r12,r12,r2 891*bc3d5698SJohn Baldwin eor r14,r14,r3 892*bc3d5698SJohn Baldwin#ifndef __thumb2__ 893*bc3d5698SJohn Baldwin str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 894*bc3d5698SJohn Baldwin#endif 895*bc3d5698SJohn Baldwin bic r10,r8,r6,ror#1 896*bc3d5698SJohn Baldwin#ifndef __thumb2__ 897*bc3d5698SJohn Baldwin str r14,[sp,#408+4] 898*bc3d5698SJohn Baldwin#else 899*bc3d5698SJohn Baldwin strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 900*bc3d5698SJohn Baldwin#endif 901*bc3d5698SJohn Baldwin bic r11,r9,r7,ror#1 902*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#31-1 903*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#31-1 904*bc3d5698SJohn Baldwin eor r4,r4,r10,ror#32-1 905*bc3d5698SJohn Baldwin#ifndef __thumb2__ 906*bc3d5698SJohn Baldwin str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 907*bc3d5698SJohn Baldwin#endif 908*bc3d5698SJohn Baldwin eor r5,r5,r11,ror#32-1 909*bc3d5698SJohn Baldwin#ifndef __thumb2__ 910*bc3d5698SJohn Baldwin str r5,[sp,#416+4] 911*bc3d5698SJohn Baldwin#else 912*bc3d5698SJohn Baldwin strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 913*bc3d5698SJohn Baldwin#endif 914*bc3d5698SJohn Baldwin eor r6,r6,r12,ror#32-31 915*bc3d5698SJohn Baldwin eor r7,r7,r14,ror#32-31 916*bc3d5698SJohn Baldwin#ifndef __thumb2__ 917*bc3d5698SJohn Baldwin str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 918*bc3d5698SJohn Baldwin#endif 919*bc3d5698SJohn Baldwin bic r10,r2,r0,ror#32-31 920*bc3d5698SJohn Baldwin#ifndef __thumb2__ 921*bc3d5698SJohn Baldwin str r7,[sp,#424+4] 922*bc3d5698SJohn Baldwin#else 923*bc3d5698SJohn Baldwin strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 924*bc3d5698SJohn Baldwin#endif 925*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-31 926*bc3d5698SJohn Baldwin add r12,sp,#240 927*bc3d5698SJohn Baldwin eor r8,r10,r8,ror#32-1 928*bc3d5698SJohn Baldwin add r10,sp,#280 929*bc3d5698SJohn Baldwin eor r9,r11,r9,ror#32-1 930*bc3d5698SJohn Baldwin#ifndef __thumb2__ 931*bc3d5698SJohn Baldwin str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 932*bc3d5698SJohn Baldwin#endif 933*bc3d5698SJohn Baldwin#ifndef __thumb2__ 934*bc3d5698SJohn Baldwin str r9,[sp,#432+4] 935*bc3d5698SJohn Baldwin#else 936*bc3d5698SJohn Baldwin strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 937*bc3d5698SJohn Baldwin#endif 938*bc3d5698SJohn Baldwin ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 939*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 940*bc3d5698SJohn Baldwin#ifdef __thumb2__ 941*bc3d5698SJohn Baldwin eor r0,r0,r10 942*bc3d5698SJohn Baldwin eor r1,r1,r11 943*bc3d5698SJohn Baldwin eor r2,r2,r12 944*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#296] 945*bc3d5698SJohn Baldwin eor r3,r3,r14 946*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#304] 947*bc3d5698SJohn Baldwin eor r4,r4,r10 948*bc3d5698SJohn Baldwin eor r5,r5,r11 949*bc3d5698SJohn Baldwin eor r6,r6,r12 950*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#312] 951*bc3d5698SJohn Baldwin eor r7,r7,r14 952*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#320] 953*bc3d5698SJohn Baldwin eor r8,r8,r10 954*bc3d5698SJohn Baldwin eor r9,r9,r11 955*bc3d5698SJohn Baldwin eor r0,r0,r12 956*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#328] 957*bc3d5698SJohn Baldwin eor r1,r1,r14 958*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#336] 959*bc3d5698SJohn Baldwin eor r2,r2,r10 960*bc3d5698SJohn Baldwin eor r3,r3,r11 961*bc3d5698SJohn Baldwin eor r4,r4,r12 962*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#344] 963*bc3d5698SJohn Baldwin eor r5,r5,r14 964*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#352] 965*bc3d5698SJohn Baldwin eor r6,r6,r10 966*bc3d5698SJohn Baldwin eor r7,r7,r11 967*bc3d5698SJohn Baldwin eor r8,r8,r12 968*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#360] 969*bc3d5698SJohn Baldwin eor r9,r9,r14 970*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#368] 971*bc3d5698SJohn Baldwin eor r0,r0,r10 972*bc3d5698SJohn Baldwin eor r1,r1,r11 973*bc3d5698SJohn Baldwin eor r2,r2,r12 974*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#376] 975*bc3d5698SJohn Baldwin eor r3,r3,r14 976*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#384] 977*bc3d5698SJohn Baldwin eor r4,r4,r10 978*bc3d5698SJohn Baldwin eor r5,r5,r11 979*bc3d5698SJohn Baldwin eor r6,r6,r12 980*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#392] 981*bc3d5698SJohn Baldwin eor r7,r7,r14 982*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#400] 983*bc3d5698SJohn Baldwin eor r8,r8,r10 984*bc3d5698SJohn Baldwin eor r9,r9,r11 985*bc3d5698SJohn Baldwin eor r0,r0,r12 986*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#408] 987*bc3d5698SJohn Baldwin eor r1,r1,r14 988*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#256] 989*bc3d5698SJohn Baldwin eor r2,r2,r10 990*bc3d5698SJohn Baldwin eor r3,r3,r11 991*bc3d5698SJohn Baldwin eor r4,r4,r12 992*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#264] 993*bc3d5698SJohn Baldwin eor r5,r5,r14 994*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#272] 995*bc3d5698SJohn Baldwin#else 996*bc3d5698SJohn Baldwin eor r0,r0,r10 997*bc3d5698SJohn Baldwin add r10,sp,#296 998*bc3d5698SJohn Baldwin eor r1,r1,r11 999*bc3d5698SJohn Baldwin eor r2,r2,r12 1000*bc3d5698SJohn Baldwin eor r3,r3,r14 1001*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 1002*bc3d5698SJohn Baldwin eor r4,r4,r10 1003*bc3d5698SJohn Baldwin add r10,sp,#312 1004*bc3d5698SJohn Baldwin eor r5,r5,r11 1005*bc3d5698SJohn Baldwin eor r6,r6,r12 1006*bc3d5698SJohn Baldwin eor r7,r7,r14 1007*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 1008*bc3d5698SJohn Baldwin eor r8,r8,r10 1009*bc3d5698SJohn Baldwin add r10,sp,#328 1010*bc3d5698SJohn Baldwin eor r9,r9,r11 1011*bc3d5698SJohn Baldwin eor r0,r0,r12 1012*bc3d5698SJohn Baldwin eor r1,r1,r14 1013*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 1014*bc3d5698SJohn Baldwin eor r2,r2,r10 1015*bc3d5698SJohn Baldwin add r10,sp,#344 1016*bc3d5698SJohn Baldwin eor r3,r3,r11 1017*bc3d5698SJohn Baldwin eor r4,r4,r12 1018*bc3d5698SJohn Baldwin eor r5,r5,r14 1019*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 1020*bc3d5698SJohn Baldwin eor r6,r6,r10 1021*bc3d5698SJohn Baldwin add r10,sp,#360 1022*bc3d5698SJohn Baldwin eor r7,r7,r11 1023*bc3d5698SJohn Baldwin eor r8,r8,r12 1024*bc3d5698SJohn Baldwin eor r9,r9,r14 1025*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 1026*bc3d5698SJohn Baldwin eor r0,r0,r10 1027*bc3d5698SJohn Baldwin add r10,sp,#376 1028*bc3d5698SJohn Baldwin eor r1,r1,r11 1029*bc3d5698SJohn Baldwin eor r2,r2,r12 1030*bc3d5698SJohn Baldwin eor r3,r3,r14 1031*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 1032*bc3d5698SJohn Baldwin eor r4,r4,r10 1033*bc3d5698SJohn Baldwin add r10,sp,#392 1034*bc3d5698SJohn Baldwin eor r5,r5,r11 1035*bc3d5698SJohn Baldwin eor r6,r6,r12 1036*bc3d5698SJohn Baldwin eor r7,r7,r14 1037*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 1038*bc3d5698SJohn Baldwin eor r8,r8,r10 1039*bc3d5698SJohn Baldwin ldr r10,[sp,#408] @ A[4][1] 1040*bc3d5698SJohn Baldwin eor r9,r9,r11 1041*bc3d5698SJohn Baldwin ldr r11,[sp,#408+4] 1042*bc3d5698SJohn Baldwin eor r0,r0,r12 1043*bc3d5698SJohn Baldwin ldr r12,[sp,#256] @ A[0][2] 1044*bc3d5698SJohn Baldwin eor r1,r1,r14 1045*bc3d5698SJohn Baldwin ldr r14,[sp,#256+4] 1046*bc3d5698SJohn Baldwin eor r2,r2,r10 1047*bc3d5698SJohn Baldwin add r10,sp,#264 1048*bc3d5698SJohn Baldwin eor r3,r3,r11 1049*bc3d5698SJohn Baldwin eor r4,r4,r12 1050*bc3d5698SJohn Baldwin eor r5,r5,r14 1051*bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 1052*bc3d5698SJohn Baldwin#endif 1053*bc3d5698SJohn Baldwin eor r6,r6,r10 1054*bc3d5698SJohn Baldwin eor r7,r7,r11 1055*bc3d5698SJohn Baldwin eor r8,r8,r12 1056*bc3d5698SJohn Baldwin eor r9,r9,r14 1057*bc3d5698SJohn Baldwin 1058*bc3d5698SJohn Baldwin eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1059*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1060*bc3d5698SJohn Baldwin str r10,[sp,#208] @ D[1] = E[0] 1061*bc3d5698SJohn Baldwin#endif 1062*bc3d5698SJohn Baldwin eor r11,r1,r4 1063*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1064*bc3d5698SJohn Baldwin str r11,[sp,#208+4] 1065*bc3d5698SJohn Baldwin#else 1066*bc3d5698SJohn Baldwin strd r10,r11,[sp,#208] @ D[1] = E[0] 1067*bc3d5698SJohn Baldwin#endif 1068*bc3d5698SJohn Baldwin eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1069*bc3d5698SJohn Baldwin eor r14,r7,r0 1070*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1071*bc3d5698SJohn Baldwin str r12,[sp,#232] @ D[4] = E[1] 1072*bc3d5698SJohn Baldwin#endif 1073*bc3d5698SJohn Baldwin eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1074*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1075*bc3d5698SJohn Baldwin str r14,[sp,#232+4] 1076*bc3d5698SJohn Baldwin#else 1077*bc3d5698SJohn Baldwin strd r12,r14,[sp,#232] @ D[4] = E[1] 1078*bc3d5698SJohn Baldwin#endif 1079*bc3d5698SJohn Baldwin eor r1,r9,r2 1080*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1081*bc3d5698SJohn Baldwin str r0,[sp,#200] @ D[0] = C[0] 1082*bc3d5698SJohn Baldwin#endif 1083*bc3d5698SJohn Baldwin eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1084*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1085*bc3d5698SJohn Baldwin ldr r7,[sp,#384] 1086*bc3d5698SJohn Baldwin#endif 1087*bc3d5698SJohn Baldwin eor r3,r3,r6 1088*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1089*bc3d5698SJohn Baldwin str r1,[sp,#200+4] 1090*bc3d5698SJohn Baldwin#else 1091*bc3d5698SJohn Baldwin strd r0,r1,[sp,#200] @ D[0] = C[0] 1092*bc3d5698SJohn Baldwin#endif 1093*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1094*bc3d5698SJohn Baldwin ldr r6,[sp,#384+4] 1095*bc3d5698SJohn Baldwin#else 1096*bc3d5698SJohn Baldwin ldrd r7,r6,[sp,#384] 1097*bc3d5698SJohn Baldwin#endif 1098*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1099*bc3d5698SJohn Baldwin str r2,[sp,#216] @ D[2] = C[1] 1100*bc3d5698SJohn Baldwin#endif 1101*bc3d5698SJohn Baldwin eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1102*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1103*bc3d5698SJohn Baldwin str r3,[sp,#216+4] 1104*bc3d5698SJohn Baldwin#else 1105*bc3d5698SJohn Baldwin strd r2,r3,[sp,#216] @ D[2] = C[1] 1106*bc3d5698SJohn Baldwin#endif 1107*bc3d5698SJohn Baldwin eor r5,r5,r8 1108*bc3d5698SJohn Baldwin 1109*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1110*bc3d5698SJohn Baldwin ldr r8,[sp,#432] 1111*bc3d5698SJohn Baldwin#endif 1112*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1113*bc3d5698SJohn Baldwin ldr r9,[sp,#432+4] 1114*bc3d5698SJohn Baldwin#else 1115*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#432] 1116*bc3d5698SJohn Baldwin#endif 1117*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1118*bc3d5698SJohn Baldwin str r4,[sp,#224] @ D[3] = C[2] 1119*bc3d5698SJohn Baldwin#endif 1120*bc3d5698SJohn Baldwin eor r7,r7,r4 1121*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1122*bc3d5698SJohn Baldwin str r5,[sp,#224+4] 1123*bc3d5698SJohn Baldwin#else 1124*bc3d5698SJohn Baldwin strd r4,r5,[sp,#224] @ D[3] = C[2] 1125*bc3d5698SJohn Baldwin#endif 1126*bc3d5698SJohn Baldwin eor r6,r6,r5 1127*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1128*bc3d5698SJohn Baldwin ldr r4,[sp,#240] 1129*bc3d5698SJohn Baldwin#endif 1130*bc3d5698SJohn Baldwin @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1131*bc3d5698SJohn Baldwin @ mov r6,r6,ror#32-11 1132*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1133*bc3d5698SJohn Baldwin ldr r5,[sp,#240+4] 1134*bc3d5698SJohn Baldwin#else 1135*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#240] 1136*bc3d5698SJohn Baldwin#endif 1137*bc3d5698SJohn Baldwin eor r8,r8,r12 1138*bc3d5698SJohn Baldwin eor r9,r9,r14 1139*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1140*bc3d5698SJohn Baldwin ldr r12,[sp,#336] 1141*bc3d5698SJohn Baldwin#endif 1142*bc3d5698SJohn Baldwin eor r0,r0,r4 1143*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1144*bc3d5698SJohn Baldwin ldr r14,[sp,#336+4] 1145*bc3d5698SJohn Baldwin#else 1146*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#336] 1147*bc3d5698SJohn Baldwin#endif 1148*bc3d5698SJohn Baldwin @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1149*bc3d5698SJohn Baldwin @ mov r9,r9,ror#32-7 1150*bc3d5698SJohn Baldwin eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 1151*bc3d5698SJohn Baldwin eor r12,r12,r2 1152*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1153*bc3d5698SJohn Baldwin ldr r2,[sp,#288] 1154*bc3d5698SJohn Baldwin#endif 1155*bc3d5698SJohn Baldwin eor r14,r14,r3 1156*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1157*bc3d5698SJohn Baldwin ldr r3,[sp,#288+4] 1158*bc3d5698SJohn Baldwin#else 1159*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#288] 1160*bc3d5698SJohn Baldwin#endif 1161*bc3d5698SJohn Baldwin mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 1162*bc3d5698SJohn Baldwin ldr r12,[sp,#444] @ load counter 1163*bc3d5698SJohn Baldwin eor r2,r2,r10 1164*bc3d5698SJohn Baldwin adr r10,iotas32 1165*bc3d5698SJohn Baldwin mov r4,r14,ror#32-22 1166*bc3d5698SJohn Baldwin add r14,r10,r12 1167*bc3d5698SJohn Baldwin eor r3,r3,r11 1168*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1169*bc3d5698SJohn Baldwin ldr r10,[r14,#8] @ iotas[i].lo 1170*bc3d5698SJohn Baldwin#endif 1171*bc3d5698SJohn Baldwin add r12,r12,#16 1172*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1173*bc3d5698SJohn Baldwin ldr r11,[r14,#12] @ iotas[i].hi 1174*bc3d5698SJohn Baldwin#else 1175*bc3d5698SJohn Baldwin ldrd r10,r11,[r14,#8] @ iotas[i].lo 1176*bc3d5698SJohn Baldwin#endif 1177*bc3d5698SJohn Baldwin cmp r12,#192 1178*bc3d5698SJohn Baldwin str r12,[sp,#444] @ store counter 1179*bc3d5698SJohn Baldwin bic r12,r4,r2,ror#32-22 1180*bc3d5698SJohn Baldwin bic r14,r5,r3,ror#32-22 1181*bc3d5698SJohn Baldwin mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 1182*bc3d5698SJohn Baldwin mov r3,r3,ror#32-22 1183*bc3d5698SJohn Baldwin eor r12,r12,r0 1184*bc3d5698SJohn Baldwin eor r14,r14,r1 1185*bc3d5698SJohn Baldwin eor r10,r10,r12 1186*bc3d5698SJohn Baldwin eor r11,r11,r14 1187*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1188*bc3d5698SJohn Baldwin str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1189*bc3d5698SJohn Baldwin#endif 1190*bc3d5698SJohn Baldwin bic r12,r6,r4,ror#11 1191*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1192*bc3d5698SJohn Baldwin str r11,[sp,#0+4] 1193*bc3d5698SJohn Baldwin#else 1194*bc3d5698SJohn Baldwin strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1195*bc3d5698SJohn Baldwin#endif 1196*bc3d5698SJohn Baldwin bic r14,r7,r5,ror#10 1197*bc3d5698SJohn Baldwin bic r10,r8,r6,ror#32-(11-7) 1198*bc3d5698SJohn Baldwin bic r11,r9,r7,ror#32-(10-7) 1199*bc3d5698SJohn Baldwin eor r12,r2,r12,ror#32-11 1200*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1201*bc3d5698SJohn Baldwin str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1202*bc3d5698SJohn Baldwin#endif 1203*bc3d5698SJohn Baldwin eor r14,r3,r14,ror#32-10 1204*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1205*bc3d5698SJohn Baldwin str r14,[sp,#8+4] 1206*bc3d5698SJohn Baldwin#else 1207*bc3d5698SJohn Baldwin strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1208*bc3d5698SJohn Baldwin#endif 1209*bc3d5698SJohn Baldwin eor r10,r4,r10,ror#32-7 1210*bc3d5698SJohn Baldwin eor r11,r5,r11,ror#32-7 1211*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1212*bc3d5698SJohn Baldwin str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1213*bc3d5698SJohn Baldwin#endif 1214*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#32-7 1215*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1216*bc3d5698SJohn Baldwin str r11,[sp,#16+4] 1217*bc3d5698SJohn Baldwin#else 1218*bc3d5698SJohn Baldwin strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1219*bc3d5698SJohn Baldwin#endif 1220*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#32-7 1221*bc3d5698SJohn Baldwin eor r12,r12,r6,ror#32-11 1222*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1223*bc3d5698SJohn Baldwin str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1224*bc3d5698SJohn Baldwin#endif 1225*bc3d5698SJohn Baldwin eor r14,r14,r7,ror#32-10 1226*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1227*bc3d5698SJohn Baldwin str r14,[sp,#24+4] 1228*bc3d5698SJohn Baldwin#else 1229*bc3d5698SJohn Baldwin strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1230*bc3d5698SJohn Baldwin#endif 1231*bc3d5698SJohn Baldwin bic r10,r2,r0 1232*bc3d5698SJohn Baldwin add r14,sp,#224 1233*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1234*bc3d5698SJohn Baldwin ldr r0,[sp,#264] @ A[0][3] 1235*bc3d5698SJohn Baldwin#endif 1236*bc3d5698SJohn Baldwin bic r11,r3,r1 1237*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1238*bc3d5698SJohn Baldwin ldr r1,[sp,#264+4] 1239*bc3d5698SJohn Baldwin#else 1240*bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#264] @ A[0][3] 1241*bc3d5698SJohn Baldwin#endif 1242*bc3d5698SJohn Baldwin eor r10,r10,r8,ror#32-7 1243*bc3d5698SJohn Baldwin eor r11,r11,r9,ror#32-7 1244*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1245*bc3d5698SJohn Baldwin str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1246*bc3d5698SJohn Baldwin#endif 1247*bc3d5698SJohn Baldwin add r9,sp,#200 1248*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1249*bc3d5698SJohn Baldwin str r11,[sp,#32+4] 1250*bc3d5698SJohn Baldwin#else 1251*bc3d5698SJohn Baldwin strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1252*bc3d5698SJohn Baldwin#endif 1253*bc3d5698SJohn Baldwin 1254*bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[3..4] 1255*bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[0..1] 1256*bc3d5698SJohn Baldwin 1257*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1258*bc3d5698SJohn Baldwin ldr r2,[sp,#312] @ A[1][4] 1259*bc3d5698SJohn Baldwin#endif 1260*bc3d5698SJohn Baldwin eor r0,r0,r10 1261*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1262*bc3d5698SJohn Baldwin ldr r3,[sp,#312+4] 1263*bc3d5698SJohn Baldwin#else 1264*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#312] @ A[1][4] 1265*bc3d5698SJohn Baldwin#endif 1266*bc3d5698SJohn Baldwin eor r1,r1,r11 1267*bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1268*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1269*bc3d5698SJohn Baldwin ldr r10,[sp,#368] @ A[3][1] 1270*bc3d5698SJohn Baldwin#endif 1271*bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-14 1272*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1273*bc3d5698SJohn Baldwin ldr r11,[sp,#368+4] 1274*bc3d5698SJohn Baldwin#else 1275*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#368] @ A[3][1] 1276*bc3d5698SJohn Baldwin#endif 1277*bc3d5698SJohn Baldwin 1278*bc3d5698SJohn Baldwin eor r2,r2,r12 1279*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1280*bc3d5698SJohn Baldwin ldr r4,[sp,#320] @ A[2][0] 1281*bc3d5698SJohn Baldwin#endif 1282*bc3d5698SJohn Baldwin eor r3,r3,r14 1283*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1284*bc3d5698SJohn Baldwin ldr r5,[sp,#320+4] 1285*bc3d5698SJohn Baldwin#else 1286*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#320] @ A[2][0] 1287*bc3d5698SJohn Baldwin#endif 1288*bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1289*bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-10 1290*bc3d5698SJohn Baldwin 1291*bc3d5698SJohn Baldwin eor r6,r6,r4 1292*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1293*bc3d5698SJohn Baldwin ldr r12,[sp,#216] @ D[2] 1294*bc3d5698SJohn Baldwin#endif 1295*bc3d5698SJohn Baldwin eor r7,r7,r5 1296*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1297*bc3d5698SJohn Baldwin ldr r14,[sp,#216+4] 1298*bc3d5698SJohn Baldwin#else 1299*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#216] @ D[2] 1300*bc3d5698SJohn Baldwin#endif 1301*bc3d5698SJohn Baldwin mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1302*bc3d5698SJohn Baldwin mov r4,r7,ror#32-2 1303*bc3d5698SJohn Baldwin 1304*bc3d5698SJohn Baldwin eor r10,r10,r8 1305*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1306*bc3d5698SJohn Baldwin ldr r8,[sp,#416] @ A[4][2] 1307*bc3d5698SJohn Baldwin#endif 1308*bc3d5698SJohn Baldwin eor r11,r11,r9 1309*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1310*bc3d5698SJohn Baldwin ldr r9,[sp,#416+4] 1311*bc3d5698SJohn Baldwin#else 1312*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#416] @ A[4][2] 1313*bc3d5698SJohn Baldwin#endif 1314*bc3d5698SJohn Baldwin mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1315*bc3d5698SJohn Baldwin mov r6,r11,ror#32-23 1316*bc3d5698SJohn Baldwin 1317*bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-10 1318*bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-10 1319*bc3d5698SJohn Baldwin eor r12,r12,r8 1320*bc3d5698SJohn Baldwin eor r14,r14,r9 1321*bc3d5698SJohn Baldwin mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1322*bc3d5698SJohn Baldwin mov r8,r14,ror#32-31 1323*bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 1324*bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-14 1325*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1326*bc3d5698SJohn Baldwin str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1327*bc3d5698SJohn Baldwin#endif 1328*bc3d5698SJohn Baldwin bic r12,r6,r4 1329*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1330*bc3d5698SJohn Baldwin str r11,[sp,#40+4] 1331*bc3d5698SJohn Baldwin#else 1332*bc3d5698SJohn Baldwin strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1333*bc3d5698SJohn Baldwin#endif 1334*bc3d5698SJohn Baldwin bic r14,r7,r5 1335*bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-10 1336*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1337*bc3d5698SJohn Baldwin str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1338*bc3d5698SJohn Baldwin#endif 1339*bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-10 1340*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1341*bc3d5698SJohn Baldwin str r14,[sp,#48+4] 1342*bc3d5698SJohn Baldwin#else 1343*bc3d5698SJohn Baldwin strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1344*bc3d5698SJohn Baldwin#endif 1345*bc3d5698SJohn Baldwin bic r10,r8,r6 1346*bc3d5698SJohn Baldwin bic r11,r9,r7 1347*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 1348*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#14 1349*bc3d5698SJohn Baldwin eor r10,r10,r4 1350*bc3d5698SJohn Baldwin eor r11,r11,r5 1351*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1352*bc3d5698SJohn Baldwin str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1353*bc3d5698SJohn Baldwin#endif 1354*bc3d5698SJohn Baldwin bic r2,r2,r0,ror#32-(14-10) 1355*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1356*bc3d5698SJohn Baldwin str r11,[sp,#56+4] 1357*bc3d5698SJohn Baldwin#else 1358*bc3d5698SJohn Baldwin strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1359*bc3d5698SJohn Baldwin#endif 1360*bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 1361*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-(14-10) 1362*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1363*bc3d5698SJohn Baldwin str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1364*bc3d5698SJohn Baldwin#endif 1365*bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-14 1366*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1367*bc3d5698SJohn Baldwin str r14,[sp,#64+4] 1368*bc3d5698SJohn Baldwin#else 1369*bc3d5698SJohn Baldwin strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1370*bc3d5698SJohn Baldwin#endif 1371*bc3d5698SJohn Baldwin add r12,sp,#208 1372*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1373*bc3d5698SJohn Baldwin ldr r1,[sp,#248] @ A[0][1] 1374*bc3d5698SJohn Baldwin#endif 1375*bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-10 1376*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1377*bc3d5698SJohn Baldwin ldr r0,[sp,#248+4] 1378*bc3d5698SJohn Baldwin#else 1379*bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#248] @ A[0][1] 1380*bc3d5698SJohn Baldwin#endif 1381*bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-10 1382*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1383*bc3d5698SJohn Baldwin str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1384*bc3d5698SJohn Baldwin#endif 1385*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1386*bc3d5698SJohn Baldwin str r11,[sp,#72+4] 1387*bc3d5698SJohn Baldwin#else 1388*bc3d5698SJohn Baldwin strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1389*bc3d5698SJohn Baldwin#endif 1390*bc3d5698SJohn Baldwin 1391*bc3d5698SJohn Baldwin add r9,sp,#224 1392*bc3d5698SJohn Baldwin ldmia r12,{r10,r11,r12,r14} @ D[1..2] 1393*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1394*bc3d5698SJohn Baldwin ldr r2,[sp,#296] @ A[1][2] 1395*bc3d5698SJohn Baldwin#endif 1396*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1397*bc3d5698SJohn Baldwin ldr r3,[sp,#296+4] 1398*bc3d5698SJohn Baldwin#else 1399*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#296] @ A[1][2] 1400*bc3d5698SJohn Baldwin#endif 1401*bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[3..4] 1402*bc3d5698SJohn Baldwin 1403*bc3d5698SJohn Baldwin eor r1,r1,r10 1404*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1405*bc3d5698SJohn Baldwin ldr r4,[sp,#344] @ A[2][3] 1406*bc3d5698SJohn Baldwin#endif 1407*bc3d5698SJohn Baldwin eor r0,r0,r11 1408*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1409*bc3d5698SJohn Baldwin ldr r5,[sp,#344+4] 1410*bc3d5698SJohn Baldwin#else 1411*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#344] @ A[2][3] 1412*bc3d5698SJohn Baldwin#endif 1413*bc3d5698SJohn Baldwin mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1414*bc3d5698SJohn Baldwin 1415*bc3d5698SJohn Baldwin eor r2,r2,r12 1416*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1417*bc3d5698SJohn Baldwin ldr r10,[sp,#392] @ A[3][4] 1418*bc3d5698SJohn Baldwin#endif 1419*bc3d5698SJohn Baldwin eor r3,r3,r14 1420*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1421*bc3d5698SJohn Baldwin ldr r11,[sp,#392+4] 1422*bc3d5698SJohn Baldwin#else 1423*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#392] @ A[3][4] 1424*bc3d5698SJohn Baldwin#endif 1425*bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1426*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1427*bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 1428*bc3d5698SJohn Baldwin#endif 1429*bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-3 1430*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1431*bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 1432*bc3d5698SJohn Baldwin#else 1433*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 1434*bc3d5698SJohn Baldwin#endif 1435*bc3d5698SJohn Baldwin 1436*bc3d5698SJohn Baldwin eor r4,r4,r6 1437*bc3d5698SJohn Baldwin eor r5,r5,r7 1438*bc3d5698SJohn Baldwin @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1439*bc3d5698SJohn Baldwin @ mov r4,r7,ror#32-13 @ [track reverse order below] 1440*bc3d5698SJohn Baldwin 1441*bc3d5698SJohn Baldwin eor r10,r10,r8 1442*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1443*bc3d5698SJohn Baldwin ldr r8,[sp,#400] @ A[4][0] 1444*bc3d5698SJohn Baldwin#endif 1445*bc3d5698SJohn Baldwin eor r11,r11,r9 1446*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1447*bc3d5698SJohn Baldwin ldr r9,[sp,#400+4] 1448*bc3d5698SJohn Baldwin#else 1449*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#400] @ A[4][0] 1450*bc3d5698SJohn Baldwin#endif 1451*bc3d5698SJohn Baldwin mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1452*bc3d5698SJohn Baldwin mov r7,r11,ror#32-4 1453*bc3d5698SJohn Baldwin 1454*bc3d5698SJohn Baldwin eor r12,r12,r8 1455*bc3d5698SJohn Baldwin eor r14,r14,r9 1456*bc3d5698SJohn Baldwin mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1457*bc3d5698SJohn Baldwin mov r9,r14,ror#32-9 1458*bc3d5698SJohn Baldwin 1459*bc3d5698SJohn Baldwin bic r10,r5,r2,ror#13-3 1460*bc3d5698SJohn Baldwin bic r11,r4,r3,ror#12-3 1461*bc3d5698SJohn Baldwin bic r12,r6,r5,ror#32-13 1462*bc3d5698SJohn Baldwin bic r14,r7,r4,ror#32-12 1463*bc3d5698SJohn Baldwin eor r10,r0,r10,ror#32-13 1464*bc3d5698SJohn Baldwin eor r11,r1,r11,ror#32-12 1465*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1466*bc3d5698SJohn Baldwin str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1467*bc3d5698SJohn Baldwin#endif 1468*bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-3 1469*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1470*bc3d5698SJohn Baldwin str r11,[sp,#80+4] 1471*bc3d5698SJohn Baldwin#else 1472*bc3d5698SJohn Baldwin strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1473*bc3d5698SJohn Baldwin#endif 1474*bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-3 1475*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1476*bc3d5698SJohn Baldwin str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1477*bc3d5698SJohn Baldwin#endif 1478*bc3d5698SJohn Baldwin bic r10,r8,r6 1479*bc3d5698SJohn Baldwin bic r11,r9,r7 1480*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1481*bc3d5698SJohn Baldwin str r14,[sp,#88+4] 1482*bc3d5698SJohn Baldwin#else 1483*bc3d5698SJohn Baldwin strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1484*bc3d5698SJohn Baldwin#endif 1485*bc3d5698SJohn Baldwin eor r10,r10,r5,ror#32-13 1486*bc3d5698SJohn Baldwin eor r11,r11,r4,ror#32-12 1487*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1488*bc3d5698SJohn Baldwin str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1489*bc3d5698SJohn Baldwin#endif 1490*bc3d5698SJohn Baldwin bic r12,r0,r8 1491*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1492*bc3d5698SJohn Baldwin str r11,[sp,#96+4] 1493*bc3d5698SJohn Baldwin#else 1494*bc3d5698SJohn Baldwin strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1495*bc3d5698SJohn Baldwin#endif 1496*bc3d5698SJohn Baldwin bic r14,r1,r9 1497*bc3d5698SJohn Baldwin eor r12,r12,r6 1498*bc3d5698SJohn Baldwin eor r14,r14,r7 1499*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1500*bc3d5698SJohn Baldwin str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1501*bc3d5698SJohn Baldwin#endif 1502*bc3d5698SJohn Baldwin bic r10,r2,r0,ror#3 1503*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1504*bc3d5698SJohn Baldwin str r14,[sp,#104+4] 1505*bc3d5698SJohn Baldwin#else 1506*bc3d5698SJohn Baldwin strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1507*bc3d5698SJohn Baldwin#endif 1508*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#3 1509*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1510*bc3d5698SJohn Baldwin ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1511*bc3d5698SJohn Baldwin#endif 1512*bc3d5698SJohn Baldwin eor r10,r8,r10,ror#32-3 1513*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1514*bc3d5698SJohn Baldwin ldr r0,[sp,#272+4] 1515*bc3d5698SJohn Baldwin#else 1516*bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1517*bc3d5698SJohn Baldwin#endif 1518*bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-3 1519*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1520*bc3d5698SJohn Baldwin str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1521*bc3d5698SJohn Baldwin#endif 1522*bc3d5698SJohn Baldwin add r9,sp,#208 1523*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1524*bc3d5698SJohn Baldwin str r11,[sp,#112+4] 1525*bc3d5698SJohn Baldwin#else 1526*bc3d5698SJohn Baldwin strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1527*bc3d5698SJohn Baldwin#endif 1528*bc3d5698SJohn Baldwin 1529*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1530*bc3d5698SJohn Baldwin ldr r10,[sp,#232] @ D[4] 1531*bc3d5698SJohn Baldwin#endif 1532*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1533*bc3d5698SJohn Baldwin ldr r11,[sp,#232+4] 1534*bc3d5698SJohn Baldwin#else 1535*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#232] @ D[4] 1536*bc3d5698SJohn Baldwin#endif 1537*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1538*bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 1539*bc3d5698SJohn Baldwin#endif 1540*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1541*bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 1542*bc3d5698SJohn Baldwin#else 1543*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 1544*bc3d5698SJohn Baldwin#endif 1545*bc3d5698SJohn Baldwin 1546*bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[1..2] 1547*bc3d5698SJohn Baldwin 1548*bc3d5698SJohn Baldwin eor r1,r1,r10 1549*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1550*bc3d5698SJohn Baldwin ldr r2,[sp,#280] @ A[1][0] 1551*bc3d5698SJohn Baldwin#endif 1552*bc3d5698SJohn Baldwin eor r0,r0,r11 1553*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1554*bc3d5698SJohn Baldwin ldr r3,[sp,#280+4] 1555*bc3d5698SJohn Baldwin#else 1556*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#280] @ A[1][0] 1557*bc3d5698SJohn Baldwin#endif 1558*bc3d5698SJohn Baldwin @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1559*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1560*bc3d5698SJohn Baldwin ldr r4,[sp,#328] @ A[2][1] 1561*bc3d5698SJohn Baldwin#endif 1562*bc3d5698SJohn Baldwin @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1563*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1564*bc3d5698SJohn Baldwin ldr r5,[sp,#328+4] 1565*bc3d5698SJohn Baldwin#else 1566*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#328] @ A[2][1] 1567*bc3d5698SJohn Baldwin#endif 1568*bc3d5698SJohn Baldwin 1569*bc3d5698SJohn Baldwin eor r2,r2,r12 1570*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1571*bc3d5698SJohn Baldwin ldr r10,[sp,#376] @ A[3][2] 1572*bc3d5698SJohn Baldwin#endif 1573*bc3d5698SJohn Baldwin eor r3,r3,r14 1574*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1575*bc3d5698SJohn Baldwin ldr r11,[sp,#376+4] 1576*bc3d5698SJohn Baldwin#else 1577*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#376] @ A[3][2] 1578*bc3d5698SJohn Baldwin#endif 1579*bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1580*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1581*bc3d5698SJohn Baldwin ldr r12,[sp,#224] @ D[3] 1582*bc3d5698SJohn Baldwin#endif 1583*bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-18 1584*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1585*bc3d5698SJohn Baldwin ldr r14,[sp,#224+4] 1586*bc3d5698SJohn Baldwin#else 1587*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#224] @ D[3] 1588*bc3d5698SJohn Baldwin#endif 1589*bc3d5698SJohn Baldwin 1590*bc3d5698SJohn Baldwin eor r6,r6,r4 1591*bc3d5698SJohn Baldwin eor r7,r7,r5 1592*bc3d5698SJohn Baldwin mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1593*bc3d5698SJohn Baldwin mov r5,r7,ror#32-5 1594*bc3d5698SJohn Baldwin 1595*bc3d5698SJohn Baldwin eor r10,r10,r8 1596*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1597*bc3d5698SJohn Baldwin ldr r8,[sp,#424] @ A[4][3] 1598*bc3d5698SJohn Baldwin#endif 1599*bc3d5698SJohn Baldwin eor r11,r11,r9 1600*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1601*bc3d5698SJohn Baldwin ldr r9,[sp,#424+4] 1602*bc3d5698SJohn Baldwin#else 1603*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#424] @ A[4][3] 1604*bc3d5698SJohn Baldwin#endif 1605*bc3d5698SJohn Baldwin mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1606*bc3d5698SJohn Baldwin mov r6,r11,ror#32-8 1607*bc3d5698SJohn Baldwin 1608*bc3d5698SJohn Baldwin eor r12,r12,r8 1609*bc3d5698SJohn Baldwin eor r14,r14,r9 1610*bc3d5698SJohn Baldwin mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1611*bc3d5698SJohn Baldwin mov r9,r14,ror#32-28 1612*bc3d5698SJohn Baldwin 1613*bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-18 1614*bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-18 1615*bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 1616*bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-13 1617*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1618*bc3d5698SJohn Baldwin str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1619*bc3d5698SJohn Baldwin#endif 1620*bc3d5698SJohn Baldwin bic r12,r6,r4 1621*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1622*bc3d5698SJohn Baldwin str r11,[sp,#120+4] 1623*bc3d5698SJohn Baldwin#else 1624*bc3d5698SJohn Baldwin strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1625*bc3d5698SJohn Baldwin#endif 1626*bc3d5698SJohn Baldwin bic r14,r7,r5 1627*bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-18 1628*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1629*bc3d5698SJohn Baldwin str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1630*bc3d5698SJohn Baldwin#endif 1631*bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-18 1632*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1633*bc3d5698SJohn Baldwin str r14,[sp,#128+4] 1634*bc3d5698SJohn Baldwin#else 1635*bc3d5698SJohn Baldwin strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1636*bc3d5698SJohn Baldwin#endif 1637*bc3d5698SJohn Baldwin bic r10,r8,r6 1638*bc3d5698SJohn Baldwin bic r11,r9,r7 1639*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 1640*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#13 1641*bc3d5698SJohn Baldwin eor r10,r10,r4 1642*bc3d5698SJohn Baldwin eor r11,r11,r5 1643*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1644*bc3d5698SJohn Baldwin str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1645*bc3d5698SJohn Baldwin#endif 1646*bc3d5698SJohn Baldwin bic r2,r2,r0,ror#18-14 1647*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1648*bc3d5698SJohn Baldwin str r11,[sp,#136+4] 1649*bc3d5698SJohn Baldwin#else 1650*bc3d5698SJohn Baldwin strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1651*bc3d5698SJohn Baldwin#endif 1652*bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 1653*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#18-13 1654*bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-13 1655*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1656*bc3d5698SJohn Baldwin str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1657*bc3d5698SJohn Baldwin#endif 1658*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1659*bc3d5698SJohn Baldwin str r14,[sp,#144+4] 1660*bc3d5698SJohn Baldwin#else 1661*bc3d5698SJohn Baldwin strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1662*bc3d5698SJohn Baldwin#endif 1663*bc3d5698SJohn Baldwin add r14,sp,#216 1664*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1665*bc3d5698SJohn Baldwin ldr r0,[sp,#256] @ A[0][2] 1666*bc3d5698SJohn Baldwin#endif 1667*bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-18 1668*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1669*bc3d5698SJohn Baldwin ldr r1,[sp,#256+4] 1670*bc3d5698SJohn Baldwin#else 1671*bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#256] @ A[0][2] 1672*bc3d5698SJohn Baldwin#endif 1673*bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-18 1674*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1675*bc3d5698SJohn Baldwin str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1676*bc3d5698SJohn Baldwin#endif 1677*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1678*bc3d5698SJohn Baldwin str r11,[sp,#152+4] 1679*bc3d5698SJohn Baldwin#else 1680*bc3d5698SJohn Baldwin strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1681*bc3d5698SJohn Baldwin#endif 1682*bc3d5698SJohn Baldwin 1683*bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[2..3] 1684*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1685*bc3d5698SJohn Baldwin ldr r2,[sp,#304] @ A[1][3] 1686*bc3d5698SJohn Baldwin#endif 1687*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1688*bc3d5698SJohn Baldwin ldr r3,[sp,#304+4] 1689*bc3d5698SJohn Baldwin#else 1690*bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#304] @ A[1][3] 1691*bc3d5698SJohn Baldwin#endif 1692*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1693*bc3d5698SJohn Baldwin ldr r6,[sp,#232] @ D[4] 1694*bc3d5698SJohn Baldwin#endif 1695*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1696*bc3d5698SJohn Baldwin ldr r7,[sp,#232+4] 1697*bc3d5698SJohn Baldwin#else 1698*bc3d5698SJohn Baldwin ldrd r6,r7,[sp,#232] @ D[4] 1699*bc3d5698SJohn Baldwin#endif 1700*bc3d5698SJohn Baldwin 1701*bc3d5698SJohn Baldwin eor r0,r0,r10 1702*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1703*bc3d5698SJohn Baldwin ldr r4,[sp,#352] @ A[2][4] 1704*bc3d5698SJohn Baldwin#endif 1705*bc3d5698SJohn Baldwin eor r1,r1,r11 1706*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1707*bc3d5698SJohn Baldwin ldr r5,[sp,#352+4] 1708*bc3d5698SJohn Baldwin#else 1709*bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#352] @ A[2][4] 1710*bc3d5698SJohn Baldwin#endif 1711*bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1712*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1713*bc3d5698SJohn Baldwin ldr r8,[sp,#200] @ D[0] 1714*bc3d5698SJohn Baldwin#endif 1715*bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-31 1716*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1717*bc3d5698SJohn Baldwin ldr r9,[sp,#200+4] 1718*bc3d5698SJohn Baldwin#else 1719*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#200] @ D[0] 1720*bc3d5698SJohn Baldwin#endif 1721*bc3d5698SJohn Baldwin 1722*bc3d5698SJohn Baldwin eor r12,r12,r2 1723*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1724*bc3d5698SJohn Baldwin ldr r10,[sp,#360] @ A[3][0] 1725*bc3d5698SJohn Baldwin#endif 1726*bc3d5698SJohn Baldwin eor r14,r14,r3 1727*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1728*bc3d5698SJohn Baldwin ldr r11,[sp,#360+4] 1729*bc3d5698SJohn Baldwin#else 1730*bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#360] @ A[3][0] 1731*bc3d5698SJohn Baldwin#endif 1732*bc3d5698SJohn Baldwin mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1733*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1734*bc3d5698SJohn Baldwin ldr r12,[sp,#208] @ D[1] 1735*bc3d5698SJohn Baldwin#endif 1736*bc3d5698SJohn Baldwin mov r2,r14,ror#32-28 1737*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1738*bc3d5698SJohn Baldwin ldr r14,[sp,#208+4] 1739*bc3d5698SJohn Baldwin#else 1740*bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#208] @ D[1] 1741*bc3d5698SJohn Baldwin#endif 1742*bc3d5698SJohn Baldwin 1743*bc3d5698SJohn Baldwin eor r6,r6,r4 1744*bc3d5698SJohn Baldwin eor r7,r7,r5 1745*bc3d5698SJohn Baldwin mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1746*bc3d5698SJohn Baldwin mov r4,r7,ror#32-20 1747*bc3d5698SJohn Baldwin 1748*bc3d5698SJohn Baldwin eor r10,r10,r8 1749*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1750*bc3d5698SJohn Baldwin ldr r8,[sp,#408] @ A[4][1] 1751*bc3d5698SJohn Baldwin#endif 1752*bc3d5698SJohn Baldwin eor r11,r11,r9 1753*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1754*bc3d5698SJohn Baldwin ldr r9,[sp,#408+4] 1755*bc3d5698SJohn Baldwin#else 1756*bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#408] @ A[4][1] 1757*bc3d5698SJohn Baldwin#endif 1758*bc3d5698SJohn Baldwin mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1759*bc3d5698SJohn Baldwin mov r6,r11,ror#32-21 1760*bc3d5698SJohn Baldwin 1761*bc3d5698SJohn Baldwin eor r8,r8,r12 1762*bc3d5698SJohn Baldwin eor r9,r9,r14 1763*bc3d5698SJohn Baldwin @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1764*bc3d5698SJohn Baldwin @ mov r9,r3,ror#32-1 1765*bc3d5698SJohn Baldwin 1766*bc3d5698SJohn Baldwin bic r10,r4,r2 1767*bc3d5698SJohn Baldwin bic r11,r5,r3 1768*bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-31 1769*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1770*bc3d5698SJohn Baldwin str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1771*bc3d5698SJohn Baldwin#endif 1772*bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-31 1773*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1774*bc3d5698SJohn Baldwin str r11,[sp,#160+4] 1775*bc3d5698SJohn Baldwin#else 1776*bc3d5698SJohn Baldwin strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1777*bc3d5698SJohn Baldwin#endif 1778*bc3d5698SJohn Baldwin bic r12,r6,r4 1779*bc3d5698SJohn Baldwin bic r14,r7,r5 1780*bc3d5698SJohn Baldwin eor r12,r12,r2 1781*bc3d5698SJohn Baldwin eor r14,r14,r3 1782*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1783*bc3d5698SJohn Baldwin str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1784*bc3d5698SJohn Baldwin#endif 1785*bc3d5698SJohn Baldwin bic r10,r8,r6,ror#1 1786*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1787*bc3d5698SJohn Baldwin str r14,[sp,#168+4] 1788*bc3d5698SJohn Baldwin#else 1789*bc3d5698SJohn Baldwin strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1790*bc3d5698SJohn Baldwin#endif 1791*bc3d5698SJohn Baldwin bic r11,r9,r7,ror#1 1792*bc3d5698SJohn Baldwin bic r12,r0,r8,ror#31-1 1793*bc3d5698SJohn Baldwin bic r14,r1,r9,ror#31-1 1794*bc3d5698SJohn Baldwin eor r4,r4,r10,ror#32-1 1795*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1796*bc3d5698SJohn Baldwin str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1797*bc3d5698SJohn Baldwin#endif 1798*bc3d5698SJohn Baldwin eor r5,r5,r11,ror#32-1 1799*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1800*bc3d5698SJohn Baldwin str r5,[sp,#176+4] 1801*bc3d5698SJohn Baldwin#else 1802*bc3d5698SJohn Baldwin strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1803*bc3d5698SJohn Baldwin#endif 1804*bc3d5698SJohn Baldwin eor r6,r6,r12,ror#32-31 1805*bc3d5698SJohn Baldwin eor r7,r7,r14,ror#32-31 1806*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1807*bc3d5698SJohn Baldwin str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1808*bc3d5698SJohn Baldwin#endif 1809*bc3d5698SJohn Baldwin bic r10,r2,r0,ror#32-31 1810*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1811*bc3d5698SJohn Baldwin str r7,[sp,#184+4] 1812*bc3d5698SJohn Baldwin#else 1813*bc3d5698SJohn Baldwin strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1814*bc3d5698SJohn Baldwin#endif 1815*bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-31 1816*bc3d5698SJohn Baldwin add r12,sp,#0 1817*bc3d5698SJohn Baldwin eor r8,r10,r8,ror#32-1 1818*bc3d5698SJohn Baldwin add r10,sp,#40 1819*bc3d5698SJohn Baldwin eor r9,r11,r9,ror#32-1 1820*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1821*bc3d5698SJohn Baldwin str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1822*bc3d5698SJohn Baldwin#endif 1823*bc3d5698SJohn Baldwin#ifndef __thumb2__ 1824*bc3d5698SJohn Baldwin str r9,[sp,#192+4] 1825*bc3d5698SJohn Baldwin#else 1826*bc3d5698SJohn Baldwin strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1827*bc3d5698SJohn Baldwin#endif 1828*bc3d5698SJohn Baldwin blo .Lround2x 1829*bc3d5698SJohn Baldwin 1830*bc3d5698SJohn Baldwin ldr pc,[sp,#440] 1831*bc3d5698SJohn Baldwin.size KeccakF1600_int,.-KeccakF1600_int 1832*bc3d5698SJohn Baldwin 1833*bc3d5698SJohn Baldwin.type KeccakF1600, %function 1834*bc3d5698SJohn Baldwin.align 5 1835*bc3d5698SJohn BaldwinKeccakF1600: 1836*bc3d5698SJohn Baldwin stmdb sp!,{r0,r4-r11,lr} 1837*bc3d5698SJohn Baldwin sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1838*bc3d5698SJohn Baldwin 1839*bc3d5698SJohn Baldwin add r10,r0,#40 1840*bc3d5698SJohn Baldwin add r11,sp,#40 1841*bc3d5698SJohn Baldwin ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1842*bc3d5698SJohn Baldwin stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1843*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1844*bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1845*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1846*bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1847*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1848*bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1849*bc3d5698SJohn Baldwin ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1850*bc3d5698SJohn Baldwin add r12,sp,#0 1851*bc3d5698SJohn Baldwin add r10,sp,#40 1852*bc3d5698SJohn Baldwin stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1853*bc3d5698SJohn Baldwin 1854*bc3d5698SJohn Baldwin bl KeccakF1600_enter 1855*bc3d5698SJohn Baldwin 1856*bc3d5698SJohn Baldwin ldr r11, [sp,#440+16] @ restore pointer to A 1857*bc3d5698SJohn Baldwin ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1858*bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 1859*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1860*bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1861*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1862*bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1863*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1864*bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1865*bc3d5698SJohn Baldwin ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1866*bc3d5698SJohn Baldwin stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1867*bc3d5698SJohn Baldwin 1868*bc3d5698SJohn Baldwin add sp,sp,#440+20 1869*bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} 1870*bc3d5698SJohn Baldwin.size KeccakF1600,.-KeccakF1600 1871*bc3d5698SJohn Baldwin.globl SHA3_absorb 1872*bc3d5698SJohn Baldwin.type SHA3_absorb,%function 1873*bc3d5698SJohn Baldwin.align 5 1874*bc3d5698SJohn BaldwinSHA3_absorb: 1875*bc3d5698SJohn Baldwin stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1876*bc3d5698SJohn Baldwin sub sp,sp,#456+16 1877*bc3d5698SJohn Baldwin 1878*bc3d5698SJohn Baldwin add r10,r0,#40 1879*bc3d5698SJohn Baldwin @ mov r11,r1 1880*bc3d5698SJohn Baldwin mov r12,r2 1881*bc3d5698SJohn Baldwin mov r14,r3 1882*bc3d5698SJohn Baldwin cmp r2,r3 1883*bc3d5698SJohn Baldwin blo .Labsorb_abort 1884*bc3d5698SJohn Baldwin 1885*bc3d5698SJohn Baldwin add r11,sp,#0 1886*bc3d5698SJohn Baldwin ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1887*bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1888*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1889*bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1890*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1891*bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1892*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1893*bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1894*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1895*bc3d5698SJohn Baldwin stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1896*bc3d5698SJohn Baldwin 1897*bc3d5698SJohn Baldwin ldr r11,[sp,#476] @ restore r11 1898*bc3d5698SJohn Baldwin#ifdef __thumb2__ 1899*bc3d5698SJohn Baldwin mov r9,#0x00ff00ff 1900*bc3d5698SJohn Baldwin mov r8,#0x0f0f0f0f 1901*bc3d5698SJohn Baldwin mov r7,#0x33333333 1902*bc3d5698SJohn Baldwin mov r6,#0x55555555 1903*bc3d5698SJohn Baldwin#else 1904*bc3d5698SJohn Baldwin mov r6,#0x11 @ compose constants 1905*bc3d5698SJohn Baldwin mov r8,#0x0f 1906*bc3d5698SJohn Baldwin mov r9,#0xff 1907*bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#8 1908*bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#8 1909*bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#16 @ 0x11111111 1910*bc3d5698SJohn Baldwin orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1911*bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1912*bc3d5698SJohn Baldwin orr r7,r6,r6,lsl#1 @ 0x33333333 1913*bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#2 @ 0x55555555 1914*bc3d5698SJohn Baldwin#endif 1915*bc3d5698SJohn Baldwin str r9,[sp,#468] 1916*bc3d5698SJohn Baldwin str r8,[sp,#464] 1917*bc3d5698SJohn Baldwin str r7,[sp,#460] 1918*bc3d5698SJohn Baldwin str r6,[sp,#456] 1919*bc3d5698SJohn Baldwin b .Loop_absorb 1920*bc3d5698SJohn Baldwin 1921*bc3d5698SJohn Baldwin.align 4 1922*bc3d5698SJohn Baldwin.Loop_absorb: 1923*bc3d5698SJohn Baldwin subs r0,r12,r14 1924*bc3d5698SJohn Baldwin blo .Labsorbed 1925*bc3d5698SJohn Baldwin add r10,sp,#0 1926*bc3d5698SJohn Baldwin str r0,[sp,#480] @ save len - bsz 1927*bc3d5698SJohn Baldwin 1928*bc3d5698SJohn Baldwin.align 4 1929*bc3d5698SJohn Baldwin.Loop_block: 1930*bc3d5698SJohn Baldwin ldrb r0,[r11],#1 1931*bc3d5698SJohn Baldwin ldrb r1,[r11],#1 1932*bc3d5698SJohn Baldwin ldrb r2,[r11],#1 1933*bc3d5698SJohn Baldwin ldrb r3,[r11],#1 1934*bc3d5698SJohn Baldwin ldrb r4,[r11],#1 1935*bc3d5698SJohn Baldwin orr r0,r0,r1,lsl#8 1936*bc3d5698SJohn Baldwin ldrb r1,[r11],#1 1937*bc3d5698SJohn Baldwin orr r0,r0,r2,lsl#16 1938*bc3d5698SJohn Baldwin ldrb r2,[r11],#1 1939*bc3d5698SJohn Baldwin orr r0,r0,r3,lsl#24 @ lo 1940*bc3d5698SJohn Baldwin ldrb r3,[r11],#1 1941*bc3d5698SJohn Baldwin orr r1,r4,r1,lsl#8 1942*bc3d5698SJohn Baldwin orr r1,r1,r2,lsl#16 1943*bc3d5698SJohn Baldwin orr r1,r1,r3,lsl#24 @ hi 1944*bc3d5698SJohn Baldwin 1945*bc3d5698SJohn Baldwin and r2,r0,r6 @ &=0x55555555 1946*bc3d5698SJohn Baldwin and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1947*bc3d5698SJohn Baldwin and r3,r1,r6 @ &=0x55555555 1948*bc3d5698SJohn Baldwin and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1949*bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#1 1950*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#1 1951*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#1 1952*bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#1 1953*bc3d5698SJohn Baldwin and r2,r2,r7 @ &=0x33333333 1954*bc3d5698SJohn Baldwin and r0,r0,r7,lsl#2 @ &=0xcccccccc 1955*bc3d5698SJohn Baldwin and r3,r3,r7 @ &=0x33333333 1956*bc3d5698SJohn Baldwin and r1,r1,r7,lsl#2 @ &=0xcccccccc 1957*bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#2 1958*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#2 1959*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#2 1960*bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#2 1961*bc3d5698SJohn Baldwin and r2,r2,r8 @ &=0x0f0f0f0f 1962*bc3d5698SJohn Baldwin and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1963*bc3d5698SJohn Baldwin and r3,r3,r8 @ &=0x0f0f0f0f 1964*bc3d5698SJohn Baldwin and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1965*bc3d5698SJohn Baldwin ldmia r10,{r4,r5} @ A_flat[i] 1966*bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#4 1967*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#4 1968*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#4 1969*bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#4 1970*bc3d5698SJohn Baldwin and r2,r2,r9 @ &=0x00ff00ff 1971*bc3d5698SJohn Baldwin and r0,r0,r9,lsl#8 @ &=0xff00ff00 1972*bc3d5698SJohn Baldwin and r3,r3,r9 @ &=0x00ff00ff 1973*bc3d5698SJohn Baldwin and r1,r1,r9,lsl#8 @ &=0xff00ff00 1974*bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#8 1975*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#8 1976*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#8 1977*bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#8 1978*bc3d5698SJohn Baldwin 1979*bc3d5698SJohn Baldwin mov r2,r2,lsl#16 1980*bc3d5698SJohn Baldwin mov r1,r1,lsr#16 1981*bc3d5698SJohn Baldwin eor r4,r4,r3,lsl#16 1982*bc3d5698SJohn Baldwin eor r5,r5,r0,lsr#16 1983*bc3d5698SJohn Baldwin eor r4,r4,r2,lsr#16 1984*bc3d5698SJohn Baldwin eor r5,r5,r1,lsl#16 1985*bc3d5698SJohn Baldwin stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1986*bc3d5698SJohn Baldwin 1987*bc3d5698SJohn Baldwin subs r14,r14,#8 1988*bc3d5698SJohn Baldwin bhi .Loop_block 1989*bc3d5698SJohn Baldwin 1990*bc3d5698SJohn Baldwin str r11,[sp,#476] 1991*bc3d5698SJohn Baldwin 1992*bc3d5698SJohn Baldwin bl KeccakF1600_int 1993*bc3d5698SJohn Baldwin 1994*bc3d5698SJohn Baldwin add r14,sp,#456 1995*bc3d5698SJohn Baldwin ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables 1996*bc3d5698SJohn Baldwin b .Loop_absorb 1997*bc3d5698SJohn Baldwin 1998*bc3d5698SJohn Baldwin.align 4 1999*bc3d5698SJohn Baldwin.Labsorbed: 2000*bc3d5698SJohn Baldwin add r11,sp,#40 2001*bc3d5698SJohn Baldwin ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2002*bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 2003*bc3d5698SJohn Baldwin ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2004*bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2005*bc3d5698SJohn Baldwin ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2006*bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2007*bc3d5698SJohn Baldwin ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2008*bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2009*bc3d5698SJohn Baldwin ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2010*bc3d5698SJohn Baldwin stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2011*bc3d5698SJohn Baldwin 2012*bc3d5698SJohn Baldwin.Labsorb_abort: 2013*bc3d5698SJohn Baldwin add sp,sp,#456+32 2014*bc3d5698SJohn Baldwin mov r0,r12 @ return value 2015*bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 2016*bc3d5698SJohn Baldwin.size SHA3_absorb,.-SHA3_absorb 2017*bc3d5698SJohn Baldwin.globl SHA3_squeeze 2018*bc3d5698SJohn Baldwin.type SHA3_squeeze,%function 2019*bc3d5698SJohn Baldwin.align 5 2020*bc3d5698SJohn BaldwinSHA3_squeeze: 2021*bc3d5698SJohn Baldwin stmdb sp!,{r0,r3-r10,lr} 2022*bc3d5698SJohn Baldwin 2023*bc3d5698SJohn Baldwin mov r10,r0 2024*bc3d5698SJohn Baldwin mov r4,r1 2025*bc3d5698SJohn Baldwin mov r5,r2 2026*bc3d5698SJohn Baldwin mov r12,r3 2027*bc3d5698SJohn Baldwin 2028*bc3d5698SJohn Baldwin#ifdef __thumb2__ 2029*bc3d5698SJohn Baldwin mov r9,#0x00ff00ff 2030*bc3d5698SJohn Baldwin mov r8,#0x0f0f0f0f 2031*bc3d5698SJohn Baldwin mov r7,#0x33333333 2032*bc3d5698SJohn Baldwin mov r6,#0x55555555 2033*bc3d5698SJohn Baldwin#else 2034*bc3d5698SJohn Baldwin mov r6,#0x11 @ compose constants 2035*bc3d5698SJohn Baldwin mov r8,#0x0f 2036*bc3d5698SJohn Baldwin mov r9,#0xff 2037*bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#8 2038*bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#8 2039*bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#16 @ 0x11111111 2040*bc3d5698SJohn Baldwin orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2041*bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2042*bc3d5698SJohn Baldwin orr r7,r6,r6,lsl#1 @ 0x33333333 2043*bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#2 @ 0x55555555 2044*bc3d5698SJohn Baldwin#endif 2045*bc3d5698SJohn Baldwin stmdb sp!,{r6,r7,r8,r9} 2046*bc3d5698SJohn Baldwin 2047*bc3d5698SJohn Baldwin mov r14,r10 2048*bc3d5698SJohn Baldwin b .Loop_squeeze 2049*bc3d5698SJohn Baldwin 2050*bc3d5698SJohn Baldwin.align 4 2051*bc3d5698SJohn Baldwin.Loop_squeeze: 2052*bc3d5698SJohn Baldwin ldmia r10!,{r0,r1} @ A_flat[i++] 2053*bc3d5698SJohn Baldwin 2054*bc3d5698SJohn Baldwin mov r2,r0,lsl#16 2055*bc3d5698SJohn Baldwin mov r3,r1,lsl#16 @ r3 = r1 << 16 2056*bc3d5698SJohn Baldwin mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2057*bc3d5698SJohn Baldwin mov r1,r1,lsr#16 2058*bc3d5698SJohn Baldwin mov r0,r0,lsr#16 @ r0 = r0 >> 16 2059*bc3d5698SJohn Baldwin mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2060*bc3d5698SJohn Baldwin 2061*bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#8 2062*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#8 2063*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#8 2064*bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#8 2065*bc3d5698SJohn Baldwin and r2,r2,r9 @ &=0x00ff00ff 2066*bc3d5698SJohn Baldwin and r3,r3,r9,lsl#8 @ &=0xff00ff00 2067*bc3d5698SJohn Baldwin and r0,r0,r9 @ &=0x00ff00ff 2068*bc3d5698SJohn Baldwin and r1,r1,r9,lsl#8 @ &=0xff00ff00 2069*bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#4 2070*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#4 2071*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#4 2072*bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#4 2073*bc3d5698SJohn Baldwin and r2,r2,r8 @ &=0x0f0f0f0f 2074*bc3d5698SJohn Baldwin and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2075*bc3d5698SJohn Baldwin and r0,r0,r8 @ &=0x0f0f0f0f 2076*bc3d5698SJohn Baldwin and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2077*bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#2 2078*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#2 2079*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#2 2080*bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#2 2081*bc3d5698SJohn Baldwin and r2,r2,r7 @ &=0x33333333 2082*bc3d5698SJohn Baldwin and r3,r3,r7,lsl#2 @ &=0xcccccccc 2083*bc3d5698SJohn Baldwin and r0,r0,r7 @ &=0x33333333 2084*bc3d5698SJohn Baldwin and r1,r1,r7,lsl#2 @ &=0xcccccccc 2085*bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#1 2086*bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#1 2087*bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#1 2088*bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#1 2089*bc3d5698SJohn Baldwin and r2,r2,r6 @ &=0x55555555 2090*bc3d5698SJohn Baldwin and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2091*bc3d5698SJohn Baldwin and r0,r0,r6 @ &=0x55555555 2092*bc3d5698SJohn Baldwin and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2093*bc3d5698SJohn Baldwin 2094*bc3d5698SJohn Baldwin orr r2,r2,r3 2095*bc3d5698SJohn Baldwin orr r0,r0,r1 2096*bc3d5698SJohn Baldwin 2097*bc3d5698SJohn Baldwin cmp r5,#8 2098*bc3d5698SJohn Baldwin blo .Lsqueeze_tail 2099*bc3d5698SJohn Baldwin mov r1,r2,lsr#8 2100*bc3d5698SJohn Baldwin strb r2,[r4],#1 2101*bc3d5698SJohn Baldwin mov r3,r2,lsr#16 2102*bc3d5698SJohn Baldwin strb r1,[r4],#1 2103*bc3d5698SJohn Baldwin mov r2,r2,lsr#24 2104*bc3d5698SJohn Baldwin strb r3,[r4],#1 2105*bc3d5698SJohn Baldwin strb r2,[r4],#1 2106*bc3d5698SJohn Baldwin 2107*bc3d5698SJohn Baldwin mov r1,r0,lsr#8 2108*bc3d5698SJohn Baldwin strb r0,[r4],#1 2109*bc3d5698SJohn Baldwin mov r3,r0,lsr#16 2110*bc3d5698SJohn Baldwin strb r1,[r4],#1 2111*bc3d5698SJohn Baldwin mov r0,r0,lsr#24 2112*bc3d5698SJohn Baldwin strb r3,[r4],#1 2113*bc3d5698SJohn Baldwin strb r0,[r4],#1 2114*bc3d5698SJohn Baldwin subs r5,r5,#8 2115*bc3d5698SJohn Baldwin beq .Lsqueeze_done 2116*bc3d5698SJohn Baldwin 2117*bc3d5698SJohn Baldwin subs r12,r12,#8 @ bsz -= 8 2118*bc3d5698SJohn Baldwin bhi .Loop_squeeze 2119*bc3d5698SJohn Baldwin 2120*bc3d5698SJohn Baldwin mov r0,r14 @ original r10 2121*bc3d5698SJohn Baldwin 2122*bc3d5698SJohn Baldwin bl KeccakF1600 2123*bc3d5698SJohn Baldwin 2124*bc3d5698SJohn Baldwin ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables 2125*bc3d5698SJohn Baldwin mov r14,r10 2126*bc3d5698SJohn Baldwin b .Loop_squeeze 2127*bc3d5698SJohn Baldwin 2128*bc3d5698SJohn Baldwin.align 4 2129*bc3d5698SJohn Baldwin.Lsqueeze_tail: 2130*bc3d5698SJohn Baldwin strb r2,[r4],#1 2131*bc3d5698SJohn Baldwin mov r2,r2,lsr#8 2132*bc3d5698SJohn Baldwin subs r5,r5,#1 2133*bc3d5698SJohn Baldwin beq .Lsqueeze_done 2134*bc3d5698SJohn Baldwin strb r2,[r4],#1 2135*bc3d5698SJohn Baldwin mov r2,r2,lsr#8 2136*bc3d5698SJohn Baldwin subs r5,r5,#1 2137*bc3d5698SJohn Baldwin beq .Lsqueeze_done 2138*bc3d5698SJohn Baldwin strb r2,[r4],#1 2139*bc3d5698SJohn Baldwin mov r2,r2,lsr#8 2140*bc3d5698SJohn Baldwin subs r5,r5,#1 2141*bc3d5698SJohn Baldwin beq .Lsqueeze_done 2142*bc3d5698SJohn Baldwin strb r2,[r4],#1 2143*bc3d5698SJohn Baldwin subs r5,r5,#1 2144*bc3d5698SJohn Baldwin beq .Lsqueeze_done 2145*bc3d5698SJohn Baldwin 2146*bc3d5698SJohn Baldwin strb r0,[r4],#1 2147*bc3d5698SJohn Baldwin mov r0,r0,lsr#8 2148*bc3d5698SJohn Baldwin subs r5,r5,#1 2149*bc3d5698SJohn Baldwin beq .Lsqueeze_done 2150*bc3d5698SJohn Baldwin strb r0,[r4],#1 2151*bc3d5698SJohn Baldwin mov r0,r0,lsr#8 2152*bc3d5698SJohn Baldwin subs r5,r5,#1 2153*bc3d5698SJohn Baldwin beq .Lsqueeze_done 2154*bc3d5698SJohn Baldwin strb r0,[r4] 2155*bc3d5698SJohn Baldwin b .Lsqueeze_done 2156*bc3d5698SJohn Baldwin 2157*bc3d5698SJohn Baldwin.align 4 2158*bc3d5698SJohn Baldwin.Lsqueeze_done: 2159*bc3d5698SJohn Baldwin add sp,sp,#24 2160*bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 2161*bc3d5698SJohn Baldwin.size SHA3_squeeze,.-SHA3_squeeze 2162*bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 2163*bc3d5698SJohn Baldwin.fpu neon 2164*bc3d5698SJohn Baldwin 2165*bc3d5698SJohn Baldwin.type iotas64, %object 2166*bc3d5698SJohn Baldwin.align 5 2167*bc3d5698SJohn Baldwiniotas64: 2168*bc3d5698SJohn Baldwin.quad 0x0000000000000001 2169*bc3d5698SJohn Baldwin.quad 0x0000000000008082 2170*bc3d5698SJohn Baldwin.quad 0x800000000000808a 2171*bc3d5698SJohn Baldwin.quad 0x8000000080008000 2172*bc3d5698SJohn Baldwin.quad 0x000000000000808b 2173*bc3d5698SJohn Baldwin.quad 0x0000000080000001 2174*bc3d5698SJohn Baldwin.quad 0x8000000080008081 2175*bc3d5698SJohn Baldwin.quad 0x8000000000008009 2176*bc3d5698SJohn Baldwin.quad 0x000000000000008a 2177*bc3d5698SJohn Baldwin.quad 0x0000000000000088 2178*bc3d5698SJohn Baldwin.quad 0x0000000080008009 2179*bc3d5698SJohn Baldwin.quad 0x000000008000000a 2180*bc3d5698SJohn Baldwin.quad 0x000000008000808b 2181*bc3d5698SJohn Baldwin.quad 0x800000000000008b 2182*bc3d5698SJohn Baldwin.quad 0x8000000000008089 2183*bc3d5698SJohn Baldwin.quad 0x8000000000008003 2184*bc3d5698SJohn Baldwin.quad 0x8000000000008002 2185*bc3d5698SJohn Baldwin.quad 0x8000000000000080 2186*bc3d5698SJohn Baldwin.quad 0x000000000000800a 2187*bc3d5698SJohn Baldwin.quad 0x800000008000000a 2188*bc3d5698SJohn Baldwin.quad 0x8000000080008081 2189*bc3d5698SJohn Baldwin.quad 0x8000000000008080 2190*bc3d5698SJohn Baldwin.quad 0x0000000080000001 2191*bc3d5698SJohn Baldwin.quad 0x8000000080008008 2192*bc3d5698SJohn Baldwin.size iotas64,.-iotas64 2193*bc3d5698SJohn Baldwin 2194*bc3d5698SJohn Baldwin.type KeccakF1600_neon, %function 2195*bc3d5698SJohn Baldwin.align 5 2196*bc3d5698SJohn BaldwinKeccakF1600_neon: 2197*bc3d5698SJohn Baldwin add r1, r0, #16 2198*bc3d5698SJohn Baldwin adr r2, iotas64 2199*bc3d5698SJohn Baldwin mov r3, #24 @ loop counter 2200*bc3d5698SJohn Baldwin b .Loop_neon 2201*bc3d5698SJohn Baldwin 2202*bc3d5698SJohn Baldwin.align 4 2203*bc3d5698SJohn Baldwin.Loop_neon: 2204*bc3d5698SJohn Baldwin @ Theta 2205*bc3d5698SJohn Baldwin vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2206*bc3d5698SJohn Baldwin veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2207*bc3d5698SJohn Baldwin vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2208*bc3d5698SJohn Baldwin veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2209*bc3d5698SJohn Baldwin veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2210*bc3d5698SJohn Baldwin veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2211*bc3d5698SJohn Baldwin veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2212*bc3d5698SJohn Baldwin veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2213*bc3d5698SJohn Baldwin veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2214*bc3d5698SJohn Baldwin veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2215*bc3d5698SJohn Baldwin veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2216*bc3d5698SJohn Baldwin veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2217*bc3d5698SJohn Baldwin veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2218*bc3d5698SJohn Baldwin veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2219*bc3d5698SJohn Baldwin veor d25, d25, d24 @ C[4]^=A[4][4] 2220*bc3d5698SJohn Baldwin 2221*bc3d5698SJohn Baldwin vadd.u64 q4, q13, q13 @ C[0..1]<<1 2222*bc3d5698SJohn Baldwin vadd.u64 q15, q14, q14 @ C[2..3]<<1 2223*bc3d5698SJohn Baldwin vadd.u64 d18, d25, d25 @ C[4]<<1 2224*bc3d5698SJohn Baldwin vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2225*bc3d5698SJohn Baldwin vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2226*bc3d5698SJohn Baldwin vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2227*bc3d5698SJohn Baldwin veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2228*bc3d5698SJohn Baldwin veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2229*bc3d5698SJohn Baldwin veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2230*bc3d5698SJohn Baldwin veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2231*bc3d5698SJohn Baldwin 2232*bc3d5698SJohn Baldwin veor d0, d0, d25 @ A[0][0] ^= C[4] 2233*bc3d5698SJohn Baldwin veor d1, d1, d25 @ A[1][0] ^= C[4] 2234*bc3d5698SJohn Baldwin veor d10, d10, d25 @ A[2][0] ^= C[4] 2235*bc3d5698SJohn Baldwin veor d11, d11, d25 @ A[3][0] ^= C[4] 2236*bc3d5698SJohn Baldwin veor d20, d20, d25 @ A[4][0] ^= C[4] 2237*bc3d5698SJohn Baldwin 2238*bc3d5698SJohn Baldwin veor d2, d2, d26 @ A[0][1] ^= D[1] 2239*bc3d5698SJohn Baldwin veor d3, d3, d26 @ A[1][1] ^= D[1] 2240*bc3d5698SJohn Baldwin veor d12, d12, d26 @ A[2][1] ^= D[1] 2241*bc3d5698SJohn Baldwin veor d13, d13, d26 @ A[3][1] ^= D[1] 2242*bc3d5698SJohn Baldwin veor d21, d21, d26 @ A[4][1] ^= D[1] 2243*bc3d5698SJohn Baldwin vmov d26, d27 2244*bc3d5698SJohn Baldwin 2245*bc3d5698SJohn Baldwin veor d6, d6, d28 @ A[0][3] ^= C[2] 2246*bc3d5698SJohn Baldwin veor d7, d7, d28 @ A[1][3] ^= C[2] 2247*bc3d5698SJohn Baldwin veor d16, d16, d28 @ A[2][3] ^= C[2] 2248*bc3d5698SJohn Baldwin veor d17, d17, d28 @ A[3][3] ^= C[2] 2249*bc3d5698SJohn Baldwin veor d23, d23, d28 @ A[4][3] ^= C[2] 2250*bc3d5698SJohn Baldwin vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2251*bc3d5698SJohn Baldwin vmov d28, d29 2252*bc3d5698SJohn Baldwin 2253*bc3d5698SJohn Baldwin vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2254*bc3d5698SJohn Baldwin veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2255*bc3d5698SJohn Baldwin veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2256*bc3d5698SJohn Baldwin veor d22, d22, d27 @ A[4][2] ^= D[2] 2257*bc3d5698SJohn Baldwin 2258*bc3d5698SJohn Baldwin veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2259*bc3d5698SJohn Baldwin veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2260*bc3d5698SJohn Baldwin veor d24, d24, d29 @ A[4][4] ^= C[3] 2261*bc3d5698SJohn Baldwin 2262*bc3d5698SJohn Baldwin @ Rho + Pi 2263*bc3d5698SJohn Baldwin vmov d26, d2 @ C[1] = A[0][1] 2264*bc3d5698SJohn Baldwin vshl.u64 d2, d3, #44 2265*bc3d5698SJohn Baldwin vmov d27, d4 @ C[2] = A[0][2] 2266*bc3d5698SJohn Baldwin vshl.u64 d4, d14, #43 2267*bc3d5698SJohn Baldwin vmov d28, d6 @ C[3] = A[0][3] 2268*bc3d5698SJohn Baldwin vshl.u64 d6, d17, #21 2269*bc3d5698SJohn Baldwin vmov d29, d8 @ C[4] = A[0][4] 2270*bc3d5698SJohn Baldwin vshl.u64 d8, d24, #14 2271*bc3d5698SJohn Baldwin vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2272*bc3d5698SJohn Baldwin vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2273*bc3d5698SJohn Baldwin vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2274*bc3d5698SJohn Baldwin vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2275*bc3d5698SJohn Baldwin 2276*bc3d5698SJohn Baldwin vshl.u64 d3, d9, #20 2277*bc3d5698SJohn Baldwin vshl.u64 d14, d16, #25 2278*bc3d5698SJohn Baldwin vshl.u64 d17, d15, #15 2279*bc3d5698SJohn Baldwin vshl.u64 d24, d21, #2 2280*bc3d5698SJohn Baldwin vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2281*bc3d5698SJohn Baldwin vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2282*bc3d5698SJohn Baldwin vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2283*bc3d5698SJohn Baldwin vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2284*bc3d5698SJohn Baldwin 2285*bc3d5698SJohn Baldwin vshl.u64 d9, d22, #61 2286*bc3d5698SJohn Baldwin @ vshl.u64 d16, d19, #8 2287*bc3d5698SJohn Baldwin vshl.u64 d15, d12, #10 2288*bc3d5698SJohn Baldwin vshl.u64 d21, d7, #55 2289*bc3d5698SJohn Baldwin vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2290*bc3d5698SJohn Baldwin vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2291*bc3d5698SJohn Baldwin vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2292*bc3d5698SJohn Baldwin vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2293*bc3d5698SJohn Baldwin 2294*bc3d5698SJohn Baldwin vshl.u64 d22, d18, #39 2295*bc3d5698SJohn Baldwin @ vshl.u64 d19, d23, #56 2296*bc3d5698SJohn Baldwin vshl.u64 d12, d5, #6 2297*bc3d5698SJohn Baldwin vshl.u64 d7, d13, #45 2298*bc3d5698SJohn Baldwin vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2299*bc3d5698SJohn Baldwin vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2300*bc3d5698SJohn Baldwin vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2301*bc3d5698SJohn Baldwin vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2302*bc3d5698SJohn Baldwin 2303*bc3d5698SJohn Baldwin vshl.u64 d18, d20, #18 2304*bc3d5698SJohn Baldwin vshl.u64 d23, d11, #41 2305*bc3d5698SJohn Baldwin vshl.u64 d5, d10, #3 2306*bc3d5698SJohn Baldwin vshl.u64 d13, d1, #36 2307*bc3d5698SJohn Baldwin vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2308*bc3d5698SJohn Baldwin vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2309*bc3d5698SJohn Baldwin vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2310*bc3d5698SJohn Baldwin vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2311*bc3d5698SJohn Baldwin 2312*bc3d5698SJohn Baldwin vshl.u64 d1, d28, #28 2313*bc3d5698SJohn Baldwin vshl.u64 d10, d26, #1 2314*bc3d5698SJohn Baldwin vshl.u64 d11, d29, #27 2315*bc3d5698SJohn Baldwin vshl.u64 d20, d27, #62 2316*bc3d5698SJohn Baldwin vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2317*bc3d5698SJohn Baldwin vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2318*bc3d5698SJohn Baldwin vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2319*bc3d5698SJohn Baldwin vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2320*bc3d5698SJohn Baldwin 2321*bc3d5698SJohn Baldwin @ Chi + Iota 2322*bc3d5698SJohn Baldwin vbic q13, q2, q1 2323*bc3d5698SJohn Baldwin vbic q14, q3, q2 2324*bc3d5698SJohn Baldwin vbic q15, q4, q3 2325*bc3d5698SJohn Baldwin veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2326*bc3d5698SJohn Baldwin veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2327*bc3d5698SJohn Baldwin veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2328*bc3d5698SJohn Baldwin vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2329*bc3d5698SJohn Baldwin vbic q13, q0, q4 2330*bc3d5698SJohn Baldwin vbic q15, q1, q0 2331*bc3d5698SJohn Baldwin vmov q1, q14 @ A[0..1][1] 2332*bc3d5698SJohn Baldwin veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2333*bc3d5698SJohn Baldwin veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2334*bc3d5698SJohn Baldwin 2335*bc3d5698SJohn Baldwin vbic q13, q7, q6 2336*bc3d5698SJohn Baldwin vmov q0, q5 @ A[2..3][0] 2337*bc3d5698SJohn Baldwin vbic q14, q8, q7 2338*bc3d5698SJohn Baldwin vmov q15, q6 @ A[2..3][1] 2339*bc3d5698SJohn Baldwin veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2340*bc3d5698SJohn Baldwin vbic q13, q9, q8 2341*bc3d5698SJohn Baldwin veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2342*bc3d5698SJohn Baldwin vbic q14, q0, q9 2343*bc3d5698SJohn Baldwin veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2344*bc3d5698SJohn Baldwin vbic q13, q15, q0 2345*bc3d5698SJohn Baldwin veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2346*bc3d5698SJohn Baldwin vmov q14, q10 @ A[4][0..1] 2347*bc3d5698SJohn Baldwin veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2348*bc3d5698SJohn Baldwin 2349*bc3d5698SJohn Baldwin vld1.64 d25, [r2,:64]! @ Iota[i++] 2350*bc3d5698SJohn Baldwin vbic d26, d22, d21 2351*bc3d5698SJohn Baldwin vbic d27, d23, d22 2352*bc3d5698SJohn Baldwin vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2353*bc3d5698SJohn Baldwin veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2354*bc3d5698SJohn Baldwin vbic d26, d24, d23 2355*bc3d5698SJohn Baldwin veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2356*bc3d5698SJohn Baldwin vbic d27, d28, d24 2357*bc3d5698SJohn Baldwin veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2358*bc3d5698SJohn Baldwin vbic d26, d29, d28 2359*bc3d5698SJohn Baldwin veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2360*bc3d5698SJohn Baldwin veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2361*bc3d5698SJohn Baldwin veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2362*bc3d5698SJohn Baldwin 2363*bc3d5698SJohn Baldwin subs r3, r3, #1 2364*bc3d5698SJohn Baldwin bne .Loop_neon 2365*bc3d5698SJohn Baldwin 2366*bc3d5698SJohn Baldwin.word 0xe12fff1e 2367*bc3d5698SJohn Baldwin.size KeccakF1600_neon,.-KeccakF1600_neon 2368*bc3d5698SJohn Baldwin 2369*bc3d5698SJohn Baldwin.globl SHA3_absorb_neon 2370*bc3d5698SJohn Baldwin.type SHA3_absorb_neon, %function 2371*bc3d5698SJohn Baldwin.align 5 2372*bc3d5698SJohn BaldwinSHA3_absorb_neon: 2373*bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,lr} 2374*bc3d5698SJohn Baldwin vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2375*bc3d5698SJohn Baldwin 2376*bc3d5698SJohn Baldwin mov r4, r1 @ inp 2377*bc3d5698SJohn Baldwin mov r5, r2 @ len 2378*bc3d5698SJohn Baldwin mov r6, r3 @ bsz 2379*bc3d5698SJohn Baldwin 2380*bc3d5698SJohn Baldwin vld1.32 {d0}, [r0,:64]! @ A[0][0] 2381*bc3d5698SJohn Baldwin vld1.32 {d2}, [r0,:64]! @ A[0][1] 2382*bc3d5698SJohn Baldwin vld1.32 {d4}, [r0,:64]! @ A[0][2] 2383*bc3d5698SJohn Baldwin vld1.32 {d6}, [r0,:64]! @ A[0][3] 2384*bc3d5698SJohn Baldwin vld1.32 {d8}, [r0,:64]! @ A[0][4] 2385*bc3d5698SJohn Baldwin 2386*bc3d5698SJohn Baldwin vld1.32 {d1}, [r0,:64]! @ A[1][0] 2387*bc3d5698SJohn Baldwin vld1.32 {d3}, [r0,:64]! @ A[1][1] 2388*bc3d5698SJohn Baldwin vld1.32 {d5}, [r0,:64]! @ A[1][2] 2389*bc3d5698SJohn Baldwin vld1.32 {d7}, [r0,:64]! @ A[1][3] 2390*bc3d5698SJohn Baldwin vld1.32 {d9}, [r0,:64]! @ A[1][4] 2391*bc3d5698SJohn Baldwin 2392*bc3d5698SJohn Baldwin vld1.32 {d10}, [r0,:64]! @ A[2][0] 2393*bc3d5698SJohn Baldwin vld1.32 {d12}, [r0,:64]! @ A[2][1] 2394*bc3d5698SJohn Baldwin vld1.32 {d14}, [r0,:64]! @ A[2][2] 2395*bc3d5698SJohn Baldwin vld1.32 {d16}, [r0,:64]! @ A[2][3] 2396*bc3d5698SJohn Baldwin vld1.32 {d18}, [r0,:64]! @ A[2][4] 2397*bc3d5698SJohn Baldwin 2398*bc3d5698SJohn Baldwin vld1.32 {d11}, [r0,:64]! @ A[3][0] 2399*bc3d5698SJohn Baldwin vld1.32 {d13}, [r0,:64]! @ A[3][1] 2400*bc3d5698SJohn Baldwin vld1.32 {d15}, [r0,:64]! @ A[3][2] 2401*bc3d5698SJohn Baldwin vld1.32 {d17}, [r0,:64]! @ A[3][3] 2402*bc3d5698SJohn Baldwin vld1.32 {d19}, [r0,:64]! @ A[3][4] 2403*bc3d5698SJohn Baldwin 2404*bc3d5698SJohn Baldwin vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3] 2405*bc3d5698SJohn Baldwin vld1.32 {d24}, [r0,:64] @ A[4][4] 2406*bc3d5698SJohn Baldwin sub r0, r0, #24*8 @ rewind 2407*bc3d5698SJohn Baldwin b .Loop_absorb_neon 2408*bc3d5698SJohn Baldwin 2409*bc3d5698SJohn Baldwin.align 4 2410*bc3d5698SJohn Baldwin.Loop_absorb_neon: 2411*bc3d5698SJohn Baldwin subs r12, r5, r6 @ len - bsz 2412*bc3d5698SJohn Baldwin blo .Labsorbed_neon 2413*bc3d5698SJohn Baldwin mov r5, r12 2414*bc3d5698SJohn Baldwin 2415*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! @ endian-neutral loads... 2416*bc3d5698SJohn Baldwin cmp r6, #8*2 2417*bc3d5698SJohn Baldwin veor d0, d0, d31 @ A[0][0] ^= *inp++ 2418*bc3d5698SJohn Baldwin blo .Lprocess_neon 2419*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2420*bc3d5698SJohn Baldwin veor d2, d2, d31 @ A[0][1] ^= *inp++ 2421*bc3d5698SJohn Baldwin beq .Lprocess_neon 2422*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2423*bc3d5698SJohn Baldwin cmp r6, #8*4 2424*bc3d5698SJohn Baldwin veor d4, d4, d31 @ A[0][2] ^= *inp++ 2425*bc3d5698SJohn Baldwin blo .Lprocess_neon 2426*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2427*bc3d5698SJohn Baldwin veor d6, d6, d31 @ A[0][3] ^= *inp++ 2428*bc3d5698SJohn Baldwin beq .Lprocess_neon 2429*bc3d5698SJohn Baldwin vld1.8 {d31},[r4]! 2430*bc3d5698SJohn Baldwin cmp r6, #8*6 2431*bc3d5698SJohn Baldwin veor d8, d8, d31 @ A[0][4] ^= *inp++ 2432*bc3d5698SJohn Baldwin blo .Lprocess_neon 2433*bc3d5698SJohn Baldwin 2434*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2435*bc3d5698SJohn Baldwin veor d1, d1, d31 @ A[1][0] ^= *inp++ 2436*bc3d5698SJohn Baldwin beq .Lprocess_neon 2437*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2438*bc3d5698SJohn Baldwin cmp r6, #8*8 2439*bc3d5698SJohn Baldwin veor d3, d3, d31 @ A[1][1] ^= *inp++ 2440*bc3d5698SJohn Baldwin blo .Lprocess_neon 2441*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2442*bc3d5698SJohn Baldwin veor d5, d5, d31 @ A[1][2] ^= *inp++ 2443*bc3d5698SJohn Baldwin beq .Lprocess_neon 2444*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2445*bc3d5698SJohn Baldwin cmp r6, #8*10 2446*bc3d5698SJohn Baldwin veor d7, d7, d31 @ A[1][3] ^= *inp++ 2447*bc3d5698SJohn Baldwin blo .Lprocess_neon 2448*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2449*bc3d5698SJohn Baldwin veor d9, d9, d31 @ A[1][4] ^= *inp++ 2450*bc3d5698SJohn Baldwin beq .Lprocess_neon 2451*bc3d5698SJohn Baldwin 2452*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2453*bc3d5698SJohn Baldwin cmp r6, #8*12 2454*bc3d5698SJohn Baldwin veor d10, d10, d31 @ A[2][0] ^= *inp++ 2455*bc3d5698SJohn Baldwin blo .Lprocess_neon 2456*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2457*bc3d5698SJohn Baldwin veor d12, d12, d31 @ A[2][1] ^= *inp++ 2458*bc3d5698SJohn Baldwin beq .Lprocess_neon 2459*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2460*bc3d5698SJohn Baldwin cmp r6, #8*14 2461*bc3d5698SJohn Baldwin veor d14, d14, d31 @ A[2][2] ^= *inp++ 2462*bc3d5698SJohn Baldwin blo .Lprocess_neon 2463*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2464*bc3d5698SJohn Baldwin veor d16, d16, d31 @ A[2][3] ^= *inp++ 2465*bc3d5698SJohn Baldwin beq .Lprocess_neon 2466*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2467*bc3d5698SJohn Baldwin cmp r6, #8*16 2468*bc3d5698SJohn Baldwin veor d18, d18, d31 @ A[2][4] ^= *inp++ 2469*bc3d5698SJohn Baldwin blo .Lprocess_neon 2470*bc3d5698SJohn Baldwin 2471*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2472*bc3d5698SJohn Baldwin veor d11, d11, d31 @ A[3][0] ^= *inp++ 2473*bc3d5698SJohn Baldwin beq .Lprocess_neon 2474*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2475*bc3d5698SJohn Baldwin cmp r6, #8*18 2476*bc3d5698SJohn Baldwin veor d13, d13, d31 @ A[3][1] ^= *inp++ 2477*bc3d5698SJohn Baldwin blo .Lprocess_neon 2478*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2479*bc3d5698SJohn Baldwin veor d15, d15, d31 @ A[3][2] ^= *inp++ 2480*bc3d5698SJohn Baldwin beq .Lprocess_neon 2481*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2482*bc3d5698SJohn Baldwin cmp r6, #8*20 2483*bc3d5698SJohn Baldwin veor d17, d17, d31 @ A[3][3] ^= *inp++ 2484*bc3d5698SJohn Baldwin blo .Lprocess_neon 2485*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2486*bc3d5698SJohn Baldwin veor d19, d19, d31 @ A[3][4] ^= *inp++ 2487*bc3d5698SJohn Baldwin beq .Lprocess_neon 2488*bc3d5698SJohn Baldwin 2489*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2490*bc3d5698SJohn Baldwin cmp r6, #8*22 2491*bc3d5698SJohn Baldwin veor d20, d20, d31 @ A[4][0] ^= *inp++ 2492*bc3d5698SJohn Baldwin blo .Lprocess_neon 2493*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2494*bc3d5698SJohn Baldwin veor d21, d21, d31 @ A[4][1] ^= *inp++ 2495*bc3d5698SJohn Baldwin beq .Lprocess_neon 2496*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2497*bc3d5698SJohn Baldwin cmp r6, #8*24 2498*bc3d5698SJohn Baldwin veor d22, d22, d31 @ A[4][2] ^= *inp++ 2499*bc3d5698SJohn Baldwin blo .Lprocess_neon 2500*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2501*bc3d5698SJohn Baldwin veor d23, d23, d31 @ A[4][3] ^= *inp++ 2502*bc3d5698SJohn Baldwin beq .Lprocess_neon 2503*bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2504*bc3d5698SJohn Baldwin veor d24, d24, d31 @ A[4][4] ^= *inp++ 2505*bc3d5698SJohn Baldwin 2506*bc3d5698SJohn Baldwin.Lprocess_neon: 2507*bc3d5698SJohn Baldwin bl KeccakF1600_neon 2508*bc3d5698SJohn Baldwin b .Loop_absorb_neon 2509*bc3d5698SJohn Baldwin 2510*bc3d5698SJohn Baldwin.align 4 2511*bc3d5698SJohn Baldwin.Labsorbed_neon: 2512*bc3d5698SJohn Baldwin vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2513*bc3d5698SJohn Baldwin vst1.32 {d2}, [r0,:64]! 2514*bc3d5698SJohn Baldwin vst1.32 {d4}, [r0,:64]! 2515*bc3d5698SJohn Baldwin vst1.32 {d6}, [r0,:64]! 2516*bc3d5698SJohn Baldwin vst1.32 {d8}, [r0,:64]! 2517*bc3d5698SJohn Baldwin 2518*bc3d5698SJohn Baldwin vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2519*bc3d5698SJohn Baldwin vst1.32 {d3}, [r0,:64]! 2520*bc3d5698SJohn Baldwin vst1.32 {d5}, [r0,:64]! 2521*bc3d5698SJohn Baldwin vst1.32 {d7}, [r0,:64]! 2522*bc3d5698SJohn Baldwin vst1.32 {d9}, [r0,:64]! 2523*bc3d5698SJohn Baldwin 2524*bc3d5698SJohn Baldwin vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2525*bc3d5698SJohn Baldwin vst1.32 {d12}, [r0,:64]! 2526*bc3d5698SJohn Baldwin vst1.32 {d14}, [r0,:64]! 2527*bc3d5698SJohn Baldwin vst1.32 {d16}, [r0,:64]! 2528*bc3d5698SJohn Baldwin vst1.32 {d18}, [r0,:64]! 2529*bc3d5698SJohn Baldwin 2530*bc3d5698SJohn Baldwin vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2531*bc3d5698SJohn Baldwin vst1.32 {d13}, [r0,:64]! 2532*bc3d5698SJohn Baldwin vst1.32 {d15}, [r0,:64]! 2533*bc3d5698SJohn Baldwin vst1.32 {d17}, [r0,:64]! 2534*bc3d5698SJohn Baldwin vst1.32 {d19}, [r0,:64]! 2535*bc3d5698SJohn Baldwin 2536*bc3d5698SJohn Baldwin vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2537*bc3d5698SJohn Baldwin vst1.32 {d24}, [r0,:64] 2538*bc3d5698SJohn Baldwin 2539*bc3d5698SJohn Baldwin mov r0, r5 @ return value 2540*bc3d5698SJohn Baldwin vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2541*bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,pc} 2542*bc3d5698SJohn Baldwin.size SHA3_absorb_neon,.-SHA3_absorb_neon 2543*bc3d5698SJohn Baldwin 2544*bc3d5698SJohn Baldwin.globl SHA3_squeeze_neon 2545*bc3d5698SJohn Baldwin.type SHA3_squeeze_neon, %function 2546*bc3d5698SJohn Baldwin.align 5 2547*bc3d5698SJohn BaldwinSHA3_squeeze_neon: 2548*bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,lr} 2549*bc3d5698SJohn Baldwin 2550*bc3d5698SJohn Baldwin mov r4, r1 @ out 2551*bc3d5698SJohn Baldwin mov r5, r2 @ len 2552*bc3d5698SJohn Baldwin mov r6, r3 @ bsz 2553*bc3d5698SJohn Baldwin mov r12, r0 @ A_flat 2554*bc3d5698SJohn Baldwin mov r14, r3 @ bsz 2555*bc3d5698SJohn Baldwin b .Loop_squeeze_neon 2556*bc3d5698SJohn Baldwin 2557*bc3d5698SJohn Baldwin.align 4 2558*bc3d5698SJohn Baldwin.Loop_squeeze_neon: 2559*bc3d5698SJohn Baldwin cmp r5, #8 2560*bc3d5698SJohn Baldwin blo .Lsqueeze_neon_tail 2561*bc3d5698SJohn Baldwin vld1.32 {d0}, [r12]! 2562*bc3d5698SJohn Baldwin vst1.8 {d0}, [r4]! @ endian-neutral store 2563*bc3d5698SJohn Baldwin 2564*bc3d5698SJohn Baldwin subs r5, r5, #8 @ len -= 8 2565*bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2566*bc3d5698SJohn Baldwin 2567*bc3d5698SJohn Baldwin subs r14, r14, #8 @ bsz -= 8 2568*bc3d5698SJohn Baldwin bhi .Loop_squeeze_neon 2569*bc3d5698SJohn Baldwin 2570*bc3d5698SJohn Baldwin vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2571*bc3d5698SJohn Baldwin 2572*bc3d5698SJohn Baldwin vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2573*bc3d5698SJohn Baldwin vld1.32 {d2}, [r0,:64]! 2574*bc3d5698SJohn Baldwin vld1.32 {d4}, [r0,:64]! 2575*bc3d5698SJohn Baldwin vld1.32 {d6}, [r0,:64]! 2576*bc3d5698SJohn Baldwin vld1.32 {d8}, [r0,:64]! 2577*bc3d5698SJohn Baldwin 2578*bc3d5698SJohn Baldwin vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2579*bc3d5698SJohn Baldwin vld1.32 {d3}, [r0,:64]! 2580*bc3d5698SJohn Baldwin vld1.32 {d5}, [r0,:64]! 2581*bc3d5698SJohn Baldwin vld1.32 {d7}, [r0,:64]! 2582*bc3d5698SJohn Baldwin vld1.32 {d9}, [r0,:64]! 2583*bc3d5698SJohn Baldwin 2584*bc3d5698SJohn Baldwin vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2585*bc3d5698SJohn Baldwin vld1.32 {d12}, [r0,:64]! 2586*bc3d5698SJohn Baldwin vld1.32 {d14}, [r0,:64]! 2587*bc3d5698SJohn Baldwin vld1.32 {d16}, [r0,:64]! 2588*bc3d5698SJohn Baldwin vld1.32 {d18}, [r0,:64]! 2589*bc3d5698SJohn Baldwin 2590*bc3d5698SJohn Baldwin vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2591*bc3d5698SJohn Baldwin vld1.32 {d13}, [r0,:64]! 2592*bc3d5698SJohn Baldwin vld1.32 {d15}, [r0,:64]! 2593*bc3d5698SJohn Baldwin vld1.32 {d17}, [r0,:64]! 2594*bc3d5698SJohn Baldwin vld1.32 {d19}, [r0,:64]! 2595*bc3d5698SJohn Baldwin 2596*bc3d5698SJohn Baldwin vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2597*bc3d5698SJohn Baldwin vld1.32 {d24}, [r0,:64] 2598*bc3d5698SJohn Baldwin sub r0, r0, #24*8 @ rewind 2599*bc3d5698SJohn Baldwin 2600*bc3d5698SJohn Baldwin bl KeccakF1600_neon 2601*bc3d5698SJohn Baldwin 2602*bc3d5698SJohn Baldwin mov r12, r0 @ A_flat 2603*bc3d5698SJohn Baldwin vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2604*bc3d5698SJohn Baldwin vst1.32 {d2}, [r0,:64]! 2605*bc3d5698SJohn Baldwin vst1.32 {d4}, [r0,:64]! 2606*bc3d5698SJohn Baldwin vst1.32 {d6}, [r0,:64]! 2607*bc3d5698SJohn Baldwin vst1.32 {d8}, [r0,:64]! 2608*bc3d5698SJohn Baldwin 2609*bc3d5698SJohn Baldwin vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2610*bc3d5698SJohn Baldwin vst1.32 {d3}, [r0,:64]! 2611*bc3d5698SJohn Baldwin vst1.32 {d5}, [r0,:64]! 2612*bc3d5698SJohn Baldwin vst1.32 {d7}, [r0,:64]! 2613*bc3d5698SJohn Baldwin vst1.32 {d9}, [r0,:64]! 2614*bc3d5698SJohn Baldwin 2615*bc3d5698SJohn Baldwin vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2616*bc3d5698SJohn Baldwin vst1.32 {d12}, [r0,:64]! 2617*bc3d5698SJohn Baldwin vst1.32 {d14}, [r0,:64]! 2618*bc3d5698SJohn Baldwin vst1.32 {d16}, [r0,:64]! 2619*bc3d5698SJohn Baldwin vst1.32 {d18}, [r0,:64]! 2620*bc3d5698SJohn Baldwin 2621*bc3d5698SJohn Baldwin vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2622*bc3d5698SJohn Baldwin vst1.32 {d13}, [r0,:64]! 2623*bc3d5698SJohn Baldwin vst1.32 {d15}, [r0,:64]! 2624*bc3d5698SJohn Baldwin vst1.32 {d17}, [r0,:64]! 2625*bc3d5698SJohn Baldwin vst1.32 {d19}, [r0,:64]! 2626*bc3d5698SJohn Baldwin 2627*bc3d5698SJohn Baldwin vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2628*bc3d5698SJohn Baldwin mov r14, r6 @ bsz 2629*bc3d5698SJohn Baldwin vst1.32 {d24}, [r0,:64] 2630*bc3d5698SJohn Baldwin mov r0, r12 @ rewind 2631*bc3d5698SJohn Baldwin 2632*bc3d5698SJohn Baldwin vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2633*bc3d5698SJohn Baldwin b .Loop_squeeze_neon 2634*bc3d5698SJohn Baldwin 2635*bc3d5698SJohn Baldwin.align 4 2636*bc3d5698SJohn Baldwin.Lsqueeze_neon_tail: 2637*bc3d5698SJohn Baldwin ldmia r12, {r2,r3} 2638*bc3d5698SJohn Baldwin cmp r5, #2 2639*bc3d5698SJohn Baldwin strb r2, [r4],#1 @ endian-neutral store 2640*bc3d5698SJohn Baldwin mov r2, r2, lsr#8 2641*bc3d5698SJohn Baldwin blo .Lsqueeze_neon_done 2642*bc3d5698SJohn Baldwin strb r2, [r4], #1 2643*bc3d5698SJohn Baldwin mov r2, r2, lsr#8 2644*bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2645*bc3d5698SJohn Baldwin strb r2, [r4], #1 2646*bc3d5698SJohn Baldwin mov r2, r2, lsr#8 2647*bc3d5698SJohn Baldwin cmp r5, #4 2648*bc3d5698SJohn Baldwin blo .Lsqueeze_neon_done 2649*bc3d5698SJohn Baldwin strb r2, [r4], #1 2650*bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2651*bc3d5698SJohn Baldwin 2652*bc3d5698SJohn Baldwin strb r3, [r4], #1 2653*bc3d5698SJohn Baldwin mov r3, r3, lsr#8 2654*bc3d5698SJohn Baldwin cmp r5, #6 2655*bc3d5698SJohn Baldwin blo .Lsqueeze_neon_done 2656*bc3d5698SJohn Baldwin strb r3, [r4], #1 2657*bc3d5698SJohn Baldwin mov r3, r3, lsr#8 2658*bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2659*bc3d5698SJohn Baldwin strb r3, [r4], #1 2660*bc3d5698SJohn Baldwin 2661*bc3d5698SJohn Baldwin.Lsqueeze_neon_done: 2662*bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,pc} 2663*bc3d5698SJohn Baldwin.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2664*bc3d5698SJohn Baldwin#endif 2665*bc3d5698SJohn Baldwin.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2666*bc3d5698SJohn Baldwin.align 2 2667*bc3d5698SJohn Baldwin.align 2 2668