1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */ 2bc3d5698SJohn Baldwin#include "arm_arch.h" 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin#if defined(__thumb2__) 5bc3d5698SJohn Baldwin.syntax unified 6bc3d5698SJohn Baldwin.thumb 7bc3d5698SJohn Baldwin#else 8bc3d5698SJohn Baldwin.code 32 9bc3d5698SJohn Baldwin#endif 10bc3d5698SJohn Baldwin 11*c0855eaaSJohn Baldwin.text 12*c0855eaaSJohn Baldwin 13bc3d5698SJohn Baldwin.type iotas32, %object 14bc3d5698SJohn Baldwin.align 5 15bc3d5698SJohn Baldwiniotas32: 16bc3d5698SJohn Baldwin.long 0x00000001, 0x00000000 17bc3d5698SJohn Baldwin.long 0x00000000, 0x00000089 18bc3d5698SJohn Baldwin.long 0x00000000, 0x8000008b 19bc3d5698SJohn Baldwin.long 0x00000000, 0x80008080 20bc3d5698SJohn Baldwin.long 0x00000001, 0x0000008b 21bc3d5698SJohn Baldwin.long 0x00000001, 0x00008000 22bc3d5698SJohn Baldwin.long 0x00000001, 0x80008088 23bc3d5698SJohn Baldwin.long 0x00000001, 0x80000082 24bc3d5698SJohn Baldwin.long 0x00000000, 0x0000000b 25bc3d5698SJohn Baldwin.long 0x00000000, 0x0000000a 26bc3d5698SJohn Baldwin.long 0x00000001, 0x00008082 27bc3d5698SJohn Baldwin.long 0x00000000, 0x00008003 28bc3d5698SJohn Baldwin.long 0x00000001, 0x0000808b 29bc3d5698SJohn Baldwin.long 0x00000001, 0x8000000b 30bc3d5698SJohn Baldwin.long 0x00000001, 0x8000008a 31bc3d5698SJohn Baldwin.long 0x00000001, 0x80000081 32bc3d5698SJohn Baldwin.long 0x00000000, 0x80000081 33bc3d5698SJohn Baldwin.long 0x00000000, 0x80000008 34bc3d5698SJohn Baldwin.long 0x00000000, 0x00000083 35bc3d5698SJohn Baldwin.long 0x00000000, 0x80008003 36bc3d5698SJohn Baldwin.long 0x00000001, 0x80008088 37bc3d5698SJohn Baldwin.long 0x00000000, 0x80000088 38bc3d5698SJohn Baldwin.long 0x00000001, 0x00008000 39bc3d5698SJohn Baldwin.long 0x00000000, 0x80008082 40bc3d5698SJohn Baldwin.size iotas32,.-iotas32 41bc3d5698SJohn Baldwin 42bc3d5698SJohn Baldwin.type KeccakF1600_int, %function 43bc3d5698SJohn Baldwin.align 5 44bc3d5698SJohn BaldwinKeccakF1600_int: 45bc3d5698SJohn Baldwin add r9,sp,#176 46bc3d5698SJohn Baldwin add r12,sp,#0 47bc3d5698SJohn Baldwin add r10,sp,#40 48bc3d5698SJohn Baldwin ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4] 49bc3d5698SJohn BaldwinKeccakF1600_enter: 50bc3d5698SJohn Baldwin str lr,[sp,#440] 51bc3d5698SJohn Baldwin eor r11,r11,r11 52bc3d5698SJohn Baldwin str r11,[sp,#444] 53bc3d5698SJohn Baldwin b .Lround2x 54bc3d5698SJohn Baldwin 55bc3d5698SJohn Baldwin.align 4 56bc3d5698SJohn Baldwin.Lround2x: 57bc3d5698SJohn Baldwin ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 58bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 59bc3d5698SJohn Baldwin#ifdef __thumb2__ 60bc3d5698SJohn Baldwin eor r0,r0,r10 61bc3d5698SJohn Baldwin eor r1,r1,r11 62bc3d5698SJohn Baldwin eor r2,r2,r12 63bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#56] 64bc3d5698SJohn Baldwin eor r3,r3,r14 65bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#64] 66bc3d5698SJohn Baldwin eor r4,r4,r10 67bc3d5698SJohn Baldwin eor r5,r5,r11 68bc3d5698SJohn Baldwin eor r6,r6,r12 69bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#72] 70bc3d5698SJohn Baldwin eor r7,r7,r14 71bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#80] 72bc3d5698SJohn Baldwin eor r8,r8,r10 73bc3d5698SJohn Baldwin eor r9,r9,r11 74bc3d5698SJohn Baldwin eor r0,r0,r12 75bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#88] 76bc3d5698SJohn Baldwin eor r1,r1,r14 77bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#96] 78bc3d5698SJohn Baldwin eor r2,r2,r10 79bc3d5698SJohn Baldwin eor r3,r3,r11 80bc3d5698SJohn Baldwin eor r4,r4,r12 81bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#104] 82bc3d5698SJohn Baldwin eor r5,r5,r14 83bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#112] 84bc3d5698SJohn Baldwin eor r6,r6,r10 85bc3d5698SJohn Baldwin eor r7,r7,r11 86bc3d5698SJohn Baldwin eor r8,r8,r12 87bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#120] 88bc3d5698SJohn Baldwin eor r9,r9,r14 89bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#128] 90bc3d5698SJohn Baldwin eor r0,r0,r10 91bc3d5698SJohn Baldwin eor r1,r1,r11 92bc3d5698SJohn Baldwin eor r2,r2,r12 93bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#136] 94bc3d5698SJohn Baldwin eor r3,r3,r14 95bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#144] 96bc3d5698SJohn Baldwin eor r4,r4,r10 97bc3d5698SJohn Baldwin eor r5,r5,r11 98bc3d5698SJohn Baldwin eor r6,r6,r12 99bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#152] 100bc3d5698SJohn Baldwin eor r7,r7,r14 101bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#160] 102bc3d5698SJohn Baldwin eor r8,r8,r10 103bc3d5698SJohn Baldwin eor r9,r9,r11 104bc3d5698SJohn Baldwin eor r0,r0,r12 105bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#168] 106bc3d5698SJohn Baldwin eor r1,r1,r14 107bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#16] 108bc3d5698SJohn Baldwin eor r2,r2,r10 109bc3d5698SJohn Baldwin eor r3,r3,r11 110bc3d5698SJohn Baldwin eor r4,r4,r12 111bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#24] 112bc3d5698SJohn Baldwin eor r5,r5,r14 113bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#32] 114bc3d5698SJohn Baldwin#else 115bc3d5698SJohn Baldwin eor r0,r0,r10 116bc3d5698SJohn Baldwin add r10,sp,#56 117bc3d5698SJohn Baldwin eor r1,r1,r11 118bc3d5698SJohn Baldwin eor r2,r2,r12 119bc3d5698SJohn Baldwin eor r3,r3,r14 120bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 121bc3d5698SJohn Baldwin eor r4,r4,r10 122bc3d5698SJohn Baldwin add r10,sp,#72 123bc3d5698SJohn Baldwin eor r5,r5,r11 124bc3d5698SJohn Baldwin eor r6,r6,r12 125bc3d5698SJohn Baldwin eor r7,r7,r14 126bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 127bc3d5698SJohn Baldwin eor r8,r8,r10 128bc3d5698SJohn Baldwin add r10,sp,#88 129bc3d5698SJohn Baldwin eor r9,r9,r11 130bc3d5698SJohn Baldwin eor r0,r0,r12 131bc3d5698SJohn Baldwin eor r1,r1,r14 132bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 133bc3d5698SJohn Baldwin eor r2,r2,r10 134bc3d5698SJohn Baldwin add r10,sp,#104 135bc3d5698SJohn Baldwin eor r3,r3,r11 136bc3d5698SJohn Baldwin eor r4,r4,r12 137bc3d5698SJohn Baldwin eor r5,r5,r14 138bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 139bc3d5698SJohn Baldwin eor r6,r6,r10 140bc3d5698SJohn Baldwin add r10,sp,#120 141bc3d5698SJohn Baldwin eor r7,r7,r11 142bc3d5698SJohn Baldwin eor r8,r8,r12 143bc3d5698SJohn Baldwin eor r9,r9,r14 144bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 145bc3d5698SJohn Baldwin eor r0,r0,r10 146bc3d5698SJohn Baldwin add r10,sp,#136 147bc3d5698SJohn Baldwin eor r1,r1,r11 148bc3d5698SJohn Baldwin eor r2,r2,r12 149bc3d5698SJohn Baldwin eor r3,r3,r14 150bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 151bc3d5698SJohn Baldwin eor r4,r4,r10 152bc3d5698SJohn Baldwin add r10,sp,#152 153bc3d5698SJohn Baldwin eor r5,r5,r11 154bc3d5698SJohn Baldwin eor r6,r6,r12 155bc3d5698SJohn Baldwin eor r7,r7,r14 156bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 157bc3d5698SJohn Baldwin eor r8,r8,r10 158bc3d5698SJohn Baldwin ldr r10,[sp,#168] @ A[4][1] 159bc3d5698SJohn Baldwin eor r9,r9,r11 160bc3d5698SJohn Baldwin ldr r11,[sp,#168+4] 161bc3d5698SJohn Baldwin eor r0,r0,r12 162bc3d5698SJohn Baldwin ldr r12,[sp,#16] @ A[0][2] 163bc3d5698SJohn Baldwin eor r1,r1,r14 164bc3d5698SJohn Baldwin ldr r14,[sp,#16+4] 165bc3d5698SJohn Baldwin eor r2,r2,r10 166bc3d5698SJohn Baldwin add r10,sp,#24 167bc3d5698SJohn Baldwin eor r3,r3,r11 168bc3d5698SJohn Baldwin eor r4,r4,r12 169bc3d5698SJohn Baldwin eor r5,r5,r14 170bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 171bc3d5698SJohn Baldwin#endif 172bc3d5698SJohn Baldwin eor r6,r6,r10 173bc3d5698SJohn Baldwin eor r7,r7,r11 174bc3d5698SJohn Baldwin eor r8,r8,r12 175bc3d5698SJohn Baldwin eor r9,r9,r14 176bc3d5698SJohn Baldwin 177bc3d5698SJohn Baldwin eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 178bc3d5698SJohn Baldwin#ifndef __thumb2__ 179bc3d5698SJohn Baldwin str r10,[sp,#208] @ D[1] = E[0] 180bc3d5698SJohn Baldwin#endif 181bc3d5698SJohn Baldwin eor r11,r1,r4 182bc3d5698SJohn Baldwin#ifndef __thumb2__ 183bc3d5698SJohn Baldwin str r11,[sp,#208+4] 184bc3d5698SJohn Baldwin#else 185bc3d5698SJohn Baldwin strd r10,r11,[sp,#208] @ D[1] = E[0] 186bc3d5698SJohn Baldwin#endif 187bc3d5698SJohn Baldwin eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 188bc3d5698SJohn Baldwin eor r14,r7,r0 189bc3d5698SJohn Baldwin#ifndef __thumb2__ 190bc3d5698SJohn Baldwin str r12,[sp,#232] @ D[4] = E[1] 191bc3d5698SJohn Baldwin#endif 192bc3d5698SJohn Baldwin eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 193bc3d5698SJohn Baldwin#ifndef __thumb2__ 194bc3d5698SJohn Baldwin str r14,[sp,#232+4] 195bc3d5698SJohn Baldwin#else 196bc3d5698SJohn Baldwin strd r12,r14,[sp,#232] @ D[4] = E[1] 197bc3d5698SJohn Baldwin#endif 198bc3d5698SJohn Baldwin eor r1,r9,r2 199bc3d5698SJohn Baldwin#ifndef __thumb2__ 200bc3d5698SJohn Baldwin str r0,[sp,#200] @ D[0] = C[0] 201bc3d5698SJohn Baldwin#endif 202bc3d5698SJohn Baldwin eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 203bc3d5698SJohn Baldwin#ifndef __thumb2__ 204bc3d5698SJohn Baldwin ldr r7,[sp,#144] 205bc3d5698SJohn Baldwin#endif 206bc3d5698SJohn Baldwin eor r3,r3,r6 207bc3d5698SJohn Baldwin#ifndef __thumb2__ 208bc3d5698SJohn Baldwin str r1,[sp,#200+4] 209bc3d5698SJohn Baldwin#else 210bc3d5698SJohn Baldwin strd r0,r1,[sp,#200] @ D[0] = C[0] 211bc3d5698SJohn Baldwin#endif 212bc3d5698SJohn Baldwin#ifndef __thumb2__ 213bc3d5698SJohn Baldwin ldr r6,[sp,#144+4] 214bc3d5698SJohn Baldwin#else 215bc3d5698SJohn Baldwin ldrd r7,r6,[sp,#144] 216bc3d5698SJohn Baldwin#endif 217bc3d5698SJohn Baldwin#ifndef __thumb2__ 218bc3d5698SJohn Baldwin str r2,[sp,#216] @ D[2] = C[1] 219bc3d5698SJohn Baldwin#endif 220bc3d5698SJohn Baldwin eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 221bc3d5698SJohn Baldwin#ifndef __thumb2__ 222bc3d5698SJohn Baldwin str r3,[sp,#216+4] 223bc3d5698SJohn Baldwin#else 224bc3d5698SJohn Baldwin strd r2,r3,[sp,#216] @ D[2] = C[1] 225bc3d5698SJohn Baldwin#endif 226bc3d5698SJohn Baldwin eor r5,r5,r8 227bc3d5698SJohn Baldwin 228bc3d5698SJohn Baldwin#ifndef __thumb2__ 229bc3d5698SJohn Baldwin ldr r8,[sp,#192] 230bc3d5698SJohn Baldwin#endif 231bc3d5698SJohn Baldwin#ifndef __thumb2__ 232bc3d5698SJohn Baldwin ldr r9,[sp,#192+4] 233bc3d5698SJohn Baldwin#else 234bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#192] 235bc3d5698SJohn Baldwin#endif 236bc3d5698SJohn Baldwin#ifndef __thumb2__ 237bc3d5698SJohn Baldwin str r4,[sp,#224] @ D[3] = C[2] 238bc3d5698SJohn Baldwin#endif 239bc3d5698SJohn Baldwin eor r7,r7,r4 240bc3d5698SJohn Baldwin#ifndef __thumb2__ 241bc3d5698SJohn Baldwin str r5,[sp,#224+4] 242bc3d5698SJohn Baldwin#else 243bc3d5698SJohn Baldwin strd r4,r5,[sp,#224] @ D[3] = C[2] 244bc3d5698SJohn Baldwin#endif 245bc3d5698SJohn Baldwin eor r6,r6,r5 246bc3d5698SJohn Baldwin#ifndef __thumb2__ 247bc3d5698SJohn Baldwin ldr r4,[sp,#0] 248bc3d5698SJohn Baldwin#endif 249bc3d5698SJohn Baldwin @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 250bc3d5698SJohn Baldwin @ mov r6,r6,ror#32-11 251bc3d5698SJohn Baldwin#ifndef __thumb2__ 252bc3d5698SJohn Baldwin ldr r5,[sp,#0+4] 253bc3d5698SJohn Baldwin#else 254bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#0] 255bc3d5698SJohn Baldwin#endif 256bc3d5698SJohn Baldwin eor r8,r8,r12 257bc3d5698SJohn Baldwin eor r9,r9,r14 258bc3d5698SJohn Baldwin#ifndef __thumb2__ 259bc3d5698SJohn Baldwin ldr r12,[sp,#96] 260bc3d5698SJohn Baldwin#endif 261bc3d5698SJohn Baldwin eor r0,r0,r4 262bc3d5698SJohn Baldwin#ifndef __thumb2__ 263bc3d5698SJohn Baldwin ldr r14,[sp,#96+4] 264bc3d5698SJohn Baldwin#else 265bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#96] 266bc3d5698SJohn Baldwin#endif 267bc3d5698SJohn Baldwin @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 268bc3d5698SJohn Baldwin @ mov r9,r9,ror#32-7 269bc3d5698SJohn Baldwin eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 270bc3d5698SJohn Baldwin eor r12,r12,r2 271bc3d5698SJohn Baldwin#ifndef __thumb2__ 272bc3d5698SJohn Baldwin ldr r2,[sp,#48] 273bc3d5698SJohn Baldwin#endif 274bc3d5698SJohn Baldwin eor r14,r14,r3 275bc3d5698SJohn Baldwin#ifndef __thumb2__ 276bc3d5698SJohn Baldwin ldr r3,[sp,#48+4] 277bc3d5698SJohn Baldwin#else 278bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#48] 279bc3d5698SJohn Baldwin#endif 280bc3d5698SJohn Baldwin mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 281bc3d5698SJohn Baldwin ldr r12,[sp,#444] @ load counter 282bc3d5698SJohn Baldwin eor r2,r2,r10 283bc3d5698SJohn Baldwin adr r10,iotas32 284bc3d5698SJohn Baldwin mov r4,r14,ror#32-22 285bc3d5698SJohn Baldwin add r14,r10,r12 286bc3d5698SJohn Baldwin eor r3,r3,r11 287bc3d5698SJohn Baldwin ldmia r14,{r10,r11} @ iotas[i] 288bc3d5698SJohn Baldwin bic r12,r4,r2,ror#32-22 289bc3d5698SJohn Baldwin bic r14,r5,r3,ror#32-22 290bc3d5698SJohn Baldwin mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 291bc3d5698SJohn Baldwin mov r3,r3,ror#32-22 292bc3d5698SJohn Baldwin eor r12,r12,r0 293bc3d5698SJohn Baldwin eor r14,r14,r1 294bc3d5698SJohn Baldwin eor r10,r10,r12 295bc3d5698SJohn Baldwin eor r11,r11,r14 296bc3d5698SJohn Baldwin#ifndef __thumb2__ 297bc3d5698SJohn Baldwin str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 298bc3d5698SJohn Baldwin#endif 299bc3d5698SJohn Baldwin bic r12,r6,r4,ror#11 300bc3d5698SJohn Baldwin#ifndef __thumb2__ 301bc3d5698SJohn Baldwin str r11,[sp,#240+4] 302bc3d5698SJohn Baldwin#else 303bc3d5698SJohn Baldwin strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 304bc3d5698SJohn Baldwin#endif 305bc3d5698SJohn Baldwin bic r14,r7,r5,ror#10 306bc3d5698SJohn Baldwin bic r10,r8,r6,ror#32-(11-7) 307bc3d5698SJohn Baldwin bic r11,r9,r7,ror#32-(10-7) 308bc3d5698SJohn Baldwin eor r12,r2,r12,ror#32-11 309bc3d5698SJohn Baldwin#ifndef __thumb2__ 310bc3d5698SJohn Baldwin str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 311bc3d5698SJohn Baldwin#endif 312bc3d5698SJohn Baldwin eor r14,r3,r14,ror#32-10 313bc3d5698SJohn Baldwin#ifndef __thumb2__ 314bc3d5698SJohn Baldwin str r14,[sp,#248+4] 315bc3d5698SJohn Baldwin#else 316bc3d5698SJohn Baldwin strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 317bc3d5698SJohn Baldwin#endif 318bc3d5698SJohn Baldwin eor r10,r4,r10,ror#32-7 319bc3d5698SJohn Baldwin eor r11,r5,r11,ror#32-7 320bc3d5698SJohn Baldwin#ifndef __thumb2__ 321bc3d5698SJohn Baldwin str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 322bc3d5698SJohn Baldwin#endif 323bc3d5698SJohn Baldwin bic r12,r0,r8,ror#32-7 324bc3d5698SJohn Baldwin#ifndef __thumb2__ 325bc3d5698SJohn Baldwin str r11,[sp,#256+4] 326bc3d5698SJohn Baldwin#else 327bc3d5698SJohn Baldwin strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 328bc3d5698SJohn Baldwin#endif 329bc3d5698SJohn Baldwin bic r14,r1,r9,ror#32-7 330bc3d5698SJohn Baldwin eor r12,r12,r6,ror#32-11 331bc3d5698SJohn Baldwin#ifndef __thumb2__ 332bc3d5698SJohn Baldwin str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 333bc3d5698SJohn Baldwin#endif 334bc3d5698SJohn Baldwin eor r14,r14,r7,ror#32-10 335bc3d5698SJohn Baldwin#ifndef __thumb2__ 336bc3d5698SJohn Baldwin str r14,[sp,#264+4] 337bc3d5698SJohn Baldwin#else 338bc3d5698SJohn Baldwin strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 339bc3d5698SJohn Baldwin#endif 340bc3d5698SJohn Baldwin bic r10,r2,r0 341bc3d5698SJohn Baldwin add r14,sp,#224 342bc3d5698SJohn Baldwin#ifndef __thumb2__ 343bc3d5698SJohn Baldwin ldr r0,[sp,#24] @ A[0][3] 344bc3d5698SJohn Baldwin#endif 345bc3d5698SJohn Baldwin bic r11,r3,r1 346bc3d5698SJohn Baldwin#ifndef __thumb2__ 347bc3d5698SJohn Baldwin ldr r1,[sp,#24+4] 348bc3d5698SJohn Baldwin#else 349bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#24] @ A[0][3] 350bc3d5698SJohn Baldwin#endif 351bc3d5698SJohn Baldwin eor r10,r10,r8,ror#32-7 352bc3d5698SJohn Baldwin eor r11,r11,r9,ror#32-7 353bc3d5698SJohn Baldwin#ifndef __thumb2__ 354bc3d5698SJohn Baldwin str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 355bc3d5698SJohn Baldwin#endif 356bc3d5698SJohn Baldwin add r9,sp,#200 357bc3d5698SJohn Baldwin#ifndef __thumb2__ 358bc3d5698SJohn Baldwin str r11,[sp,#272+4] 359bc3d5698SJohn Baldwin#else 360bc3d5698SJohn Baldwin strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 361bc3d5698SJohn Baldwin#endif 362bc3d5698SJohn Baldwin 363bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[3..4] 364bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[0..1] 365bc3d5698SJohn Baldwin 366bc3d5698SJohn Baldwin#ifndef __thumb2__ 367bc3d5698SJohn Baldwin ldr r2,[sp,#72] @ A[1][4] 368bc3d5698SJohn Baldwin#endif 369bc3d5698SJohn Baldwin eor r0,r0,r10 370bc3d5698SJohn Baldwin#ifndef __thumb2__ 371bc3d5698SJohn Baldwin ldr r3,[sp,#72+4] 372bc3d5698SJohn Baldwin#else 373bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#72] @ A[1][4] 374bc3d5698SJohn Baldwin#endif 375bc3d5698SJohn Baldwin eor r1,r1,r11 376bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 377bc3d5698SJohn Baldwin#ifndef __thumb2__ 378bc3d5698SJohn Baldwin ldr r10,[sp,#128] @ A[3][1] 379bc3d5698SJohn Baldwin#endif 380bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-14 381bc3d5698SJohn Baldwin#ifndef __thumb2__ 382bc3d5698SJohn Baldwin ldr r11,[sp,#128+4] 383bc3d5698SJohn Baldwin#else 384bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#128] @ A[3][1] 385bc3d5698SJohn Baldwin#endif 386bc3d5698SJohn Baldwin 387bc3d5698SJohn Baldwin eor r2,r2,r12 388bc3d5698SJohn Baldwin#ifndef __thumb2__ 389bc3d5698SJohn Baldwin ldr r4,[sp,#80] @ A[2][0] 390bc3d5698SJohn Baldwin#endif 391bc3d5698SJohn Baldwin eor r3,r3,r14 392bc3d5698SJohn Baldwin#ifndef __thumb2__ 393bc3d5698SJohn Baldwin ldr r5,[sp,#80+4] 394bc3d5698SJohn Baldwin#else 395bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#80] @ A[2][0] 396bc3d5698SJohn Baldwin#endif 397bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 398bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-10 399bc3d5698SJohn Baldwin 400bc3d5698SJohn Baldwin eor r6,r6,r4 401bc3d5698SJohn Baldwin#ifndef __thumb2__ 402bc3d5698SJohn Baldwin ldr r12,[sp,#216] @ D[2] 403bc3d5698SJohn Baldwin#endif 404bc3d5698SJohn Baldwin eor r7,r7,r5 405bc3d5698SJohn Baldwin#ifndef __thumb2__ 406bc3d5698SJohn Baldwin ldr r14,[sp,#216+4] 407bc3d5698SJohn Baldwin#else 408bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#216] @ D[2] 409bc3d5698SJohn Baldwin#endif 410bc3d5698SJohn Baldwin mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 411bc3d5698SJohn Baldwin mov r4,r7,ror#32-2 412bc3d5698SJohn Baldwin 413bc3d5698SJohn Baldwin eor r10,r10,r8 414bc3d5698SJohn Baldwin#ifndef __thumb2__ 415bc3d5698SJohn Baldwin ldr r8,[sp,#176] @ A[4][2] 416bc3d5698SJohn Baldwin#endif 417bc3d5698SJohn Baldwin eor r11,r11,r9 418bc3d5698SJohn Baldwin#ifndef __thumb2__ 419bc3d5698SJohn Baldwin ldr r9,[sp,#176+4] 420bc3d5698SJohn Baldwin#else 421bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#176] @ A[4][2] 422bc3d5698SJohn Baldwin#endif 423bc3d5698SJohn Baldwin mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 424bc3d5698SJohn Baldwin mov r6,r11,ror#32-23 425bc3d5698SJohn Baldwin 426bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-10 427bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-10 428bc3d5698SJohn Baldwin eor r12,r12,r8 429bc3d5698SJohn Baldwin eor r14,r14,r9 430bc3d5698SJohn Baldwin mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 431bc3d5698SJohn Baldwin mov r8,r14,ror#32-31 432bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 433bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-14 434bc3d5698SJohn Baldwin#ifndef __thumb2__ 435bc3d5698SJohn Baldwin str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 436bc3d5698SJohn Baldwin#endif 437bc3d5698SJohn Baldwin bic r12,r6,r4 438bc3d5698SJohn Baldwin#ifndef __thumb2__ 439bc3d5698SJohn Baldwin str r11,[sp,#280+4] 440bc3d5698SJohn Baldwin#else 441bc3d5698SJohn Baldwin strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 442bc3d5698SJohn Baldwin#endif 443bc3d5698SJohn Baldwin bic r14,r7,r5 444bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-10 445bc3d5698SJohn Baldwin#ifndef __thumb2__ 446bc3d5698SJohn Baldwin str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 447bc3d5698SJohn Baldwin#endif 448bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-10 449bc3d5698SJohn Baldwin#ifndef __thumb2__ 450bc3d5698SJohn Baldwin str r14,[sp,#288+4] 451bc3d5698SJohn Baldwin#else 452bc3d5698SJohn Baldwin strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 453bc3d5698SJohn Baldwin#endif 454bc3d5698SJohn Baldwin bic r10,r8,r6 455bc3d5698SJohn Baldwin bic r11,r9,r7 456bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 457bc3d5698SJohn Baldwin bic r14,r1,r9,ror#14 458bc3d5698SJohn Baldwin eor r10,r10,r4 459bc3d5698SJohn Baldwin eor r11,r11,r5 460bc3d5698SJohn Baldwin#ifndef __thumb2__ 461bc3d5698SJohn Baldwin str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 462bc3d5698SJohn Baldwin#endif 463bc3d5698SJohn Baldwin bic r2,r2,r0,ror#32-(14-10) 464bc3d5698SJohn Baldwin#ifndef __thumb2__ 465bc3d5698SJohn Baldwin str r11,[sp,#296+4] 466bc3d5698SJohn Baldwin#else 467bc3d5698SJohn Baldwin strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 468bc3d5698SJohn Baldwin#endif 469bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 470bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-(14-10) 471bc3d5698SJohn Baldwin#ifndef __thumb2__ 472bc3d5698SJohn Baldwin str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 473bc3d5698SJohn Baldwin#endif 474bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-14 475bc3d5698SJohn Baldwin#ifndef __thumb2__ 476bc3d5698SJohn Baldwin str r14,[sp,#304+4] 477bc3d5698SJohn Baldwin#else 478bc3d5698SJohn Baldwin strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 479bc3d5698SJohn Baldwin#endif 480bc3d5698SJohn Baldwin add r12,sp,#208 481bc3d5698SJohn Baldwin#ifndef __thumb2__ 482bc3d5698SJohn Baldwin ldr r1,[sp,#8] @ A[0][1] 483bc3d5698SJohn Baldwin#endif 484bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-10 485bc3d5698SJohn Baldwin#ifndef __thumb2__ 486bc3d5698SJohn Baldwin ldr r0,[sp,#8+4] 487bc3d5698SJohn Baldwin#else 488bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#8] @ A[0][1] 489bc3d5698SJohn Baldwin#endif 490bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-10 491bc3d5698SJohn Baldwin#ifndef __thumb2__ 492bc3d5698SJohn Baldwin str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 493bc3d5698SJohn Baldwin#endif 494bc3d5698SJohn Baldwin#ifndef __thumb2__ 495bc3d5698SJohn Baldwin str r11,[sp,#312+4] 496bc3d5698SJohn Baldwin#else 497bc3d5698SJohn Baldwin strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 498bc3d5698SJohn Baldwin#endif 499bc3d5698SJohn Baldwin 500bc3d5698SJohn Baldwin add r9,sp,#224 501bc3d5698SJohn Baldwin ldmia r12,{r10,r11,r12,r14} @ D[1..2] 502bc3d5698SJohn Baldwin#ifndef __thumb2__ 503bc3d5698SJohn Baldwin ldr r2,[sp,#56] @ A[1][2] 504bc3d5698SJohn Baldwin#endif 505bc3d5698SJohn Baldwin#ifndef __thumb2__ 506bc3d5698SJohn Baldwin ldr r3,[sp,#56+4] 507bc3d5698SJohn Baldwin#else 508bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#56] @ A[1][2] 509bc3d5698SJohn Baldwin#endif 510bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[3..4] 511bc3d5698SJohn Baldwin 512bc3d5698SJohn Baldwin eor r1,r1,r10 513bc3d5698SJohn Baldwin#ifndef __thumb2__ 514bc3d5698SJohn Baldwin ldr r4,[sp,#104] @ A[2][3] 515bc3d5698SJohn Baldwin#endif 516bc3d5698SJohn Baldwin eor r0,r0,r11 517bc3d5698SJohn Baldwin#ifndef __thumb2__ 518bc3d5698SJohn Baldwin ldr r5,[sp,#104+4] 519bc3d5698SJohn Baldwin#else 520bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#104] @ A[2][3] 521bc3d5698SJohn Baldwin#endif 522bc3d5698SJohn Baldwin mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 523bc3d5698SJohn Baldwin 524bc3d5698SJohn Baldwin eor r2,r2,r12 525bc3d5698SJohn Baldwin#ifndef __thumb2__ 526bc3d5698SJohn Baldwin ldr r10,[sp,#152] @ A[3][4] 527bc3d5698SJohn Baldwin#endif 528bc3d5698SJohn Baldwin eor r3,r3,r14 529bc3d5698SJohn Baldwin#ifndef __thumb2__ 530bc3d5698SJohn Baldwin ldr r11,[sp,#152+4] 531bc3d5698SJohn Baldwin#else 532bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#152] @ A[3][4] 533bc3d5698SJohn Baldwin#endif 534bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 535bc3d5698SJohn Baldwin#ifndef __thumb2__ 536bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 537bc3d5698SJohn Baldwin#endif 538bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-3 539bc3d5698SJohn Baldwin#ifndef __thumb2__ 540bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 541bc3d5698SJohn Baldwin#else 542bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 543bc3d5698SJohn Baldwin#endif 544bc3d5698SJohn Baldwin 545bc3d5698SJohn Baldwin eor r4,r4,r6 546bc3d5698SJohn Baldwin eor r5,r5,r7 547bc3d5698SJohn Baldwin @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 548bc3d5698SJohn Baldwin @ mov r4,r7,ror#32-13 @ [track reverse order below] 549bc3d5698SJohn Baldwin 550bc3d5698SJohn Baldwin eor r10,r10,r8 551bc3d5698SJohn Baldwin#ifndef __thumb2__ 552bc3d5698SJohn Baldwin ldr r8,[sp,#160] @ A[4][0] 553bc3d5698SJohn Baldwin#endif 554bc3d5698SJohn Baldwin eor r11,r11,r9 555bc3d5698SJohn Baldwin#ifndef __thumb2__ 556bc3d5698SJohn Baldwin ldr r9,[sp,#160+4] 557bc3d5698SJohn Baldwin#else 558bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#160] @ A[4][0] 559bc3d5698SJohn Baldwin#endif 560bc3d5698SJohn Baldwin mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 561bc3d5698SJohn Baldwin mov r7,r11,ror#32-4 562bc3d5698SJohn Baldwin 563bc3d5698SJohn Baldwin eor r12,r12,r8 564bc3d5698SJohn Baldwin eor r14,r14,r9 565bc3d5698SJohn Baldwin mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 566bc3d5698SJohn Baldwin mov r9,r14,ror#32-9 567bc3d5698SJohn Baldwin 568bc3d5698SJohn Baldwin bic r10,r5,r2,ror#13-3 569bc3d5698SJohn Baldwin bic r11,r4,r3,ror#12-3 570bc3d5698SJohn Baldwin bic r12,r6,r5,ror#32-13 571bc3d5698SJohn Baldwin bic r14,r7,r4,ror#32-12 572bc3d5698SJohn Baldwin eor r10,r0,r10,ror#32-13 573bc3d5698SJohn Baldwin eor r11,r1,r11,ror#32-12 574bc3d5698SJohn Baldwin#ifndef __thumb2__ 575bc3d5698SJohn Baldwin str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 576bc3d5698SJohn Baldwin#endif 577bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-3 578bc3d5698SJohn Baldwin#ifndef __thumb2__ 579bc3d5698SJohn Baldwin str r11,[sp,#320+4] 580bc3d5698SJohn Baldwin#else 581bc3d5698SJohn Baldwin strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 582bc3d5698SJohn Baldwin#endif 583bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-3 584bc3d5698SJohn Baldwin#ifndef __thumb2__ 585bc3d5698SJohn Baldwin str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 586bc3d5698SJohn Baldwin#endif 587bc3d5698SJohn Baldwin bic r10,r8,r6 588bc3d5698SJohn Baldwin bic r11,r9,r7 589bc3d5698SJohn Baldwin#ifndef __thumb2__ 590bc3d5698SJohn Baldwin str r14,[sp,#328+4] 591bc3d5698SJohn Baldwin#else 592bc3d5698SJohn Baldwin strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 593bc3d5698SJohn Baldwin#endif 594bc3d5698SJohn Baldwin eor r10,r10,r5,ror#32-13 595bc3d5698SJohn Baldwin eor r11,r11,r4,ror#32-12 596bc3d5698SJohn Baldwin#ifndef __thumb2__ 597bc3d5698SJohn Baldwin str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 598bc3d5698SJohn Baldwin#endif 599bc3d5698SJohn Baldwin bic r12,r0,r8 600bc3d5698SJohn Baldwin#ifndef __thumb2__ 601bc3d5698SJohn Baldwin str r11,[sp,#336+4] 602bc3d5698SJohn Baldwin#else 603bc3d5698SJohn Baldwin strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 604bc3d5698SJohn Baldwin#endif 605bc3d5698SJohn Baldwin bic r14,r1,r9 606bc3d5698SJohn Baldwin eor r12,r12,r6 607bc3d5698SJohn Baldwin eor r14,r14,r7 608bc3d5698SJohn Baldwin#ifndef __thumb2__ 609bc3d5698SJohn Baldwin str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 610bc3d5698SJohn Baldwin#endif 611bc3d5698SJohn Baldwin bic r10,r2,r0,ror#3 612bc3d5698SJohn Baldwin#ifndef __thumb2__ 613bc3d5698SJohn Baldwin str r14,[sp,#344+4] 614bc3d5698SJohn Baldwin#else 615bc3d5698SJohn Baldwin strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 616bc3d5698SJohn Baldwin#endif 617bc3d5698SJohn Baldwin bic r11,r3,r1,ror#3 618bc3d5698SJohn Baldwin#ifndef __thumb2__ 619bc3d5698SJohn Baldwin ldr r1,[sp,#32] @ A[0][4] [in reverse order] 620bc3d5698SJohn Baldwin#endif 621bc3d5698SJohn Baldwin eor r10,r8,r10,ror#32-3 622bc3d5698SJohn Baldwin#ifndef __thumb2__ 623bc3d5698SJohn Baldwin ldr r0,[sp,#32+4] 624bc3d5698SJohn Baldwin#else 625bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 626bc3d5698SJohn Baldwin#endif 627bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-3 628bc3d5698SJohn Baldwin#ifndef __thumb2__ 629bc3d5698SJohn Baldwin str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 630bc3d5698SJohn Baldwin#endif 631bc3d5698SJohn Baldwin add r9,sp,#208 632bc3d5698SJohn Baldwin#ifndef __thumb2__ 633bc3d5698SJohn Baldwin str r11,[sp,#352+4] 634bc3d5698SJohn Baldwin#else 635bc3d5698SJohn Baldwin strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 636bc3d5698SJohn Baldwin#endif 637bc3d5698SJohn Baldwin 638bc3d5698SJohn Baldwin#ifndef __thumb2__ 639bc3d5698SJohn Baldwin ldr r10,[sp,#232] @ D[4] 640bc3d5698SJohn Baldwin#endif 641bc3d5698SJohn Baldwin#ifndef __thumb2__ 642bc3d5698SJohn Baldwin ldr r11,[sp,#232+4] 643bc3d5698SJohn Baldwin#else 644bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#232] @ D[4] 645bc3d5698SJohn Baldwin#endif 646bc3d5698SJohn Baldwin#ifndef __thumb2__ 647bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 648bc3d5698SJohn Baldwin#endif 649bc3d5698SJohn Baldwin#ifndef __thumb2__ 650bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 651bc3d5698SJohn Baldwin#else 652bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 653bc3d5698SJohn Baldwin#endif 654bc3d5698SJohn Baldwin 655bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[1..2] 656bc3d5698SJohn Baldwin 657bc3d5698SJohn Baldwin eor r1,r1,r10 658bc3d5698SJohn Baldwin#ifndef __thumb2__ 659bc3d5698SJohn Baldwin ldr r2,[sp,#40] @ A[1][0] 660bc3d5698SJohn Baldwin#endif 661bc3d5698SJohn Baldwin eor r0,r0,r11 662bc3d5698SJohn Baldwin#ifndef __thumb2__ 663bc3d5698SJohn Baldwin ldr r3,[sp,#40+4] 664bc3d5698SJohn Baldwin#else 665bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#40] @ A[1][0] 666bc3d5698SJohn Baldwin#endif 667bc3d5698SJohn Baldwin @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 668bc3d5698SJohn Baldwin#ifndef __thumb2__ 669bc3d5698SJohn Baldwin ldr r4,[sp,#88] @ A[2][1] 670bc3d5698SJohn Baldwin#endif 671bc3d5698SJohn Baldwin @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 672bc3d5698SJohn Baldwin#ifndef __thumb2__ 673bc3d5698SJohn Baldwin ldr r5,[sp,#88+4] 674bc3d5698SJohn Baldwin#else 675bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#88] @ A[2][1] 676bc3d5698SJohn Baldwin#endif 677bc3d5698SJohn Baldwin 678bc3d5698SJohn Baldwin eor r2,r2,r12 679bc3d5698SJohn Baldwin#ifndef __thumb2__ 680bc3d5698SJohn Baldwin ldr r10,[sp,#136] @ A[3][2] 681bc3d5698SJohn Baldwin#endif 682bc3d5698SJohn Baldwin eor r3,r3,r14 683bc3d5698SJohn Baldwin#ifndef __thumb2__ 684bc3d5698SJohn Baldwin ldr r11,[sp,#136+4] 685bc3d5698SJohn Baldwin#else 686bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#136] @ A[3][2] 687bc3d5698SJohn Baldwin#endif 688bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 689bc3d5698SJohn Baldwin#ifndef __thumb2__ 690bc3d5698SJohn Baldwin ldr r12,[sp,#224] @ D[3] 691bc3d5698SJohn Baldwin#endif 692bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-18 693bc3d5698SJohn Baldwin#ifndef __thumb2__ 694bc3d5698SJohn Baldwin ldr r14,[sp,#224+4] 695bc3d5698SJohn Baldwin#else 696bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#224] @ D[3] 697bc3d5698SJohn Baldwin#endif 698bc3d5698SJohn Baldwin 699bc3d5698SJohn Baldwin eor r6,r6,r4 700bc3d5698SJohn Baldwin eor r7,r7,r5 701bc3d5698SJohn Baldwin mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 702bc3d5698SJohn Baldwin mov r5,r7,ror#32-5 703bc3d5698SJohn Baldwin 704bc3d5698SJohn Baldwin eor r10,r10,r8 705bc3d5698SJohn Baldwin#ifndef __thumb2__ 706bc3d5698SJohn Baldwin ldr r8,[sp,#184] @ A[4][3] 707bc3d5698SJohn Baldwin#endif 708bc3d5698SJohn Baldwin eor r11,r11,r9 709bc3d5698SJohn Baldwin#ifndef __thumb2__ 710bc3d5698SJohn Baldwin ldr r9,[sp,#184+4] 711bc3d5698SJohn Baldwin#else 712bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#184] @ A[4][3] 713bc3d5698SJohn Baldwin#endif 714bc3d5698SJohn Baldwin mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 715bc3d5698SJohn Baldwin mov r6,r11,ror#32-8 716bc3d5698SJohn Baldwin 717bc3d5698SJohn Baldwin eor r12,r12,r8 718bc3d5698SJohn Baldwin eor r14,r14,r9 719bc3d5698SJohn Baldwin mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 720bc3d5698SJohn Baldwin mov r9,r14,ror#32-28 721bc3d5698SJohn Baldwin 722bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-18 723bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-18 724bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 725bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-13 726bc3d5698SJohn Baldwin#ifndef __thumb2__ 727bc3d5698SJohn Baldwin str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 728bc3d5698SJohn Baldwin#endif 729bc3d5698SJohn Baldwin bic r12,r6,r4 730bc3d5698SJohn Baldwin#ifndef __thumb2__ 731bc3d5698SJohn Baldwin str r11,[sp,#360+4] 732bc3d5698SJohn Baldwin#else 733bc3d5698SJohn Baldwin strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 734bc3d5698SJohn Baldwin#endif 735bc3d5698SJohn Baldwin bic r14,r7,r5 736bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-18 737bc3d5698SJohn Baldwin#ifndef __thumb2__ 738bc3d5698SJohn Baldwin str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 739bc3d5698SJohn Baldwin#endif 740bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-18 741bc3d5698SJohn Baldwin#ifndef __thumb2__ 742bc3d5698SJohn Baldwin str r14,[sp,#368+4] 743bc3d5698SJohn Baldwin#else 744bc3d5698SJohn Baldwin strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 745bc3d5698SJohn Baldwin#endif 746bc3d5698SJohn Baldwin bic r10,r8,r6 747bc3d5698SJohn Baldwin bic r11,r9,r7 748bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 749bc3d5698SJohn Baldwin bic r14,r1,r9,ror#13 750bc3d5698SJohn Baldwin eor r10,r10,r4 751bc3d5698SJohn Baldwin eor r11,r11,r5 752bc3d5698SJohn Baldwin#ifndef __thumb2__ 753bc3d5698SJohn Baldwin str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 754bc3d5698SJohn Baldwin#endif 755bc3d5698SJohn Baldwin bic r2,r2,r0,ror#18-14 756bc3d5698SJohn Baldwin#ifndef __thumb2__ 757bc3d5698SJohn Baldwin str r11,[sp,#376+4] 758bc3d5698SJohn Baldwin#else 759bc3d5698SJohn Baldwin strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 760bc3d5698SJohn Baldwin#endif 761bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 762bc3d5698SJohn Baldwin bic r11,r3,r1,ror#18-13 763bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-13 764bc3d5698SJohn Baldwin#ifndef __thumb2__ 765bc3d5698SJohn Baldwin str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 766bc3d5698SJohn Baldwin#endif 767bc3d5698SJohn Baldwin#ifndef __thumb2__ 768bc3d5698SJohn Baldwin str r14,[sp,#384+4] 769bc3d5698SJohn Baldwin#else 770bc3d5698SJohn Baldwin strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 771bc3d5698SJohn Baldwin#endif 772bc3d5698SJohn Baldwin add r14,sp,#216 773bc3d5698SJohn Baldwin#ifndef __thumb2__ 774bc3d5698SJohn Baldwin ldr r0,[sp,#16] @ A[0][2] 775bc3d5698SJohn Baldwin#endif 776bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-18 777bc3d5698SJohn Baldwin#ifndef __thumb2__ 778bc3d5698SJohn Baldwin ldr r1,[sp,#16+4] 779bc3d5698SJohn Baldwin#else 780bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#16] @ A[0][2] 781bc3d5698SJohn Baldwin#endif 782bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-18 783bc3d5698SJohn Baldwin#ifndef __thumb2__ 784bc3d5698SJohn Baldwin str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 785bc3d5698SJohn Baldwin#endif 786bc3d5698SJohn Baldwin#ifndef __thumb2__ 787bc3d5698SJohn Baldwin str r11,[sp,#392+4] 788bc3d5698SJohn Baldwin#else 789bc3d5698SJohn Baldwin strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 790bc3d5698SJohn Baldwin#endif 791bc3d5698SJohn Baldwin 792bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[2..3] 793bc3d5698SJohn Baldwin#ifndef __thumb2__ 794bc3d5698SJohn Baldwin ldr r2,[sp,#64] @ A[1][3] 795bc3d5698SJohn Baldwin#endif 796bc3d5698SJohn Baldwin#ifndef __thumb2__ 797bc3d5698SJohn Baldwin ldr r3,[sp,#64+4] 798bc3d5698SJohn Baldwin#else 799bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#64] @ A[1][3] 800bc3d5698SJohn Baldwin#endif 801bc3d5698SJohn Baldwin#ifndef __thumb2__ 802bc3d5698SJohn Baldwin ldr r6,[sp,#232] @ D[4] 803bc3d5698SJohn Baldwin#endif 804bc3d5698SJohn Baldwin#ifndef __thumb2__ 805bc3d5698SJohn Baldwin ldr r7,[sp,#232+4] 806bc3d5698SJohn Baldwin#else 807bc3d5698SJohn Baldwin ldrd r6,r7,[sp,#232] @ D[4] 808bc3d5698SJohn Baldwin#endif 809bc3d5698SJohn Baldwin 810bc3d5698SJohn Baldwin eor r0,r0,r10 811bc3d5698SJohn Baldwin#ifndef __thumb2__ 812bc3d5698SJohn Baldwin ldr r4,[sp,#112] @ A[2][4] 813bc3d5698SJohn Baldwin#endif 814bc3d5698SJohn Baldwin eor r1,r1,r11 815bc3d5698SJohn Baldwin#ifndef __thumb2__ 816bc3d5698SJohn Baldwin ldr r5,[sp,#112+4] 817bc3d5698SJohn Baldwin#else 818bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#112] @ A[2][4] 819bc3d5698SJohn Baldwin#endif 820bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 821bc3d5698SJohn Baldwin#ifndef __thumb2__ 822bc3d5698SJohn Baldwin ldr r8,[sp,#200] @ D[0] 823bc3d5698SJohn Baldwin#endif 824bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-31 825bc3d5698SJohn Baldwin#ifndef __thumb2__ 826bc3d5698SJohn Baldwin ldr r9,[sp,#200+4] 827bc3d5698SJohn Baldwin#else 828bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#200] @ D[0] 829bc3d5698SJohn Baldwin#endif 830bc3d5698SJohn Baldwin 831bc3d5698SJohn Baldwin eor r12,r12,r2 832bc3d5698SJohn Baldwin#ifndef __thumb2__ 833bc3d5698SJohn Baldwin ldr r10,[sp,#120] @ A[3][0] 834bc3d5698SJohn Baldwin#endif 835bc3d5698SJohn Baldwin eor r14,r14,r3 836bc3d5698SJohn Baldwin#ifndef __thumb2__ 837bc3d5698SJohn Baldwin ldr r11,[sp,#120+4] 838bc3d5698SJohn Baldwin#else 839bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#120] @ A[3][0] 840bc3d5698SJohn Baldwin#endif 841bc3d5698SJohn Baldwin mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 842bc3d5698SJohn Baldwin#ifndef __thumb2__ 843bc3d5698SJohn Baldwin ldr r12,[sp,#208] @ D[1] 844bc3d5698SJohn Baldwin#endif 845bc3d5698SJohn Baldwin mov r2,r14,ror#32-28 846bc3d5698SJohn Baldwin#ifndef __thumb2__ 847bc3d5698SJohn Baldwin ldr r14,[sp,#208+4] 848bc3d5698SJohn Baldwin#else 849bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#208] @ D[1] 850bc3d5698SJohn Baldwin#endif 851bc3d5698SJohn Baldwin 852bc3d5698SJohn Baldwin eor r6,r6,r4 853bc3d5698SJohn Baldwin eor r7,r7,r5 854bc3d5698SJohn Baldwin mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 855bc3d5698SJohn Baldwin mov r4,r7,ror#32-20 856bc3d5698SJohn Baldwin 857bc3d5698SJohn Baldwin eor r10,r10,r8 858bc3d5698SJohn Baldwin#ifndef __thumb2__ 859bc3d5698SJohn Baldwin ldr r8,[sp,#168] @ A[4][1] 860bc3d5698SJohn Baldwin#endif 861bc3d5698SJohn Baldwin eor r11,r11,r9 862bc3d5698SJohn Baldwin#ifndef __thumb2__ 863bc3d5698SJohn Baldwin ldr r9,[sp,#168+4] 864bc3d5698SJohn Baldwin#else 865bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#168] @ A[4][1] 866bc3d5698SJohn Baldwin#endif 867bc3d5698SJohn Baldwin mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 868bc3d5698SJohn Baldwin mov r6,r11,ror#32-21 869bc3d5698SJohn Baldwin 870bc3d5698SJohn Baldwin eor r8,r8,r12 871bc3d5698SJohn Baldwin eor r9,r9,r14 872bc3d5698SJohn Baldwin @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 873bc3d5698SJohn Baldwin @ mov r9,r3,ror#32-1 874bc3d5698SJohn Baldwin 875bc3d5698SJohn Baldwin bic r10,r4,r2 876bc3d5698SJohn Baldwin bic r11,r5,r3 877bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-31 878bc3d5698SJohn Baldwin#ifndef __thumb2__ 879bc3d5698SJohn Baldwin str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 880bc3d5698SJohn Baldwin#endif 881bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-31 882bc3d5698SJohn Baldwin#ifndef __thumb2__ 883bc3d5698SJohn Baldwin str r11,[sp,#400+4] 884bc3d5698SJohn Baldwin#else 885bc3d5698SJohn Baldwin strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 886bc3d5698SJohn Baldwin#endif 887bc3d5698SJohn Baldwin bic r12,r6,r4 888bc3d5698SJohn Baldwin bic r14,r7,r5 889bc3d5698SJohn Baldwin eor r12,r12,r2 890bc3d5698SJohn Baldwin eor r14,r14,r3 891bc3d5698SJohn Baldwin#ifndef __thumb2__ 892bc3d5698SJohn Baldwin str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 893bc3d5698SJohn Baldwin#endif 894bc3d5698SJohn Baldwin bic r10,r8,r6,ror#1 895bc3d5698SJohn Baldwin#ifndef __thumb2__ 896bc3d5698SJohn Baldwin str r14,[sp,#408+4] 897bc3d5698SJohn Baldwin#else 898bc3d5698SJohn Baldwin strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 899bc3d5698SJohn Baldwin#endif 900bc3d5698SJohn Baldwin bic r11,r9,r7,ror#1 901bc3d5698SJohn Baldwin bic r12,r0,r8,ror#31-1 902bc3d5698SJohn Baldwin bic r14,r1,r9,ror#31-1 903bc3d5698SJohn Baldwin eor r4,r4,r10,ror#32-1 904bc3d5698SJohn Baldwin#ifndef __thumb2__ 905bc3d5698SJohn Baldwin str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 906bc3d5698SJohn Baldwin#endif 907bc3d5698SJohn Baldwin eor r5,r5,r11,ror#32-1 908bc3d5698SJohn Baldwin#ifndef __thumb2__ 909bc3d5698SJohn Baldwin str r5,[sp,#416+4] 910bc3d5698SJohn Baldwin#else 911bc3d5698SJohn Baldwin strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 912bc3d5698SJohn Baldwin#endif 913bc3d5698SJohn Baldwin eor r6,r6,r12,ror#32-31 914bc3d5698SJohn Baldwin eor r7,r7,r14,ror#32-31 915bc3d5698SJohn Baldwin#ifndef __thumb2__ 916bc3d5698SJohn Baldwin str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 917bc3d5698SJohn Baldwin#endif 918bc3d5698SJohn Baldwin bic r10,r2,r0,ror#32-31 919bc3d5698SJohn Baldwin#ifndef __thumb2__ 920bc3d5698SJohn Baldwin str r7,[sp,#424+4] 921bc3d5698SJohn Baldwin#else 922bc3d5698SJohn Baldwin strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 923bc3d5698SJohn Baldwin#endif 924bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-31 925bc3d5698SJohn Baldwin add r12,sp,#240 926bc3d5698SJohn Baldwin eor r8,r10,r8,ror#32-1 927bc3d5698SJohn Baldwin add r10,sp,#280 928bc3d5698SJohn Baldwin eor r9,r11,r9,ror#32-1 929bc3d5698SJohn Baldwin#ifndef __thumb2__ 930bc3d5698SJohn Baldwin str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 931bc3d5698SJohn Baldwin#endif 932bc3d5698SJohn Baldwin#ifndef __thumb2__ 933bc3d5698SJohn Baldwin str r9,[sp,#432+4] 934bc3d5698SJohn Baldwin#else 935bc3d5698SJohn Baldwin strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 936bc3d5698SJohn Baldwin#endif 937bc3d5698SJohn Baldwin ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 938bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 939bc3d5698SJohn Baldwin#ifdef __thumb2__ 940bc3d5698SJohn Baldwin eor r0,r0,r10 941bc3d5698SJohn Baldwin eor r1,r1,r11 942bc3d5698SJohn Baldwin eor r2,r2,r12 943bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#296] 944bc3d5698SJohn Baldwin eor r3,r3,r14 945bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#304] 946bc3d5698SJohn Baldwin eor r4,r4,r10 947bc3d5698SJohn Baldwin eor r5,r5,r11 948bc3d5698SJohn Baldwin eor r6,r6,r12 949bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#312] 950bc3d5698SJohn Baldwin eor r7,r7,r14 951bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#320] 952bc3d5698SJohn Baldwin eor r8,r8,r10 953bc3d5698SJohn Baldwin eor r9,r9,r11 954bc3d5698SJohn Baldwin eor r0,r0,r12 955bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#328] 956bc3d5698SJohn Baldwin eor r1,r1,r14 957bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#336] 958bc3d5698SJohn Baldwin eor r2,r2,r10 959bc3d5698SJohn Baldwin eor r3,r3,r11 960bc3d5698SJohn Baldwin eor r4,r4,r12 961bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#344] 962bc3d5698SJohn Baldwin eor r5,r5,r14 963bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#352] 964bc3d5698SJohn Baldwin eor r6,r6,r10 965bc3d5698SJohn Baldwin eor r7,r7,r11 966bc3d5698SJohn Baldwin eor r8,r8,r12 967bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#360] 968bc3d5698SJohn Baldwin eor r9,r9,r14 969bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#368] 970bc3d5698SJohn Baldwin eor r0,r0,r10 971bc3d5698SJohn Baldwin eor r1,r1,r11 972bc3d5698SJohn Baldwin eor r2,r2,r12 973bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#376] 974bc3d5698SJohn Baldwin eor r3,r3,r14 975bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#384] 976bc3d5698SJohn Baldwin eor r4,r4,r10 977bc3d5698SJohn Baldwin eor r5,r5,r11 978bc3d5698SJohn Baldwin eor r6,r6,r12 979bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#392] 980bc3d5698SJohn Baldwin eor r7,r7,r14 981bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#400] 982bc3d5698SJohn Baldwin eor r8,r8,r10 983bc3d5698SJohn Baldwin eor r9,r9,r11 984bc3d5698SJohn Baldwin eor r0,r0,r12 985bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#408] 986bc3d5698SJohn Baldwin eor r1,r1,r14 987bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#256] 988bc3d5698SJohn Baldwin eor r2,r2,r10 989bc3d5698SJohn Baldwin eor r3,r3,r11 990bc3d5698SJohn Baldwin eor r4,r4,r12 991bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#264] 992bc3d5698SJohn Baldwin eor r5,r5,r14 993bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#272] 994bc3d5698SJohn Baldwin#else 995bc3d5698SJohn Baldwin eor r0,r0,r10 996bc3d5698SJohn Baldwin add r10,sp,#296 997bc3d5698SJohn Baldwin eor r1,r1,r11 998bc3d5698SJohn Baldwin eor r2,r2,r12 999bc3d5698SJohn Baldwin eor r3,r3,r14 1000bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 1001bc3d5698SJohn Baldwin eor r4,r4,r10 1002bc3d5698SJohn Baldwin add r10,sp,#312 1003bc3d5698SJohn Baldwin eor r5,r5,r11 1004bc3d5698SJohn Baldwin eor r6,r6,r12 1005bc3d5698SJohn Baldwin eor r7,r7,r14 1006bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 1007bc3d5698SJohn Baldwin eor r8,r8,r10 1008bc3d5698SJohn Baldwin add r10,sp,#328 1009bc3d5698SJohn Baldwin eor r9,r9,r11 1010bc3d5698SJohn Baldwin eor r0,r0,r12 1011bc3d5698SJohn Baldwin eor r1,r1,r14 1012bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 1013bc3d5698SJohn Baldwin eor r2,r2,r10 1014bc3d5698SJohn Baldwin add r10,sp,#344 1015bc3d5698SJohn Baldwin eor r3,r3,r11 1016bc3d5698SJohn Baldwin eor r4,r4,r12 1017bc3d5698SJohn Baldwin eor r5,r5,r14 1018bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 1019bc3d5698SJohn Baldwin eor r6,r6,r10 1020bc3d5698SJohn Baldwin add r10,sp,#360 1021bc3d5698SJohn Baldwin eor r7,r7,r11 1022bc3d5698SJohn Baldwin eor r8,r8,r12 1023bc3d5698SJohn Baldwin eor r9,r9,r14 1024bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 1025bc3d5698SJohn Baldwin eor r0,r0,r10 1026bc3d5698SJohn Baldwin add r10,sp,#376 1027bc3d5698SJohn Baldwin eor r1,r1,r11 1028bc3d5698SJohn Baldwin eor r2,r2,r12 1029bc3d5698SJohn Baldwin eor r3,r3,r14 1030bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 1031bc3d5698SJohn Baldwin eor r4,r4,r10 1032bc3d5698SJohn Baldwin add r10,sp,#392 1033bc3d5698SJohn Baldwin eor r5,r5,r11 1034bc3d5698SJohn Baldwin eor r6,r6,r12 1035bc3d5698SJohn Baldwin eor r7,r7,r14 1036bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 1037bc3d5698SJohn Baldwin eor r8,r8,r10 1038bc3d5698SJohn Baldwin ldr r10,[sp,#408] @ A[4][1] 1039bc3d5698SJohn Baldwin eor r9,r9,r11 1040bc3d5698SJohn Baldwin ldr r11,[sp,#408+4] 1041bc3d5698SJohn Baldwin eor r0,r0,r12 1042bc3d5698SJohn Baldwin ldr r12,[sp,#256] @ A[0][2] 1043bc3d5698SJohn Baldwin eor r1,r1,r14 1044bc3d5698SJohn Baldwin ldr r14,[sp,#256+4] 1045bc3d5698SJohn Baldwin eor r2,r2,r10 1046bc3d5698SJohn Baldwin add r10,sp,#264 1047bc3d5698SJohn Baldwin eor r3,r3,r11 1048bc3d5698SJohn Baldwin eor r4,r4,r12 1049bc3d5698SJohn Baldwin eor r5,r5,r14 1050bc3d5698SJohn Baldwin ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 1051bc3d5698SJohn Baldwin#endif 1052bc3d5698SJohn Baldwin eor r6,r6,r10 1053bc3d5698SJohn Baldwin eor r7,r7,r11 1054bc3d5698SJohn Baldwin eor r8,r8,r12 1055bc3d5698SJohn Baldwin eor r9,r9,r14 1056bc3d5698SJohn Baldwin 1057bc3d5698SJohn Baldwin eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1058bc3d5698SJohn Baldwin#ifndef __thumb2__ 1059bc3d5698SJohn Baldwin str r10,[sp,#208] @ D[1] = E[0] 1060bc3d5698SJohn Baldwin#endif 1061bc3d5698SJohn Baldwin eor r11,r1,r4 1062bc3d5698SJohn Baldwin#ifndef __thumb2__ 1063bc3d5698SJohn Baldwin str r11,[sp,#208+4] 1064bc3d5698SJohn Baldwin#else 1065bc3d5698SJohn Baldwin strd r10,r11,[sp,#208] @ D[1] = E[0] 1066bc3d5698SJohn Baldwin#endif 1067bc3d5698SJohn Baldwin eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1068bc3d5698SJohn Baldwin eor r14,r7,r0 1069bc3d5698SJohn Baldwin#ifndef __thumb2__ 1070bc3d5698SJohn Baldwin str r12,[sp,#232] @ D[4] = E[1] 1071bc3d5698SJohn Baldwin#endif 1072bc3d5698SJohn Baldwin eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1073bc3d5698SJohn Baldwin#ifndef __thumb2__ 1074bc3d5698SJohn Baldwin str r14,[sp,#232+4] 1075bc3d5698SJohn Baldwin#else 1076bc3d5698SJohn Baldwin strd r12,r14,[sp,#232] @ D[4] = E[1] 1077bc3d5698SJohn Baldwin#endif 1078bc3d5698SJohn Baldwin eor r1,r9,r2 1079bc3d5698SJohn Baldwin#ifndef __thumb2__ 1080bc3d5698SJohn Baldwin str r0,[sp,#200] @ D[0] = C[0] 1081bc3d5698SJohn Baldwin#endif 1082bc3d5698SJohn Baldwin eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1083bc3d5698SJohn Baldwin#ifndef __thumb2__ 1084bc3d5698SJohn Baldwin ldr r7,[sp,#384] 1085bc3d5698SJohn Baldwin#endif 1086bc3d5698SJohn Baldwin eor r3,r3,r6 1087bc3d5698SJohn Baldwin#ifndef __thumb2__ 1088bc3d5698SJohn Baldwin str r1,[sp,#200+4] 1089bc3d5698SJohn Baldwin#else 1090bc3d5698SJohn Baldwin strd r0,r1,[sp,#200] @ D[0] = C[0] 1091bc3d5698SJohn Baldwin#endif 1092bc3d5698SJohn Baldwin#ifndef __thumb2__ 1093bc3d5698SJohn Baldwin ldr r6,[sp,#384+4] 1094bc3d5698SJohn Baldwin#else 1095bc3d5698SJohn Baldwin ldrd r7,r6,[sp,#384] 1096bc3d5698SJohn Baldwin#endif 1097bc3d5698SJohn Baldwin#ifndef __thumb2__ 1098bc3d5698SJohn Baldwin str r2,[sp,#216] @ D[2] = C[1] 1099bc3d5698SJohn Baldwin#endif 1100bc3d5698SJohn Baldwin eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1101bc3d5698SJohn Baldwin#ifndef __thumb2__ 1102bc3d5698SJohn Baldwin str r3,[sp,#216+4] 1103bc3d5698SJohn Baldwin#else 1104bc3d5698SJohn Baldwin strd r2,r3,[sp,#216] @ D[2] = C[1] 1105bc3d5698SJohn Baldwin#endif 1106bc3d5698SJohn Baldwin eor r5,r5,r8 1107bc3d5698SJohn Baldwin 1108bc3d5698SJohn Baldwin#ifndef __thumb2__ 1109bc3d5698SJohn Baldwin ldr r8,[sp,#432] 1110bc3d5698SJohn Baldwin#endif 1111bc3d5698SJohn Baldwin#ifndef __thumb2__ 1112bc3d5698SJohn Baldwin ldr r9,[sp,#432+4] 1113bc3d5698SJohn Baldwin#else 1114bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#432] 1115bc3d5698SJohn Baldwin#endif 1116bc3d5698SJohn Baldwin#ifndef __thumb2__ 1117bc3d5698SJohn Baldwin str r4,[sp,#224] @ D[3] = C[2] 1118bc3d5698SJohn Baldwin#endif 1119bc3d5698SJohn Baldwin eor r7,r7,r4 1120bc3d5698SJohn Baldwin#ifndef __thumb2__ 1121bc3d5698SJohn Baldwin str r5,[sp,#224+4] 1122bc3d5698SJohn Baldwin#else 1123bc3d5698SJohn Baldwin strd r4,r5,[sp,#224] @ D[3] = C[2] 1124bc3d5698SJohn Baldwin#endif 1125bc3d5698SJohn Baldwin eor r6,r6,r5 1126bc3d5698SJohn Baldwin#ifndef __thumb2__ 1127bc3d5698SJohn Baldwin ldr r4,[sp,#240] 1128bc3d5698SJohn Baldwin#endif 1129bc3d5698SJohn Baldwin @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1130bc3d5698SJohn Baldwin @ mov r6,r6,ror#32-11 1131bc3d5698SJohn Baldwin#ifndef __thumb2__ 1132bc3d5698SJohn Baldwin ldr r5,[sp,#240+4] 1133bc3d5698SJohn Baldwin#else 1134bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#240] 1135bc3d5698SJohn Baldwin#endif 1136bc3d5698SJohn Baldwin eor r8,r8,r12 1137bc3d5698SJohn Baldwin eor r9,r9,r14 1138bc3d5698SJohn Baldwin#ifndef __thumb2__ 1139bc3d5698SJohn Baldwin ldr r12,[sp,#336] 1140bc3d5698SJohn Baldwin#endif 1141bc3d5698SJohn Baldwin eor r0,r0,r4 1142bc3d5698SJohn Baldwin#ifndef __thumb2__ 1143bc3d5698SJohn Baldwin ldr r14,[sp,#336+4] 1144bc3d5698SJohn Baldwin#else 1145bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#336] 1146bc3d5698SJohn Baldwin#endif 1147bc3d5698SJohn Baldwin @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1148bc3d5698SJohn Baldwin @ mov r9,r9,ror#32-7 1149bc3d5698SJohn Baldwin eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 1150bc3d5698SJohn Baldwin eor r12,r12,r2 1151bc3d5698SJohn Baldwin#ifndef __thumb2__ 1152bc3d5698SJohn Baldwin ldr r2,[sp,#288] 1153bc3d5698SJohn Baldwin#endif 1154bc3d5698SJohn Baldwin eor r14,r14,r3 1155bc3d5698SJohn Baldwin#ifndef __thumb2__ 1156bc3d5698SJohn Baldwin ldr r3,[sp,#288+4] 1157bc3d5698SJohn Baldwin#else 1158bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#288] 1159bc3d5698SJohn Baldwin#endif 1160bc3d5698SJohn Baldwin mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 1161bc3d5698SJohn Baldwin ldr r12,[sp,#444] @ load counter 1162bc3d5698SJohn Baldwin eor r2,r2,r10 1163bc3d5698SJohn Baldwin adr r10,iotas32 1164bc3d5698SJohn Baldwin mov r4,r14,ror#32-22 1165bc3d5698SJohn Baldwin add r14,r10,r12 1166bc3d5698SJohn Baldwin eor r3,r3,r11 1167bc3d5698SJohn Baldwin#ifndef __thumb2__ 1168bc3d5698SJohn Baldwin ldr r10,[r14,#8] @ iotas[i].lo 1169bc3d5698SJohn Baldwin#endif 1170bc3d5698SJohn Baldwin add r12,r12,#16 1171bc3d5698SJohn Baldwin#ifndef __thumb2__ 1172bc3d5698SJohn Baldwin ldr r11,[r14,#12] @ iotas[i].hi 1173bc3d5698SJohn Baldwin#else 1174bc3d5698SJohn Baldwin ldrd r10,r11,[r14,#8] @ iotas[i].lo 1175bc3d5698SJohn Baldwin#endif 1176bc3d5698SJohn Baldwin cmp r12,#192 1177bc3d5698SJohn Baldwin str r12,[sp,#444] @ store counter 1178bc3d5698SJohn Baldwin bic r12,r4,r2,ror#32-22 1179bc3d5698SJohn Baldwin bic r14,r5,r3,ror#32-22 1180bc3d5698SJohn Baldwin mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 1181bc3d5698SJohn Baldwin mov r3,r3,ror#32-22 1182bc3d5698SJohn Baldwin eor r12,r12,r0 1183bc3d5698SJohn Baldwin eor r14,r14,r1 1184bc3d5698SJohn Baldwin eor r10,r10,r12 1185bc3d5698SJohn Baldwin eor r11,r11,r14 1186bc3d5698SJohn Baldwin#ifndef __thumb2__ 1187bc3d5698SJohn Baldwin str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1188bc3d5698SJohn Baldwin#endif 1189bc3d5698SJohn Baldwin bic r12,r6,r4,ror#11 1190bc3d5698SJohn Baldwin#ifndef __thumb2__ 1191bc3d5698SJohn Baldwin str r11,[sp,#0+4] 1192bc3d5698SJohn Baldwin#else 1193bc3d5698SJohn Baldwin strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1194bc3d5698SJohn Baldwin#endif 1195bc3d5698SJohn Baldwin bic r14,r7,r5,ror#10 1196bc3d5698SJohn Baldwin bic r10,r8,r6,ror#32-(11-7) 1197bc3d5698SJohn Baldwin bic r11,r9,r7,ror#32-(10-7) 1198bc3d5698SJohn Baldwin eor r12,r2,r12,ror#32-11 1199bc3d5698SJohn Baldwin#ifndef __thumb2__ 1200bc3d5698SJohn Baldwin str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1201bc3d5698SJohn Baldwin#endif 1202bc3d5698SJohn Baldwin eor r14,r3,r14,ror#32-10 1203bc3d5698SJohn Baldwin#ifndef __thumb2__ 1204bc3d5698SJohn Baldwin str r14,[sp,#8+4] 1205bc3d5698SJohn Baldwin#else 1206bc3d5698SJohn Baldwin strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1207bc3d5698SJohn Baldwin#endif 1208bc3d5698SJohn Baldwin eor r10,r4,r10,ror#32-7 1209bc3d5698SJohn Baldwin eor r11,r5,r11,ror#32-7 1210bc3d5698SJohn Baldwin#ifndef __thumb2__ 1211bc3d5698SJohn Baldwin str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1212bc3d5698SJohn Baldwin#endif 1213bc3d5698SJohn Baldwin bic r12,r0,r8,ror#32-7 1214bc3d5698SJohn Baldwin#ifndef __thumb2__ 1215bc3d5698SJohn Baldwin str r11,[sp,#16+4] 1216bc3d5698SJohn Baldwin#else 1217bc3d5698SJohn Baldwin strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1218bc3d5698SJohn Baldwin#endif 1219bc3d5698SJohn Baldwin bic r14,r1,r9,ror#32-7 1220bc3d5698SJohn Baldwin eor r12,r12,r6,ror#32-11 1221bc3d5698SJohn Baldwin#ifndef __thumb2__ 1222bc3d5698SJohn Baldwin str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1223bc3d5698SJohn Baldwin#endif 1224bc3d5698SJohn Baldwin eor r14,r14,r7,ror#32-10 1225bc3d5698SJohn Baldwin#ifndef __thumb2__ 1226bc3d5698SJohn Baldwin str r14,[sp,#24+4] 1227bc3d5698SJohn Baldwin#else 1228bc3d5698SJohn Baldwin strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1229bc3d5698SJohn Baldwin#endif 1230bc3d5698SJohn Baldwin bic r10,r2,r0 1231bc3d5698SJohn Baldwin add r14,sp,#224 1232bc3d5698SJohn Baldwin#ifndef __thumb2__ 1233bc3d5698SJohn Baldwin ldr r0,[sp,#264] @ A[0][3] 1234bc3d5698SJohn Baldwin#endif 1235bc3d5698SJohn Baldwin bic r11,r3,r1 1236bc3d5698SJohn Baldwin#ifndef __thumb2__ 1237bc3d5698SJohn Baldwin ldr r1,[sp,#264+4] 1238bc3d5698SJohn Baldwin#else 1239bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#264] @ A[0][3] 1240bc3d5698SJohn Baldwin#endif 1241bc3d5698SJohn Baldwin eor r10,r10,r8,ror#32-7 1242bc3d5698SJohn Baldwin eor r11,r11,r9,ror#32-7 1243bc3d5698SJohn Baldwin#ifndef __thumb2__ 1244bc3d5698SJohn Baldwin str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1245bc3d5698SJohn Baldwin#endif 1246bc3d5698SJohn Baldwin add r9,sp,#200 1247bc3d5698SJohn Baldwin#ifndef __thumb2__ 1248bc3d5698SJohn Baldwin str r11,[sp,#32+4] 1249bc3d5698SJohn Baldwin#else 1250bc3d5698SJohn Baldwin strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1251bc3d5698SJohn Baldwin#endif 1252bc3d5698SJohn Baldwin 1253bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[3..4] 1254bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[0..1] 1255bc3d5698SJohn Baldwin 1256bc3d5698SJohn Baldwin#ifndef __thumb2__ 1257bc3d5698SJohn Baldwin ldr r2,[sp,#312] @ A[1][4] 1258bc3d5698SJohn Baldwin#endif 1259bc3d5698SJohn Baldwin eor r0,r0,r10 1260bc3d5698SJohn Baldwin#ifndef __thumb2__ 1261bc3d5698SJohn Baldwin ldr r3,[sp,#312+4] 1262bc3d5698SJohn Baldwin#else 1263bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#312] @ A[1][4] 1264bc3d5698SJohn Baldwin#endif 1265bc3d5698SJohn Baldwin eor r1,r1,r11 1266bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1267bc3d5698SJohn Baldwin#ifndef __thumb2__ 1268bc3d5698SJohn Baldwin ldr r10,[sp,#368] @ A[3][1] 1269bc3d5698SJohn Baldwin#endif 1270bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-14 1271bc3d5698SJohn Baldwin#ifndef __thumb2__ 1272bc3d5698SJohn Baldwin ldr r11,[sp,#368+4] 1273bc3d5698SJohn Baldwin#else 1274bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#368] @ A[3][1] 1275bc3d5698SJohn Baldwin#endif 1276bc3d5698SJohn Baldwin 1277bc3d5698SJohn Baldwin eor r2,r2,r12 1278bc3d5698SJohn Baldwin#ifndef __thumb2__ 1279bc3d5698SJohn Baldwin ldr r4,[sp,#320] @ A[2][0] 1280bc3d5698SJohn Baldwin#endif 1281bc3d5698SJohn Baldwin eor r3,r3,r14 1282bc3d5698SJohn Baldwin#ifndef __thumb2__ 1283bc3d5698SJohn Baldwin ldr r5,[sp,#320+4] 1284bc3d5698SJohn Baldwin#else 1285bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#320] @ A[2][0] 1286bc3d5698SJohn Baldwin#endif 1287bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1288bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-10 1289bc3d5698SJohn Baldwin 1290bc3d5698SJohn Baldwin eor r6,r6,r4 1291bc3d5698SJohn Baldwin#ifndef __thumb2__ 1292bc3d5698SJohn Baldwin ldr r12,[sp,#216] @ D[2] 1293bc3d5698SJohn Baldwin#endif 1294bc3d5698SJohn Baldwin eor r7,r7,r5 1295bc3d5698SJohn Baldwin#ifndef __thumb2__ 1296bc3d5698SJohn Baldwin ldr r14,[sp,#216+4] 1297bc3d5698SJohn Baldwin#else 1298bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#216] @ D[2] 1299bc3d5698SJohn Baldwin#endif 1300bc3d5698SJohn Baldwin mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1301bc3d5698SJohn Baldwin mov r4,r7,ror#32-2 1302bc3d5698SJohn Baldwin 1303bc3d5698SJohn Baldwin eor r10,r10,r8 1304bc3d5698SJohn Baldwin#ifndef __thumb2__ 1305bc3d5698SJohn Baldwin ldr r8,[sp,#416] @ A[4][2] 1306bc3d5698SJohn Baldwin#endif 1307bc3d5698SJohn Baldwin eor r11,r11,r9 1308bc3d5698SJohn Baldwin#ifndef __thumb2__ 1309bc3d5698SJohn Baldwin ldr r9,[sp,#416+4] 1310bc3d5698SJohn Baldwin#else 1311bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#416] @ A[4][2] 1312bc3d5698SJohn Baldwin#endif 1313bc3d5698SJohn Baldwin mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1314bc3d5698SJohn Baldwin mov r6,r11,ror#32-23 1315bc3d5698SJohn Baldwin 1316bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-10 1317bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-10 1318bc3d5698SJohn Baldwin eor r12,r12,r8 1319bc3d5698SJohn Baldwin eor r14,r14,r9 1320bc3d5698SJohn Baldwin mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1321bc3d5698SJohn Baldwin mov r8,r14,ror#32-31 1322bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 1323bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-14 1324bc3d5698SJohn Baldwin#ifndef __thumb2__ 1325bc3d5698SJohn Baldwin str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1326bc3d5698SJohn Baldwin#endif 1327bc3d5698SJohn Baldwin bic r12,r6,r4 1328bc3d5698SJohn Baldwin#ifndef __thumb2__ 1329bc3d5698SJohn Baldwin str r11,[sp,#40+4] 1330bc3d5698SJohn Baldwin#else 1331bc3d5698SJohn Baldwin strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1332bc3d5698SJohn Baldwin#endif 1333bc3d5698SJohn Baldwin bic r14,r7,r5 1334bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-10 1335bc3d5698SJohn Baldwin#ifndef __thumb2__ 1336bc3d5698SJohn Baldwin str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1337bc3d5698SJohn Baldwin#endif 1338bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-10 1339bc3d5698SJohn Baldwin#ifndef __thumb2__ 1340bc3d5698SJohn Baldwin str r14,[sp,#48+4] 1341bc3d5698SJohn Baldwin#else 1342bc3d5698SJohn Baldwin strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1343bc3d5698SJohn Baldwin#endif 1344bc3d5698SJohn Baldwin bic r10,r8,r6 1345bc3d5698SJohn Baldwin bic r11,r9,r7 1346bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 1347bc3d5698SJohn Baldwin bic r14,r1,r9,ror#14 1348bc3d5698SJohn Baldwin eor r10,r10,r4 1349bc3d5698SJohn Baldwin eor r11,r11,r5 1350bc3d5698SJohn Baldwin#ifndef __thumb2__ 1351bc3d5698SJohn Baldwin str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1352bc3d5698SJohn Baldwin#endif 1353bc3d5698SJohn Baldwin bic r2,r2,r0,ror#32-(14-10) 1354bc3d5698SJohn Baldwin#ifndef __thumb2__ 1355bc3d5698SJohn Baldwin str r11,[sp,#56+4] 1356bc3d5698SJohn Baldwin#else 1357bc3d5698SJohn Baldwin strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1358bc3d5698SJohn Baldwin#endif 1359bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 1360bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-(14-10) 1361bc3d5698SJohn Baldwin#ifndef __thumb2__ 1362bc3d5698SJohn Baldwin str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1363bc3d5698SJohn Baldwin#endif 1364bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-14 1365bc3d5698SJohn Baldwin#ifndef __thumb2__ 1366bc3d5698SJohn Baldwin str r14,[sp,#64+4] 1367bc3d5698SJohn Baldwin#else 1368bc3d5698SJohn Baldwin strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1369bc3d5698SJohn Baldwin#endif 1370bc3d5698SJohn Baldwin add r12,sp,#208 1371bc3d5698SJohn Baldwin#ifndef __thumb2__ 1372bc3d5698SJohn Baldwin ldr r1,[sp,#248] @ A[0][1] 1373bc3d5698SJohn Baldwin#endif 1374bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-10 1375bc3d5698SJohn Baldwin#ifndef __thumb2__ 1376bc3d5698SJohn Baldwin ldr r0,[sp,#248+4] 1377bc3d5698SJohn Baldwin#else 1378bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#248] @ A[0][1] 1379bc3d5698SJohn Baldwin#endif 1380bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-10 1381bc3d5698SJohn Baldwin#ifndef __thumb2__ 1382bc3d5698SJohn Baldwin str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1383bc3d5698SJohn Baldwin#endif 1384bc3d5698SJohn Baldwin#ifndef __thumb2__ 1385bc3d5698SJohn Baldwin str r11,[sp,#72+4] 1386bc3d5698SJohn Baldwin#else 1387bc3d5698SJohn Baldwin strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1388bc3d5698SJohn Baldwin#endif 1389bc3d5698SJohn Baldwin 1390bc3d5698SJohn Baldwin add r9,sp,#224 1391bc3d5698SJohn Baldwin ldmia r12,{r10,r11,r12,r14} @ D[1..2] 1392bc3d5698SJohn Baldwin#ifndef __thumb2__ 1393bc3d5698SJohn Baldwin ldr r2,[sp,#296] @ A[1][2] 1394bc3d5698SJohn Baldwin#endif 1395bc3d5698SJohn Baldwin#ifndef __thumb2__ 1396bc3d5698SJohn Baldwin ldr r3,[sp,#296+4] 1397bc3d5698SJohn Baldwin#else 1398bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#296] @ A[1][2] 1399bc3d5698SJohn Baldwin#endif 1400bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[3..4] 1401bc3d5698SJohn Baldwin 1402bc3d5698SJohn Baldwin eor r1,r1,r10 1403bc3d5698SJohn Baldwin#ifndef __thumb2__ 1404bc3d5698SJohn Baldwin ldr r4,[sp,#344] @ A[2][3] 1405bc3d5698SJohn Baldwin#endif 1406bc3d5698SJohn Baldwin eor r0,r0,r11 1407bc3d5698SJohn Baldwin#ifndef __thumb2__ 1408bc3d5698SJohn Baldwin ldr r5,[sp,#344+4] 1409bc3d5698SJohn Baldwin#else 1410bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#344] @ A[2][3] 1411bc3d5698SJohn Baldwin#endif 1412bc3d5698SJohn Baldwin mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1413bc3d5698SJohn Baldwin 1414bc3d5698SJohn Baldwin eor r2,r2,r12 1415bc3d5698SJohn Baldwin#ifndef __thumb2__ 1416bc3d5698SJohn Baldwin ldr r10,[sp,#392] @ A[3][4] 1417bc3d5698SJohn Baldwin#endif 1418bc3d5698SJohn Baldwin eor r3,r3,r14 1419bc3d5698SJohn Baldwin#ifndef __thumb2__ 1420bc3d5698SJohn Baldwin ldr r11,[sp,#392+4] 1421bc3d5698SJohn Baldwin#else 1422bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#392] @ A[3][4] 1423bc3d5698SJohn Baldwin#endif 1424bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1425bc3d5698SJohn Baldwin#ifndef __thumb2__ 1426bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 1427bc3d5698SJohn Baldwin#endif 1428bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-3 1429bc3d5698SJohn Baldwin#ifndef __thumb2__ 1430bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 1431bc3d5698SJohn Baldwin#else 1432bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 1433bc3d5698SJohn Baldwin#endif 1434bc3d5698SJohn Baldwin 1435bc3d5698SJohn Baldwin eor r4,r4,r6 1436bc3d5698SJohn Baldwin eor r5,r5,r7 1437bc3d5698SJohn Baldwin @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1438bc3d5698SJohn Baldwin @ mov r4,r7,ror#32-13 @ [track reverse order below] 1439bc3d5698SJohn Baldwin 1440bc3d5698SJohn Baldwin eor r10,r10,r8 1441bc3d5698SJohn Baldwin#ifndef __thumb2__ 1442bc3d5698SJohn Baldwin ldr r8,[sp,#400] @ A[4][0] 1443bc3d5698SJohn Baldwin#endif 1444bc3d5698SJohn Baldwin eor r11,r11,r9 1445bc3d5698SJohn Baldwin#ifndef __thumb2__ 1446bc3d5698SJohn Baldwin ldr r9,[sp,#400+4] 1447bc3d5698SJohn Baldwin#else 1448bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#400] @ A[4][0] 1449bc3d5698SJohn Baldwin#endif 1450bc3d5698SJohn Baldwin mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1451bc3d5698SJohn Baldwin mov r7,r11,ror#32-4 1452bc3d5698SJohn Baldwin 1453bc3d5698SJohn Baldwin eor r12,r12,r8 1454bc3d5698SJohn Baldwin eor r14,r14,r9 1455bc3d5698SJohn Baldwin mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1456bc3d5698SJohn Baldwin mov r9,r14,ror#32-9 1457bc3d5698SJohn Baldwin 1458bc3d5698SJohn Baldwin bic r10,r5,r2,ror#13-3 1459bc3d5698SJohn Baldwin bic r11,r4,r3,ror#12-3 1460bc3d5698SJohn Baldwin bic r12,r6,r5,ror#32-13 1461bc3d5698SJohn Baldwin bic r14,r7,r4,ror#32-12 1462bc3d5698SJohn Baldwin eor r10,r0,r10,ror#32-13 1463bc3d5698SJohn Baldwin eor r11,r1,r11,ror#32-12 1464bc3d5698SJohn Baldwin#ifndef __thumb2__ 1465bc3d5698SJohn Baldwin str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1466bc3d5698SJohn Baldwin#endif 1467bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-3 1468bc3d5698SJohn Baldwin#ifndef __thumb2__ 1469bc3d5698SJohn Baldwin str r11,[sp,#80+4] 1470bc3d5698SJohn Baldwin#else 1471bc3d5698SJohn Baldwin strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1472bc3d5698SJohn Baldwin#endif 1473bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-3 1474bc3d5698SJohn Baldwin#ifndef __thumb2__ 1475bc3d5698SJohn Baldwin str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1476bc3d5698SJohn Baldwin#endif 1477bc3d5698SJohn Baldwin bic r10,r8,r6 1478bc3d5698SJohn Baldwin bic r11,r9,r7 1479bc3d5698SJohn Baldwin#ifndef __thumb2__ 1480bc3d5698SJohn Baldwin str r14,[sp,#88+4] 1481bc3d5698SJohn Baldwin#else 1482bc3d5698SJohn Baldwin strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1483bc3d5698SJohn Baldwin#endif 1484bc3d5698SJohn Baldwin eor r10,r10,r5,ror#32-13 1485bc3d5698SJohn Baldwin eor r11,r11,r4,ror#32-12 1486bc3d5698SJohn Baldwin#ifndef __thumb2__ 1487bc3d5698SJohn Baldwin str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1488bc3d5698SJohn Baldwin#endif 1489bc3d5698SJohn Baldwin bic r12,r0,r8 1490bc3d5698SJohn Baldwin#ifndef __thumb2__ 1491bc3d5698SJohn Baldwin str r11,[sp,#96+4] 1492bc3d5698SJohn Baldwin#else 1493bc3d5698SJohn Baldwin strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1494bc3d5698SJohn Baldwin#endif 1495bc3d5698SJohn Baldwin bic r14,r1,r9 1496bc3d5698SJohn Baldwin eor r12,r12,r6 1497bc3d5698SJohn Baldwin eor r14,r14,r7 1498bc3d5698SJohn Baldwin#ifndef __thumb2__ 1499bc3d5698SJohn Baldwin str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1500bc3d5698SJohn Baldwin#endif 1501bc3d5698SJohn Baldwin bic r10,r2,r0,ror#3 1502bc3d5698SJohn Baldwin#ifndef __thumb2__ 1503bc3d5698SJohn Baldwin str r14,[sp,#104+4] 1504bc3d5698SJohn Baldwin#else 1505bc3d5698SJohn Baldwin strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1506bc3d5698SJohn Baldwin#endif 1507bc3d5698SJohn Baldwin bic r11,r3,r1,ror#3 1508bc3d5698SJohn Baldwin#ifndef __thumb2__ 1509bc3d5698SJohn Baldwin ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1510bc3d5698SJohn Baldwin#endif 1511bc3d5698SJohn Baldwin eor r10,r8,r10,ror#32-3 1512bc3d5698SJohn Baldwin#ifndef __thumb2__ 1513bc3d5698SJohn Baldwin ldr r0,[sp,#272+4] 1514bc3d5698SJohn Baldwin#else 1515bc3d5698SJohn Baldwin ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1516bc3d5698SJohn Baldwin#endif 1517bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-3 1518bc3d5698SJohn Baldwin#ifndef __thumb2__ 1519bc3d5698SJohn Baldwin str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1520bc3d5698SJohn Baldwin#endif 1521bc3d5698SJohn Baldwin add r9,sp,#208 1522bc3d5698SJohn Baldwin#ifndef __thumb2__ 1523bc3d5698SJohn Baldwin str r11,[sp,#112+4] 1524bc3d5698SJohn Baldwin#else 1525bc3d5698SJohn Baldwin strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1526bc3d5698SJohn Baldwin#endif 1527bc3d5698SJohn Baldwin 1528bc3d5698SJohn Baldwin#ifndef __thumb2__ 1529bc3d5698SJohn Baldwin ldr r10,[sp,#232] @ D[4] 1530bc3d5698SJohn Baldwin#endif 1531bc3d5698SJohn Baldwin#ifndef __thumb2__ 1532bc3d5698SJohn Baldwin ldr r11,[sp,#232+4] 1533bc3d5698SJohn Baldwin#else 1534bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#232] @ D[4] 1535bc3d5698SJohn Baldwin#endif 1536bc3d5698SJohn Baldwin#ifndef __thumb2__ 1537bc3d5698SJohn Baldwin ldr r12,[sp,#200] @ D[0] 1538bc3d5698SJohn Baldwin#endif 1539bc3d5698SJohn Baldwin#ifndef __thumb2__ 1540bc3d5698SJohn Baldwin ldr r14,[sp,#200+4] 1541bc3d5698SJohn Baldwin#else 1542bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#200] @ D[0] 1543bc3d5698SJohn Baldwin#endif 1544bc3d5698SJohn Baldwin 1545bc3d5698SJohn Baldwin ldmia r9,{r6,r7,r8,r9} @ D[1..2] 1546bc3d5698SJohn Baldwin 1547bc3d5698SJohn Baldwin eor r1,r1,r10 1548bc3d5698SJohn Baldwin#ifndef __thumb2__ 1549bc3d5698SJohn Baldwin ldr r2,[sp,#280] @ A[1][0] 1550bc3d5698SJohn Baldwin#endif 1551bc3d5698SJohn Baldwin eor r0,r0,r11 1552bc3d5698SJohn Baldwin#ifndef __thumb2__ 1553bc3d5698SJohn Baldwin ldr r3,[sp,#280+4] 1554bc3d5698SJohn Baldwin#else 1555bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#280] @ A[1][0] 1556bc3d5698SJohn Baldwin#endif 1557bc3d5698SJohn Baldwin @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1558bc3d5698SJohn Baldwin#ifndef __thumb2__ 1559bc3d5698SJohn Baldwin ldr r4,[sp,#328] @ A[2][1] 1560bc3d5698SJohn Baldwin#endif 1561bc3d5698SJohn Baldwin @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1562bc3d5698SJohn Baldwin#ifndef __thumb2__ 1563bc3d5698SJohn Baldwin ldr r5,[sp,#328+4] 1564bc3d5698SJohn Baldwin#else 1565bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#328] @ A[2][1] 1566bc3d5698SJohn Baldwin#endif 1567bc3d5698SJohn Baldwin 1568bc3d5698SJohn Baldwin eor r2,r2,r12 1569bc3d5698SJohn Baldwin#ifndef __thumb2__ 1570bc3d5698SJohn Baldwin ldr r10,[sp,#376] @ A[3][2] 1571bc3d5698SJohn Baldwin#endif 1572bc3d5698SJohn Baldwin eor r3,r3,r14 1573bc3d5698SJohn Baldwin#ifndef __thumb2__ 1574bc3d5698SJohn Baldwin ldr r11,[sp,#376+4] 1575bc3d5698SJohn Baldwin#else 1576bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#376] @ A[3][2] 1577bc3d5698SJohn Baldwin#endif 1578bc3d5698SJohn Baldwin @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1579bc3d5698SJohn Baldwin#ifndef __thumb2__ 1580bc3d5698SJohn Baldwin ldr r12,[sp,#224] @ D[3] 1581bc3d5698SJohn Baldwin#endif 1582bc3d5698SJohn Baldwin @ mov r3,r3,ror#32-18 1583bc3d5698SJohn Baldwin#ifndef __thumb2__ 1584bc3d5698SJohn Baldwin ldr r14,[sp,#224+4] 1585bc3d5698SJohn Baldwin#else 1586bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#224] @ D[3] 1587bc3d5698SJohn Baldwin#endif 1588bc3d5698SJohn Baldwin 1589bc3d5698SJohn Baldwin eor r6,r6,r4 1590bc3d5698SJohn Baldwin eor r7,r7,r5 1591bc3d5698SJohn Baldwin mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1592bc3d5698SJohn Baldwin mov r5,r7,ror#32-5 1593bc3d5698SJohn Baldwin 1594bc3d5698SJohn Baldwin eor r10,r10,r8 1595bc3d5698SJohn Baldwin#ifndef __thumb2__ 1596bc3d5698SJohn Baldwin ldr r8,[sp,#424] @ A[4][3] 1597bc3d5698SJohn Baldwin#endif 1598bc3d5698SJohn Baldwin eor r11,r11,r9 1599bc3d5698SJohn Baldwin#ifndef __thumb2__ 1600bc3d5698SJohn Baldwin ldr r9,[sp,#424+4] 1601bc3d5698SJohn Baldwin#else 1602bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#424] @ A[4][3] 1603bc3d5698SJohn Baldwin#endif 1604bc3d5698SJohn Baldwin mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1605bc3d5698SJohn Baldwin mov r6,r11,ror#32-8 1606bc3d5698SJohn Baldwin 1607bc3d5698SJohn Baldwin eor r12,r12,r8 1608bc3d5698SJohn Baldwin eor r14,r14,r9 1609bc3d5698SJohn Baldwin mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1610bc3d5698SJohn Baldwin mov r9,r14,ror#32-28 1611bc3d5698SJohn Baldwin 1612bc3d5698SJohn Baldwin bic r10,r4,r2,ror#32-18 1613bc3d5698SJohn Baldwin bic r11,r5,r3,ror#32-18 1614bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-14 1615bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-13 1616bc3d5698SJohn Baldwin#ifndef __thumb2__ 1617bc3d5698SJohn Baldwin str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1618bc3d5698SJohn Baldwin#endif 1619bc3d5698SJohn Baldwin bic r12,r6,r4 1620bc3d5698SJohn Baldwin#ifndef __thumb2__ 1621bc3d5698SJohn Baldwin str r11,[sp,#120+4] 1622bc3d5698SJohn Baldwin#else 1623bc3d5698SJohn Baldwin strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1624bc3d5698SJohn Baldwin#endif 1625bc3d5698SJohn Baldwin bic r14,r7,r5 1626bc3d5698SJohn Baldwin eor r12,r12,r2,ror#32-18 1627bc3d5698SJohn Baldwin#ifndef __thumb2__ 1628bc3d5698SJohn Baldwin str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1629bc3d5698SJohn Baldwin#endif 1630bc3d5698SJohn Baldwin eor r14,r14,r3,ror#32-18 1631bc3d5698SJohn Baldwin#ifndef __thumb2__ 1632bc3d5698SJohn Baldwin str r14,[sp,#128+4] 1633bc3d5698SJohn Baldwin#else 1634bc3d5698SJohn Baldwin strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1635bc3d5698SJohn Baldwin#endif 1636bc3d5698SJohn Baldwin bic r10,r8,r6 1637bc3d5698SJohn Baldwin bic r11,r9,r7 1638bc3d5698SJohn Baldwin bic r12,r0,r8,ror#14 1639bc3d5698SJohn Baldwin bic r14,r1,r9,ror#13 1640bc3d5698SJohn Baldwin eor r10,r10,r4 1641bc3d5698SJohn Baldwin eor r11,r11,r5 1642bc3d5698SJohn Baldwin#ifndef __thumb2__ 1643bc3d5698SJohn Baldwin str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1644bc3d5698SJohn Baldwin#endif 1645bc3d5698SJohn Baldwin bic r2,r2,r0,ror#18-14 1646bc3d5698SJohn Baldwin#ifndef __thumb2__ 1647bc3d5698SJohn Baldwin str r11,[sp,#136+4] 1648bc3d5698SJohn Baldwin#else 1649bc3d5698SJohn Baldwin strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1650bc3d5698SJohn Baldwin#endif 1651bc3d5698SJohn Baldwin eor r12,r6,r12,ror#32-14 1652bc3d5698SJohn Baldwin bic r11,r3,r1,ror#18-13 1653bc3d5698SJohn Baldwin eor r14,r7,r14,ror#32-13 1654bc3d5698SJohn Baldwin#ifndef __thumb2__ 1655bc3d5698SJohn Baldwin str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1656bc3d5698SJohn Baldwin#endif 1657bc3d5698SJohn Baldwin#ifndef __thumb2__ 1658bc3d5698SJohn Baldwin str r14,[sp,#144+4] 1659bc3d5698SJohn Baldwin#else 1660bc3d5698SJohn Baldwin strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1661bc3d5698SJohn Baldwin#endif 1662bc3d5698SJohn Baldwin add r14,sp,#216 1663bc3d5698SJohn Baldwin#ifndef __thumb2__ 1664bc3d5698SJohn Baldwin ldr r0,[sp,#256] @ A[0][2] 1665bc3d5698SJohn Baldwin#endif 1666bc3d5698SJohn Baldwin eor r10,r8,r2,ror#32-18 1667bc3d5698SJohn Baldwin#ifndef __thumb2__ 1668bc3d5698SJohn Baldwin ldr r1,[sp,#256+4] 1669bc3d5698SJohn Baldwin#else 1670bc3d5698SJohn Baldwin ldrd r0,r1,[sp,#256] @ A[0][2] 1671bc3d5698SJohn Baldwin#endif 1672bc3d5698SJohn Baldwin eor r11,r9,r11,ror#32-18 1673bc3d5698SJohn Baldwin#ifndef __thumb2__ 1674bc3d5698SJohn Baldwin str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1675bc3d5698SJohn Baldwin#endif 1676bc3d5698SJohn Baldwin#ifndef __thumb2__ 1677bc3d5698SJohn Baldwin str r11,[sp,#152+4] 1678bc3d5698SJohn Baldwin#else 1679bc3d5698SJohn Baldwin strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1680bc3d5698SJohn Baldwin#endif 1681bc3d5698SJohn Baldwin 1682bc3d5698SJohn Baldwin ldmia r14,{r10,r11,r12,r14} @ D[2..3] 1683bc3d5698SJohn Baldwin#ifndef __thumb2__ 1684bc3d5698SJohn Baldwin ldr r2,[sp,#304] @ A[1][3] 1685bc3d5698SJohn Baldwin#endif 1686bc3d5698SJohn Baldwin#ifndef __thumb2__ 1687bc3d5698SJohn Baldwin ldr r3,[sp,#304+4] 1688bc3d5698SJohn Baldwin#else 1689bc3d5698SJohn Baldwin ldrd r2,r3,[sp,#304] @ A[1][3] 1690bc3d5698SJohn Baldwin#endif 1691bc3d5698SJohn Baldwin#ifndef __thumb2__ 1692bc3d5698SJohn Baldwin ldr r6,[sp,#232] @ D[4] 1693bc3d5698SJohn Baldwin#endif 1694bc3d5698SJohn Baldwin#ifndef __thumb2__ 1695bc3d5698SJohn Baldwin ldr r7,[sp,#232+4] 1696bc3d5698SJohn Baldwin#else 1697bc3d5698SJohn Baldwin ldrd r6,r7,[sp,#232] @ D[4] 1698bc3d5698SJohn Baldwin#endif 1699bc3d5698SJohn Baldwin 1700bc3d5698SJohn Baldwin eor r0,r0,r10 1701bc3d5698SJohn Baldwin#ifndef __thumb2__ 1702bc3d5698SJohn Baldwin ldr r4,[sp,#352] @ A[2][4] 1703bc3d5698SJohn Baldwin#endif 1704bc3d5698SJohn Baldwin eor r1,r1,r11 1705bc3d5698SJohn Baldwin#ifndef __thumb2__ 1706bc3d5698SJohn Baldwin ldr r5,[sp,#352+4] 1707bc3d5698SJohn Baldwin#else 1708bc3d5698SJohn Baldwin ldrd r4,r5,[sp,#352] @ A[2][4] 1709bc3d5698SJohn Baldwin#endif 1710bc3d5698SJohn Baldwin @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1711bc3d5698SJohn Baldwin#ifndef __thumb2__ 1712bc3d5698SJohn Baldwin ldr r8,[sp,#200] @ D[0] 1713bc3d5698SJohn Baldwin#endif 1714bc3d5698SJohn Baldwin @ mov r1,r1,ror#32-31 1715bc3d5698SJohn Baldwin#ifndef __thumb2__ 1716bc3d5698SJohn Baldwin ldr r9,[sp,#200+4] 1717bc3d5698SJohn Baldwin#else 1718bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#200] @ D[0] 1719bc3d5698SJohn Baldwin#endif 1720bc3d5698SJohn Baldwin 1721bc3d5698SJohn Baldwin eor r12,r12,r2 1722bc3d5698SJohn Baldwin#ifndef __thumb2__ 1723bc3d5698SJohn Baldwin ldr r10,[sp,#360] @ A[3][0] 1724bc3d5698SJohn Baldwin#endif 1725bc3d5698SJohn Baldwin eor r14,r14,r3 1726bc3d5698SJohn Baldwin#ifndef __thumb2__ 1727bc3d5698SJohn Baldwin ldr r11,[sp,#360+4] 1728bc3d5698SJohn Baldwin#else 1729bc3d5698SJohn Baldwin ldrd r10,r11,[sp,#360] @ A[3][0] 1730bc3d5698SJohn Baldwin#endif 1731bc3d5698SJohn Baldwin mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1732bc3d5698SJohn Baldwin#ifndef __thumb2__ 1733bc3d5698SJohn Baldwin ldr r12,[sp,#208] @ D[1] 1734bc3d5698SJohn Baldwin#endif 1735bc3d5698SJohn Baldwin mov r2,r14,ror#32-28 1736bc3d5698SJohn Baldwin#ifndef __thumb2__ 1737bc3d5698SJohn Baldwin ldr r14,[sp,#208+4] 1738bc3d5698SJohn Baldwin#else 1739bc3d5698SJohn Baldwin ldrd r12,r14,[sp,#208] @ D[1] 1740bc3d5698SJohn Baldwin#endif 1741bc3d5698SJohn Baldwin 1742bc3d5698SJohn Baldwin eor r6,r6,r4 1743bc3d5698SJohn Baldwin eor r7,r7,r5 1744bc3d5698SJohn Baldwin mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1745bc3d5698SJohn Baldwin mov r4,r7,ror#32-20 1746bc3d5698SJohn Baldwin 1747bc3d5698SJohn Baldwin eor r10,r10,r8 1748bc3d5698SJohn Baldwin#ifndef __thumb2__ 1749bc3d5698SJohn Baldwin ldr r8,[sp,#408] @ A[4][1] 1750bc3d5698SJohn Baldwin#endif 1751bc3d5698SJohn Baldwin eor r11,r11,r9 1752bc3d5698SJohn Baldwin#ifndef __thumb2__ 1753bc3d5698SJohn Baldwin ldr r9,[sp,#408+4] 1754bc3d5698SJohn Baldwin#else 1755bc3d5698SJohn Baldwin ldrd r8,r9,[sp,#408] @ A[4][1] 1756bc3d5698SJohn Baldwin#endif 1757bc3d5698SJohn Baldwin mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1758bc3d5698SJohn Baldwin mov r6,r11,ror#32-21 1759bc3d5698SJohn Baldwin 1760bc3d5698SJohn Baldwin eor r8,r8,r12 1761bc3d5698SJohn Baldwin eor r9,r9,r14 1762bc3d5698SJohn Baldwin @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1763bc3d5698SJohn Baldwin @ mov r9,r3,ror#32-1 1764bc3d5698SJohn Baldwin 1765bc3d5698SJohn Baldwin bic r10,r4,r2 1766bc3d5698SJohn Baldwin bic r11,r5,r3 1767bc3d5698SJohn Baldwin eor r10,r10,r0,ror#32-31 1768bc3d5698SJohn Baldwin#ifndef __thumb2__ 1769bc3d5698SJohn Baldwin str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1770bc3d5698SJohn Baldwin#endif 1771bc3d5698SJohn Baldwin eor r11,r11,r1,ror#32-31 1772bc3d5698SJohn Baldwin#ifndef __thumb2__ 1773bc3d5698SJohn Baldwin str r11,[sp,#160+4] 1774bc3d5698SJohn Baldwin#else 1775bc3d5698SJohn Baldwin strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1776bc3d5698SJohn Baldwin#endif 1777bc3d5698SJohn Baldwin bic r12,r6,r4 1778bc3d5698SJohn Baldwin bic r14,r7,r5 1779bc3d5698SJohn Baldwin eor r12,r12,r2 1780bc3d5698SJohn Baldwin eor r14,r14,r3 1781bc3d5698SJohn Baldwin#ifndef __thumb2__ 1782bc3d5698SJohn Baldwin str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1783bc3d5698SJohn Baldwin#endif 1784bc3d5698SJohn Baldwin bic r10,r8,r6,ror#1 1785bc3d5698SJohn Baldwin#ifndef __thumb2__ 1786bc3d5698SJohn Baldwin str r14,[sp,#168+4] 1787bc3d5698SJohn Baldwin#else 1788bc3d5698SJohn Baldwin strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1789bc3d5698SJohn Baldwin#endif 1790bc3d5698SJohn Baldwin bic r11,r9,r7,ror#1 1791bc3d5698SJohn Baldwin bic r12,r0,r8,ror#31-1 1792bc3d5698SJohn Baldwin bic r14,r1,r9,ror#31-1 1793bc3d5698SJohn Baldwin eor r4,r4,r10,ror#32-1 1794bc3d5698SJohn Baldwin#ifndef __thumb2__ 1795bc3d5698SJohn Baldwin str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1796bc3d5698SJohn Baldwin#endif 1797bc3d5698SJohn Baldwin eor r5,r5,r11,ror#32-1 1798bc3d5698SJohn Baldwin#ifndef __thumb2__ 1799bc3d5698SJohn Baldwin str r5,[sp,#176+4] 1800bc3d5698SJohn Baldwin#else 1801bc3d5698SJohn Baldwin strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1802bc3d5698SJohn Baldwin#endif 1803bc3d5698SJohn Baldwin eor r6,r6,r12,ror#32-31 1804bc3d5698SJohn Baldwin eor r7,r7,r14,ror#32-31 1805bc3d5698SJohn Baldwin#ifndef __thumb2__ 1806bc3d5698SJohn Baldwin str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1807bc3d5698SJohn Baldwin#endif 1808bc3d5698SJohn Baldwin bic r10,r2,r0,ror#32-31 1809bc3d5698SJohn Baldwin#ifndef __thumb2__ 1810bc3d5698SJohn Baldwin str r7,[sp,#184+4] 1811bc3d5698SJohn Baldwin#else 1812bc3d5698SJohn Baldwin strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1813bc3d5698SJohn Baldwin#endif 1814bc3d5698SJohn Baldwin bic r11,r3,r1,ror#32-31 1815bc3d5698SJohn Baldwin add r12,sp,#0 1816bc3d5698SJohn Baldwin eor r8,r10,r8,ror#32-1 1817bc3d5698SJohn Baldwin add r10,sp,#40 1818bc3d5698SJohn Baldwin eor r9,r11,r9,ror#32-1 1819bc3d5698SJohn Baldwin#ifndef __thumb2__ 1820bc3d5698SJohn Baldwin str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1821bc3d5698SJohn Baldwin#endif 1822bc3d5698SJohn Baldwin#ifndef __thumb2__ 1823bc3d5698SJohn Baldwin str r9,[sp,#192+4] 1824bc3d5698SJohn Baldwin#else 1825bc3d5698SJohn Baldwin strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1826bc3d5698SJohn Baldwin#endif 1827bc3d5698SJohn Baldwin blo .Lround2x 1828bc3d5698SJohn Baldwin 1829*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5 1830bc3d5698SJohn Baldwin ldr pc,[sp,#440] 1831*c0855eaaSJohn Baldwin#else 1832*c0855eaaSJohn Baldwin ldr lr,[sp,#440] 1833*c0855eaaSJohn Baldwin tst lr,#1 1834*c0855eaaSJohn Baldwin moveq pc,lr @ be binary compatible with V4, yet 1835*c0855eaaSJohn Baldwin.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1836*c0855eaaSJohn Baldwin#endif 1837bc3d5698SJohn Baldwin.size KeccakF1600_int,.-KeccakF1600_int 1838bc3d5698SJohn Baldwin 1839bc3d5698SJohn Baldwin.type KeccakF1600, %function 1840bc3d5698SJohn Baldwin.align 5 1841bc3d5698SJohn BaldwinKeccakF1600: 1842bc3d5698SJohn Baldwin stmdb sp!,{r0,r4-r11,lr} 1843bc3d5698SJohn Baldwin sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1844bc3d5698SJohn Baldwin 1845bc3d5698SJohn Baldwin add r10,r0,#40 1846bc3d5698SJohn Baldwin add r11,sp,#40 1847bc3d5698SJohn Baldwin ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1848bc3d5698SJohn Baldwin stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1849bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1850bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1851bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1852bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1853bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1854bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1855bc3d5698SJohn Baldwin ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1856bc3d5698SJohn Baldwin add r12,sp,#0 1857bc3d5698SJohn Baldwin add r10,sp,#40 1858bc3d5698SJohn Baldwin stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1859bc3d5698SJohn Baldwin 1860bc3d5698SJohn Baldwin bl KeccakF1600_enter 1861bc3d5698SJohn Baldwin 1862bc3d5698SJohn Baldwin ldr r11, [sp,#440+16] @ restore pointer to A 1863bc3d5698SJohn Baldwin ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1864bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 1865bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1866bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1867bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1868bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1869bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1870bc3d5698SJohn Baldwin stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1871bc3d5698SJohn Baldwin ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1872bc3d5698SJohn Baldwin stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1873bc3d5698SJohn Baldwin 1874bc3d5698SJohn Baldwin add sp,sp,#440+20 1875*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5 1876bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} 1877*c0855eaaSJohn Baldwin#else 1878*c0855eaaSJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} 1879*c0855eaaSJohn Baldwin tst lr,#1 1880*c0855eaaSJohn Baldwin moveq pc,lr @ be binary compatible with V4, yet 1881*c0855eaaSJohn Baldwin.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1882*c0855eaaSJohn Baldwin#endif 1883bc3d5698SJohn Baldwin.size KeccakF1600,.-KeccakF1600 1884bc3d5698SJohn Baldwin.globl SHA3_absorb 1885bc3d5698SJohn Baldwin.type SHA3_absorb,%function 1886bc3d5698SJohn Baldwin.align 5 1887bc3d5698SJohn BaldwinSHA3_absorb: 1888bc3d5698SJohn Baldwin stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1889bc3d5698SJohn Baldwin sub sp,sp,#456+16 1890bc3d5698SJohn Baldwin 1891bc3d5698SJohn Baldwin add r10,r0,#40 1892bc3d5698SJohn Baldwin @ mov r11,r1 1893bc3d5698SJohn Baldwin mov r12,r2 1894bc3d5698SJohn Baldwin mov r14,r3 1895bc3d5698SJohn Baldwin cmp r2,r3 1896bc3d5698SJohn Baldwin blo .Labsorb_abort 1897bc3d5698SJohn Baldwin 1898bc3d5698SJohn Baldwin add r11,sp,#0 1899bc3d5698SJohn Baldwin ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1900bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1901bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1902bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1903bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1904bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1905bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1906bc3d5698SJohn Baldwin stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1907bc3d5698SJohn Baldwin ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1908bc3d5698SJohn Baldwin stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1909bc3d5698SJohn Baldwin 1910bc3d5698SJohn Baldwin ldr r11,[sp,#476] @ restore r11 1911bc3d5698SJohn Baldwin#ifdef __thumb2__ 1912bc3d5698SJohn Baldwin mov r9,#0x00ff00ff 1913bc3d5698SJohn Baldwin mov r8,#0x0f0f0f0f 1914bc3d5698SJohn Baldwin mov r7,#0x33333333 1915bc3d5698SJohn Baldwin mov r6,#0x55555555 1916bc3d5698SJohn Baldwin#else 1917bc3d5698SJohn Baldwin mov r6,#0x11 @ compose constants 1918bc3d5698SJohn Baldwin mov r8,#0x0f 1919bc3d5698SJohn Baldwin mov r9,#0xff 1920bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#8 1921bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#8 1922bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#16 @ 0x11111111 1923bc3d5698SJohn Baldwin orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1924bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1925bc3d5698SJohn Baldwin orr r7,r6,r6,lsl#1 @ 0x33333333 1926bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#2 @ 0x55555555 1927bc3d5698SJohn Baldwin#endif 1928bc3d5698SJohn Baldwin str r9,[sp,#468] 1929bc3d5698SJohn Baldwin str r8,[sp,#464] 1930bc3d5698SJohn Baldwin str r7,[sp,#460] 1931bc3d5698SJohn Baldwin str r6,[sp,#456] 1932bc3d5698SJohn Baldwin b .Loop_absorb 1933bc3d5698SJohn Baldwin 1934bc3d5698SJohn Baldwin.align 4 1935bc3d5698SJohn Baldwin.Loop_absorb: 1936bc3d5698SJohn Baldwin subs r0,r12,r14 1937bc3d5698SJohn Baldwin blo .Labsorbed 1938bc3d5698SJohn Baldwin add r10,sp,#0 1939bc3d5698SJohn Baldwin str r0,[sp,#480] @ save len - bsz 1940bc3d5698SJohn Baldwin 1941bc3d5698SJohn Baldwin.align 4 1942bc3d5698SJohn Baldwin.Loop_block: 1943bc3d5698SJohn Baldwin ldrb r0,[r11],#1 1944bc3d5698SJohn Baldwin ldrb r1,[r11],#1 1945bc3d5698SJohn Baldwin ldrb r2,[r11],#1 1946bc3d5698SJohn Baldwin ldrb r3,[r11],#1 1947bc3d5698SJohn Baldwin ldrb r4,[r11],#1 1948bc3d5698SJohn Baldwin orr r0,r0,r1,lsl#8 1949bc3d5698SJohn Baldwin ldrb r1,[r11],#1 1950bc3d5698SJohn Baldwin orr r0,r0,r2,lsl#16 1951bc3d5698SJohn Baldwin ldrb r2,[r11],#1 1952bc3d5698SJohn Baldwin orr r0,r0,r3,lsl#24 @ lo 1953bc3d5698SJohn Baldwin ldrb r3,[r11],#1 1954bc3d5698SJohn Baldwin orr r1,r4,r1,lsl#8 1955bc3d5698SJohn Baldwin orr r1,r1,r2,lsl#16 1956bc3d5698SJohn Baldwin orr r1,r1,r3,lsl#24 @ hi 1957bc3d5698SJohn Baldwin 1958bc3d5698SJohn Baldwin and r2,r0,r6 @ &=0x55555555 1959bc3d5698SJohn Baldwin and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1960bc3d5698SJohn Baldwin and r3,r1,r6 @ &=0x55555555 1961bc3d5698SJohn Baldwin and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1962bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#1 1963bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#1 1964bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#1 1965bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#1 1966bc3d5698SJohn Baldwin and r2,r2,r7 @ &=0x33333333 1967bc3d5698SJohn Baldwin and r0,r0,r7,lsl#2 @ &=0xcccccccc 1968bc3d5698SJohn Baldwin and r3,r3,r7 @ &=0x33333333 1969bc3d5698SJohn Baldwin and r1,r1,r7,lsl#2 @ &=0xcccccccc 1970bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#2 1971bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#2 1972bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#2 1973bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#2 1974bc3d5698SJohn Baldwin and r2,r2,r8 @ &=0x0f0f0f0f 1975bc3d5698SJohn Baldwin and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1976bc3d5698SJohn Baldwin and r3,r3,r8 @ &=0x0f0f0f0f 1977bc3d5698SJohn Baldwin and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1978bc3d5698SJohn Baldwin ldmia r10,{r4,r5} @ A_flat[i] 1979bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#4 1980bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#4 1981bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#4 1982bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#4 1983bc3d5698SJohn Baldwin and r2,r2,r9 @ &=0x00ff00ff 1984bc3d5698SJohn Baldwin and r0,r0,r9,lsl#8 @ &=0xff00ff00 1985bc3d5698SJohn Baldwin and r3,r3,r9 @ &=0x00ff00ff 1986bc3d5698SJohn Baldwin and r1,r1,r9,lsl#8 @ &=0xff00ff00 1987bc3d5698SJohn Baldwin orr r2,r2,r2,lsr#8 1988bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#8 1989bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#8 1990bc3d5698SJohn Baldwin orr r1,r1,r1,lsl#8 1991bc3d5698SJohn Baldwin 1992bc3d5698SJohn Baldwin mov r2,r2,lsl#16 1993bc3d5698SJohn Baldwin mov r1,r1,lsr#16 1994bc3d5698SJohn Baldwin eor r4,r4,r3,lsl#16 1995bc3d5698SJohn Baldwin eor r5,r5,r0,lsr#16 1996bc3d5698SJohn Baldwin eor r4,r4,r2,lsr#16 1997bc3d5698SJohn Baldwin eor r5,r5,r1,lsl#16 1998bc3d5698SJohn Baldwin stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1999bc3d5698SJohn Baldwin 2000bc3d5698SJohn Baldwin subs r14,r14,#8 2001bc3d5698SJohn Baldwin bhi .Loop_block 2002bc3d5698SJohn Baldwin 2003bc3d5698SJohn Baldwin str r11,[sp,#476] 2004bc3d5698SJohn Baldwin 2005bc3d5698SJohn Baldwin bl KeccakF1600_int 2006bc3d5698SJohn Baldwin 2007bc3d5698SJohn Baldwin add r14,sp,#456 2008bc3d5698SJohn Baldwin ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables 2009bc3d5698SJohn Baldwin b .Loop_absorb 2010bc3d5698SJohn Baldwin 2011bc3d5698SJohn Baldwin.align 4 2012bc3d5698SJohn Baldwin.Labsorbed: 2013bc3d5698SJohn Baldwin add r11,sp,#40 2014bc3d5698SJohn Baldwin ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2015bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 2016bc3d5698SJohn Baldwin ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2017bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2018bc3d5698SJohn Baldwin ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2019bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2020bc3d5698SJohn Baldwin ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2021bc3d5698SJohn Baldwin stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2022bc3d5698SJohn Baldwin ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2023bc3d5698SJohn Baldwin stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2024bc3d5698SJohn Baldwin 2025bc3d5698SJohn Baldwin.Labsorb_abort: 2026bc3d5698SJohn Baldwin add sp,sp,#456+32 2027bc3d5698SJohn Baldwin mov r0,r12 @ return value 2028*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5 2029bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 2030*c0855eaaSJohn Baldwin#else 2031*c0855eaaSJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 2032*c0855eaaSJohn Baldwin tst lr,#1 2033*c0855eaaSJohn Baldwin moveq pc,lr @ be binary compatible with V4, yet 2034*c0855eaaSJohn Baldwin.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2035*c0855eaaSJohn Baldwin#endif 2036bc3d5698SJohn Baldwin.size SHA3_absorb,.-SHA3_absorb 2037bc3d5698SJohn Baldwin.globl SHA3_squeeze 2038bc3d5698SJohn Baldwin.type SHA3_squeeze,%function 2039bc3d5698SJohn Baldwin.align 5 2040bc3d5698SJohn BaldwinSHA3_squeeze: 2041bc3d5698SJohn Baldwin stmdb sp!,{r0,r3-r10,lr} 2042bc3d5698SJohn Baldwin 2043bc3d5698SJohn Baldwin mov r10,r0 2044bc3d5698SJohn Baldwin mov r4,r1 2045bc3d5698SJohn Baldwin mov r5,r2 2046bc3d5698SJohn Baldwin mov r12,r3 2047bc3d5698SJohn Baldwin 2048bc3d5698SJohn Baldwin#ifdef __thumb2__ 2049bc3d5698SJohn Baldwin mov r9,#0x00ff00ff 2050bc3d5698SJohn Baldwin mov r8,#0x0f0f0f0f 2051bc3d5698SJohn Baldwin mov r7,#0x33333333 2052bc3d5698SJohn Baldwin mov r6,#0x55555555 2053bc3d5698SJohn Baldwin#else 2054bc3d5698SJohn Baldwin mov r6,#0x11 @ compose constants 2055bc3d5698SJohn Baldwin mov r8,#0x0f 2056bc3d5698SJohn Baldwin mov r9,#0xff 2057bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#8 2058bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#8 2059bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#16 @ 0x11111111 2060bc3d5698SJohn Baldwin orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2061bc3d5698SJohn Baldwin orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2062bc3d5698SJohn Baldwin orr r7,r6,r6,lsl#1 @ 0x33333333 2063bc3d5698SJohn Baldwin orr r6,r6,r6,lsl#2 @ 0x55555555 2064bc3d5698SJohn Baldwin#endif 2065bc3d5698SJohn Baldwin stmdb sp!,{r6,r7,r8,r9} 2066bc3d5698SJohn Baldwin 2067bc3d5698SJohn Baldwin mov r14,r10 2068bc3d5698SJohn Baldwin b .Loop_squeeze 2069bc3d5698SJohn Baldwin 2070bc3d5698SJohn Baldwin.align 4 2071bc3d5698SJohn Baldwin.Loop_squeeze: 2072bc3d5698SJohn Baldwin ldmia r10!,{r0,r1} @ A_flat[i++] 2073bc3d5698SJohn Baldwin 2074bc3d5698SJohn Baldwin mov r2,r0,lsl#16 2075bc3d5698SJohn Baldwin mov r3,r1,lsl#16 @ r3 = r1 << 16 2076bc3d5698SJohn Baldwin mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2077bc3d5698SJohn Baldwin mov r1,r1,lsr#16 2078bc3d5698SJohn Baldwin mov r0,r0,lsr#16 @ r0 = r0 >> 16 2079bc3d5698SJohn Baldwin mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2080bc3d5698SJohn Baldwin 2081bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#8 2082bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#8 2083bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#8 2084bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#8 2085bc3d5698SJohn Baldwin and r2,r2,r9 @ &=0x00ff00ff 2086bc3d5698SJohn Baldwin and r3,r3,r9,lsl#8 @ &=0xff00ff00 2087bc3d5698SJohn Baldwin and r0,r0,r9 @ &=0x00ff00ff 2088bc3d5698SJohn Baldwin and r1,r1,r9,lsl#8 @ &=0xff00ff00 2089bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#4 2090bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#4 2091bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#4 2092bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#4 2093bc3d5698SJohn Baldwin and r2,r2,r8 @ &=0x0f0f0f0f 2094bc3d5698SJohn Baldwin and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2095bc3d5698SJohn Baldwin and r0,r0,r8 @ &=0x0f0f0f0f 2096bc3d5698SJohn Baldwin and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2097bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#2 2098bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#2 2099bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#2 2100bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#2 2101bc3d5698SJohn Baldwin and r2,r2,r7 @ &=0x33333333 2102bc3d5698SJohn Baldwin and r3,r3,r7,lsl#2 @ &=0xcccccccc 2103bc3d5698SJohn Baldwin and r0,r0,r7 @ &=0x33333333 2104bc3d5698SJohn Baldwin and r1,r1,r7,lsl#2 @ &=0xcccccccc 2105bc3d5698SJohn Baldwin orr r2,r2,r2,lsl#1 2106bc3d5698SJohn Baldwin orr r3,r3,r3,lsr#1 2107bc3d5698SJohn Baldwin orr r0,r0,r0,lsl#1 2108bc3d5698SJohn Baldwin orr r1,r1,r1,lsr#1 2109bc3d5698SJohn Baldwin and r2,r2,r6 @ &=0x55555555 2110bc3d5698SJohn Baldwin and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2111bc3d5698SJohn Baldwin and r0,r0,r6 @ &=0x55555555 2112bc3d5698SJohn Baldwin and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2113bc3d5698SJohn Baldwin 2114bc3d5698SJohn Baldwin orr r2,r2,r3 2115bc3d5698SJohn Baldwin orr r0,r0,r1 2116bc3d5698SJohn Baldwin 2117bc3d5698SJohn Baldwin cmp r5,#8 2118bc3d5698SJohn Baldwin blo .Lsqueeze_tail 2119bc3d5698SJohn Baldwin mov r1,r2,lsr#8 2120bc3d5698SJohn Baldwin strb r2,[r4],#1 2121bc3d5698SJohn Baldwin mov r3,r2,lsr#16 2122bc3d5698SJohn Baldwin strb r1,[r4],#1 2123bc3d5698SJohn Baldwin mov r2,r2,lsr#24 2124bc3d5698SJohn Baldwin strb r3,[r4],#1 2125bc3d5698SJohn Baldwin strb r2,[r4],#1 2126bc3d5698SJohn Baldwin 2127bc3d5698SJohn Baldwin mov r1,r0,lsr#8 2128bc3d5698SJohn Baldwin strb r0,[r4],#1 2129bc3d5698SJohn Baldwin mov r3,r0,lsr#16 2130bc3d5698SJohn Baldwin strb r1,[r4],#1 2131bc3d5698SJohn Baldwin mov r0,r0,lsr#24 2132bc3d5698SJohn Baldwin strb r3,[r4],#1 2133bc3d5698SJohn Baldwin strb r0,[r4],#1 2134bc3d5698SJohn Baldwin subs r5,r5,#8 2135bc3d5698SJohn Baldwin beq .Lsqueeze_done 2136bc3d5698SJohn Baldwin 2137bc3d5698SJohn Baldwin subs r12,r12,#8 @ bsz -= 8 2138bc3d5698SJohn Baldwin bhi .Loop_squeeze 2139bc3d5698SJohn Baldwin 2140bc3d5698SJohn Baldwin mov r0,r14 @ original r10 2141bc3d5698SJohn Baldwin 2142bc3d5698SJohn Baldwin bl KeccakF1600 2143bc3d5698SJohn Baldwin 2144bc3d5698SJohn Baldwin ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables 2145bc3d5698SJohn Baldwin mov r14,r10 2146bc3d5698SJohn Baldwin b .Loop_squeeze 2147bc3d5698SJohn Baldwin 2148bc3d5698SJohn Baldwin.align 4 2149bc3d5698SJohn Baldwin.Lsqueeze_tail: 2150bc3d5698SJohn Baldwin strb r2,[r4],#1 2151bc3d5698SJohn Baldwin mov r2,r2,lsr#8 2152bc3d5698SJohn Baldwin subs r5,r5,#1 2153bc3d5698SJohn Baldwin beq .Lsqueeze_done 2154bc3d5698SJohn Baldwin strb r2,[r4],#1 2155bc3d5698SJohn Baldwin mov r2,r2,lsr#8 2156bc3d5698SJohn Baldwin subs r5,r5,#1 2157bc3d5698SJohn Baldwin beq .Lsqueeze_done 2158bc3d5698SJohn Baldwin strb r2,[r4],#1 2159bc3d5698SJohn Baldwin mov r2,r2,lsr#8 2160bc3d5698SJohn Baldwin subs r5,r5,#1 2161bc3d5698SJohn Baldwin beq .Lsqueeze_done 2162bc3d5698SJohn Baldwin strb r2,[r4],#1 2163bc3d5698SJohn Baldwin subs r5,r5,#1 2164bc3d5698SJohn Baldwin beq .Lsqueeze_done 2165bc3d5698SJohn Baldwin 2166bc3d5698SJohn Baldwin strb r0,[r4],#1 2167bc3d5698SJohn Baldwin mov r0,r0,lsr#8 2168bc3d5698SJohn Baldwin subs r5,r5,#1 2169bc3d5698SJohn Baldwin beq .Lsqueeze_done 2170bc3d5698SJohn Baldwin strb r0,[r4],#1 2171bc3d5698SJohn Baldwin mov r0,r0,lsr#8 2172bc3d5698SJohn Baldwin subs r5,r5,#1 2173bc3d5698SJohn Baldwin beq .Lsqueeze_done 2174bc3d5698SJohn Baldwin strb r0,[r4] 2175bc3d5698SJohn Baldwin b .Lsqueeze_done 2176bc3d5698SJohn Baldwin 2177bc3d5698SJohn Baldwin.align 4 2178bc3d5698SJohn Baldwin.Lsqueeze_done: 2179bc3d5698SJohn Baldwin add sp,sp,#24 2180*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5 2181bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 2182*c0855eaaSJohn Baldwin#else 2183*c0855eaaSJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 2184*c0855eaaSJohn Baldwin tst lr,#1 2185*c0855eaaSJohn Baldwin moveq pc,lr @ be binary compatible with V4, yet 2186*c0855eaaSJohn Baldwin.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2187*c0855eaaSJohn Baldwin#endif 2188bc3d5698SJohn Baldwin.size SHA3_squeeze,.-SHA3_squeeze 2189bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 2190bc3d5698SJohn Baldwin.fpu neon 2191bc3d5698SJohn Baldwin 2192bc3d5698SJohn Baldwin.type iotas64, %object 2193bc3d5698SJohn Baldwin.align 5 2194bc3d5698SJohn Baldwiniotas64: 2195bc3d5698SJohn Baldwin.quad 0x0000000000000001 2196bc3d5698SJohn Baldwin.quad 0x0000000000008082 2197bc3d5698SJohn Baldwin.quad 0x800000000000808a 2198bc3d5698SJohn Baldwin.quad 0x8000000080008000 2199bc3d5698SJohn Baldwin.quad 0x000000000000808b 2200bc3d5698SJohn Baldwin.quad 0x0000000080000001 2201bc3d5698SJohn Baldwin.quad 0x8000000080008081 2202bc3d5698SJohn Baldwin.quad 0x8000000000008009 2203bc3d5698SJohn Baldwin.quad 0x000000000000008a 2204bc3d5698SJohn Baldwin.quad 0x0000000000000088 2205bc3d5698SJohn Baldwin.quad 0x0000000080008009 2206bc3d5698SJohn Baldwin.quad 0x000000008000000a 2207bc3d5698SJohn Baldwin.quad 0x000000008000808b 2208bc3d5698SJohn Baldwin.quad 0x800000000000008b 2209bc3d5698SJohn Baldwin.quad 0x8000000000008089 2210bc3d5698SJohn Baldwin.quad 0x8000000000008003 2211bc3d5698SJohn Baldwin.quad 0x8000000000008002 2212bc3d5698SJohn Baldwin.quad 0x8000000000000080 2213bc3d5698SJohn Baldwin.quad 0x000000000000800a 2214bc3d5698SJohn Baldwin.quad 0x800000008000000a 2215bc3d5698SJohn Baldwin.quad 0x8000000080008081 2216bc3d5698SJohn Baldwin.quad 0x8000000000008080 2217bc3d5698SJohn Baldwin.quad 0x0000000080000001 2218bc3d5698SJohn Baldwin.quad 0x8000000080008008 2219bc3d5698SJohn Baldwin.size iotas64,.-iotas64 2220bc3d5698SJohn Baldwin 2221bc3d5698SJohn Baldwin.type KeccakF1600_neon, %function 2222bc3d5698SJohn Baldwin.align 5 2223bc3d5698SJohn BaldwinKeccakF1600_neon: 2224bc3d5698SJohn Baldwin add r1, r0, #16 2225bc3d5698SJohn Baldwin adr r2, iotas64 2226bc3d5698SJohn Baldwin mov r3, #24 @ loop counter 2227bc3d5698SJohn Baldwin b .Loop_neon 2228bc3d5698SJohn Baldwin 2229bc3d5698SJohn Baldwin.align 4 2230bc3d5698SJohn Baldwin.Loop_neon: 2231bc3d5698SJohn Baldwin @ Theta 2232bc3d5698SJohn Baldwin vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2233bc3d5698SJohn Baldwin veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2234bc3d5698SJohn Baldwin vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2235bc3d5698SJohn Baldwin veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2236bc3d5698SJohn Baldwin veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2237bc3d5698SJohn Baldwin veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2238bc3d5698SJohn Baldwin veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2239bc3d5698SJohn Baldwin veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2240bc3d5698SJohn Baldwin veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2241bc3d5698SJohn Baldwin veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2242bc3d5698SJohn Baldwin veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2243bc3d5698SJohn Baldwin veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2244bc3d5698SJohn Baldwin veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2245bc3d5698SJohn Baldwin veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2246bc3d5698SJohn Baldwin veor d25, d25, d24 @ C[4]^=A[4][4] 2247bc3d5698SJohn Baldwin 2248bc3d5698SJohn Baldwin vadd.u64 q4, q13, q13 @ C[0..1]<<1 2249bc3d5698SJohn Baldwin vadd.u64 q15, q14, q14 @ C[2..3]<<1 2250bc3d5698SJohn Baldwin vadd.u64 d18, d25, d25 @ C[4]<<1 2251bc3d5698SJohn Baldwin vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2252bc3d5698SJohn Baldwin vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2253bc3d5698SJohn Baldwin vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2254bc3d5698SJohn Baldwin veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2255bc3d5698SJohn Baldwin veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2256bc3d5698SJohn Baldwin veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2257bc3d5698SJohn Baldwin veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2258bc3d5698SJohn Baldwin 2259bc3d5698SJohn Baldwin veor d0, d0, d25 @ A[0][0] ^= C[4] 2260bc3d5698SJohn Baldwin veor d1, d1, d25 @ A[1][0] ^= C[4] 2261bc3d5698SJohn Baldwin veor d10, d10, d25 @ A[2][0] ^= C[4] 2262bc3d5698SJohn Baldwin veor d11, d11, d25 @ A[3][0] ^= C[4] 2263bc3d5698SJohn Baldwin veor d20, d20, d25 @ A[4][0] ^= C[4] 2264bc3d5698SJohn Baldwin 2265bc3d5698SJohn Baldwin veor d2, d2, d26 @ A[0][1] ^= D[1] 2266bc3d5698SJohn Baldwin veor d3, d3, d26 @ A[1][1] ^= D[1] 2267bc3d5698SJohn Baldwin veor d12, d12, d26 @ A[2][1] ^= D[1] 2268bc3d5698SJohn Baldwin veor d13, d13, d26 @ A[3][1] ^= D[1] 2269bc3d5698SJohn Baldwin veor d21, d21, d26 @ A[4][1] ^= D[1] 2270bc3d5698SJohn Baldwin vmov d26, d27 2271bc3d5698SJohn Baldwin 2272bc3d5698SJohn Baldwin veor d6, d6, d28 @ A[0][3] ^= C[2] 2273bc3d5698SJohn Baldwin veor d7, d7, d28 @ A[1][3] ^= C[2] 2274bc3d5698SJohn Baldwin veor d16, d16, d28 @ A[2][3] ^= C[2] 2275bc3d5698SJohn Baldwin veor d17, d17, d28 @ A[3][3] ^= C[2] 2276bc3d5698SJohn Baldwin veor d23, d23, d28 @ A[4][3] ^= C[2] 2277bc3d5698SJohn Baldwin vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2278bc3d5698SJohn Baldwin vmov d28, d29 2279bc3d5698SJohn Baldwin 2280bc3d5698SJohn Baldwin vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2281bc3d5698SJohn Baldwin veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2282bc3d5698SJohn Baldwin veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2283bc3d5698SJohn Baldwin veor d22, d22, d27 @ A[4][2] ^= D[2] 2284bc3d5698SJohn Baldwin 2285bc3d5698SJohn Baldwin veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2286bc3d5698SJohn Baldwin veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2287bc3d5698SJohn Baldwin veor d24, d24, d29 @ A[4][4] ^= C[3] 2288bc3d5698SJohn Baldwin 2289bc3d5698SJohn Baldwin @ Rho + Pi 2290bc3d5698SJohn Baldwin vmov d26, d2 @ C[1] = A[0][1] 2291bc3d5698SJohn Baldwin vshl.u64 d2, d3, #44 2292bc3d5698SJohn Baldwin vmov d27, d4 @ C[2] = A[0][2] 2293bc3d5698SJohn Baldwin vshl.u64 d4, d14, #43 2294bc3d5698SJohn Baldwin vmov d28, d6 @ C[3] = A[0][3] 2295bc3d5698SJohn Baldwin vshl.u64 d6, d17, #21 2296bc3d5698SJohn Baldwin vmov d29, d8 @ C[4] = A[0][4] 2297bc3d5698SJohn Baldwin vshl.u64 d8, d24, #14 2298bc3d5698SJohn Baldwin vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2299bc3d5698SJohn Baldwin vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2300bc3d5698SJohn Baldwin vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2301bc3d5698SJohn Baldwin vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2302bc3d5698SJohn Baldwin 2303bc3d5698SJohn Baldwin vshl.u64 d3, d9, #20 2304bc3d5698SJohn Baldwin vshl.u64 d14, d16, #25 2305bc3d5698SJohn Baldwin vshl.u64 d17, d15, #15 2306bc3d5698SJohn Baldwin vshl.u64 d24, d21, #2 2307bc3d5698SJohn Baldwin vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2308bc3d5698SJohn Baldwin vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2309bc3d5698SJohn Baldwin vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2310bc3d5698SJohn Baldwin vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2311bc3d5698SJohn Baldwin 2312bc3d5698SJohn Baldwin vshl.u64 d9, d22, #61 2313bc3d5698SJohn Baldwin @ vshl.u64 d16, d19, #8 2314bc3d5698SJohn Baldwin vshl.u64 d15, d12, #10 2315bc3d5698SJohn Baldwin vshl.u64 d21, d7, #55 2316bc3d5698SJohn Baldwin vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2317bc3d5698SJohn Baldwin vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2318bc3d5698SJohn Baldwin vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2319bc3d5698SJohn Baldwin vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2320bc3d5698SJohn Baldwin 2321bc3d5698SJohn Baldwin vshl.u64 d22, d18, #39 2322bc3d5698SJohn Baldwin @ vshl.u64 d19, d23, #56 2323bc3d5698SJohn Baldwin vshl.u64 d12, d5, #6 2324bc3d5698SJohn Baldwin vshl.u64 d7, d13, #45 2325bc3d5698SJohn Baldwin vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2326bc3d5698SJohn Baldwin vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2327bc3d5698SJohn Baldwin vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2328bc3d5698SJohn Baldwin vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2329bc3d5698SJohn Baldwin 2330bc3d5698SJohn Baldwin vshl.u64 d18, d20, #18 2331bc3d5698SJohn Baldwin vshl.u64 d23, d11, #41 2332bc3d5698SJohn Baldwin vshl.u64 d5, d10, #3 2333bc3d5698SJohn Baldwin vshl.u64 d13, d1, #36 2334bc3d5698SJohn Baldwin vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2335bc3d5698SJohn Baldwin vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2336bc3d5698SJohn Baldwin vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2337bc3d5698SJohn Baldwin vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2338bc3d5698SJohn Baldwin 2339bc3d5698SJohn Baldwin vshl.u64 d1, d28, #28 2340bc3d5698SJohn Baldwin vshl.u64 d10, d26, #1 2341bc3d5698SJohn Baldwin vshl.u64 d11, d29, #27 2342bc3d5698SJohn Baldwin vshl.u64 d20, d27, #62 2343bc3d5698SJohn Baldwin vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2344bc3d5698SJohn Baldwin vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2345bc3d5698SJohn Baldwin vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2346bc3d5698SJohn Baldwin vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2347bc3d5698SJohn Baldwin 2348bc3d5698SJohn Baldwin @ Chi + Iota 2349bc3d5698SJohn Baldwin vbic q13, q2, q1 2350bc3d5698SJohn Baldwin vbic q14, q3, q2 2351bc3d5698SJohn Baldwin vbic q15, q4, q3 2352bc3d5698SJohn Baldwin veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2353bc3d5698SJohn Baldwin veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2354bc3d5698SJohn Baldwin veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2355bc3d5698SJohn Baldwin vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2356bc3d5698SJohn Baldwin vbic q13, q0, q4 2357bc3d5698SJohn Baldwin vbic q15, q1, q0 2358bc3d5698SJohn Baldwin vmov q1, q14 @ A[0..1][1] 2359bc3d5698SJohn Baldwin veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2360bc3d5698SJohn Baldwin veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2361bc3d5698SJohn Baldwin 2362bc3d5698SJohn Baldwin vbic q13, q7, q6 2363bc3d5698SJohn Baldwin vmov q0, q5 @ A[2..3][0] 2364bc3d5698SJohn Baldwin vbic q14, q8, q7 2365bc3d5698SJohn Baldwin vmov q15, q6 @ A[2..3][1] 2366bc3d5698SJohn Baldwin veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2367bc3d5698SJohn Baldwin vbic q13, q9, q8 2368bc3d5698SJohn Baldwin veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2369bc3d5698SJohn Baldwin vbic q14, q0, q9 2370bc3d5698SJohn Baldwin veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2371bc3d5698SJohn Baldwin vbic q13, q15, q0 2372bc3d5698SJohn Baldwin veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2373bc3d5698SJohn Baldwin vmov q14, q10 @ A[4][0..1] 2374bc3d5698SJohn Baldwin veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2375bc3d5698SJohn Baldwin 2376bc3d5698SJohn Baldwin vld1.64 d25, [r2,:64]! @ Iota[i++] 2377bc3d5698SJohn Baldwin vbic d26, d22, d21 2378bc3d5698SJohn Baldwin vbic d27, d23, d22 2379bc3d5698SJohn Baldwin vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2380bc3d5698SJohn Baldwin veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2381bc3d5698SJohn Baldwin vbic d26, d24, d23 2382bc3d5698SJohn Baldwin veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2383bc3d5698SJohn Baldwin vbic d27, d28, d24 2384bc3d5698SJohn Baldwin veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2385bc3d5698SJohn Baldwin vbic d26, d29, d28 2386bc3d5698SJohn Baldwin veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2387bc3d5698SJohn Baldwin veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2388bc3d5698SJohn Baldwin veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2389bc3d5698SJohn Baldwin 2390bc3d5698SJohn Baldwin subs r3, r3, #1 2391bc3d5698SJohn Baldwin bne .Loop_neon 2392bc3d5698SJohn Baldwin 2393*c0855eaaSJohn Baldwin bx lr 2394bc3d5698SJohn Baldwin.size KeccakF1600_neon,.-KeccakF1600_neon 2395bc3d5698SJohn Baldwin 2396bc3d5698SJohn Baldwin.globl SHA3_absorb_neon 2397bc3d5698SJohn Baldwin.type SHA3_absorb_neon, %function 2398bc3d5698SJohn Baldwin.align 5 2399bc3d5698SJohn BaldwinSHA3_absorb_neon: 2400bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,lr} 2401bc3d5698SJohn Baldwin vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2402bc3d5698SJohn Baldwin 2403bc3d5698SJohn Baldwin mov r4, r1 @ inp 2404bc3d5698SJohn Baldwin mov r5, r2 @ len 2405bc3d5698SJohn Baldwin mov r6, r3 @ bsz 2406bc3d5698SJohn Baldwin 2407bc3d5698SJohn Baldwin vld1.32 {d0}, [r0,:64]! @ A[0][0] 2408bc3d5698SJohn Baldwin vld1.32 {d2}, [r0,:64]! @ A[0][1] 2409bc3d5698SJohn Baldwin vld1.32 {d4}, [r0,:64]! @ A[0][2] 2410bc3d5698SJohn Baldwin vld1.32 {d6}, [r0,:64]! @ A[0][3] 2411bc3d5698SJohn Baldwin vld1.32 {d8}, [r0,:64]! @ A[0][4] 2412bc3d5698SJohn Baldwin 2413bc3d5698SJohn Baldwin vld1.32 {d1}, [r0,:64]! @ A[1][0] 2414bc3d5698SJohn Baldwin vld1.32 {d3}, [r0,:64]! @ A[1][1] 2415bc3d5698SJohn Baldwin vld1.32 {d5}, [r0,:64]! @ A[1][2] 2416bc3d5698SJohn Baldwin vld1.32 {d7}, [r0,:64]! @ A[1][3] 2417bc3d5698SJohn Baldwin vld1.32 {d9}, [r0,:64]! @ A[1][4] 2418bc3d5698SJohn Baldwin 2419bc3d5698SJohn Baldwin vld1.32 {d10}, [r0,:64]! @ A[2][0] 2420bc3d5698SJohn Baldwin vld1.32 {d12}, [r0,:64]! @ A[2][1] 2421bc3d5698SJohn Baldwin vld1.32 {d14}, [r0,:64]! @ A[2][2] 2422bc3d5698SJohn Baldwin vld1.32 {d16}, [r0,:64]! @ A[2][3] 2423bc3d5698SJohn Baldwin vld1.32 {d18}, [r0,:64]! @ A[2][4] 2424bc3d5698SJohn Baldwin 2425bc3d5698SJohn Baldwin vld1.32 {d11}, [r0,:64]! @ A[3][0] 2426bc3d5698SJohn Baldwin vld1.32 {d13}, [r0,:64]! @ A[3][1] 2427bc3d5698SJohn Baldwin vld1.32 {d15}, [r0,:64]! @ A[3][2] 2428bc3d5698SJohn Baldwin vld1.32 {d17}, [r0,:64]! @ A[3][3] 2429bc3d5698SJohn Baldwin vld1.32 {d19}, [r0,:64]! @ A[3][4] 2430bc3d5698SJohn Baldwin 2431bc3d5698SJohn Baldwin vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3] 2432bc3d5698SJohn Baldwin vld1.32 {d24}, [r0,:64] @ A[4][4] 2433bc3d5698SJohn Baldwin sub r0, r0, #24*8 @ rewind 2434bc3d5698SJohn Baldwin b .Loop_absorb_neon 2435bc3d5698SJohn Baldwin 2436bc3d5698SJohn Baldwin.align 4 2437bc3d5698SJohn Baldwin.Loop_absorb_neon: 2438bc3d5698SJohn Baldwin subs r12, r5, r6 @ len - bsz 2439bc3d5698SJohn Baldwin blo .Labsorbed_neon 2440bc3d5698SJohn Baldwin mov r5, r12 2441bc3d5698SJohn Baldwin 2442bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! @ endian-neutral loads... 2443bc3d5698SJohn Baldwin cmp r6, #8*2 2444bc3d5698SJohn Baldwin veor d0, d0, d31 @ A[0][0] ^= *inp++ 2445bc3d5698SJohn Baldwin blo .Lprocess_neon 2446bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2447bc3d5698SJohn Baldwin veor d2, d2, d31 @ A[0][1] ^= *inp++ 2448bc3d5698SJohn Baldwin beq .Lprocess_neon 2449bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2450bc3d5698SJohn Baldwin cmp r6, #8*4 2451bc3d5698SJohn Baldwin veor d4, d4, d31 @ A[0][2] ^= *inp++ 2452bc3d5698SJohn Baldwin blo .Lprocess_neon 2453bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2454bc3d5698SJohn Baldwin veor d6, d6, d31 @ A[0][3] ^= *inp++ 2455bc3d5698SJohn Baldwin beq .Lprocess_neon 2456bc3d5698SJohn Baldwin vld1.8 {d31},[r4]! 2457bc3d5698SJohn Baldwin cmp r6, #8*6 2458bc3d5698SJohn Baldwin veor d8, d8, d31 @ A[0][4] ^= *inp++ 2459bc3d5698SJohn Baldwin blo .Lprocess_neon 2460bc3d5698SJohn Baldwin 2461bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2462bc3d5698SJohn Baldwin veor d1, d1, d31 @ A[1][0] ^= *inp++ 2463bc3d5698SJohn Baldwin beq .Lprocess_neon 2464bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2465bc3d5698SJohn Baldwin cmp r6, #8*8 2466bc3d5698SJohn Baldwin veor d3, d3, d31 @ A[1][1] ^= *inp++ 2467bc3d5698SJohn Baldwin blo .Lprocess_neon 2468bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2469bc3d5698SJohn Baldwin veor d5, d5, d31 @ A[1][2] ^= *inp++ 2470bc3d5698SJohn Baldwin beq .Lprocess_neon 2471bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2472bc3d5698SJohn Baldwin cmp r6, #8*10 2473bc3d5698SJohn Baldwin veor d7, d7, d31 @ A[1][3] ^= *inp++ 2474bc3d5698SJohn Baldwin blo .Lprocess_neon 2475bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2476bc3d5698SJohn Baldwin veor d9, d9, d31 @ A[1][4] ^= *inp++ 2477bc3d5698SJohn Baldwin beq .Lprocess_neon 2478bc3d5698SJohn Baldwin 2479bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2480bc3d5698SJohn Baldwin cmp r6, #8*12 2481bc3d5698SJohn Baldwin veor d10, d10, d31 @ A[2][0] ^= *inp++ 2482bc3d5698SJohn Baldwin blo .Lprocess_neon 2483bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2484bc3d5698SJohn Baldwin veor d12, d12, d31 @ A[2][1] ^= *inp++ 2485bc3d5698SJohn Baldwin beq .Lprocess_neon 2486bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2487bc3d5698SJohn Baldwin cmp r6, #8*14 2488bc3d5698SJohn Baldwin veor d14, d14, d31 @ A[2][2] ^= *inp++ 2489bc3d5698SJohn Baldwin blo .Lprocess_neon 2490bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2491bc3d5698SJohn Baldwin veor d16, d16, d31 @ A[2][3] ^= *inp++ 2492bc3d5698SJohn Baldwin beq .Lprocess_neon 2493bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2494bc3d5698SJohn Baldwin cmp r6, #8*16 2495bc3d5698SJohn Baldwin veor d18, d18, d31 @ A[2][4] ^= *inp++ 2496bc3d5698SJohn Baldwin blo .Lprocess_neon 2497bc3d5698SJohn Baldwin 2498bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2499bc3d5698SJohn Baldwin veor d11, d11, d31 @ A[3][0] ^= *inp++ 2500bc3d5698SJohn Baldwin beq .Lprocess_neon 2501bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2502bc3d5698SJohn Baldwin cmp r6, #8*18 2503bc3d5698SJohn Baldwin veor d13, d13, d31 @ A[3][1] ^= *inp++ 2504bc3d5698SJohn Baldwin blo .Lprocess_neon 2505bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2506bc3d5698SJohn Baldwin veor d15, d15, d31 @ A[3][2] ^= *inp++ 2507bc3d5698SJohn Baldwin beq .Lprocess_neon 2508bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2509bc3d5698SJohn Baldwin cmp r6, #8*20 2510bc3d5698SJohn Baldwin veor d17, d17, d31 @ A[3][3] ^= *inp++ 2511bc3d5698SJohn Baldwin blo .Lprocess_neon 2512bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2513bc3d5698SJohn Baldwin veor d19, d19, d31 @ A[3][4] ^= *inp++ 2514bc3d5698SJohn Baldwin beq .Lprocess_neon 2515bc3d5698SJohn Baldwin 2516bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2517bc3d5698SJohn Baldwin cmp r6, #8*22 2518bc3d5698SJohn Baldwin veor d20, d20, d31 @ A[4][0] ^= *inp++ 2519bc3d5698SJohn Baldwin blo .Lprocess_neon 2520bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2521bc3d5698SJohn Baldwin veor d21, d21, d31 @ A[4][1] ^= *inp++ 2522bc3d5698SJohn Baldwin beq .Lprocess_neon 2523bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2524bc3d5698SJohn Baldwin cmp r6, #8*24 2525bc3d5698SJohn Baldwin veor d22, d22, d31 @ A[4][2] ^= *inp++ 2526bc3d5698SJohn Baldwin blo .Lprocess_neon 2527bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2528bc3d5698SJohn Baldwin veor d23, d23, d31 @ A[4][3] ^= *inp++ 2529bc3d5698SJohn Baldwin beq .Lprocess_neon 2530bc3d5698SJohn Baldwin vld1.8 {d31}, [r4]! 2531bc3d5698SJohn Baldwin veor d24, d24, d31 @ A[4][4] ^= *inp++ 2532bc3d5698SJohn Baldwin 2533bc3d5698SJohn Baldwin.Lprocess_neon: 2534bc3d5698SJohn Baldwin bl KeccakF1600_neon 2535bc3d5698SJohn Baldwin b .Loop_absorb_neon 2536bc3d5698SJohn Baldwin 2537bc3d5698SJohn Baldwin.align 4 2538bc3d5698SJohn Baldwin.Labsorbed_neon: 2539bc3d5698SJohn Baldwin vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2540bc3d5698SJohn Baldwin vst1.32 {d2}, [r0,:64]! 2541bc3d5698SJohn Baldwin vst1.32 {d4}, [r0,:64]! 2542bc3d5698SJohn Baldwin vst1.32 {d6}, [r0,:64]! 2543bc3d5698SJohn Baldwin vst1.32 {d8}, [r0,:64]! 2544bc3d5698SJohn Baldwin 2545bc3d5698SJohn Baldwin vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2546bc3d5698SJohn Baldwin vst1.32 {d3}, [r0,:64]! 2547bc3d5698SJohn Baldwin vst1.32 {d5}, [r0,:64]! 2548bc3d5698SJohn Baldwin vst1.32 {d7}, [r0,:64]! 2549bc3d5698SJohn Baldwin vst1.32 {d9}, [r0,:64]! 2550bc3d5698SJohn Baldwin 2551bc3d5698SJohn Baldwin vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2552bc3d5698SJohn Baldwin vst1.32 {d12}, [r0,:64]! 2553bc3d5698SJohn Baldwin vst1.32 {d14}, [r0,:64]! 2554bc3d5698SJohn Baldwin vst1.32 {d16}, [r0,:64]! 2555bc3d5698SJohn Baldwin vst1.32 {d18}, [r0,:64]! 2556bc3d5698SJohn Baldwin 2557bc3d5698SJohn Baldwin vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2558bc3d5698SJohn Baldwin vst1.32 {d13}, [r0,:64]! 2559bc3d5698SJohn Baldwin vst1.32 {d15}, [r0,:64]! 2560bc3d5698SJohn Baldwin vst1.32 {d17}, [r0,:64]! 2561bc3d5698SJohn Baldwin vst1.32 {d19}, [r0,:64]! 2562bc3d5698SJohn Baldwin 2563bc3d5698SJohn Baldwin vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2564bc3d5698SJohn Baldwin vst1.32 {d24}, [r0,:64] 2565bc3d5698SJohn Baldwin 2566bc3d5698SJohn Baldwin mov r0, r5 @ return value 2567bc3d5698SJohn Baldwin vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2568bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,pc} 2569bc3d5698SJohn Baldwin.size SHA3_absorb_neon,.-SHA3_absorb_neon 2570bc3d5698SJohn Baldwin 2571bc3d5698SJohn Baldwin.globl SHA3_squeeze_neon 2572bc3d5698SJohn Baldwin.type SHA3_squeeze_neon, %function 2573bc3d5698SJohn Baldwin.align 5 2574bc3d5698SJohn BaldwinSHA3_squeeze_neon: 2575bc3d5698SJohn Baldwin stmdb sp!, {r4,r5,r6,lr} 2576bc3d5698SJohn Baldwin 2577bc3d5698SJohn Baldwin mov r4, r1 @ out 2578bc3d5698SJohn Baldwin mov r5, r2 @ len 2579bc3d5698SJohn Baldwin mov r6, r3 @ bsz 2580bc3d5698SJohn Baldwin mov r12, r0 @ A_flat 2581bc3d5698SJohn Baldwin mov r14, r3 @ bsz 2582bc3d5698SJohn Baldwin b .Loop_squeeze_neon 2583bc3d5698SJohn Baldwin 2584bc3d5698SJohn Baldwin.align 4 2585bc3d5698SJohn Baldwin.Loop_squeeze_neon: 2586bc3d5698SJohn Baldwin cmp r5, #8 2587bc3d5698SJohn Baldwin blo .Lsqueeze_neon_tail 2588bc3d5698SJohn Baldwin vld1.32 {d0}, [r12]! 2589bc3d5698SJohn Baldwin vst1.8 {d0}, [r4]! @ endian-neutral store 2590bc3d5698SJohn Baldwin 2591bc3d5698SJohn Baldwin subs r5, r5, #8 @ len -= 8 2592bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2593bc3d5698SJohn Baldwin 2594bc3d5698SJohn Baldwin subs r14, r14, #8 @ bsz -= 8 2595bc3d5698SJohn Baldwin bhi .Loop_squeeze_neon 2596bc3d5698SJohn Baldwin 2597bc3d5698SJohn Baldwin vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2598bc3d5698SJohn Baldwin 2599bc3d5698SJohn Baldwin vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2600bc3d5698SJohn Baldwin vld1.32 {d2}, [r0,:64]! 2601bc3d5698SJohn Baldwin vld1.32 {d4}, [r0,:64]! 2602bc3d5698SJohn Baldwin vld1.32 {d6}, [r0,:64]! 2603bc3d5698SJohn Baldwin vld1.32 {d8}, [r0,:64]! 2604bc3d5698SJohn Baldwin 2605bc3d5698SJohn Baldwin vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2606bc3d5698SJohn Baldwin vld1.32 {d3}, [r0,:64]! 2607bc3d5698SJohn Baldwin vld1.32 {d5}, [r0,:64]! 2608bc3d5698SJohn Baldwin vld1.32 {d7}, [r0,:64]! 2609bc3d5698SJohn Baldwin vld1.32 {d9}, [r0,:64]! 2610bc3d5698SJohn Baldwin 2611bc3d5698SJohn Baldwin vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2612bc3d5698SJohn Baldwin vld1.32 {d12}, [r0,:64]! 2613bc3d5698SJohn Baldwin vld1.32 {d14}, [r0,:64]! 2614bc3d5698SJohn Baldwin vld1.32 {d16}, [r0,:64]! 2615bc3d5698SJohn Baldwin vld1.32 {d18}, [r0,:64]! 2616bc3d5698SJohn Baldwin 2617bc3d5698SJohn Baldwin vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2618bc3d5698SJohn Baldwin vld1.32 {d13}, [r0,:64]! 2619bc3d5698SJohn Baldwin vld1.32 {d15}, [r0,:64]! 2620bc3d5698SJohn Baldwin vld1.32 {d17}, [r0,:64]! 2621bc3d5698SJohn Baldwin vld1.32 {d19}, [r0,:64]! 2622bc3d5698SJohn Baldwin 2623bc3d5698SJohn Baldwin vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2624bc3d5698SJohn Baldwin vld1.32 {d24}, [r0,:64] 2625bc3d5698SJohn Baldwin sub r0, r0, #24*8 @ rewind 2626bc3d5698SJohn Baldwin 2627bc3d5698SJohn Baldwin bl KeccakF1600_neon 2628bc3d5698SJohn Baldwin 2629bc3d5698SJohn Baldwin mov r12, r0 @ A_flat 2630bc3d5698SJohn Baldwin vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2631bc3d5698SJohn Baldwin vst1.32 {d2}, [r0,:64]! 2632bc3d5698SJohn Baldwin vst1.32 {d4}, [r0,:64]! 2633bc3d5698SJohn Baldwin vst1.32 {d6}, [r0,:64]! 2634bc3d5698SJohn Baldwin vst1.32 {d8}, [r0,:64]! 2635bc3d5698SJohn Baldwin 2636bc3d5698SJohn Baldwin vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2637bc3d5698SJohn Baldwin vst1.32 {d3}, [r0,:64]! 2638bc3d5698SJohn Baldwin vst1.32 {d5}, [r0,:64]! 2639bc3d5698SJohn Baldwin vst1.32 {d7}, [r0,:64]! 2640bc3d5698SJohn Baldwin vst1.32 {d9}, [r0,:64]! 2641bc3d5698SJohn Baldwin 2642bc3d5698SJohn Baldwin vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2643bc3d5698SJohn Baldwin vst1.32 {d12}, [r0,:64]! 2644bc3d5698SJohn Baldwin vst1.32 {d14}, [r0,:64]! 2645bc3d5698SJohn Baldwin vst1.32 {d16}, [r0,:64]! 2646bc3d5698SJohn Baldwin vst1.32 {d18}, [r0,:64]! 2647bc3d5698SJohn Baldwin 2648bc3d5698SJohn Baldwin vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2649bc3d5698SJohn Baldwin vst1.32 {d13}, [r0,:64]! 2650bc3d5698SJohn Baldwin vst1.32 {d15}, [r0,:64]! 2651bc3d5698SJohn Baldwin vst1.32 {d17}, [r0,:64]! 2652bc3d5698SJohn Baldwin vst1.32 {d19}, [r0,:64]! 2653bc3d5698SJohn Baldwin 2654bc3d5698SJohn Baldwin vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2655bc3d5698SJohn Baldwin mov r14, r6 @ bsz 2656bc3d5698SJohn Baldwin vst1.32 {d24}, [r0,:64] 2657bc3d5698SJohn Baldwin mov r0, r12 @ rewind 2658bc3d5698SJohn Baldwin 2659bc3d5698SJohn Baldwin vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2660bc3d5698SJohn Baldwin b .Loop_squeeze_neon 2661bc3d5698SJohn Baldwin 2662bc3d5698SJohn Baldwin.align 4 2663bc3d5698SJohn Baldwin.Lsqueeze_neon_tail: 2664bc3d5698SJohn Baldwin ldmia r12, {r2,r3} 2665bc3d5698SJohn Baldwin cmp r5, #2 2666bc3d5698SJohn Baldwin strb r2, [r4],#1 @ endian-neutral store 2667bc3d5698SJohn Baldwin mov r2, r2, lsr#8 2668bc3d5698SJohn Baldwin blo .Lsqueeze_neon_done 2669bc3d5698SJohn Baldwin strb r2, [r4], #1 2670bc3d5698SJohn Baldwin mov r2, r2, lsr#8 2671bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2672bc3d5698SJohn Baldwin strb r2, [r4], #1 2673bc3d5698SJohn Baldwin mov r2, r2, lsr#8 2674bc3d5698SJohn Baldwin cmp r5, #4 2675bc3d5698SJohn Baldwin blo .Lsqueeze_neon_done 2676bc3d5698SJohn Baldwin strb r2, [r4], #1 2677bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2678bc3d5698SJohn Baldwin 2679bc3d5698SJohn Baldwin strb r3, [r4], #1 2680bc3d5698SJohn Baldwin mov r3, r3, lsr#8 2681bc3d5698SJohn Baldwin cmp r5, #6 2682bc3d5698SJohn Baldwin blo .Lsqueeze_neon_done 2683bc3d5698SJohn Baldwin strb r3, [r4], #1 2684bc3d5698SJohn Baldwin mov r3, r3, lsr#8 2685bc3d5698SJohn Baldwin beq .Lsqueeze_neon_done 2686bc3d5698SJohn Baldwin strb r3, [r4], #1 2687bc3d5698SJohn Baldwin 2688bc3d5698SJohn Baldwin.Lsqueeze_neon_done: 2689bc3d5698SJohn Baldwin ldmia sp!, {r4,r5,r6,pc} 2690bc3d5698SJohn Baldwin.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2691bc3d5698SJohn Baldwin#endif 2692bc3d5698SJohn Baldwin.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2693bc3d5698SJohn Baldwin.align 2 2694bc3d5698SJohn Baldwin.align 2 2695