1/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */ 2#include "arm_arch.h" 3 4.text 5 6#if defined(__thumb2__) 7.syntax unified 8.thumb 9#else 10.code 32 11#endif 12 13.type iotas32, %object 14.align 5 15iotas32: 16.long 0x00000001, 0x00000000 17.long 0x00000000, 0x00000089 18.long 0x00000000, 0x8000008b 19.long 0x00000000, 0x80008080 20.long 0x00000001, 0x0000008b 21.long 0x00000001, 0x00008000 22.long 0x00000001, 0x80008088 23.long 0x00000001, 0x80000082 24.long 0x00000000, 0x0000000b 25.long 0x00000000, 0x0000000a 26.long 0x00000001, 0x00008082 27.long 0x00000000, 0x00008003 28.long 0x00000001, 0x0000808b 29.long 0x00000001, 0x8000000b 30.long 0x00000001, 0x8000008a 31.long 0x00000001, 0x80000081 32.long 0x00000000, 0x80000081 33.long 0x00000000, 0x80000008 34.long 0x00000000, 0x00000083 35.long 0x00000000, 0x80008003 36.long 0x00000001, 0x80008088 37.long 0x00000000, 0x80000088 38.long 0x00000001, 0x00008000 39.long 0x00000000, 0x80008082 40.size iotas32,.-iotas32 41 42.type KeccakF1600_int, %function 43.align 5 44KeccakF1600_int: 45 add r9,sp,#176 46 add r12,sp,#0 47 add r10,sp,#40 48 ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4] 49KeccakF1600_enter: 50 str lr,[sp,#440] 51 eor r11,r11,r11 52 str r11,[sp,#444] 53 b .Lround2x 54 55.align 4 56.Lround2x: 57 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 58 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 59#ifdef __thumb2__ 60 eor r0,r0,r10 61 eor r1,r1,r11 62 eor r2,r2,r12 63 ldrd r10,r11,[sp,#56] 64 eor r3,r3,r14 65 ldrd r12,r14,[sp,#64] 66 eor r4,r4,r10 67 eor r5,r5,r11 68 eor r6,r6,r12 69 ldrd r10,r11,[sp,#72] 70 eor r7,r7,r14 71 ldrd r12,r14,[sp,#80] 72 eor r8,r8,r10 73 eor r9,r9,r11 74 eor r0,r0,r12 75 ldrd r10,r11,[sp,#88] 76 eor r1,r1,r14 77 ldrd r12,r14,[sp,#96] 78 eor r2,r2,r10 79 eor r3,r3,r11 80 eor r4,r4,r12 81 ldrd r10,r11,[sp,#104] 82 eor r5,r5,r14 83 ldrd r12,r14,[sp,#112] 84 eor r6,r6,r10 85 eor r7,r7,r11 86 eor r8,r8,r12 87 ldrd r10,r11,[sp,#120] 88 eor r9,r9,r14 89 ldrd r12,r14,[sp,#128] 90 eor r0,r0,r10 91 eor r1,r1,r11 92 eor r2,r2,r12 93 ldrd r10,r11,[sp,#136] 94 eor r3,r3,r14 95 ldrd r12,r14,[sp,#144] 96 eor r4,r4,r10 97 eor r5,r5,r11 98 eor r6,r6,r12 99 ldrd r10,r11,[sp,#152] 100 eor r7,r7,r14 101 ldrd r12,r14,[sp,#160] 102 eor r8,r8,r10 103 eor r9,r9,r11 104 eor r0,r0,r12 105 ldrd r10,r11,[sp,#168] 106 eor r1,r1,r14 107 ldrd r12,r14,[sp,#16] 108 eor r2,r2,r10 109 eor r3,r3,r11 110 eor r4,r4,r12 111 ldrd r10,r11,[sp,#24] 112 eor r5,r5,r14 113 ldrd r12,r14,[sp,#32] 114#else 115 eor r0,r0,r10 116 add r10,sp,#56 117 eor r1,r1,r11 118 eor r2,r2,r12 119 eor r3,r3,r14 120 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 121 eor r4,r4,r10 122 add r10,sp,#72 123 eor r5,r5,r11 124 eor r6,r6,r12 125 eor r7,r7,r14 126 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 127 eor r8,r8,r10 128 add r10,sp,#88 129 eor r9,r9,r11 130 eor r0,r0,r12 131 eor r1,r1,r14 132 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 133 eor r2,r2,r10 134 add r10,sp,#104 135 eor r3,r3,r11 136 eor r4,r4,r12 137 eor r5,r5,r14 138 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 139 eor r6,r6,r10 140 add r10,sp,#120 141 eor r7,r7,r11 142 eor r8,r8,r12 143 eor r9,r9,r14 144 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 145 eor r0,r0,r10 146 add r10,sp,#136 147 eor r1,r1,r11 148 eor r2,r2,r12 149 eor r3,r3,r14 150 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 151 eor r4,r4,r10 152 add r10,sp,#152 153 eor r5,r5,r11 154 eor r6,r6,r12 155 eor r7,r7,r14 156 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 157 eor r8,r8,r10 158 ldr r10,[sp,#168] @ A[4][1] 159 eor r9,r9,r11 160 ldr r11,[sp,#168+4] 161 eor r0,r0,r12 162 ldr r12,[sp,#16] @ A[0][2] 163 eor r1,r1,r14 164 ldr r14,[sp,#16+4] 165 eor r2,r2,r10 166 add r10,sp,#24 167 eor r3,r3,r11 168 eor r4,r4,r12 169 eor r5,r5,r14 170 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 171#endif 172 eor r6,r6,r10 173 eor r7,r7,r11 174 eor r8,r8,r12 175 eor r9,r9,r14 176 177 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 178#ifndef __thumb2__ 179 str r10,[sp,#208] @ D[1] = E[0] 180#endif 181 eor r11,r1,r4 182#ifndef __thumb2__ 183 str r11,[sp,#208+4] 184#else 185 strd r10,r11,[sp,#208] @ D[1] = E[0] 186#endif 187 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 188 eor r14,r7,r0 189#ifndef __thumb2__ 190 str r12,[sp,#232] @ D[4] = E[1] 191#endif 192 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 193#ifndef __thumb2__ 194 str r14,[sp,#232+4] 195#else 196 strd r12,r14,[sp,#232] @ D[4] = E[1] 197#endif 198 eor r1,r9,r2 199#ifndef __thumb2__ 200 str r0,[sp,#200] @ D[0] = C[0] 201#endif 202 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 203#ifndef __thumb2__ 204 ldr r7,[sp,#144] 205#endif 206 eor r3,r3,r6 207#ifndef __thumb2__ 208 str r1,[sp,#200+4] 209#else 210 strd r0,r1,[sp,#200] @ D[0] = C[0] 211#endif 212#ifndef __thumb2__ 213 ldr r6,[sp,#144+4] 214#else 215 ldrd r7,r6,[sp,#144] 216#endif 217#ifndef __thumb2__ 218 str r2,[sp,#216] @ D[2] = C[1] 219#endif 220 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 221#ifndef __thumb2__ 222 str r3,[sp,#216+4] 223#else 224 strd r2,r3,[sp,#216] @ D[2] = C[1] 225#endif 226 eor r5,r5,r8 227 228#ifndef __thumb2__ 229 ldr r8,[sp,#192] 230#endif 231#ifndef __thumb2__ 232 ldr r9,[sp,#192+4] 233#else 234 ldrd r8,r9,[sp,#192] 235#endif 236#ifndef __thumb2__ 237 str r4,[sp,#224] @ D[3] = C[2] 238#endif 239 eor r7,r7,r4 240#ifndef __thumb2__ 241 str r5,[sp,#224+4] 242#else 243 strd r4,r5,[sp,#224] @ D[3] = C[2] 244#endif 245 eor r6,r6,r5 246#ifndef __thumb2__ 247 ldr r4,[sp,#0] 248#endif 249 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 250 @ mov r6,r6,ror#32-11 251#ifndef __thumb2__ 252 ldr r5,[sp,#0+4] 253#else 254 ldrd r4,r5,[sp,#0] 255#endif 256 eor r8,r8,r12 257 eor r9,r9,r14 258#ifndef __thumb2__ 259 ldr r12,[sp,#96] 260#endif 261 eor r0,r0,r4 262#ifndef __thumb2__ 263 ldr r14,[sp,#96+4] 264#else 265 ldrd r12,r14,[sp,#96] 266#endif 267 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 268 @ mov r9,r9,ror#32-7 269 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 270 eor r12,r12,r2 271#ifndef __thumb2__ 272 ldr r2,[sp,#48] 273#endif 274 eor r14,r14,r3 275#ifndef __thumb2__ 276 ldr r3,[sp,#48+4] 277#else 278 ldrd r2,r3,[sp,#48] 279#endif 280 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 281 ldr r12,[sp,#444] @ load counter 282 eor r2,r2,r10 283 adr r10,iotas32 284 mov r4,r14,ror#32-22 285 add r14,r10,r12 286 eor r3,r3,r11 287 ldmia r14,{r10,r11} @ iotas[i] 288 bic r12,r4,r2,ror#32-22 289 bic r14,r5,r3,ror#32-22 290 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 291 mov r3,r3,ror#32-22 292 eor r12,r12,r0 293 eor r14,r14,r1 294 eor r10,r10,r12 295 eor r11,r11,r14 296#ifndef __thumb2__ 297 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 298#endif 299 bic r12,r6,r4,ror#11 300#ifndef __thumb2__ 301 str r11,[sp,#240+4] 302#else 303 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 304#endif 305 bic r14,r7,r5,ror#10 306 bic r10,r8,r6,ror#32-(11-7) 307 bic r11,r9,r7,ror#32-(10-7) 308 eor r12,r2,r12,ror#32-11 309#ifndef __thumb2__ 310 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 311#endif 312 eor r14,r3,r14,ror#32-10 313#ifndef __thumb2__ 314 str r14,[sp,#248+4] 315#else 316 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 317#endif 318 eor r10,r4,r10,ror#32-7 319 eor r11,r5,r11,ror#32-7 320#ifndef __thumb2__ 321 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 322#endif 323 bic r12,r0,r8,ror#32-7 324#ifndef __thumb2__ 325 str r11,[sp,#256+4] 326#else 327 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 328#endif 329 bic r14,r1,r9,ror#32-7 330 eor r12,r12,r6,ror#32-11 331#ifndef __thumb2__ 332 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 333#endif 334 eor r14,r14,r7,ror#32-10 335#ifndef __thumb2__ 336 str r14,[sp,#264+4] 337#else 338 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 339#endif 340 bic r10,r2,r0 341 add r14,sp,#224 342#ifndef __thumb2__ 343 ldr r0,[sp,#24] @ A[0][3] 344#endif 345 bic r11,r3,r1 346#ifndef __thumb2__ 347 ldr r1,[sp,#24+4] 348#else 349 ldrd r0,r1,[sp,#24] @ A[0][3] 350#endif 351 eor r10,r10,r8,ror#32-7 352 eor r11,r11,r9,ror#32-7 353#ifndef __thumb2__ 354 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 355#endif 356 add r9,sp,#200 357#ifndef __thumb2__ 358 str r11,[sp,#272+4] 359#else 360 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 361#endif 362 363 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 364 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 365 366#ifndef __thumb2__ 367 ldr r2,[sp,#72] @ A[1][4] 368#endif 369 eor r0,r0,r10 370#ifndef __thumb2__ 371 ldr r3,[sp,#72+4] 372#else 373 ldrd r2,r3,[sp,#72] @ A[1][4] 374#endif 375 eor r1,r1,r11 376 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 377#ifndef __thumb2__ 378 ldr r10,[sp,#128] @ A[3][1] 379#endif 380 @ mov r1,r1,ror#32-14 381#ifndef __thumb2__ 382 ldr r11,[sp,#128+4] 383#else 384 ldrd r10,r11,[sp,#128] @ A[3][1] 385#endif 386 387 eor r2,r2,r12 388#ifndef __thumb2__ 389 ldr r4,[sp,#80] @ A[2][0] 390#endif 391 eor r3,r3,r14 392#ifndef __thumb2__ 393 ldr r5,[sp,#80+4] 394#else 395 ldrd r4,r5,[sp,#80] @ A[2][0] 396#endif 397 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 398 @ mov r3,r3,ror#32-10 399 400 eor r6,r6,r4 401#ifndef __thumb2__ 402 ldr r12,[sp,#216] @ D[2] 403#endif 404 eor r7,r7,r5 405#ifndef __thumb2__ 406 ldr r14,[sp,#216+4] 407#else 408 ldrd r12,r14,[sp,#216] @ D[2] 409#endif 410 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 411 mov r4,r7,ror#32-2 412 413 eor r10,r10,r8 414#ifndef __thumb2__ 415 ldr r8,[sp,#176] @ A[4][2] 416#endif 417 eor r11,r11,r9 418#ifndef __thumb2__ 419 ldr r9,[sp,#176+4] 420#else 421 ldrd r8,r9,[sp,#176] @ A[4][2] 422#endif 423 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 424 mov r6,r11,ror#32-23 425 426 bic r10,r4,r2,ror#32-10 427 bic r11,r5,r3,ror#32-10 428 eor r12,r12,r8 429 eor r14,r14,r9 430 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 431 mov r8,r14,ror#32-31 432 eor r10,r10,r0,ror#32-14 433 eor r11,r11,r1,ror#32-14 434#ifndef __thumb2__ 435 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 436#endif 437 bic r12,r6,r4 438#ifndef __thumb2__ 439 str r11,[sp,#280+4] 440#else 441 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 442#endif 443 bic r14,r7,r5 444 eor r12,r12,r2,ror#32-10 445#ifndef __thumb2__ 446 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 447#endif 448 eor r14,r14,r3,ror#32-10 449#ifndef __thumb2__ 450 str r14,[sp,#288+4] 451#else 452 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 453#endif 454 bic r10,r8,r6 455 bic r11,r9,r7 456 bic r12,r0,r8,ror#14 457 bic r14,r1,r9,ror#14 458 eor r10,r10,r4 459 eor r11,r11,r5 460#ifndef __thumb2__ 461 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 462#endif 463 bic r2,r2,r0,ror#32-(14-10) 464#ifndef __thumb2__ 465 str r11,[sp,#296+4] 466#else 467 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 468#endif 469 eor r12,r6,r12,ror#32-14 470 bic r11,r3,r1,ror#32-(14-10) 471#ifndef __thumb2__ 472 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 473#endif 474 eor r14,r7,r14,ror#32-14 475#ifndef __thumb2__ 476 str r14,[sp,#304+4] 477#else 478 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 479#endif 480 add r12,sp,#208 481#ifndef __thumb2__ 482 ldr r1,[sp,#8] @ A[0][1] 483#endif 484 eor r10,r8,r2,ror#32-10 485#ifndef __thumb2__ 486 ldr r0,[sp,#8+4] 487#else 488 ldrd r1,r0,[sp,#8] @ A[0][1] 489#endif 490 eor r11,r9,r11,ror#32-10 491#ifndef __thumb2__ 492 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 493#endif 494#ifndef __thumb2__ 495 str r11,[sp,#312+4] 496#else 497 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 498#endif 499 500 add r9,sp,#224 501 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 502#ifndef __thumb2__ 503 ldr r2,[sp,#56] @ A[1][2] 504#endif 505#ifndef __thumb2__ 506 ldr r3,[sp,#56+4] 507#else 508 ldrd r2,r3,[sp,#56] @ A[1][2] 509#endif 510 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 511 512 eor r1,r1,r10 513#ifndef __thumb2__ 514 ldr r4,[sp,#104] @ A[2][3] 515#endif 516 eor r0,r0,r11 517#ifndef __thumb2__ 518 ldr r5,[sp,#104+4] 519#else 520 ldrd r4,r5,[sp,#104] @ A[2][3] 521#endif 522 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 523 524 eor r2,r2,r12 525#ifndef __thumb2__ 526 ldr r10,[sp,#152] @ A[3][4] 527#endif 528 eor r3,r3,r14 529#ifndef __thumb2__ 530 ldr r11,[sp,#152+4] 531#else 532 ldrd r10,r11,[sp,#152] @ A[3][4] 533#endif 534 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 535#ifndef __thumb2__ 536 ldr r12,[sp,#200] @ D[0] 537#endif 538 @ mov r3,r3,ror#32-3 539#ifndef __thumb2__ 540 ldr r14,[sp,#200+4] 541#else 542 ldrd r12,r14,[sp,#200] @ D[0] 543#endif 544 545 eor r4,r4,r6 546 eor r5,r5,r7 547 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 548 @ mov r4,r7,ror#32-13 @ [track reverse order below] 549 550 eor r10,r10,r8 551#ifndef __thumb2__ 552 ldr r8,[sp,#160] @ A[4][0] 553#endif 554 eor r11,r11,r9 555#ifndef __thumb2__ 556 ldr r9,[sp,#160+4] 557#else 558 ldrd r8,r9,[sp,#160] @ A[4][0] 559#endif 560 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 561 mov r7,r11,ror#32-4 562 563 eor r12,r12,r8 564 eor r14,r14,r9 565 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 566 mov r9,r14,ror#32-9 567 568 bic r10,r5,r2,ror#13-3 569 bic r11,r4,r3,ror#12-3 570 bic r12,r6,r5,ror#32-13 571 bic r14,r7,r4,ror#32-12 572 eor r10,r0,r10,ror#32-13 573 eor r11,r1,r11,ror#32-12 574#ifndef __thumb2__ 575 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 576#endif 577 eor r12,r12,r2,ror#32-3 578#ifndef __thumb2__ 579 str r11,[sp,#320+4] 580#else 581 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 582#endif 583 eor r14,r14,r3,ror#32-3 584#ifndef __thumb2__ 585 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 586#endif 587 bic r10,r8,r6 588 bic r11,r9,r7 589#ifndef __thumb2__ 590 str r14,[sp,#328+4] 591#else 592 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 593#endif 594 eor r10,r10,r5,ror#32-13 595 eor r11,r11,r4,ror#32-12 596#ifndef __thumb2__ 597 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 598#endif 599 bic r12,r0,r8 600#ifndef __thumb2__ 601 str r11,[sp,#336+4] 602#else 603 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 604#endif 605 bic r14,r1,r9 606 eor r12,r12,r6 607 eor r14,r14,r7 608#ifndef __thumb2__ 609 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 610#endif 611 bic r10,r2,r0,ror#3 612#ifndef __thumb2__ 613 str r14,[sp,#344+4] 614#else 615 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 616#endif 617 bic r11,r3,r1,ror#3 618#ifndef __thumb2__ 619 ldr r1,[sp,#32] @ A[0][4] [in reverse order] 620#endif 621 eor r10,r8,r10,ror#32-3 622#ifndef __thumb2__ 623 ldr r0,[sp,#32+4] 624#else 625 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 626#endif 627 eor r11,r9,r11,ror#32-3 628#ifndef __thumb2__ 629 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 630#endif 631 add r9,sp,#208 632#ifndef __thumb2__ 633 str r11,[sp,#352+4] 634#else 635 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 636#endif 637 638#ifndef __thumb2__ 639 ldr r10,[sp,#232] @ D[4] 640#endif 641#ifndef __thumb2__ 642 ldr r11,[sp,#232+4] 643#else 644 ldrd r10,r11,[sp,#232] @ D[4] 645#endif 646#ifndef __thumb2__ 647 ldr r12,[sp,#200] @ D[0] 648#endif 649#ifndef __thumb2__ 650 ldr r14,[sp,#200+4] 651#else 652 ldrd r12,r14,[sp,#200] @ D[0] 653#endif 654 655 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 656 657 eor r1,r1,r10 658#ifndef __thumb2__ 659 ldr r2,[sp,#40] @ A[1][0] 660#endif 661 eor r0,r0,r11 662#ifndef __thumb2__ 663 ldr r3,[sp,#40+4] 664#else 665 ldrd r2,r3,[sp,#40] @ A[1][0] 666#endif 667 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 668#ifndef __thumb2__ 669 ldr r4,[sp,#88] @ A[2][1] 670#endif 671 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 672#ifndef __thumb2__ 673 ldr r5,[sp,#88+4] 674#else 675 ldrd r4,r5,[sp,#88] @ A[2][1] 676#endif 677 678 eor r2,r2,r12 679#ifndef __thumb2__ 680 ldr r10,[sp,#136] @ A[3][2] 681#endif 682 eor r3,r3,r14 683#ifndef __thumb2__ 684 ldr r11,[sp,#136+4] 685#else 686 ldrd r10,r11,[sp,#136] @ A[3][2] 687#endif 688 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 689#ifndef __thumb2__ 690 ldr r12,[sp,#224] @ D[3] 691#endif 692 @ mov r3,r3,ror#32-18 693#ifndef __thumb2__ 694 ldr r14,[sp,#224+4] 695#else 696 ldrd r12,r14,[sp,#224] @ D[3] 697#endif 698 699 eor r6,r6,r4 700 eor r7,r7,r5 701 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 702 mov r5,r7,ror#32-5 703 704 eor r10,r10,r8 705#ifndef __thumb2__ 706 ldr r8,[sp,#184] @ A[4][3] 707#endif 708 eor r11,r11,r9 709#ifndef __thumb2__ 710 ldr r9,[sp,#184+4] 711#else 712 ldrd r8,r9,[sp,#184] @ A[4][3] 713#endif 714 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 715 mov r6,r11,ror#32-8 716 717 eor r12,r12,r8 718 eor r14,r14,r9 719 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 720 mov r9,r14,ror#32-28 721 722 bic r10,r4,r2,ror#32-18 723 bic r11,r5,r3,ror#32-18 724 eor r10,r10,r0,ror#32-14 725 eor r11,r11,r1,ror#32-13 726#ifndef __thumb2__ 727 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 728#endif 729 bic r12,r6,r4 730#ifndef __thumb2__ 731 str r11,[sp,#360+4] 732#else 733 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 734#endif 735 bic r14,r7,r5 736 eor r12,r12,r2,ror#32-18 737#ifndef __thumb2__ 738 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 739#endif 740 eor r14,r14,r3,ror#32-18 741#ifndef __thumb2__ 742 str r14,[sp,#368+4] 743#else 744 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 745#endif 746 bic r10,r8,r6 747 bic r11,r9,r7 748 bic r12,r0,r8,ror#14 749 bic r14,r1,r9,ror#13 750 eor r10,r10,r4 751 eor r11,r11,r5 752#ifndef __thumb2__ 753 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 754#endif 755 bic r2,r2,r0,ror#18-14 756#ifndef __thumb2__ 757 str r11,[sp,#376+4] 758#else 759 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 760#endif 761 eor r12,r6,r12,ror#32-14 762 bic r11,r3,r1,ror#18-13 763 eor r14,r7,r14,ror#32-13 764#ifndef __thumb2__ 765 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 766#endif 767#ifndef __thumb2__ 768 str r14,[sp,#384+4] 769#else 770 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 771#endif 772 add r14,sp,#216 773#ifndef __thumb2__ 774 ldr r0,[sp,#16] @ A[0][2] 775#endif 776 eor r10,r8,r2,ror#32-18 777#ifndef __thumb2__ 778 ldr r1,[sp,#16+4] 779#else 780 ldrd r0,r1,[sp,#16] @ A[0][2] 781#endif 782 eor r11,r9,r11,ror#32-18 783#ifndef __thumb2__ 784 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 785#endif 786#ifndef __thumb2__ 787 str r11,[sp,#392+4] 788#else 789 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 790#endif 791 792 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 793#ifndef __thumb2__ 794 ldr r2,[sp,#64] @ A[1][3] 795#endif 796#ifndef __thumb2__ 797 ldr r3,[sp,#64+4] 798#else 799 ldrd r2,r3,[sp,#64] @ A[1][3] 800#endif 801#ifndef __thumb2__ 802 ldr r6,[sp,#232] @ D[4] 803#endif 804#ifndef __thumb2__ 805 ldr r7,[sp,#232+4] 806#else 807 ldrd r6,r7,[sp,#232] @ D[4] 808#endif 809 810 eor r0,r0,r10 811#ifndef __thumb2__ 812 ldr r4,[sp,#112] @ A[2][4] 813#endif 814 eor r1,r1,r11 815#ifndef __thumb2__ 816 ldr r5,[sp,#112+4] 817#else 818 ldrd r4,r5,[sp,#112] @ A[2][4] 819#endif 820 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 821#ifndef __thumb2__ 822 ldr r8,[sp,#200] @ D[0] 823#endif 824 @ mov r1,r1,ror#32-31 825#ifndef __thumb2__ 826 ldr r9,[sp,#200+4] 827#else 828 ldrd r8,r9,[sp,#200] @ D[0] 829#endif 830 831 eor r12,r12,r2 832#ifndef __thumb2__ 833 ldr r10,[sp,#120] @ A[3][0] 834#endif 835 eor r14,r14,r3 836#ifndef __thumb2__ 837 ldr r11,[sp,#120+4] 838#else 839 ldrd r10,r11,[sp,#120] @ A[3][0] 840#endif 841 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 842#ifndef __thumb2__ 843 ldr r12,[sp,#208] @ D[1] 844#endif 845 mov r2,r14,ror#32-28 846#ifndef __thumb2__ 847 ldr r14,[sp,#208+4] 848#else 849 ldrd r12,r14,[sp,#208] @ D[1] 850#endif 851 852 eor r6,r6,r4 853 eor r7,r7,r5 854 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 855 mov r4,r7,ror#32-20 856 857 eor r10,r10,r8 858#ifndef __thumb2__ 859 ldr r8,[sp,#168] @ A[4][1] 860#endif 861 eor r11,r11,r9 862#ifndef __thumb2__ 863 ldr r9,[sp,#168+4] 864#else 865 ldrd r8,r9,[sp,#168] @ A[4][1] 866#endif 867 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 868 mov r6,r11,ror#32-21 869 870 eor r8,r8,r12 871 eor r9,r9,r14 872 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 873 @ mov r9,r3,ror#32-1 874 875 bic r10,r4,r2 876 bic r11,r5,r3 877 eor r10,r10,r0,ror#32-31 878#ifndef __thumb2__ 879 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 880#endif 881 eor r11,r11,r1,ror#32-31 882#ifndef __thumb2__ 883 str r11,[sp,#400+4] 884#else 885 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 886#endif 887 bic r12,r6,r4 888 bic r14,r7,r5 889 eor r12,r12,r2 890 eor r14,r14,r3 891#ifndef __thumb2__ 892 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 893#endif 894 bic r10,r8,r6,ror#1 895#ifndef __thumb2__ 896 str r14,[sp,#408+4] 897#else 898 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 899#endif 900 bic r11,r9,r7,ror#1 901 bic r12,r0,r8,ror#31-1 902 bic r14,r1,r9,ror#31-1 903 eor r4,r4,r10,ror#32-1 904#ifndef __thumb2__ 905 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 906#endif 907 eor r5,r5,r11,ror#32-1 908#ifndef __thumb2__ 909 str r5,[sp,#416+4] 910#else 911 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 912#endif 913 eor r6,r6,r12,ror#32-31 914 eor r7,r7,r14,ror#32-31 915#ifndef __thumb2__ 916 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 917#endif 918 bic r10,r2,r0,ror#32-31 919#ifndef __thumb2__ 920 str r7,[sp,#424+4] 921#else 922 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 923#endif 924 bic r11,r3,r1,ror#32-31 925 add r12,sp,#240 926 eor r8,r10,r8,ror#32-1 927 add r10,sp,#280 928 eor r9,r11,r9,ror#32-1 929#ifndef __thumb2__ 930 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 931#endif 932#ifndef __thumb2__ 933 str r9,[sp,#432+4] 934#else 935 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 936#endif 937 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 938 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 939#ifdef __thumb2__ 940 eor r0,r0,r10 941 eor r1,r1,r11 942 eor r2,r2,r12 943 ldrd r10,r11,[sp,#296] 944 eor r3,r3,r14 945 ldrd r12,r14,[sp,#304] 946 eor r4,r4,r10 947 eor r5,r5,r11 948 eor r6,r6,r12 949 ldrd r10,r11,[sp,#312] 950 eor r7,r7,r14 951 ldrd r12,r14,[sp,#320] 952 eor r8,r8,r10 953 eor r9,r9,r11 954 eor r0,r0,r12 955 ldrd r10,r11,[sp,#328] 956 eor r1,r1,r14 957 ldrd r12,r14,[sp,#336] 958 eor r2,r2,r10 959 eor r3,r3,r11 960 eor r4,r4,r12 961 ldrd r10,r11,[sp,#344] 962 eor r5,r5,r14 963 ldrd r12,r14,[sp,#352] 964 eor r6,r6,r10 965 eor r7,r7,r11 966 eor r8,r8,r12 967 ldrd r10,r11,[sp,#360] 968 eor r9,r9,r14 969 ldrd r12,r14,[sp,#368] 970 eor r0,r0,r10 971 eor r1,r1,r11 972 eor r2,r2,r12 973 ldrd r10,r11,[sp,#376] 974 eor r3,r3,r14 975 ldrd r12,r14,[sp,#384] 976 eor r4,r4,r10 977 eor r5,r5,r11 978 eor r6,r6,r12 979 ldrd r10,r11,[sp,#392] 980 eor r7,r7,r14 981 ldrd r12,r14,[sp,#400] 982 eor r8,r8,r10 983 eor r9,r9,r11 984 eor r0,r0,r12 985 ldrd r10,r11,[sp,#408] 986 eor r1,r1,r14 987 ldrd r12,r14,[sp,#256] 988 eor r2,r2,r10 989 eor r3,r3,r11 990 eor r4,r4,r12 991 ldrd r10,r11,[sp,#264] 992 eor r5,r5,r14 993 ldrd r12,r14,[sp,#272] 994#else 995 eor r0,r0,r10 996 add r10,sp,#296 997 eor r1,r1,r11 998 eor r2,r2,r12 999 eor r3,r3,r14 1000 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 1001 eor r4,r4,r10 1002 add r10,sp,#312 1003 eor r5,r5,r11 1004 eor r6,r6,r12 1005 eor r7,r7,r14 1006 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 1007 eor r8,r8,r10 1008 add r10,sp,#328 1009 eor r9,r9,r11 1010 eor r0,r0,r12 1011 eor r1,r1,r14 1012 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 1013 eor r2,r2,r10 1014 add r10,sp,#344 1015 eor r3,r3,r11 1016 eor r4,r4,r12 1017 eor r5,r5,r14 1018 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 1019 eor r6,r6,r10 1020 add r10,sp,#360 1021 eor r7,r7,r11 1022 eor r8,r8,r12 1023 eor r9,r9,r14 1024 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 1025 eor r0,r0,r10 1026 add r10,sp,#376 1027 eor r1,r1,r11 1028 eor r2,r2,r12 1029 eor r3,r3,r14 1030 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 1031 eor r4,r4,r10 1032 add r10,sp,#392 1033 eor r5,r5,r11 1034 eor r6,r6,r12 1035 eor r7,r7,r14 1036 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 1037 eor r8,r8,r10 1038 ldr r10,[sp,#408] @ A[4][1] 1039 eor r9,r9,r11 1040 ldr r11,[sp,#408+4] 1041 eor r0,r0,r12 1042 ldr r12,[sp,#256] @ A[0][2] 1043 eor r1,r1,r14 1044 ldr r14,[sp,#256+4] 1045 eor r2,r2,r10 1046 add r10,sp,#264 1047 eor r3,r3,r11 1048 eor r4,r4,r12 1049 eor r5,r5,r14 1050 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 1051#endif 1052 eor r6,r6,r10 1053 eor r7,r7,r11 1054 eor r8,r8,r12 1055 eor r9,r9,r14 1056 1057 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1058#ifndef __thumb2__ 1059 str r10,[sp,#208] @ D[1] = E[0] 1060#endif 1061 eor r11,r1,r4 1062#ifndef __thumb2__ 1063 str r11,[sp,#208+4] 1064#else 1065 strd r10,r11,[sp,#208] @ D[1] = E[0] 1066#endif 1067 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1068 eor r14,r7,r0 1069#ifndef __thumb2__ 1070 str r12,[sp,#232] @ D[4] = E[1] 1071#endif 1072 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1073#ifndef __thumb2__ 1074 str r14,[sp,#232+4] 1075#else 1076 strd r12,r14,[sp,#232] @ D[4] = E[1] 1077#endif 1078 eor r1,r9,r2 1079#ifndef __thumb2__ 1080 str r0,[sp,#200] @ D[0] = C[0] 1081#endif 1082 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1083#ifndef __thumb2__ 1084 ldr r7,[sp,#384] 1085#endif 1086 eor r3,r3,r6 1087#ifndef __thumb2__ 1088 str r1,[sp,#200+4] 1089#else 1090 strd r0,r1,[sp,#200] @ D[0] = C[0] 1091#endif 1092#ifndef __thumb2__ 1093 ldr r6,[sp,#384+4] 1094#else 1095 ldrd r7,r6,[sp,#384] 1096#endif 1097#ifndef __thumb2__ 1098 str r2,[sp,#216] @ D[2] = C[1] 1099#endif 1100 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1101#ifndef __thumb2__ 1102 str r3,[sp,#216+4] 1103#else 1104 strd r2,r3,[sp,#216] @ D[2] = C[1] 1105#endif 1106 eor r5,r5,r8 1107 1108#ifndef __thumb2__ 1109 ldr r8,[sp,#432] 1110#endif 1111#ifndef __thumb2__ 1112 ldr r9,[sp,#432+4] 1113#else 1114 ldrd r8,r9,[sp,#432] 1115#endif 1116#ifndef __thumb2__ 1117 str r4,[sp,#224] @ D[3] = C[2] 1118#endif 1119 eor r7,r7,r4 1120#ifndef __thumb2__ 1121 str r5,[sp,#224+4] 1122#else 1123 strd r4,r5,[sp,#224] @ D[3] = C[2] 1124#endif 1125 eor r6,r6,r5 1126#ifndef __thumb2__ 1127 ldr r4,[sp,#240] 1128#endif 1129 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1130 @ mov r6,r6,ror#32-11 1131#ifndef __thumb2__ 1132 ldr r5,[sp,#240+4] 1133#else 1134 ldrd r4,r5,[sp,#240] 1135#endif 1136 eor r8,r8,r12 1137 eor r9,r9,r14 1138#ifndef __thumb2__ 1139 ldr r12,[sp,#336] 1140#endif 1141 eor r0,r0,r4 1142#ifndef __thumb2__ 1143 ldr r14,[sp,#336+4] 1144#else 1145 ldrd r12,r14,[sp,#336] 1146#endif 1147 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1148 @ mov r9,r9,ror#32-7 1149 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 1150 eor r12,r12,r2 1151#ifndef __thumb2__ 1152 ldr r2,[sp,#288] 1153#endif 1154 eor r14,r14,r3 1155#ifndef __thumb2__ 1156 ldr r3,[sp,#288+4] 1157#else 1158 ldrd r2,r3,[sp,#288] 1159#endif 1160 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 1161 ldr r12,[sp,#444] @ load counter 1162 eor r2,r2,r10 1163 adr r10,iotas32 1164 mov r4,r14,ror#32-22 1165 add r14,r10,r12 1166 eor r3,r3,r11 1167#ifndef __thumb2__ 1168 ldr r10,[r14,#8] @ iotas[i].lo 1169#endif 1170 add r12,r12,#16 1171#ifndef __thumb2__ 1172 ldr r11,[r14,#12] @ iotas[i].hi 1173#else 1174 ldrd r10,r11,[r14,#8] @ iotas[i].lo 1175#endif 1176 cmp r12,#192 1177 str r12,[sp,#444] @ store counter 1178 bic r12,r4,r2,ror#32-22 1179 bic r14,r5,r3,ror#32-22 1180 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 1181 mov r3,r3,ror#32-22 1182 eor r12,r12,r0 1183 eor r14,r14,r1 1184 eor r10,r10,r12 1185 eor r11,r11,r14 1186#ifndef __thumb2__ 1187 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1188#endif 1189 bic r12,r6,r4,ror#11 1190#ifndef __thumb2__ 1191 str r11,[sp,#0+4] 1192#else 1193 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1194#endif 1195 bic r14,r7,r5,ror#10 1196 bic r10,r8,r6,ror#32-(11-7) 1197 bic r11,r9,r7,ror#32-(10-7) 1198 eor r12,r2,r12,ror#32-11 1199#ifndef __thumb2__ 1200 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1201#endif 1202 eor r14,r3,r14,ror#32-10 1203#ifndef __thumb2__ 1204 str r14,[sp,#8+4] 1205#else 1206 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1207#endif 1208 eor r10,r4,r10,ror#32-7 1209 eor r11,r5,r11,ror#32-7 1210#ifndef __thumb2__ 1211 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1212#endif 1213 bic r12,r0,r8,ror#32-7 1214#ifndef __thumb2__ 1215 str r11,[sp,#16+4] 1216#else 1217 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1218#endif 1219 bic r14,r1,r9,ror#32-7 1220 eor r12,r12,r6,ror#32-11 1221#ifndef __thumb2__ 1222 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1223#endif 1224 eor r14,r14,r7,ror#32-10 1225#ifndef __thumb2__ 1226 str r14,[sp,#24+4] 1227#else 1228 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1229#endif 1230 bic r10,r2,r0 1231 add r14,sp,#224 1232#ifndef __thumb2__ 1233 ldr r0,[sp,#264] @ A[0][3] 1234#endif 1235 bic r11,r3,r1 1236#ifndef __thumb2__ 1237 ldr r1,[sp,#264+4] 1238#else 1239 ldrd r0,r1,[sp,#264] @ A[0][3] 1240#endif 1241 eor r10,r10,r8,ror#32-7 1242 eor r11,r11,r9,ror#32-7 1243#ifndef __thumb2__ 1244 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1245#endif 1246 add r9,sp,#200 1247#ifndef __thumb2__ 1248 str r11,[sp,#32+4] 1249#else 1250 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1251#endif 1252 1253 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 1254 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 1255 1256#ifndef __thumb2__ 1257 ldr r2,[sp,#312] @ A[1][4] 1258#endif 1259 eor r0,r0,r10 1260#ifndef __thumb2__ 1261 ldr r3,[sp,#312+4] 1262#else 1263 ldrd r2,r3,[sp,#312] @ A[1][4] 1264#endif 1265 eor r1,r1,r11 1266 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1267#ifndef __thumb2__ 1268 ldr r10,[sp,#368] @ A[3][1] 1269#endif 1270 @ mov r1,r1,ror#32-14 1271#ifndef __thumb2__ 1272 ldr r11,[sp,#368+4] 1273#else 1274 ldrd r10,r11,[sp,#368] @ A[3][1] 1275#endif 1276 1277 eor r2,r2,r12 1278#ifndef __thumb2__ 1279 ldr r4,[sp,#320] @ A[2][0] 1280#endif 1281 eor r3,r3,r14 1282#ifndef __thumb2__ 1283 ldr r5,[sp,#320+4] 1284#else 1285 ldrd r4,r5,[sp,#320] @ A[2][0] 1286#endif 1287 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1288 @ mov r3,r3,ror#32-10 1289 1290 eor r6,r6,r4 1291#ifndef __thumb2__ 1292 ldr r12,[sp,#216] @ D[2] 1293#endif 1294 eor r7,r7,r5 1295#ifndef __thumb2__ 1296 ldr r14,[sp,#216+4] 1297#else 1298 ldrd r12,r14,[sp,#216] @ D[2] 1299#endif 1300 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1301 mov r4,r7,ror#32-2 1302 1303 eor r10,r10,r8 1304#ifndef __thumb2__ 1305 ldr r8,[sp,#416] @ A[4][2] 1306#endif 1307 eor r11,r11,r9 1308#ifndef __thumb2__ 1309 ldr r9,[sp,#416+4] 1310#else 1311 ldrd r8,r9,[sp,#416] @ A[4][2] 1312#endif 1313 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1314 mov r6,r11,ror#32-23 1315 1316 bic r10,r4,r2,ror#32-10 1317 bic r11,r5,r3,ror#32-10 1318 eor r12,r12,r8 1319 eor r14,r14,r9 1320 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1321 mov r8,r14,ror#32-31 1322 eor r10,r10,r0,ror#32-14 1323 eor r11,r11,r1,ror#32-14 1324#ifndef __thumb2__ 1325 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1326#endif 1327 bic r12,r6,r4 1328#ifndef __thumb2__ 1329 str r11,[sp,#40+4] 1330#else 1331 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1332#endif 1333 bic r14,r7,r5 1334 eor r12,r12,r2,ror#32-10 1335#ifndef __thumb2__ 1336 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1337#endif 1338 eor r14,r14,r3,ror#32-10 1339#ifndef __thumb2__ 1340 str r14,[sp,#48+4] 1341#else 1342 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1343#endif 1344 bic r10,r8,r6 1345 bic r11,r9,r7 1346 bic r12,r0,r8,ror#14 1347 bic r14,r1,r9,ror#14 1348 eor r10,r10,r4 1349 eor r11,r11,r5 1350#ifndef __thumb2__ 1351 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1352#endif 1353 bic r2,r2,r0,ror#32-(14-10) 1354#ifndef __thumb2__ 1355 str r11,[sp,#56+4] 1356#else 1357 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1358#endif 1359 eor r12,r6,r12,ror#32-14 1360 bic r11,r3,r1,ror#32-(14-10) 1361#ifndef __thumb2__ 1362 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1363#endif 1364 eor r14,r7,r14,ror#32-14 1365#ifndef __thumb2__ 1366 str r14,[sp,#64+4] 1367#else 1368 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1369#endif 1370 add r12,sp,#208 1371#ifndef __thumb2__ 1372 ldr r1,[sp,#248] @ A[0][1] 1373#endif 1374 eor r10,r8,r2,ror#32-10 1375#ifndef __thumb2__ 1376 ldr r0,[sp,#248+4] 1377#else 1378 ldrd r1,r0,[sp,#248] @ A[0][1] 1379#endif 1380 eor r11,r9,r11,ror#32-10 1381#ifndef __thumb2__ 1382 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1383#endif 1384#ifndef __thumb2__ 1385 str r11,[sp,#72+4] 1386#else 1387 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1388#endif 1389 1390 add r9,sp,#224 1391 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 1392#ifndef __thumb2__ 1393 ldr r2,[sp,#296] @ A[1][2] 1394#endif 1395#ifndef __thumb2__ 1396 ldr r3,[sp,#296+4] 1397#else 1398 ldrd r2,r3,[sp,#296] @ A[1][2] 1399#endif 1400 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 1401 1402 eor r1,r1,r10 1403#ifndef __thumb2__ 1404 ldr r4,[sp,#344] @ A[2][3] 1405#endif 1406 eor r0,r0,r11 1407#ifndef __thumb2__ 1408 ldr r5,[sp,#344+4] 1409#else 1410 ldrd r4,r5,[sp,#344] @ A[2][3] 1411#endif 1412 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1413 1414 eor r2,r2,r12 1415#ifndef __thumb2__ 1416 ldr r10,[sp,#392] @ A[3][4] 1417#endif 1418 eor r3,r3,r14 1419#ifndef __thumb2__ 1420 ldr r11,[sp,#392+4] 1421#else 1422 ldrd r10,r11,[sp,#392] @ A[3][4] 1423#endif 1424 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1425#ifndef __thumb2__ 1426 ldr r12,[sp,#200] @ D[0] 1427#endif 1428 @ mov r3,r3,ror#32-3 1429#ifndef __thumb2__ 1430 ldr r14,[sp,#200+4] 1431#else 1432 ldrd r12,r14,[sp,#200] @ D[0] 1433#endif 1434 1435 eor r4,r4,r6 1436 eor r5,r5,r7 1437 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1438 @ mov r4,r7,ror#32-13 @ [track reverse order below] 1439 1440 eor r10,r10,r8 1441#ifndef __thumb2__ 1442 ldr r8,[sp,#400] @ A[4][0] 1443#endif 1444 eor r11,r11,r9 1445#ifndef __thumb2__ 1446 ldr r9,[sp,#400+4] 1447#else 1448 ldrd r8,r9,[sp,#400] @ A[4][0] 1449#endif 1450 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1451 mov r7,r11,ror#32-4 1452 1453 eor r12,r12,r8 1454 eor r14,r14,r9 1455 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1456 mov r9,r14,ror#32-9 1457 1458 bic r10,r5,r2,ror#13-3 1459 bic r11,r4,r3,ror#12-3 1460 bic r12,r6,r5,ror#32-13 1461 bic r14,r7,r4,ror#32-12 1462 eor r10,r0,r10,ror#32-13 1463 eor r11,r1,r11,ror#32-12 1464#ifndef __thumb2__ 1465 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1466#endif 1467 eor r12,r12,r2,ror#32-3 1468#ifndef __thumb2__ 1469 str r11,[sp,#80+4] 1470#else 1471 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1472#endif 1473 eor r14,r14,r3,ror#32-3 1474#ifndef __thumb2__ 1475 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1476#endif 1477 bic r10,r8,r6 1478 bic r11,r9,r7 1479#ifndef __thumb2__ 1480 str r14,[sp,#88+4] 1481#else 1482 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1483#endif 1484 eor r10,r10,r5,ror#32-13 1485 eor r11,r11,r4,ror#32-12 1486#ifndef __thumb2__ 1487 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1488#endif 1489 bic r12,r0,r8 1490#ifndef __thumb2__ 1491 str r11,[sp,#96+4] 1492#else 1493 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1494#endif 1495 bic r14,r1,r9 1496 eor r12,r12,r6 1497 eor r14,r14,r7 1498#ifndef __thumb2__ 1499 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1500#endif 1501 bic r10,r2,r0,ror#3 1502#ifndef __thumb2__ 1503 str r14,[sp,#104+4] 1504#else 1505 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1506#endif 1507 bic r11,r3,r1,ror#3 1508#ifndef __thumb2__ 1509 ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1510#endif 1511 eor r10,r8,r10,ror#32-3 1512#ifndef __thumb2__ 1513 ldr r0,[sp,#272+4] 1514#else 1515 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1516#endif 1517 eor r11,r9,r11,ror#32-3 1518#ifndef __thumb2__ 1519 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1520#endif 1521 add r9,sp,#208 1522#ifndef __thumb2__ 1523 str r11,[sp,#112+4] 1524#else 1525 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1526#endif 1527 1528#ifndef __thumb2__ 1529 ldr r10,[sp,#232] @ D[4] 1530#endif 1531#ifndef __thumb2__ 1532 ldr r11,[sp,#232+4] 1533#else 1534 ldrd r10,r11,[sp,#232] @ D[4] 1535#endif 1536#ifndef __thumb2__ 1537 ldr r12,[sp,#200] @ D[0] 1538#endif 1539#ifndef __thumb2__ 1540 ldr r14,[sp,#200+4] 1541#else 1542 ldrd r12,r14,[sp,#200] @ D[0] 1543#endif 1544 1545 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 1546 1547 eor r1,r1,r10 1548#ifndef __thumb2__ 1549 ldr r2,[sp,#280] @ A[1][0] 1550#endif 1551 eor r0,r0,r11 1552#ifndef __thumb2__ 1553 ldr r3,[sp,#280+4] 1554#else 1555 ldrd r2,r3,[sp,#280] @ A[1][0] 1556#endif 1557 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1558#ifndef __thumb2__ 1559 ldr r4,[sp,#328] @ A[2][1] 1560#endif 1561 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1562#ifndef __thumb2__ 1563 ldr r5,[sp,#328+4] 1564#else 1565 ldrd r4,r5,[sp,#328] @ A[2][1] 1566#endif 1567 1568 eor r2,r2,r12 1569#ifndef __thumb2__ 1570 ldr r10,[sp,#376] @ A[3][2] 1571#endif 1572 eor r3,r3,r14 1573#ifndef __thumb2__ 1574 ldr r11,[sp,#376+4] 1575#else 1576 ldrd r10,r11,[sp,#376] @ A[3][2] 1577#endif 1578 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1579#ifndef __thumb2__ 1580 ldr r12,[sp,#224] @ D[3] 1581#endif 1582 @ mov r3,r3,ror#32-18 1583#ifndef __thumb2__ 1584 ldr r14,[sp,#224+4] 1585#else 1586 ldrd r12,r14,[sp,#224] @ D[3] 1587#endif 1588 1589 eor r6,r6,r4 1590 eor r7,r7,r5 1591 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1592 mov r5,r7,ror#32-5 1593 1594 eor r10,r10,r8 1595#ifndef __thumb2__ 1596 ldr r8,[sp,#424] @ A[4][3] 1597#endif 1598 eor r11,r11,r9 1599#ifndef __thumb2__ 1600 ldr r9,[sp,#424+4] 1601#else 1602 ldrd r8,r9,[sp,#424] @ A[4][3] 1603#endif 1604 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1605 mov r6,r11,ror#32-8 1606 1607 eor r12,r12,r8 1608 eor r14,r14,r9 1609 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1610 mov r9,r14,ror#32-28 1611 1612 bic r10,r4,r2,ror#32-18 1613 bic r11,r5,r3,ror#32-18 1614 eor r10,r10,r0,ror#32-14 1615 eor r11,r11,r1,ror#32-13 1616#ifndef __thumb2__ 1617 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1618#endif 1619 bic r12,r6,r4 1620#ifndef __thumb2__ 1621 str r11,[sp,#120+4] 1622#else 1623 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1624#endif 1625 bic r14,r7,r5 1626 eor r12,r12,r2,ror#32-18 1627#ifndef __thumb2__ 1628 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1629#endif 1630 eor r14,r14,r3,ror#32-18 1631#ifndef __thumb2__ 1632 str r14,[sp,#128+4] 1633#else 1634 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1635#endif 1636 bic r10,r8,r6 1637 bic r11,r9,r7 1638 bic r12,r0,r8,ror#14 1639 bic r14,r1,r9,ror#13 1640 eor r10,r10,r4 1641 eor r11,r11,r5 1642#ifndef __thumb2__ 1643 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1644#endif 1645 bic r2,r2,r0,ror#18-14 1646#ifndef __thumb2__ 1647 str r11,[sp,#136+4] 1648#else 1649 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1650#endif 1651 eor r12,r6,r12,ror#32-14 1652 bic r11,r3,r1,ror#18-13 1653 eor r14,r7,r14,ror#32-13 1654#ifndef __thumb2__ 1655 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1656#endif 1657#ifndef __thumb2__ 1658 str r14,[sp,#144+4] 1659#else 1660 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1661#endif 1662 add r14,sp,#216 1663#ifndef __thumb2__ 1664 ldr r0,[sp,#256] @ A[0][2] 1665#endif 1666 eor r10,r8,r2,ror#32-18 1667#ifndef __thumb2__ 1668 ldr r1,[sp,#256+4] 1669#else 1670 ldrd r0,r1,[sp,#256] @ A[0][2] 1671#endif 1672 eor r11,r9,r11,ror#32-18 1673#ifndef __thumb2__ 1674 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1675#endif 1676#ifndef __thumb2__ 1677 str r11,[sp,#152+4] 1678#else 1679 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1680#endif 1681 1682 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 1683#ifndef __thumb2__ 1684 ldr r2,[sp,#304] @ A[1][3] 1685#endif 1686#ifndef __thumb2__ 1687 ldr r3,[sp,#304+4] 1688#else 1689 ldrd r2,r3,[sp,#304] @ A[1][3] 1690#endif 1691#ifndef __thumb2__ 1692 ldr r6,[sp,#232] @ D[4] 1693#endif 1694#ifndef __thumb2__ 1695 ldr r7,[sp,#232+4] 1696#else 1697 ldrd r6,r7,[sp,#232] @ D[4] 1698#endif 1699 1700 eor r0,r0,r10 1701#ifndef __thumb2__ 1702 ldr r4,[sp,#352] @ A[2][4] 1703#endif 1704 eor r1,r1,r11 1705#ifndef __thumb2__ 1706 ldr r5,[sp,#352+4] 1707#else 1708 ldrd r4,r5,[sp,#352] @ A[2][4] 1709#endif 1710 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1711#ifndef __thumb2__ 1712 ldr r8,[sp,#200] @ D[0] 1713#endif 1714 @ mov r1,r1,ror#32-31 1715#ifndef __thumb2__ 1716 ldr r9,[sp,#200+4] 1717#else 1718 ldrd r8,r9,[sp,#200] @ D[0] 1719#endif 1720 1721 eor r12,r12,r2 1722#ifndef __thumb2__ 1723 ldr r10,[sp,#360] @ A[3][0] 1724#endif 1725 eor r14,r14,r3 1726#ifndef __thumb2__ 1727 ldr r11,[sp,#360+4] 1728#else 1729 ldrd r10,r11,[sp,#360] @ A[3][0] 1730#endif 1731 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1732#ifndef __thumb2__ 1733 ldr r12,[sp,#208] @ D[1] 1734#endif 1735 mov r2,r14,ror#32-28 1736#ifndef __thumb2__ 1737 ldr r14,[sp,#208+4] 1738#else 1739 ldrd r12,r14,[sp,#208] @ D[1] 1740#endif 1741 1742 eor r6,r6,r4 1743 eor r7,r7,r5 1744 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1745 mov r4,r7,ror#32-20 1746 1747 eor r10,r10,r8 1748#ifndef __thumb2__ 1749 ldr r8,[sp,#408] @ A[4][1] 1750#endif 1751 eor r11,r11,r9 1752#ifndef __thumb2__ 1753 ldr r9,[sp,#408+4] 1754#else 1755 ldrd r8,r9,[sp,#408] @ A[4][1] 1756#endif 1757 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1758 mov r6,r11,ror#32-21 1759 1760 eor r8,r8,r12 1761 eor r9,r9,r14 1762 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1763 @ mov r9,r3,ror#32-1 1764 1765 bic r10,r4,r2 1766 bic r11,r5,r3 1767 eor r10,r10,r0,ror#32-31 1768#ifndef __thumb2__ 1769 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1770#endif 1771 eor r11,r11,r1,ror#32-31 1772#ifndef __thumb2__ 1773 str r11,[sp,#160+4] 1774#else 1775 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1776#endif 1777 bic r12,r6,r4 1778 bic r14,r7,r5 1779 eor r12,r12,r2 1780 eor r14,r14,r3 1781#ifndef __thumb2__ 1782 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1783#endif 1784 bic r10,r8,r6,ror#1 1785#ifndef __thumb2__ 1786 str r14,[sp,#168+4] 1787#else 1788 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1789#endif 1790 bic r11,r9,r7,ror#1 1791 bic r12,r0,r8,ror#31-1 1792 bic r14,r1,r9,ror#31-1 1793 eor r4,r4,r10,ror#32-1 1794#ifndef __thumb2__ 1795 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1796#endif 1797 eor r5,r5,r11,ror#32-1 1798#ifndef __thumb2__ 1799 str r5,[sp,#176+4] 1800#else 1801 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1802#endif 1803 eor r6,r6,r12,ror#32-31 1804 eor r7,r7,r14,ror#32-31 1805#ifndef __thumb2__ 1806 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1807#endif 1808 bic r10,r2,r0,ror#32-31 1809#ifndef __thumb2__ 1810 str r7,[sp,#184+4] 1811#else 1812 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1813#endif 1814 bic r11,r3,r1,ror#32-31 1815 add r12,sp,#0 1816 eor r8,r10,r8,ror#32-1 1817 add r10,sp,#40 1818 eor r9,r11,r9,ror#32-1 1819#ifndef __thumb2__ 1820 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1821#endif 1822#ifndef __thumb2__ 1823 str r9,[sp,#192+4] 1824#else 1825 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1826#endif 1827 blo .Lround2x 1828 1829 ldr pc,[sp,#440] 1830.size KeccakF1600_int,.-KeccakF1600_int 1831 1832.type KeccakF1600, %function 1833.align 5 1834KeccakF1600: 1835 stmdb sp!,{r0,r4-r11,lr} 1836 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1837 1838 add r10,r0,#40 1839 add r11,sp,#40 1840 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1841 stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1842 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1843 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1844 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1845 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1846 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1847 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1848 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1849 add r12,sp,#0 1850 add r10,sp,#40 1851 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1852 1853 bl KeccakF1600_enter 1854 1855 ldr r11, [sp,#440+16] @ restore pointer to A 1856 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1857 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 1858 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1859 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1860 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1861 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1862 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1863 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1864 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1865 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1866 1867 add sp,sp,#440+20 1868 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} 1869.size KeccakF1600,.-KeccakF1600 1870.globl SHA3_absorb 1871.type SHA3_absorb,%function 1872.align 5 1873SHA3_absorb: 1874 stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1875 sub sp,sp,#456+16 1876 1877 add r10,r0,#40 1878 @ mov r11,r1 1879 mov r12,r2 1880 mov r14,r3 1881 cmp r2,r3 1882 blo .Labsorb_abort 1883 1884 add r11,sp,#0 1885 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1886 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1887 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1888 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1889 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1890 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1891 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1892 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1893 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1894 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1895 1896 ldr r11,[sp,#476] @ restore r11 1897#ifdef __thumb2__ 1898 mov r9,#0x00ff00ff 1899 mov r8,#0x0f0f0f0f 1900 mov r7,#0x33333333 1901 mov r6,#0x55555555 1902#else 1903 mov r6,#0x11 @ compose constants 1904 mov r8,#0x0f 1905 mov r9,#0xff 1906 orr r6,r6,r6,lsl#8 1907 orr r8,r8,r8,lsl#8 1908 orr r6,r6,r6,lsl#16 @ 0x11111111 1909 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1910 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1911 orr r7,r6,r6,lsl#1 @ 0x33333333 1912 orr r6,r6,r6,lsl#2 @ 0x55555555 1913#endif 1914 str r9,[sp,#468] 1915 str r8,[sp,#464] 1916 str r7,[sp,#460] 1917 str r6,[sp,#456] 1918 b .Loop_absorb 1919 1920.align 4 1921.Loop_absorb: 1922 subs r0,r12,r14 1923 blo .Labsorbed 1924 add r10,sp,#0 1925 str r0,[sp,#480] @ save len - bsz 1926 1927.align 4 1928.Loop_block: 1929 ldrb r0,[r11],#1 1930 ldrb r1,[r11],#1 1931 ldrb r2,[r11],#1 1932 ldrb r3,[r11],#1 1933 ldrb r4,[r11],#1 1934 orr r0,r0,r1,lsl#8 1935 ldrb r1,[r11],#1 1936 orr r0,r0,r2,lsl#16 1937 ldrb r2,[r11],#1 1938 orr r0,r0,r3,lsl#24 @ lo 1939 ldrb r3,[r11],#1 1940 orr r1,r4,r1,lsl#8 1941 orr r1,r1,r2,lsl#16 1942 orr r1,r1,r3,lsl#24 @ hi 1943 1944 and r2,r0,r6 @ &=0x55555555 1945 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1946 and r3,r1,r6 @ &=0x55555555 1947 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1948 orr r2,r2,r2,lsr#1 1949 orr r0,r0,r0,lsl#1 1950 orr r3,r3,r3,lsr#1 1951 orr r1,r1,r1,lsl#1 1952 and r2,r2,r7 @ &=0x33333333 1953 and r0,r0,r7,lsl#2 @ &=0xcccccccc 1954 and r3,r3,r7 @ &=0x33333333 1955 and r1,r1,r7,lsl#2 @ &=0xcccccccc 1956 orr r2,r2,r2,lsr#2 1957 orr r0,r0,r0,lsl#2 1958 orr r3,r3,r3,lsr#2 1959 orr r1,r1,r1,lsl#2 1960 and r2,r2,r8 @ &=0x0f0f0f0f 1961 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1962 and r3,r3,r8 @ &=0x0f0f0f0f 1963 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1964 ldmia r10,{r4,r5} @ A_flat[i] 1965 orr r2,r2,r2,lsr#4 1966 orr r0,r0,r0,lsl#4 1967 orr r3,r3,r3,lsr#4 1968 orr r1,r1,r1,lsl#4 1969 and r2,r2,r9 @ &=0x00ff00ff 1970 and r0,r0,r9,lsl#8 @ &=0xff00ff00 1971 and r3,r3,r9 @ &=0x00ff00ff 1972 and r1,r1,r9,lsl#8 @ &=0xff00ff00 1973 orr r2,r2,r2,lsr#8 1974 orr r0,r0,r0,lsl#8 1975 orr r3,r3,r3,lsr#8 1976 orr r1,r1,r1,lsl#8 1977 1978 mov r2,r2,lsl#16 1979 mov r1,r1,lsr#16 1980 eor r4,r4,r3,lsl#16 1981 eor r5,r5,r0,lsr#16 1982 eor r4,r4,r2,lsr#16 1983 eor r5,r5,r1,lsl#16 1984 stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1985 1986 subs r14,r14,#8 1987 bhi .Loop_block 1988 1989 str r11,[sp,#476] 1990 1991 bl KeccakF1600_int 1992 1993 add r14,sp,#456 1994 ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables 1995 b .Loop_absorb 1996 1997.align 4 1998.Labsorbed: 1999 add r11,sp,#40 2000 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2001 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 2002 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2003 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2004 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2005 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2006 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2007 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2008 ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2009 stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2010 2011.Labsorb_abort: 2012 add sp,sp,#456+32 2013 mov r0,r12 @ return value 2014 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 2015.size SHA3_absorb,.-SHA3_absorb 2016.globl SHA3_squeeze 2017.type SHA3_squeeze,%function 2018.align 5 2019SHA3_squeeze: 2020 stmdb sp!,{r0,r3-r10,lr} 2021 2022 mov r10,r0 2023 mov r4,r1 2024 mov r5,r2 2025 mov r12,r3 2026 2027#ifdef __thumb2__ 2028 mov r9,#0x00ff00ff 2029 mov r8,#0x0f0f0f0f 2030 mov r7,#0x33333333 2031 mov r6,#0x55555555 2032#else 2033 mov r6,#0x11 @ compose constants 2034 mov r8,#0x0f 2035 mov r9,#0xff 2036 orr r6,r6,r6,lsl#8 2037 orr r8,r8,r8,lsl#8 2038 orr r6,r6,r6,lsl#16 @ 0x11111111 2039 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2040 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2041 orr r7,r6,r6,lsl#1 @ 0x33333333 2042 orr r6,r6,r6,lsl#2 @ 0x55555555 2043#endif 2044 stmdb sp!,{r6,r7,r8,r9} 2045 2046 mov r14,r10 2047 b .Loop_squeeze 2048 2049.align 4 2050.Loop_squeeze: 2051 ldmia r10!,{r0,r1} @ A_flat[i++] 2052 2053 mov r2,r0,lsl#16 2054 mov r3,r1,lsl#16 @ r3 = r1 << 16 2055 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2056 mov r1,r1,lsr#16 2057 mov r0,r0,lsr#16 @ r0 = r0 >> 16 2058 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2059 2060 orr r2,r2,r2,lsl#8 2061 orr r3,r3,r3,lsr#8 2062 orr r0,r0,r0,lsl#8 2063 orr r1,r1,r1,lsr#8 2064 and r2,r2,r9 @ &=0x00ff00ff 2065 and r3,r3,r9,lsl#8 @ &=0xff00ff00 2066 and r0,r0,r9 @ &=0x00ff00ff 2067 and r1,r1,r9,lsl#8 @ &=0xff00ff00 2068 orr r2,r2,r2,lsl#4 2069 orr r3,r3,r3,lsr#4 2070 orr r0,r0,r0,lsl#4 2071 orr r1,r1,r1,lsr#4 2072 and r2,r2,r8 @ &=0x0f0f0f0f 2073 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2074 and r0,r0,r8 @ &=0x0f0f0f0f 2075 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2076 orr r2,r2,r2,lsl#2 2077 orr r3,r3,r3,lsr#2 2078 orr r0,r0,r0,lsl#2 2079 orr r1,r1,r1,lsr#2 2080 and r2,r2,r7 @ &=0x33333333 2081 and r3,r3,r7,lsl#2 @ &=0xcccccccc 2082 and r0,r0,r7 @ &=0x33333333 2083 and r1,r1,r7,lsl#2 @ &=0xcccccccc 2084 orr r2,r2,r2,lsl#1 2085 orr r3,r3,r3,lsr#1 2086 orr r0,r0,r0,lsl#1 2087 orr r1,r1,r1,lsr#1 2088 and r2,r2,r6 @ &=0x55555555 2089 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2090 and r0,r0,r6 @ &=0x55555555 2091 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2092 2093 orr r2,r2,r3 2094 orr r0,r0,r1 2095 2096 cmp r5,#8 2097 blo .Lsqueeze_tail 2098 mov r1,r2,lsr#8 2099 strb r2,[r4],#1 2100 mov r3,r2,lsr#16 2101 strb r1,[r4],#1 2102 mov r2,r2,lsr#24 2103 strb r3,[r4],#1 2104 strb r2,[r4],#1 2105 2106 mov r1,r0,lsr#8 2107 strb r0,[r4],#1 2108 mov r3,r0,lsr#16 2109 strb r1,[r4],#1 2110 mov r0,r0,lsr#24 2111 strb r3,[r4],#1 2112 strb r0,[r4],#1 2113 subs r5,r5,#8 2114 beq .Lsqueeze_done 2115 2116 subs r12,r12,#8 @ bsz -= 8 2117 bhi .Loop_squeeze 2118 2119 mov r0,r14 @ original r10 2120 2121 bl KeccakF1600 2122 2123 ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables 2124 mov r14,r10 2125 b .Loop_squeeze 2126 2127.align 4 2128.Lsqueeze_tail: 2129 strb r2,[r4],#1 2130 mov r2,r2,lsr#8 2131 subs r5,r5,#1 2132 beq .Lsqueeze_done 2133 strb r2,[r4],#1 2134 mov r2,r2,lsr#8 2135 subs r5,r5,#1 2136 beq .Lsqueeze_done 2137 strb r2,[r4],#1 2138 mov r2,r2,lsr#8 2139 subs r5,r5,#1 2140 beq .Lsqueeze_done 2141 strb r2,[r4],#1 2142 subs r5,r5,#1 2143 beq .Lsqueeze_done 2144 2145 strb r0,[r4],#1 2146 mov r0,r0,lsr#8 2147 subs r5,r5,#1 2148 beq .Lsqueeze_done 2149 strb r0,[r4],#1 2150 mov r0,r0,lsr#8 2151 subs r5,r5,#1 2152 beq .Lsqueeze_done 2153 strb r0,[r4] 2154 b .Lsqueeze_done 2155 2156.align 4 2157.Lsqueeze_done: 2158 add sp,sp,#24 2159 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 2160.size SHA3_squeeze,.-SHA3_squeeze 2161#if __ARM_MAX_ARCH__>=7 2162.fpu neon 2163 2164.type iotas64, %object 2165.align 5 2166iotas64: 2167.quad 0x0000000000000001 2168.quad 0x0000000000008082 2169.quad 0x800000000000808a 2170.quad 0x8000000080008000 2171.quad 0x000000000000808b 2172.quad 0x0000000080000001 2173.quad 0x8000000080008081 2174.quad 0x8000000000008009 2175.quad 0x000000000000008a 2176.quad 0x0000000000000088 2177.quad 0x0000000080008009 2178.quad 0x000000008000000a 2179.quad 0x000000008000808b 2180.quad 0x800000000000008b 2181.quad 0x8000000000008089 2182.quad 0x8000000000008003 2183.quad 0x8000000000008002 2184.quad 0x8000000000000080 2185.quad 0x000000000000800a 2186.quad 0x800000008000000a 2187.quad 0x8000000080008081 2188.quad 0x8000000000008080 2189.quad 0x0000000080000001 2190.quad 0x8000000080008008 2191.size iotas64,.-iotas64 2192 2193.type KeccakF1600_neon, %function 2194.align 5 2195KeccakF1600_neon: 2196 add r1, r0, #16 2197 adr r2, iotas64 2198 mov r3, #24 @ loop counter 2199 b .Loop_neon 2200 2201.align 4 2202.Loop_neon: 2203 @ Theta 2204 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2205 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2206 vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2207 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2208 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2209 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2210 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2211 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2212 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2213 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2214 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2215 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2216 veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2217 veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2218 veor d25, d25, d24 @ C[4]^=A[4][4] 2219 2220 vadd.u64 q4, q13, q13 @ C[0..1]<<1 2221 vadd.u64 q15, q14, q14 @ C[2..3]<<1 2222 vadd.u64 d18, d25, d25 @ C[4]<<1 2223 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2224 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2225 vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2226 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2227 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2228 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2229 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2230 2231 veor d0, d0, d25 @ A[0][0] ^= C[4] 2232 veor d1, d1, d25 @ A[1][0] ^= C[4] 2233 veor d10, d10, d25 @ A[2][0] ^= C[4] 2234 veor d11, d11, d25 @ A[3][0] ^= C[4] 2235 veor d20, d20, d25 @ A[4][0] ^= C[4] 2236 2237 veor d2, d2, d26 @ A[0][1] ^= D[1] 2238 veor d3, d3, d26 @ A[1][1] ^= D[1] 2239 veor d12, d12, d26 @ A[2][1] ^= D[1] 2240 veor d13, d13, d26 @ A[3][1] ^= D[1] 2241 veor d21, d21, d26 @ A[4][1] ^= D[1] 2242 vmov d26, d27 2243 2244 veor d6, d6, d28 @ A[0][3] ^= C[2] 2245 veor d7, d7, d28 @ A[1][3] ^= C[2] 2246 veor d16, d16, d28 @ A[2][3] ^= C[2] 2247 veor d17, d17, d28 @ A[3][3] ^= C[2] 2248 veor d23, d23, d28 @ A[4][3] ^= C[2] 2249 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2250 vmov d28, d29 2251 2252 vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2253 veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2254 veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2255 veor d22, d22, d27 @ A[4][2] ^= D[2] 2256 2257 veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2258 veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2259 veor d24, d24, d29 @ A[4][4] ^= C[3] 2260 2261 @ Rho + Pi 2262 vmov d26, d2 @ C[1] = A[0][1] 2263 vshl.u64 d2, d3, #44 2264 vmov d27, d4 @ C[2] = A[0][2] 2265 vshl.u64 d4, d14, #43 2266 vmov d28, d6 @ C[3] = A[0][3] 2267 vshl.u64 d6, d17, #21 2268 vmov d29, d8 @ C[4] = A[0][4] 2269 vshl.u64 d8, d24, #14 2270 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2271 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2272 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2273 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2274 2275 vshl.u64 d3, d9, #20 2276 vshl.u64 d14, d16, #25 2277 vshl.u64 d17, d15, #15 2278 vshl.u64 d24, d21, #2 2279 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2280 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2281 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2282 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2283 2284 vshl.u64 d9, d22, #61 2285 @ vshl.u64 d16, d19, #8 2286 vshl.u64 d15, d12, #10 2287 vshl.u64 d21, d7, #55 2288 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2289 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2290 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2291 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2292 2293 vshl.u64 d22, d18, #39 2294 @ vshl.u64 d19, d23, #56 2295 vshl.u64 d12, d5, #6 2296 vshl.u64 d7, d13, #45 2297 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2298 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2299 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2300 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2301 2302 vshl.u64 d18, d20, #18 2303 vshl.u64 d23, d11, #41 2304 vshl.u64 d5, d10, #3 2305 vshl.u64 d13, d1, #36 2306 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2307 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2308 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2309 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2310 2311 vshl.u64 d1, d28, #28 2312 vshl.u64 d10, d26, #1 2313 vshl.u64 d11, d29, #27 2314 vshl.u64 d20, d27, #62 2315 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2316 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2317 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2318 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2319 2320 @ Chi + Iota 2321 vbic q13, q2, q1 2322 vbic q14, q3, q2 2323 vbic q15, q4, q3 2324 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2325 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2326 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2327 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2328 vbic q13, q0, q4 2329 vbic q15, q1, q0 2330 vmov q1, q14 @ A[0..1][1] 2331 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2332 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2333 2334 vbic q13, q7, q6 2335 vmov q0, q5 @ A[2..3][0] 2336 vbic q14, q8, q7 2337 vmov q15, q6 @ A[2..3][1] 2338 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2339 vbic q13, q9, q8 2340 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2341 vbic q14, q0, q9 2342 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2343 vbic q13, q15, q0 2344 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2345 vmov q14, q10 @ A[4][0..1] 2346 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2347 2348 vld1.64 d25, [r2,:64]! @ Iota[i++] 2349 vbic d26, d22, d21 2350 vbic d27, d23, d22 2351 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2352 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2353 vbic d26, d24, d23 2354 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2355 vbic d27, d28, d24 2356 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2357 vbic d26, d29, d28 2358 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2359 veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2360 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2361 2362 subs r3, r3, #1 2363 bne .Loop_neon 2364 2365.word 0xe12fff1e 2366.size KeccakF1600_neon,.-KeccakF1600_neon 2367 2368.globl SHA3_absorb_neon 2369.type SHA3_absorb_neon, %function 2370.align 5 2371SHA3_absorb_neon: 2372 stmdb sp!, {r4,r5,r6,lr} 2373 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2374 2375 mov r4, r1 @ inp 2376 mov r5, r2 @ len 2377 mov r6, r3 @ bsz 2378 2379 vld1.32 {d0}, [r0,:64]! @ A[0][0] 2380 vld1.32 {d2}, [r0,:64]! @ A[0][1] 2381 vld1.32 {d4}, [r0,:64]! @ A[0][2] 2382 vld1.32 {d6}, [r0,:64]! @ A[0][3] 2383 vld1.32 {d8}, [r0,:64]! @ A[0][4] 2384 2385 vld1.32 {d1}, [r0,:64]! @ A[1][0] 2386 vld1.32 {d3}, [r0,:64]! @ A[1][1] 2387 vld1.32 {d5}, [r0,:64]! @ A[1][2] 2388 vld1.32 {d7}, [r0,:64]! @ A[1][3] 2389 vld1.32 {d9}, [r0,:64]! @ A[1][4] 2390 2391 vld1.32 {d10}, [r0,:64]! @ A[2][0] 2392 vld1.32 {d12}, [r0,:64]! @ A[2][1] 2393 vld1.32 {d14}, [r0,:64]! @ A[2][2] 2394 vld1.32 {d16}, [r0,:64]! @ A[2][3] 2395 vld1.32 {d18}, [r0,:64]! @ A[2][4] 2396 2397 vld1.32 {d11}, [r0,:64]! @ A[3][0] 2398 vld1.32 {d13}, [r0,:64]! @ A[3][1] 2399 vld1.32 {d15}, [r0,:64]! @ A[3][2] 2400 vld1.32 {d17}, [r0,:64]! @ A[3][3] 2401 vld1.32 {d19}, [r0,:64]! @ A[3][4] 2402 2403 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3] 2404 vld1.32 {d24}, [r0,:64] @ A[4][4] 2405 sub r0, r0, #24*8 @ rewind 2406 b .Loop_absorb_neon 2407 2408.align 4 2409.Loop_absorb_neon: 2410 subs r12, r5, r6 @ len - bsz 2411 blo .Labsorbed_neon 2412 mov r5, r12 2413 2414 vld1.8 {d31}, [r4]! @ endian-neutral loads... 2415 cmp r6, #8*2 2416 veor d0, d0, d31 @ A[0][0] ^= *inp++ 2417 blo .Lprocess_neon 2418 vld1.8 {d31}, [r4]! 2419 veor d2, d2, d31 @ A[0][1] ^= *inp++ 2420 beq .Lprocess_neon 2421 vld1.8 {d31}, [r4]! 2422 cmp r6, #8*4 2423 veor d4, d4, d31 @ A[0][2] ^= *inp++ 2424 blo .Lprocess_neon 2425 vld1.8 {d31}, [r4]! 2426 veor d6, d6, d31 @ A[0][3] ^= *inp++ 2427 beq .Lprocess_neon 2428 vld1.8 {d31},[r4]! 2429 cmp r6, #8*6 2430 veor d8, d8, d31 @ A[0][4] ^= *inp++ 2431 blo .Lprocess_neon 2432 2433 vld1.8 {d31}, [r4]! 2434 veor d1, d1, d31 @ A[1][0] ^= *inp++ 2435 beq .Lprocess_neon 2436 vld1.8 {d31}, [r4]! 2437 cmp r6, #8*8 2438 veor d3, d3, d31 @ A[1][1] ^= *inp++ 2439 blo .Lprocess_neon 2440 vld1.8 {d31}, [r4]! 2441 veor d5, d5, d31 @ A[1][2] ^= *inp++ 2442 beq .Lprocess_neon 2443 vld1.8 {d31}, [r4]! 2444 cmp r6, #8*10 2445 veor d7, d7, d31 @ A[1][3] ^= *inp++ 2446 blo .Lprocess_neon 2447 vld1.8 {d31}, [r4]! 2448 veor d9, d9, d31 @ A[1][4] ^= *inp++ 2449 beq .Lprocess_neon 2450 2451 vld1.8 {d31}, [r4]! 2452 cmp r6, #8*12 2453 veor d10, d10, d31 @ A[2][0] ^= *inp++ 2454 blo .Lprocess_neon 2455 vld1.8 {d31}, [r4]! 2456 veor d12, d12, d31 @ A[2][1] ^= *inp++ 2457 beq .Lprocess_neon 2458 vld1.8 {d31}, [r4]! 2459 cmp r6, #8*14 2460 veor d14, d14, d31 @ A[2][2] ^= *inp++ 2461 blo .Lprocess_neon 2462 vld1.8 {d31}, [r4]! 2463 veor d16, d16, d31 @ A[2][3] ^= *inp++ 2464 beq .Lprocess_neon 2465 vld1.8 {d31}, [r4]! 2466 cmp r6, #8*16 2467 veor d18, d18, d31 @ A[2][4] ^= *inp++ 2468 blo .Lprocess_neon 2469 2470 vld1.8 {d31}, [r4]! 2471 veor d11, d11, d31 @ A[3][0] ^= *inp++ 2472 beq .Lprocess_neon 2473 vld1.8 {d31}, [r4]! 2474 cmp r6, #8*18 2475 veor d13, d13, d31 @ A[3][1] ^= *inp++ 2476 blo .Lprocess_neon 2477 vld1.8 {d31}, [r4]! 2478 veor d15, d15, d31 @ A[3][2] ^= *inp++ 2479 beq .Lprocess_neon 2480 vld1.8 {d31}, [r4]! 2481 cmp r6, #8*20 2482 veor d17, d17, d31 @ A[3][3] ^= *inp++ 2483 blo .Lprocess_neon 2484 vld1.8 {d31}, [r4]! 2485 veor d19, d19, d31 @ A[3][4] ^= *inp++ 2486 beq .Lprocess_neon 2487 2488 vld1.8 {d31}, [r4]! 2489 cmp r6, #8*22 2490 veor d20, d20, d31 @ A[4][0] ^= *inp++ 2491 blo .Lprocess_neon 2492 vld1.8 {d31}, [r4]! 2493 veor d21, d21, d31 @ A[4][1] ^= *inp++ 2494 beq .Lprocess_neon 2495 vld1.8 {d31}, [r4]! 2496 cmp r6, #8*24 2497 veor d22, d22, d31 @ A[4][2] ^= *inp++ 2498 blo .Lprocess_neon 2499 vld1.8 {d31}, [r4]! 2500 veor d23, d23, d31 @ A[4][3] ^= *inp++ 2501 beq .Lprocess_neon 2502 vld1.8 {d31}, [r4]! 2503 veor d24, d24, d31 @ A[4][4] ^= *inp++ 2504 2505.Lprocess_neon: 2506 bl KeccakF1600_neon 2507 b .Loop_absorb_neon 2508 2509.align 4 2510.Labsorbed_neon: 2511 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2512 vst1.32 {d2}, [r0,:64]! 2513 vst1.32 {d4}, [r0,:64]! 2514 vst1.32 {d6}, [r0,:64]! 2515 vst1.32 {d8}, [r0,:64]! 2516 2517 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2518 vst1.32 {d3}, [r0,:64]! 2519 vst1.32 {d5}, [r0,:64]! 2520 vst1.32 {d7}, [r0,:64]! 2521 vst1.32 {d9}, [r0,:64]! 2522 2523 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2524 vst1.32 {d12}, [r0,:64]! 2525 vst1.32 {d14}, [r0,:64]! 2526 vst1.32 {d16}, [r0,:64]! 2527 vst1.32 {d18}, [r0,:64]! 2528 2529 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2530 vst1.32 {d13}, [r0,:64]! 2531 vst1.32 {d15}, [r0,:64]! 2532 vst1.32 {d17}, [r0,:64]! 2533 vst1.32 {d19}, [r0,:64]! 2534 2535 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2536 vst1.32 {d24}, [r0,:64] 2537 2538 mov r0, r5 @ return value 2539 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2540 ldmia sp!, {r4,r5,r6,pc} 2541.size SHA3_absorb_neon,.-SHA3_absorb_neon 2542 2543.globl SHA3_squeeze_neon 2544.type SHA3_squeeze_neon, %function 2545.align 5 2546SHA3_squeeze_neon: 2547 stmdb sp!, {r4,r5,r6,lr} 2548 2549 mov r4, r1 @ out 2550 mov r5, r2 @ len 2551 mov r6, r3 @ bsz 2552 mov r12, r0 @ A_flat 2553 mov r14, r3 @ bsz 2554 b .Loop_squeeze_neon 2555 2556.align 4 2557.Loop_squeeze_neon: 2558 cmp r5, #8 2559 blo .Lsqueeze_neon_tail 2560 vld1.32 {d0}, [r12]! 2561 vst1.8 {d0}, [r4]! @ endian-neutral store 2562 2563 subs r5, r5, #8 @ len -= 8 2564 beq .Lsqueeze_neon_done 2565 2566 subs r14, r14, #8 @ bsz -= 8 2567 bhi .Loop_squeeze_neon 2568 2569 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2570 2571 vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2572 vld1.32 {d2}, [r0,:64]! 2573 vld1.32 {d4}, [r0,:64]! 2574 vld1.32 {d6}, [r0,:64]! 2575 vld1.32 {d8}, [r0,:64]! 2576 2577 vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2578 vld1.32 {d3}, [r0,:64]! 2579 vld1.32 {d5}, [r0,:64]! 2580 vld1.32 {d7}, [r0,:64]! 2581 vld1.32 {d9}, [r0,:64]! 2582 2583 vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2584 vld1.32 {d12}, [r0,:64]! 2585 vld1.32 {d14}, [r0,:64]! 2586 vld1.32 {d16}, [r0,:64]! 2587 vld1.32 {d18}, [r0,:64]! 2588 2589 vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2590 vld1.32 {d13}, [r0,:64]! 2591 vld1.32 {d15}, [r0,:64]! 2592 vld1.32 {d17}, [r0,:64]! 2593 vld1.32 {d19}, [r0,:64]! 2594 2595 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2596 vld1.32 {d24}, [r0,:64] 2597 sub r0, r0, #24*8 @ rewind 2598 2599 bl KeccakF1600_neon 2600 2601 mov r12, r0 @ A_flat 2602 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2603 vst1.32 {d2}, [r0,:64]! 2604 vst1.32 {d4}, [r0,:64]! 2605 vst1.32 {d6}, [r0,:64]! 2606 vst1.32 {d8}, [r0,:64]! 2607 2608 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2609 vst1.32 {d3}, [r0,:64]! 2610 vst1.32 {d5}, [r0,:64]! 2611 vst1.32 {d7}, [r0,:64]! 2612 vst1.32 {d9}, [r0,:64]! 2613 2614 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2615 vst1.32 {d12}, [r0,:64]! 2616 vst1.32 {d14}, [r0,:64]! 2617 vst1.32 {d16}, [r0,:64]! 2618 vst1.32 {d18}, [r0,:64]! 2619 2620 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2621 vst1.32 {d13}, [r0,:64]! 2622 vst1.32 {d15}, [r0,:64]! 2623 vst1.32 {d17}, [r0,:64]! 2624 vst1.32 {d19}, [r0,:64]! 2625 2626 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2627 mov r14, r6 @ bsz 2628 vst1.32 {d24}, [r0,:64] 2629 mov r0, r12 @ rewind 2630 2631 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2632 b .Loop_squeeze_neon 2633 2634.align 4 2635.Lsqueeze_neon_tail: 2636 ldmia r12, {r2,r3} 2637 cmp r5, #2 2638 strb r2, [r4],#1 @ endian-neutral store 2639 mov r2, r2, lsr#8 2640 blo .Lsqueeze_neon_done 2641 strb r2, [r4], #1 2642 mov r2, r2, lsr#8 2643 beq .Lsqueeze_neon_done 2644 strb r2, [r4], #1 2645 mov r2, r2, lsr#8 2646 cmp r5, #4 2647 blo .Lsqueeze_neon_done 2648 strb r2, [r4], #1 2649 beq .Lsqueeze_neon_done 2650 2651 strb r3, [r4], #1 2652 mov r3, r3, lsr#8 2653 cmp r5, #6 2654 blo .Lsqueeze_neon_done 2655 strb r3, [r4], #1 2656 mov r3, r3, lsr#8 2657 beq .Lsqueeze_neon_done 2658 strb r3, [r4], #1 2659 2660.Lsqueeze_neon_done: 2661 ldmia sp!, {r4,r5,r6,pc} 2662.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2663#endif 2664.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2665.align 2 2666.align 2 2667