1/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */ 2#include "arm_arch.h" 3 4#if defined(__thumb2__) 5.syntax unified 6.thumb 7#else 8.code 32 9#endif 10 11.text 12 13.type iotas32, %object 14.align 5 15iotas32: 16.long 0x00000001, 0x00000000 17.long 0x00000000, 0x00000089 18.long 0x00000000, 0x8000008b 19.long 0x00000000, 0x80008080 20.long 0x00000001, 0x0000008b 21.long 0x00000001, 0x00008000 22.long 0x00000001, 0x80008088 23.long 0x00000001, 0x80000082 24.long 0x00000000, 0x0000000b 25.long 0x00000000, 0x0000000a 26.long 0x00000001, 0x00008082 27.long 0x00000000, 0x00008003 28.long 0x00000001, 0x0000808b 29.long 0x00000001, 0x8000000b 30.long 0x00000001, 0x8000008a 31.long 0x00000001, 0x80000081 32.long 0x00000000, 0x80000081 33.long 0x00000000, 0x80000008 34.long 0x00000000, 0x00000083 35.long 0x00000000, 0x80008003 36.long 0x00000001, 0x80008088 37.long 0x00000000, 0x80000088 38.long 0x00000001, 0x00008000 39.long 0x00000000, 0x80008082 40.size iotas32,.-iotas32 41 42.type KeccakF1600_int, %function 43.align 5 44KeccakF1600_int: 45 add r9,sp,#176 46 add r12,sp,#0 47 add r10,sp,#40 48 ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4] 49KeccakF1600_enter: 50 str lr,[sp,#440] 51 eor r11,r11,r11 52 str r11,[sp,#444] 53 b .Lround2x 54 55.align 4 56.Lround2x: 57 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 58 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 59#ifdef __thumb2__ 60 eor r0,r0,r10 61 eor r1,r1,r11 62 eor r2,r2,r12 63 ldrd r10,r11,[sp,#56] 64 eor r3,r3,r14 65 ldrd r12,r14,[sp,#64] 66 eor r4,r4,r10 67 eor r5,r5,r11 68 eor r6,r6,r12 69 ldrd r10,r11,[sp,#72] 70 eor r7,r7,r14 71 ldrd r12,r14,[sp,#80] 72 eor r8,r8,r10 73 eor r9,r9,r11 74 eor r0,r0,r12 75 ldrd r10,r11,[sp,#88] 76 eor r1,r1,r14 77 ldrd r12,r14,[sp,#96] 78 eor r2,r2,r10 79 eor r3,r3,r11 80 eor r4,r4,r12 81 ldrd r10,r11,[sp,#104] 82 eor r5,r5,r14 83 ldrd r12,r14,[sp,#112] 84 eor r6,r6,r10 85 eor r7,r7,r11 86 eor r8,r8,r12 87 ldrd r10,r11,[sp,#120] 88 eor r9,r9,r14 89 ldrd r12,r14,[sp,#128] 90 eor r0,r0,r10 91 eor r1,r1,r11 92 eor r2,r2,r12 93 ldrd r10,r11,[sp,#136] 94 eor r3,r3,r14 95 ldrd r12,r14,[sp,#144] 96 eor r4,r4,r10 97 eor r5,r5,r11 98 eor r6,r6,r12 99 ldrd r10,r11,[sp,#152] 100 eor r7,r7,r14 101 ldrd r12,r14,[sp,#160] 102 eor r8,r8,r10 103 eor r9,r9,r11 104 eor r0,r0,r12 105 ldrd r10,r11,[sp,#168] 106 eor r1,r1,r14 107 ldrd r12,r14,[sp,#16] 108 eor r2,r2,r10 109 eor r3,r3,r11 110 eor r4,r4,r12 111 ldrd r10,r11,[sp,#24] 112 eor r5,r5,r14 113 ldrd r12,r14,[sp,#32] 114#else 115 eor r0,r0,r10 116 add r10,sp,#56 117 eor r1,r1,r11 118 eor r2,r2,r12 119 eor r3,r3,r14 120 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 121 eor r4,r4,r10 122 add r10,sp,#72 123 eor r5,r5,r11 124 eor r6,r6,r12 125 eor r7,r7,r14 126 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 127 eor r8,r8,r10 128 add r10,sp,#88 129 eor r9,r9,r11 130 eor r0,r0,r12 131 eor r1,r1,r14 132 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 133 eor r2,r2,r10 134 add r10,sp,#104 135 eor r3,r3,r11 136 eor r4,r4,r12 137 eor r5,r5,r14 138 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 139 eor r6,r6,r10 140 add r10,sp,#120 141 eor r7,r7,r11 142 eor r8,r8,r12 143 eor r9,r9,r14 144 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 145 eor r0,r0,r10 146 add r10,sp,#136 147 eor r1,r1,r11 148 eor r2,r2,r12 149 eor r3,r3,r14 150 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 151 eor r4,r4,r10 152 add r10,sp,#152 153 eor r5,r5,r11 154 eor r6,r6,r12 155 eor r7,r7,r14 156 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 157 eor r8,r8,r10 158 ldr r10,[sp,#168] @ A[4][1] 159 eor r9,r9,r11 160 ldr r11,[sp,#168+4] 161 eor r0,r0,r12 162 ldr r12,[sp,#16] @ A[0][2] 163 eor r1,r1,r14 164 ldr r14,[sp,#16+4] 165 eor r2,r2,r10 166 add r10,sp,#24 167 eor r3,r3,r11 168 eor r4,r4,r12 169 eor r5,r5,r14 170 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 171#endif 172 eor r6,r6,r10 173 eor r7,r7,r11 174 eor r8,r8,r12 175 eor r9,r9,r14 176 177 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 178#ifndef __thumb2__ 179 str r10,[sp,#208] @ D[1] = E[0] 180#endif 181 eor r11,r1,r4 182#ifndef __thumb2__ 183 str r11,[sp,#208+4] 184#else 185 strd r10,r11,[sp,#208] @ D[1] = E[0] 186#endif 187 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 188 eor r14,r7,r0 189#ifndef __thumb2__ 190 str r12,[sp,#232] @ D[4] = E[1] 191#endif 192 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 193#ifndef __thumb2__ 194 str r14,[sp,#232+4] 195#else 196 strd r12,r14,[sp,#232] @ D[4] = E[1] 197#endif 198 eor r1,r9,r2 199#ifndef __thumb2__ 200 str r0,[sp,#200] @ D[0] = C[0] 201#endif 202 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 203#ifndef __thumb2__ 204 ldr r7,[sp,#144] 205#endif 206 eor r3,r3,r6 207#ifndef __thumb2__ 208 str r1,[sp,#200+4] 209#else 210 strd r0,r1,[sp,#200] @ D[0] = C[0] 211#endif 212#ifndef __thumb2__ 213 ldr r6,[sp,#144+4] 214#else 215 ldrd r7,r6,[sp,#144] 216#endif 217#ifndef __thumb2__ 218 str r2,[sp,#216] @ D[2] = C[1] 219#endif 220 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 221#ifndef __thumb2__ 222 str r3,[sp,#216+4] 223#else 224 strd r2,r3,[sp,#216] @ D[2] = C[1] 225#endif 226 eor r5,r5,r8 227 228#ifndef __thumb2__ 229 ldr r8,[sp,#192] 230#endif 231#ifndef __thumb2__ 232 ldr r9,[sp,#192+4] 233#else 234 ldrd r8,r9,[sp,#192] 235#endif 236#ifndef __thumb2__ 237 str r4,[sp,#224] @ D[3] = C[2] 238#endif 239 eor r7,r7,r4 240#ifndef __thumb2__ 241 str r5,[sp,#224+4] 242#else 243 strd r4,r5,[sp,#224] @ D[3] = C[2] 244#endif 245 eor r6,r6,r5 246#ifndef __thumb2__ 247 ldr r4,[sp,#0] 248#endif 249 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 250 @ mov r6,r6,ror#32-11 251#ifndef __thumb2__ 252 ldr r5,[sp,#0+4] 253#else 254 ldrd r4,r5,[sp,#0] 255#endif 256 eor r8,r8,r12 257 eor r9,r9,r14 258#ifndef __thumb2__ 259 ldr r12,[sp,#96] 260#endif 261 eor r0,r0,r4 262#ifndef __thumb2__ 263 ldr r14,[sp,#96+4] 264#else 265 ldrd r12,r14,[sp,#96] 266#endif 267 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 268 @ mov r9,r9,ror#32-7 269 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 270 eor r12,r12,r2 271#ifndef __thumb2__ 272 ldr r2,[sp,#48] 273#endif 274 eor r14,r14,r3 275#ifndef __thumb2__ 276 ldr r3,[sp,#48+4] 277#else 278 ldrd r2,r3,[sp,#48] 279#endif 280 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 281 ldr r12,[sp,#444] @ load counter 282 eor r2,r2,r10 283 adr r10,iotas32 284 mov r4,r14,ror#32-22 285 add r14,r10,r12 286 eor r3,r3,r11 287 ldmia r14,{r10,r11} @ iotas[i] 288 bic r12,r4,r2,ror#32-22 289 bic r14,r5,r3,ror#32-22 290 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 291 mov r3,r3,ror#32-22 292 eor r12,r12,r0 293 eor r14,r14,r1 294 eor r10,r10,r12 295 eor r11,r11,r14 296#ifndef __thumb2__ 297 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 298#endif 299 bic r12,r6,r4,ror#11 300#ifndef __thumb2__ 301 str r11,[sp,#240+4] 302#else 303 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 304#endif 305 bic r14,r7,r5,ror#10 306 bic r10,r8,r6,ror#32-(11-7) 307 bic r11,r9,r7,ror#32-(10-7) 308 eor r12,r2,r12,ror#32-11 309#ifndef __thumb2__ 310 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 311#endif 312 eor r14,r3,r14,ror#32-10 313#ifndef __thumb2__ 314 str r14,[sp,#248+4] 315#else 316 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 317#endif 318 eor r10,r4,r10,ror#32-7 319 eor r11,r5,r11,ror#32-7 320#ifndef __thumb2__ 321 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 322#endif 323 bic r12,r0,r8,ror#32-7 324#ifndef __thumb2__ 325 str r11,[sp,#256+4] 326#else 327 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 328#endif 329 bic r14,r1,r9,ror#32-7 330 eor r12,r12,r6,ror#32-11 331#ifndef __thumb2__ 332 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 333#endif 334 eor r14,r14,r7,ror#32-10 335#ifndef __thumb2__ 336 str r14,[sp,#264+4] 337#else 338 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 339#endif 340 bic r10,r2,r0 341 add r14,sp,#224 342#ifndef __thumb2__ 343 ldr r0,[sp,#24] @ A[0][3] 344#endif 345 bic r11,r3,r1 346#ifndef __thumb2__ 347 ldr r1,[sp,#24+4] 348#else 349 ldrd r0,r1,[sp,#24] @ A[0][3] 350#endif 351 eor r10,r10,r8,ror#32-7 352 eor r11,r11,r9,ror#32-7 353#ifndef __thumb2__ 354 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 355#endif 356 add r9,sp,#200 357#ifndef __thumb2__ 358 str r11,[sp,#272+4] 359#else 360 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 361#endif 362 363 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 364 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 365 366#ifndef __thumb2__ 367 ldr r2,[sp,#72] @ A[1][4] 368#endif 369 eor r0,r0,r10 370#ifndef __thumb2__ 371 ldr r3,[sp,#72+4] 372#else 373 ldrd r2,r3,[sp,#72] @ A[1][4] 374#endif 375 eor r1,r1,r11 376 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 377#ifndef __thumb2__ 378 ldr r10,[sp,#128] @ A[3][1] 379#endif 380 @ mov r1,r1,ror#32-14 381#ifndef __thumb2__ 382 ldr r11,[sp,#128+4] 383#else 384 ldrd r10,r11,[sp,#128] @ A[3][1] 385#endif 386 387 eor r2,r2,r12 388#ifndef __thumb2__ 389 ldr r4,[sp,#80] @ A[2][0] 390#endif 391 eor r3,r3,r14 392#ifndef __thumb2__ 393 ldr r5,[sp,#80+4] 394#else 395 ldrd r4,r5,[sp,#80] @ A[2][0] 396#endif 397 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 398 @ mov r3,r3,ror#32-10 399 400 eor r6,r6,r4 401#ifndef __thumb2__ 402 ldr r12,[sp,#216] @ D[2] 403#endif 404 eor r7,r7,r5 405#ifndef __thumb2__ 406 ldr r14,[sp,#216+4] 407#else 408 ldrd r12,r14,[sp,#216] @ D[2] 409#endif 410 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 411 mov r4,r7,ror#32-2 412 413 eor r10,r10,r8 414#ifndef __thumb2__ 415 ldr r8,[sp,#176] @ A[4][2] 416#endif 417 eor r11,r11,r9 418#ifndef __thumb2__ 419 ldr r9,[sp,#176+4] 420#else 421 ldrd r8,r9,[sp,#176] @ A[4][2] 422#endif 423 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 424 mov r6,r11,ror#32-23 425 426 bic r10,r4,r2,ror#32-10 427 bic r11,r5,r3,ror#32-10 428 eor r12,r12,r8 429 eor r14,r14,r9 430 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 431 mov r8,r14,ror#32-31 432 eor r10,r10,r0,ror#32-14 433 eor r11,r11,r1,ror#32-14 434#ifndef __thumb2__ 435 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 436#endif 437 bic r12,r6,r4 438#ifndef __thumb2__ 439 str r11,[sp,#280+4] 440#else 441 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 442#endif 443 bic r14,r7,r5 444 eor r12,r12,r2,ror#32-10 445#ifndef __thumb2__ 446 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 447#endif 448 eor r14,r14,r3,ror#32-10 449#ifndef __thumb2__ 450 str r14,[sp,#288+4] 451#else 452 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 453#endif 454 bic r10,r8,r6 455 bic r11,r9,r7 456 bic r12,r0,r8,ror#14 457 bic r14,r1,r9,ror#14 458 eor r10,r10,r4 459 eor r11,r11,r5 460#ifndef __thumb2__ 461 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 462#endif 463 bic r2,r2,r0,ror#32-(14-10) 464#ifndef __thumb2__ 465 str r11,[sp,#296+4] 466#else 467 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 468#endif 469 eor r12,r6,r12,ror#32-14 470 bic r11,r3,r1,ror#32-(14-10) 471#ifndef __thumb2__ 472 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 473#endif 474 eor r14,r7,r14,ror#32-14 475#ifndef __thumb2__ 476 str r14,[sp,#304+4] 477#else 478 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 479#endif 480 add r12,sp,#208 481#ifndef __thumb2__ 482 ldr r1,[sp,#8] @ A[0][1] 483#endif 484 eor r10,r8,r2,ror#32-10 485#ifndef __thumb2__ 486 ldr r0,[sp,#8+4] 487#else 488 ldrd r1,r0,[sp,#8] @ A[0][1] 489#endif 490 eor r11,r9,r11,ror#32-10 491#ifndef __thumb2__ 492 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 493#endif 494#ifndef __thumb2__ 495 str r11,[sp,#312+4] 496#else 497 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 498#endif 499 500 add r9,sp,#224 501 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 502#ifndef __thumb2__ 503 ldr r2,[sp,#56] @ A[1][2] 504#endif 505#ifndef __thumb2__ 506 ldr r3,[sp,#56+4] 507#else 508 ldrd r2,r3,[sp,#56] @ A[1][2] 509#endif 510 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 511 512 eor r1,r1,r10 513#ifndef __thumb2__ 514 ldr r4,[sp,#104] @ A[2][3] 515#endif 516 eor r0,r0,r11 517#ifndef __thumb2__ 518 ldr r5,[sp,#104+4] 519#else 520 ldrd r4,r5,[sp,#104] @ A[2][3] 521#endif 522 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 523 524 eor r2,r2,r12 525#ifndef __thumb2__ 526 ldr r10,[sp,#152] @ A[3][4] 527#endif 528 eor r3,r3,r14 529#ifndef __thumb2__ 530 ldr r11,[sp,#152+4] 531#else 532 ldrd r10,r11,[sp,#152] @ A[3][4] 533#endif 534 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 535#ifndef __thumb2__ 536 ldr r12,[sp,#200] @ D[0] 537#endif 538 @ mov r3,r3,ror#32-3 539#ifndef __thumb2__ 540 ldr r14,[sp,#200+4] 541#else 542 ldrd r12,r14,[sp,#200] @ D[0] 543#endif 544 545 eor r4,r4,r6 546 eor r5,r5,r7 547 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 548 @ mov r4,r7,ror#32-13 @ [track reverse order below] 549 550 eor r10,r10,r8 551#ifndef __thumb2__ 552 ldr r8,[sp,#160] @ A[4][0] 553#endif 554 eor r11,r11,r9 555#ifndef __thumb2__ 556 ldr r9,[sp,#160+4] 557#else 558 ldrd r8,r9,[sp,#160] @ A[4][0] 559#endif 560 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 561 mov r7,r11,ror#32-4 562 563 eor r12,r12,r8 564 eor r14,r14,r9 565 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 566 mov r9,r14,ror#32-9 567 568 bic r10,r5,r2,ror#13-3 569 bic r11,r4,r3,ror#12-3 570 bic r12,r6,r5,ror#32-13 571 bic r14,r7,r4,ror#32-12 572 eor r10,r0,r10,ror#32-13 573 eor r11,r1,r11,ror#32-12 574#ifndef __thumb2__ 575 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 576#endif 577 eor r12,r12,r2,ror#32-3 578#ifndef __thumb2__ 579 str r11,[sp,#320+4] 580#else 581 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 582#endif 583 eor r14,r14,r3,ror#32-3 584#ifndef __thumb2__ 585 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 586#endif 587 bic r10,r8,r6 588 bic r11,r9,r7 589#ifndef __thumb2__ 590 str r14,[sp,#328+4] 591#else 592 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 593#endif 594 eor r10,r10,r5,ror#32-13 595 eor r11,r11,r4,ror#32-12 596#ifndef __thumb2__ 597 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 598#endif 599 bic r12,r0,r8 600#ifndef __thumb2__ 601 str r11,[sp,#336+4] 602#else 603 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 604#endif 605 bic r14,r1,r9 606 eor r12,r12,r6 607 eor r14,r14,r7 608#ifndef __thumb2__ 609 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 610#endif 611 bic r10,r2,r0,ror#3 612#ifndef __thumb2__ 613 str r14,[sp,#344+4] 614#else 615 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 616#endif 617 bic r11,r3,r1,ror#3 618#ifndef __thumb2__ 619 ldr r1,[sp,#32] @ A[0][4] [in reverse order] 620#endif 621 eor r10,r8,r10,ror#32-3 622#ifndef __thumb2__ 623 ldr r0,[sp,#32+4] 624#else 625 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 626#endif 627 eor r11,r9,r11,ror#32-3 628#ifndef __thumb2__ 629 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 630#endif 631 add r9,sp,#208 632#ifndef __thumb2__ 633 str r11,[sp,#352+4] 634#else 635 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 636#endif 637 638#ifndef __thumb2__ 639 ldr r10,[sp,#232] @ D[4] 640#endif 641#ifndef __thumb2__ 642 ldr r11,[sp,#232+4] 643#else 644 ldrd r10,r11,[sp,#232] @ D[4] 645#endif 646#ifndef __thumb2__ 647 ldr r12,[sp,#200] @ D[0] 648#endif 649#ifndef __thumb2__ 650 ldr r14,[sp,#200+4] 651#else 652 ldrd r12,r14,[sp,#200] @ D[0] 653#endif 654 655 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 656 657 eor r1,r1,r10 658#ifndef __thumb2__ 659 ldr r2,[sp,#40] @ A[1][0] 660#endif 661 eor r0,r0,r11 662#ifndef __thumb2__ 663 ldr r3,[sp,#40+4] 664#else 665 ldrd r2,r3,[sp,#40] @ A[1][0] 666#endif 667 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 668#ifndef __thumb2__ 669 ldr r4,[sp,#88] @ A[2][1] 670#endif 671 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 672#ifndef __thumb2__ 673 ldr r5,[sp,#88+4] 674#else 675 ldrd r4,r5,[sp,#88] @ A[2][1] 676#endif 677 678 eor r2,r2,r12 679#ifndef __thumb2__ 680 ldr r10,[sp,#136] @ A[3][2] 681#endif 682 eor r3,r3,r14 683#ifndef __thumb2__ 684 ldr r11,[sp,#136+4] 685#else 686 ldrd r10,r11,[sp,#136] @ A[3][2] 687#endif 688 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 689#ifndef __thumb2__ 690 ldr r12,[sp,#224] @ D[3] 691#endif 692 @ mov r3,r3,ror#32-18 693#ifndef __thumb2__ 694 ldr r14,[sp,#224+4] 695#else 696 ldrd r12,r14,[sp,#224] @ D[3] 697#endif 698 699 eor r6,r6,r4 700 eor r7,r7,r5 701 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 702 mov r5,r7,ror#32-5 703 704 eor r10,r10,r8 705#ifndef __thumb2__ 706 ldr r8,[sp,#184] @ A[4][3] 707#endif 708 eor r11,r11,r9 709#ifndef __thumb2__ 710 ldr r9,[sp,#184+4] 711#else 712 ldrd r8,r9,[sp,#184] @ A[4][3] 713#endif 714 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 715 mov r6,r11,ror#32-8 716 717 eor r12,r12,r8 718 eor r14,r14,r9 719 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 720 mov r9,r14,ror#32-28 721 722 bic r10,r4,r2,ror#32-18 723 bic r11,r5,r3,ror#32-18 724 eor r10,r10,r0,ror#32-14 725 eor r11,r11,r1,ror#32-13 726#ifndef __thumb2__ 727 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 728#endif 729 bic r12,r6,r4 730#ifndef __thumb2__ 731 str r11,[sp,#360+4] 732#else 733 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 734#endif 735 bic r14,r7,r5 736 eor r12,r12,r2,ror#32-18 737#ifndef __thumb2__ 738 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 739#endif 740 eor r14,r14,r3,ror#32-18 741#ifndef __thumb2__ 742 str r14,[sp,#368+4] 743#else 744 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 745#endif 746 bic r10,r8,r6 747 bic r11,r9,r7 748 bic r12,r0,r8,ror#14 749 bic r14,r1,r9,ror#13 750 eor r10,r10,r4 751 eor r11,r11,r5 752#ifndef __thumb2__ 753 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 754#endif 755 bic r2,r2,r0,ror#18-14 756#ifndef __thumb2__ 757 str r11,[sp,#376+4] 758#else 759 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 760#endif 761 eor r12,r6,r12,ror#32-14 762 bic r11,r3,r1,ror#18-13 763 eor r14,r7,r14,ror#32-13 764#ifndef __thumb2__ 765 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 766#endif 767#ifndef __thumb2__ 768 str r14,[sp,#384+4] 769#else 770 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 771#endif 772 add r14,sp,#216 773#ifndef __thumb2__ 774 ldr r0,[sp,#16] @ A[0][2] 775#endif 776 eor r10,r8,r2,ror#32-18 777#ifndef __thumb2__ 778 ldr r1,[sp,#16+4] 779#else 780 ldrd r0,r1,[sp,#16] @ A[0][2] 781#endif 782 eor r11,r9,r11,ror#32-18 783#ifndef __thumb2__ 784 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 785#endif 786#ifndef __thumb2__ 787 str r11,[sp,#392+4] 788#else 789 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 790#endif 791 792 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 793#ifndef __thumb2__ 794 ldr r2,[sp,#64] @ A[1][3] 795#endif 796#ifndef __thumb2__ 797 ldr r3,[sp,#64+4] 798#else 799 ldrd r2,r3,[sp,#64] @ A[1][3] 800#endif 801#ifndef __thumb2__ 802 ldr r6,[sp,#232] @ D[4] 803#endif 804#ifndef __thumb2__ 805 ldr r7,[sp,#232+4] 806#else 807 ldrd r6,r7,[sp,#232] @ D[4] 808#endif 809 810 eor r0,r0,r10 811#ifndef __thumb2__ 812 ldr r4,[sp,#112] @ A[2][4] 813#endif 814 eor r1,r1,r11 815#ifndef __thumb2__ 816 ldr r5,[sp,#112+4] 817#else 818 ldrd r4,r5,[sp,#112] @ A[2][4] 819#endif 820 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 821#ifndef __thumb2__ 822 ldr r8,[sp,#200] @ D[0] 823#endif 824 @ mov r1,r1,ror#32-31 825#ifndef __thumb2__ 826 ldr r9,[sp,#200+4] 827#else 828 ldrd r8,r9,[sp,#200] @ D[0] 829#endif 830 831 eor r12,r12,r2 832#ifndef __thumb2__ 833 ldr r10,[sp,#120] @ A[3][0] 834#endif 835 eor r14,r14,r3 836#ifndef __thumb2__ 837 ldr r11,[sp,#120+4] 838#else 839 ldrd r10,r11,[sp,#120] @ A[3][0] 840#endif 841 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 842#ifndef __thumb2__ 843 ldr r12,[sp,#208] @ D[1] 844#endif 845 mov r2,r14,ror#32-28 846#ifndef __thumb2__ 847 ldr r14,[sp,#208+4] 848#else 849 ldrd r12,r14,[sp,#208] @ D[1] 850#endif 851 852 eor r6,r6,r4 853 eor r7,r7,r5 854 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 855 mov r4,r7,ror#32-20 856 857 eor r10,r10,r8 858#ifndef __thumb2__ 859 ldr r8,[sp,#168] @ A[4][1] 860#endif 861 eor r11,r11,r9 862#ifndef __thumb2__ 863 ldr r9,[sp,#168+4] 864#else 865 ldrd r8,r9,[sp,#168] @ A[4][1] 866#endif 867 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 868 mov r6,r11,ror#32-21 869 870 eor r8,r8,r12 871 eor r9,r9,r14 872 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 873 @ mov r9,r3,ror#32-1 874 875 bic r10,r4,r2 876 bic r11,r5,r3 877 eor r10,r10,r0,ror#32-31 878#ifndef __thumb2__ 879 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 880#endif 881 eor r11,r11,r1,ror#32-31 882#ifndef __thumb2__ 883 str r11,[sp,#400+4] 884#else 885 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 886#endif 887 bic r12,r6,r4 888 bic r14,r7,r5 889 eor r12,r12,r2 890 eor r14,r14,r3 891#ifndef __thumb2__ 892 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 893#endif 894 bic r10,r8,r6,ror#1 895#ifndef __thumb2__ 896 str r14,[sp,#408+4] 897#else 898 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 899#endif 900 bic r11,r9,r7,ror#1 901 bic r12,r0,r8,ror#31-1 902 bic r14,r1,r9,ror#31-1 903 eor r4,r4,r10,ror#32-1 904#ifndef __thumb2__ 905 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 906#endif 907 eor r5,r5,r11,ror#32-1 908#ifndef __thumb2__ 909 str r5,[sp,#416+4] 910#else 911 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 912#endif 913 eor r6,r6,r12,ror#32-31 914 eor r7,r7,r14,ror#32-31 915#ifndef __thumb2__ 916 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 917#endif 918 bic r10,r2,r0,ror#32-31 919#ifndef __thumb2__ 920 str r7,[sp,#424+4] 921#else 922 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 923#endif 924 bic r11,r3,r1,ror#32-31 925 add r12,sp,#240 926 eor r8,r10,r8,ror#32-1 927 add r10,sp,#280 928 eor r9,r11,r9,ror#32-1 929#ifndef __thumb2__ 930 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 931#endif 932#ifndef __thumb2__ 933 str r9,[sp,#432+4] 934#else 935 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 936#endif 937 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 938 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 939#ifdef __thumb2__ 940 eor r0,r0,r10 941 eor r1,r1,r11 942 eor r2,r2,r12 943 ldrd r10,r11,[sp,#296] 944 eor r3,r3,r14 945 ldrd r12,r14,[sp,#304] 946 eor r4,r4,r10 947 eor r5,r5,r11 948 eor r6,r6,r12 949 ldrd r10,r11,[sp,#312] 950 eor r7,r7,r14 951 ldrd r12,r14,[sp,#320] 952 eor r8,r8,r10 953 eor r9,r9,r11 954 eor r0,r0,r12 955 ldrd r10,r11,[sp,#328] 956 eor r1,r1,r14 957 ldrd r12,r14,[sp,#336] 958 eor r2,r2,r10 959 eor r3,r3,r11 960 eor r4,r4,r12 961 ldrd r10,r11,[sp,#344] 962 eor r5,r5,r14 963 ldrd r12,r14,[sp,#352] 964 eor r6,r6,r10 965 eor r7,r7,r11 966 eor r8,r8,r12 967 ldrd r10,r11,[sp,#360] 968 eor r9,r9,r14 969 ldrd r12,r14,[sp,#368] 970 eor r0,r0,r10 971 eor r1,r1,r11 972 eor r2,r2,r12 973 ldrd r10,r11,[sp,#376] 974 eor r3,r3,r14 975 ldrd r12,r14,[sp,#384] 976 eor r4,r4,r10 977 eor r5,r5,r11 978 eor r6,r6,r12 979 ldrd r10,r11,[sp,#392] 980 eor r7,r7,r14 981 ldrd r12,r14,[sp,#400] 982 eor r8,r8,r10 983 eor r9,r9,r11 984 eor r0,r0,r12 985 ldrd r10,r11,[sp,#408] 986 eor r1,r1,r14 987 ldrd r12,r14,[sp,#256] 988 eor r2,r2,r10 989 eor r3,r3,r11 990 eor r4,r4,r12 991 ldrd r10,r11,[sp,#264] 992 eor r5,r5,r14 993 ldrd r12,r14,[sp,#272] 994#else 995 eor r0,r0,r10 996 add r10,sp,#296 997 eor r1,r1,r11 998 eor r2,r2,r12 999 eor r3,r3,r14 1000 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 1001 eor r4,r4,r10 1002 add r10,sp,#312 1003 eor r5,r5,r11 1004 eor r6,r6,r12 1005 eor r7,r7,r14 1006 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 1007 eor r8,r8,r10 1008 add r10,sp,#328 1009 eor r9,r9,r11 1010 eor r0,r0,r12 1011 eor r1,r1,r14 1012 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 1013 eor r2,r2,r10 1014 add r10,sp,#344 1015 eor r3,r3,r11 1016 eor r4,r4,r12 1017 eor r5,r5,r14 1018 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 1019 eor r6,r6,r10 1020 add r10,sp,#360 1021 eor r7,r7,r11 1022 eor r8,r8,r12 1023 eor r9,r9,r14 1024 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 1025 eor r0,r0,r10 1026 add r10,sp,#376 1027 eor r1,r1,r11 1028 eor r2,r2,r12 1029 eor r3,r3,r14 1030 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 1031 eor r4,r4,r10 1032 add r10,sp,#392 1033 eor r5,r5,r11 1034 eor r6,r6,r12 1035 eor r7,r7,r14 1036 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 1037 eor r8,r8,r10 1038 ldr r10,[sp,#408] @ A[4][1] 1039 eor r9,r9,r11 1040 ldr r11,[sp,#408+4] 1041 eor r0,r0,r12 1042 ldr r12,[sp,#256] @ A[0][2] 1043 eor r1,r1,r14 1044 ldr r14,[sp,#256+4] 1045 eor r2,r2,r10 1046 add r10,sp,#264 1047 eor r3,r3,r11 1048 eor r4,r4,r12 1049 eor r5,r5,r14 1050 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 1051#endif 1052 eor r6,r6,r10 1053 eor r7,r7,r11 1054 eor r8,r8,r12 1055 eor r9,r9,r14 1056 1057 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1058#ifndef __thumb2__ 1059 str r10,[sp,#208] @ D[1] = E[0] 1060#endif 1061 eor r11,r1,r4 1062#ifndef __thumb2__ 1063 str r11,[sp,#208+4] 1064#else 1065 strd r10,r11,[sp,#208] @ D[1] = E[0] 1066#endif 1067 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1068 eor r14,r7,r0 1069#ifndef __thumb2__ 1070 str r12,[sp,#232] @ D[4] = E[1] 1071#endif 1072 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1073#ifndef __thumb2__ 1074 str r14,[sp,#232+4] 1075#else 1076 strd r12,r14,[sp,#232] @ D[4] = E[1] 1077#endif 1078 eor r1,r9,r2 1079#ifndef __thumb2__ 1080 str r0,[sp,#200] @ D[0] = C[0] 1081#endif 1082 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1083#ifndef __thumb2__ 1084 ldr r7,[sp,#384] 1085#endif 1086 eor r3,r3,r6 1087#ifndef __thumb2__ 1088 str r1,[sp,#200+4] 1089#else 1090 strd r0,r1,[sp,#200] @ D[0] = C[0] 1091#endif 1092#ifndef __thumb2__ 1093 ldr r6,[sp,#384+4] 1094#else 1095 ldrd r7,r6,[sp,#384] 1096#endif 1097#ifndef __thumb2__ 1098 str r2,[sp,#216] @ D[2] = C[1] 1099#endif 1100 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1101#ifndef __thumb2__ 1102 str r3,[sp,#216+4] 1103#else 1104 strd r2,r3,[sp,#216] @ D[2] = C[1] 1105#endif 1106 eor r5,r5,r8 1107 1108#ifndef __thumb2__ 1109 ldr r8,[sp,#432] 1110#endif 1111#ifndef __thumb2__ 1112 ldr r9,[sp,#432+4] 1113#else 1114 ldrd r8,r9,[sp,#432] 1115#endif 1116#ifndef __thumb2__ 1117 str r4,[sp,#224] @ D[3] = C[2] 1118#endif 1119 eor r7,r7,r4 1120#ifndef __thumb2__ 1121 str r5,[sp,#224+4] 1122#else 1123 strd r4,r5,[sp,#224] @ D[3] = C[2] 1124#endif 1125 eor r6,r6,r5 1126#ifndef __thumb2__ 1127 ldr r4,[sp,#240] 1128#endif 1129 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1130 @ mov r6,r6,ror#32-11 1131#ifndef __thumb2__ 1132 ldr r5,[sp,#240+4] 1133#else 1134 ldrd r4,r5,[sp,#240] 1135#endif 1136 eor r8,r8,r12 1137 eor r9,r9,r14 1138#ifndef __thumb2__ 1139 ldr r12,[sp,#336] 1140#endif 1141 eor r0,r0,r4 1142#ifndef __thumb2__ 1143 ldr r14,[sp,#336+4] 1144#else 1145 ldrd r12,r14,[sp,#336] 1146#endif 1147 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1148 @ mov r9,r9,ror#32-7 1149 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 1150 eor r12,r12,r2 1151#ifndef __thumb2__ 1152 ldr r2,[sp,#288] 1153#endif 1154 eor r14,r14,r3 1155#ifndef __thumb2__ 1156 ldr r3,[sp,#288+4] 1157#else 1158 ldrd r2,r3,[sp,#288] 1159#endif 1160 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 1161 ldr r12,[sp,#444] @ load counter 1162 eor r2,r2,r10 1163 adr r10,iotas32 1164 mov r4,r14,ror#32-22 1165 add r14,r10,r12 1166 eor r3,r3,r11 1167#ifndef __thumb2__ 1168 ldr r10,[r14,#8] @ iotas[i].lo 1169#endif 1170 add r12,r12,#16 1171#ifndef __thumb2__ 1172 ldr r11,[r14,#12] @ iotas[i].hi 1173#else 1174 ldrd r10,r11,[r14,#8] @ iotas[i].lo 1175#endif 1176 cmp r12,#192 1177 str r12,[sp,#444] @ store counter 1178 bic r12,r4,r2,ror#32-22 1179 bic r14,r5,r3,ror#32-22 1180 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 1181 mov r3,r3,ror#32-22 1182 eor r12,r12,r0 1183 eor r14,r14,r1 1184 eor r10,r10,r12 1185 eor r11,r11,r14 1186#ifndef __thumb2__ 1187 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1188#endif 1189 bic r12,r6,r4,ror#11 1190#ifndef __thumb2__ 1191 str r11,[sp,#0+4] 1192#else 1193 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1194#endif 1195 bic r14,r7,r5,ror#10 1196 bic r10,r8,r6,ror#32-(11-7) 1197 bic r11,r9,r7,ror#32-(10-7) 1198 eor r12,r2,r12,ror#32-11 1199#ifndef __thumb2__ 1200 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1201#endif 1202 eor r14,r3,r14,ror#32-10 1203#ifndef __thumb2__ 1204 str r14,[sp,#8+4] 1205#else 1206 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1207#endif 1208 eor r10,r4,r10,ror#32-7 1209 eor r11,r5,r11,ror#32-7 1210#ifndef __thumb2__ 1211 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1212#endif 1213 bic r12,r0,r8,ror#32-7 1214#ifndef __thumb2__ 1215 str r11,[sp,#16+4] 1216#else 1217 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1218#endif 1219 bic r14,r1,r9,ror#32-7 1220 eor r12,r12,r6,ror#32-11 1221#ifndef __thumb2__ 1222 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1223#endif 1224 eor r14,r14,r7,ror#32-10 1225#ifndef __thumb2__ 1226 str r14,[sp,#24+4] 1227#else 1228 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1229#endif 1230 bic r10,r2,r0 1231 add r14,sp,#224 1232#ifndef __thumb2__ 1233 ldr r0,[sp,#264] @ A[0][3] 1234#endif 1235 bic r11,r3,r1 1236#ifndef __thumb2__ 1237 ldr r1,[sp,#264+4] 1238#else 1239 ldrd r0,r1,[sp,#264] @ A[0][3] 1240#endif 1241 eor r10,r10,r8,ror#32-7 1242 eor r11,r11,r9,ror#32-7 1243#ifndef __thumb2__ 1244 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1245#endif 1246 add r9,sp,#200 1247#ifndef __thumb2__ 1248 str r11,[sp,#32+4] 1249#else 1250 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1251#endif 1252 1253 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 1254 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 1255 1256#ifndef __thumb2__ 1257 ldr r2,[sp,#312] @ A[1][4] 1258#endif 1259 eor r0,r0,r10 1260#ifndef __thumb2__ 1261 ldr r3,[sp,#312+4] 1262#else 1263 ldrd r2,r3,[sp,#312] @ A[1][4] 1264#endif 1265 eor r1,r1,r11 1266 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1267#ifndef __thumb2__ 1268 ldr r10,[sp,#368] @ A[3][1] 1269#endif 1270 @ mov r1,r1,ror#32-14 1271#ifndef __thumb2__ 1272 ldr r11,[sp,#368+4] 1273#else 1274 ldrd r10,r11,[sp,#368] @ A[3][1] 1275#endif 1276 1277 eor r2,r2,r12 1278#ifndef __thumb2__ 1279 ldr r4,[sp,#320] @ A[2][0] 1280#endif 1281 eor r3,r3,r14 1282#ifndef __thumb2__ 1283 ldr r5,[sp,#320+4] 1284#else 1285 ldrd r4,r5,[sp,#320] @ A[2][0] 1286#endif 1287 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1288 @ mov r3,r3,ror#32-10 1289 1290 eor r6,r6,r4 1291#ifndef __thumb2__ 1292 ldr r12,[sp,#216] @ D[2] 1293#endif 1294 eor r7,r7,r5 1295#ifndef __thumb2__ 1296 ldr r14,[sp,#216+4] 1297#else 1298 ldrd r12,r14,[sp,#216] @ D[2] 1299#endif 1300 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1301 mov r4,r7,ror#32-2 1302 1303 eor r10,r10,r8 1304#ifndef __thumb2__ 1305 ldr r8,[sp,#416] @ A[4][2] 1306#endif 1307 eor r11,r11,r9 1308#ifndef __thumb2__ 1309 ldr r9,[sp,#416+4] 1310#else 1311 ldrd r8,r9,[sp,#416] @ A[4][2] 1312#endif 1313 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1314 mov r6,r11,ror#32-23 1315 1316 bic r10,r4,r2,ror#32-10 1317 bic r11,r5,r3,ror#32-10 1318 eor r12,r12,r8 1319 eor r14,r14,r9 1320 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1321 mov r8,r14,ror#32-31 1322 eor r10,r10,r0,ror#32-14 1323 eor r11,r11,r1,ror#32-14 1324#ifndef __thumb2__ 1325 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1326#endif 1327 bic r12,r6,r4 1328#ifndef __thumb2__ 1329 str r11,[sp,#40+4] 1330#else 1331 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1332#endif 1333 bic r14,r7,r5 1334 eor r12,r12,r2,ror#32-10 1335#ifndef __thumb2__ 1336 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1337#endif 1338 eor r14,r14,r3,ror#32-10 1339#ifndef __thumb2__ 1340 str r14,[sp,#48+4] 1341#else 1342 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1343#endif 1344 bic r10,r8,r6 1345 bic r11,r9,r7 1346 bic r12,r0,r8,ror#14 1347 bic r14,r1,r9,ror#14 1348 eor r10,r10,r4 1349 eor r11,r11,r5 1350#ifndef __thumb2__ 1351 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1352#endif 1353 bic r2,r2,r0,ror#32-(14-10) 1354#ifndef __thumb2__ 1355 str r11,[sp,#56+4] 1356#else 1357 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1358#endif 1359 eor r12,r6,r12,ror#32-14 1360 bic r11,r3,r1,ror#32-(14-10) 1361#ifndef __thumb2__ 1362 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1363#endif 1364 eor r14,r7,r14,ror#32-14 1365#ifndef __thumb2__ 1366 str r14,[sp,#64+4] 1367#else 1368 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1369#endif 1370 add r12,sp,#208 1371#ifndef __thumb2__ 1372 ldr r1,[sp,#248] @ A[0][1] 1373#endif 1374 eor r10,r8,r2,ror#32-10 1375#ifndef __thumb2__ 1376 ldr r0,[sp,#248+4] 1377#else 1378 ldrd r1,r0,[sp,#248] @ A[0][1] 1379#endif 1380 eor r11,r9,r11,ror#32-10 1381#ifndef __thumb2__ 1382 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1383#endif 1384#ifndef __thumb2__ 1385 str r11,[sp,#72+4] 1386#else 1387 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1388#endif 1389 1390 add r9,sp,#224 1391 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 1392#ifndef __thumb2__ 1393 ldr r2,[sp,#296] @ A[1][2] 1394#endif 1395#ifndef __thumb2__ 1396 ldr r3,[sp,#296+4] 1397#else 1398 ldrd r2,r3,[sp,#296] @ A[1][2] 1399#endif 1400 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 1401 1402 eor r1,r1,r10 1403#ifndef __thumb2__ 1404 ldr r4,[sp,#344] @ A[2][3] 1405#endif 1406 eor r0,r0,r11 1407#ifndef __thumb2__ 1408 ldr r5,[sp,#344+4] 1409#else 1410 ldrd r4,r5,[sp,#344] @ A[2][3] 1411#endif 1412 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1413 1414 eor r2,r2,r12 1415#ifndef __thumb2__ 1416 ldr r10,[sp,#392] @ A[3][4] 1417#endif 1418 eor r3,r3,r14 1419#ifndef __thumb2__ 1420 ldr r11,[sp,#392+4] 1421#else 1422 ldrd r10,r11,[sp,#392] @ A[3][4] 1423#endif 1424 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1425#ifndef __thumb2__ 1426 ldr r12,[sp,#200] @ D[0] 1427#endif 1428 @ mov r3,r3,ror#32-3 1429#ifndef __thumb2__ 1430 ldr r14,[sp,#200+4] 1431#else 1432 ldrd r12,r14,[sp,#200] @ D[0] 1433#endif 1434 1435 eor r4,r4,r6 1436 eor r5,r5,r7 1437 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1438 @ mov r4,r7,ror#32-13 @ [track reverse order below] 1439 1440 eor r10,r10,r8 1441#ifndef __thumb2__ 1442 ldr r8,[sp,#400] @ A[4][0] 1443#endif 1444 eor r11,r11,r9 1445#ifndef __thumb2__ 1446 ldr r9,[sp,#400+4] 1447#else 1448 ldrd r8,r9,[sp,#400] @ A[4][0] 1449#endif 1450 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1451 mov r7,r11,ror#32-4 1452 1453 eor r12,r12,r8 1454 eor r14,r14,r9 1455 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1456 mov r9,r14,ror#32-9 1457 1458 bic r10,r5,r2,ror#13-3 1459 bic r11,r4,r3,ror#12-3 1460 bic r12,r6,r5,ror#32-13 1461 bic r14,r7,r4,ror#32-12 1462 eor r10,r0,r10,ror#32-13 1463 eor r11,r1,r11,ror#32-12 1464#ifndef __thumb2__ 1465 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1466#endif 1467 eor r12,r12,r2,ror#32-3 1468#ifndef __thumb2__ 1469 str r11,[sp,#80+4] 1470#else 1471 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1472#endif 1473 eor r14,r14,r3,ror#32-3 1474#ifndef __thumb2__ 1475 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1476#endif 1477 bic r10,r8,r6 1478 bic r11,r9,r7 1479#ifndef __thumb2__ 1480 str r14,[sp,#88+4] 1481#else 1482 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1483#endif 1484 eor r10,r10,r5,ror#32-13 1485 eor r11,r11,r4,ror#32-12 1486#ifndef __thumb2__ 1487 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1488#endif 1489 bic r12,r0,r8 1490#ifndef __thumb2__ 1491 str r11,[sp,#96+4] 1492#else 1493 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1494#endif 1495 bic r14,r1,r9 1496 eor r12,r12,r6 1497 eor r14,r14,r7 1498#ifndef __thumb2__ 1499 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1500#endif 1501 bic r10,r2,r0,ror#3 1502#ifndef __thumb2__ 1503 str r14,[sp,#104+4] 1504#else 1505 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1506#endif 1507 bic r11,r3,r1,ror#3 1508#ifndef __thumb2__ 1509 ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1510#endif 1511 eor r10,r8,r10,ror#32-3 1512#ifndef __thumb2__ 1513 ldr r0,[sp,#272+4] 1514#else 1515 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1516#endif 1517 eor r11,r9,r11,ror#32-3 1518#ifndef __thumb2__ 1519 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1520#endif 1521 add r9,sp,#208 1522#ifndef __thumb2__ 1523 str r11,[sp,#112+4] 1524#else 1525 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1526#endif 1527 1528#ifndef __thumb2__ 1529 ldr r10,[sp,#232] @ D[4] 1530#endif 1531#ifndef __thumb2__ 1532 ldr r11,[sp,#232+4] 1533#else 1534 ldrd r10,r11,[sp,#232] @ D[4] 1535#endif 1536#ifndef __thumb2__ 1537 ldr r12,[sp,#200] @ D[0] 1538#endif 1539#ifndef __thumb2__ 1540 ldr r14,[sp,#200+4] 1541#else 1542 ldrd r12,r14,[sp,#200] @ D[0] 1543#endif 1544 1545 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 1546 1547 eor r1,r1,r10 1548#ifndef __thumb2__ 1549 ldr r2,[sp,#280] @ A[1][0] 1550#endif 1551 eor r0,r0,r11 1552#ifndef __thumb2__ 1553 ldr r3,[sp,#280+4] 1554#else 1555 ldrd r2,r3,[sp,#280] @ A[1][0] 1556#endif 1557 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1558#ifndef __thumb2__ 1559 ldr r4,[sp,#328] @ A[2][1] 1560#endif 1561 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1562#ifndef __thumb2__ 1563 ldr r5,[sp,#328+4] 1564#else 1565 ldrd r4,r5,[sp,#328] @ A[2][1] 1566#endif 1567 1568 eor r2,r2,r12 1569#ifndef __thumb2__ 1570 ldr r10,[sp,#376] @ A[3][2] 1571#endif 1572 eor r3,r3,r14 1573#ifndef __thumb2__ 1574 ldr r11,[sp,#376+4] 1575#else 1576 ldrd r10,r11,[sp,#376] @ A[3][2] 1577#endif 1578 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1579#ifndef __thumb2__ 1580 ldr r12,[sp,#224] @ D[3] 1581#endif 1582 @ mov r3,r3,ror#32-18 1583#ifndef __thumb2__ 1584 ldr r14,[sp,#224+4] 1585#else 1586 ldrd r12,r14,[sp,#224] @ D[3] 1587#endif 1588 1589 eor r6,r6,r4 1590 eor r7,r7,r5 1591 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1592 mov r5,r7,ror#32-5 1593 1594 eor r10,r10,r8 1595#ifndef __thumb2__ 1596 ldr r8,[sp,#424] @ A[4][3] 1597#endif 1598 eor r11,r11,r9 1599#ifndef __thumb2__ 1600 ldr r9,[sp,#424+4] 1601#else 1602 ldrd r8,r9,[sp,#424] @ A[4][3] 1603#endif 1604 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1605 mov r6,r11,ror#32-8 1606 1607 eor r12,r12,r8 1608 eor r14,r14,r9 1609 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1610 mov r9,r14,ror#32-28 1611 1612 bic r10,r4,r2,ror#32-18 1613 bic r11,r5,r3,ror#32-18 1614 eor r10,r10,r0,ror#32-14 1615 eor r11,r11,r1,ror#32-13 1616#ifndef __thumb2__ 1617 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1618#endif 1619 bic r12,r6,r4 1620#ifndef __thumb2__ 1621 str r11,[sp,#120+4] 1622#else 1623 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1624#endif 1625 bic r14,r7,r5 1626 eor r12,r12,r2,ror#32-18 1627#ifndef __thumb2__ 1628 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1629#endif 1630 eor r14,r14,r3,ror#32-18 1631#ifndef __thumb2__ 1632 str r14,[sp,#128+4] 1633#else 1634 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1635#endif 1636 bic r10,r8,r6 1637 bic r11,r9,r7 1638 bic r12,r0,r8,ror#14 1639 bic r14,r1,r9,ror#13 1640 eor r10,r10,r4 1641 eor r11,r11,r5 1642#ifndef __thumb2__ 1643 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1644#endif 1645 bic r2,r2,r0,ror#18-14 1646#ifndef __thumb2__ 1647 str r11,[sp,#136+4] 1648#else 1649 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1650#endif 1651 eor r12,r6,r12,ror#32-14 1652 bic r11,r3,r1,ror#18-13 1653 eor r14,r7,r14,ror#32-13 1654#ifndef __thumb2__ 1655 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1656#endif 1657#ifndef __thumb2__ 1658 str r14,[sp,#144+4] 1659#else 1660 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1661#endif 1662 add r14,sp,#216 1663#ifndef __thumb2__ 1664 ldr r0,[sp,#256] @ A[0][2] 1665#endif 1666 eor r10,r8,r2,ror#32-18 1667#ifndef __thumb2__ 1668 ldr r1,[sp,#256+4] 1669#else 1670 ldrd r0,r1,[sp,#256] @ A[0][2] 1671#endif 1672 eor r11,r9,r11,ror#32-18 1673#ifndef __thumb2__ 1674 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1675#endif 1676#ifndef __thumb2__ 1677 str r11,[sp,#152+4] 1678#else 1679 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1680#endif 1681 1682 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 1683#ifndef __thumb2__ 1684 ldr r2,[sp,#304] @ A[1][3] 1685#endif 1686#ifndef __thumb2__ 1687 ldr r3,[sp,#304+4] 1688#else 1689 ldrd r2,r3,[sp,#304] @ A[1][3] 1690#endif 1691#ifndef __thumb2__ 1692 ldr r6,[sp,#232] @ D[4] 1693#endif 1694#ifndef __thumb2__ 1695 ldr r7,[sp,#232+4] 1696#else 1697 ldrd r6,r7,[sp,#232] @ D[4] 1698#endif 1699 1700 eor r0,r0,r10 1701#ifndef __thumb2__ 1702 ldr r4,[sp,#352] @ A[2][4] 1703#endif 1704 eor r1,r1,r11 1705#ifndef __thumb2__ 1706 ldr r5,[sp,#352+4] 1707#else 1708 ldrd r4,r5,[sp,#352] @ A[2][4] 1709#endif 1710 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1711#ifndef __thumb2__ 1712 ldr r8,[sp,#200] @ D[0] 1713#endif 1714 @ mov r1,r1,ror#32-31 1715#ifndef __thumb2__ 1716 ldr r9,[sp,#200+4] 1717#else 1718 ldrd r8,r9,[sp,#200] @ D[0] 1719#endif 1720 1721 eor r12,r12,r2 1722#ifndef __thumb2__ 1723 ldr r10,[sp,#360] @ A[3][0] 1724#endif 1725 eor r14,r14,r3 1726#ifndef __thumb2__ 1727 ldr r11,[sp,#360+4] 1728#else 1729 ldrd r10,r11,[sp,#360] @ A[3][0] 1730#endif 1731 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1732#ifndef __thumb2__ 1733 ldr r12,[sp,#208] @ D[1] 1734#endif 1735 mov r2,r14,ror#32-28 1736#ifndef __thumb2__ 1737 ldr r14,[sp,#208+4] 1738#else 1739 ldrd r12,r14,[sp,#208] @ D[1] 1740#endif 1741 1742 eor r6,r6,r4 1743 eor r7,r7,r5 1744 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1745 mov r4,r7,ror#32-20 1746 1747 eor r10,r10,r8 1748#ifndef __thumb2__ 1749 ldr r8,[sp,#408] @ A[4][1] 1750#endif 1751 eor r11,r11,r9 1752#ifndef __thumb2__ 1753 ldr r9,[sp,#408+4] 1754#else 1755 ldrd r8,r9,[sp,#408] @ A[4][1] 1756#endif 1757 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1758 mov r6,r11,ror#32-21 1759 1760 eor r8,r8,r12 1761 eor r9,r9,r14 1762 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1763 @ mov r9,r3,ror#32-1 1764 1765 bic r10,r4,r2 1766 bic r11,r5,r3 1767 eor r10,r10,r0,ror#32-31 1768#ifndef __thumb2__ 1769 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1770#endif 1771 eor r11,r11,r1,ror#32-31 1772#ifndef __thumb2__ 1773 str r11,[sp,#160+4] 1774#else 1775 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1776#endif 1777 bic r12,r6,r4 1778 bic r14,r7,r5 1779 eor r12,r12,r2 1780 eor r14,r14,r3 1781#ifndef __thumb2__ 1782 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1783#endif 1784 bic r10,r8,r6,ror#1 1785#ifndef __thumb2__ 1786 str r14,[sp,#168+4] 1787#else 1788 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1789#endif 1790 bic r11,r9,r7,ror#1 1791 bic r12,r0,r8,ror#31-1 1792 bic r14,r1,r9,ror#31-1 1793 eor r4,r4,r10,ror#32-1 1794#ifndef __thumb2__ 1795 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1796#endif 1797 eor r5,r5,r11,ror#32-1 1798#ifndef __thumb2__ 1799 str r5,[sp,#176+4] 1800#else 1801 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1802#endif 1803 eor r6,r6,r12,ror#32-31 1804 eor r7,r7,r14,ror#32-31 1805#ifndef __thumb2__ 1806 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1807#endif 1808 bic r10,r2,r0,ror#32-31 1809#ifndef __thumb2__ 1810 str r7,[sp,#184+4] 1811#else 1812 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1813#endif 1814 bic r11,r3,r1,ror#32-31 1815 add r12,sp,#0 1816 eor r8,r10,r8,ror#32-1 1817 add r10,sp,#40 1818 eor r9,r11,r9,ror#32-1 1819#ifndef __thumb2__ 1820 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1821#endif 1822#ifndef __thumb2__ 1823 str r9,[sp,#192+4] 1824#else 1825 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1826#endif 1827 blo .Lround2x 1828 1829#if __ARM_ARCH__>=5 1830 ldr pc,[sp,#440] 1831#else 1832 ldr lr,[sp,#440] 1833 tst lr,#1 1834 moveq pc,lr @ be binary compatible with V4, yet 1835.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1836#endif 1837.size KeccakF1600_int,.-KeccakF1600_int 1838 1839.type KeccakF1600, %function 1840.align 5 1841KeccakF1600: 1842 stmdb sp!,{r0,r4-r11,lr} 1843 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1844 1845 add r10,r0,#40 1846 add r11,sp,#40 1847 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1848 stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1849 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1850 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1851 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1852 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1853 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1854 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1855 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1856 add r12,sp,#0 1857 add r10,sp,#40 1858 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1859 1860 bl KeccakF1600_enter 1861 1862 ldr r11, [sp,#440+16] @ restore pointer to A 1863 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1864 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 1865 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1866 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1867 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1868 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1869 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1870 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1871 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1872 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1873 1874 add sp,sp,#440+20 1875#if __ARM_ARCH__>=5 1876 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} 1877#else 1878 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} 1879 tst lr,#1 1880 moveq pc,lr @ be binary compatible with V4, yet 1881.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1882#endif 1883.size KeccakF1600,.-KeccakF1600 1884.globl SHA3_absorb 1885.type SHA3_absorb,%function 1886.align 5 1887SHA3_absorb: 1888 stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1889 sub sp,sp,#456+16 1890 1891 add r10,r0,#40 1892 @ mov r11,r1 1893 mov r12,r2 1894 mov r14,r3 1895 cmp r2,r3 1896 blo .Labsorb_abort 1897 1898 add r11,sp,#0 1899 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1900 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1901 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1902 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1903 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1904 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1905 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1906 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1907 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1908 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1909 1910 ldr r11,[sp,#476] @ restore r11 1911#ifdef __thumb2__ 1912 mov r9,#0x00ff00ff 1913 mov r8,#0x0f0f0f0f 1914 mov r7,#0x33333333 1915 mov r6,#0x55555555 1916#else 1917 mov r6,#0x11 @ compose constants 1918 mov r8,#0x0f 1919 mov r9,#0xff 1920 orr r6,r6,r6,lsl#8 1921 orr r8,r8,r8,lsl#8 1922 orr r6,r6,r6,lsl#16 @ 0x11111111 1923 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1924 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1925 orr r7,r6,r6,lsl#1 @ 0x33333333 1926 orr r6,r6,r6,lsl#2 @ 0x55555555 1927#endif 1928 str r9,[sp,#468] 1929 str r8,[sp,#464] 1930 str r7,[sp,#460] 1931 str r6,[sp,#456] 1932 b .Loop_absorb 1933 1934.align 4 1935.Loop_absorb: 1936 subs r0,r12,r14 1937 blo .Labsorbed 1938 add r10,sp,#0 1939 str r0,[sp,#480] @ save len - bsz 1940 1941.align 4 1942.Loop_block: 1943 ldrb r0,[r11],#1 1944 ldrb r1,[r11],#1 1945 ldrb r2,[r11],#1 1946 ldrb r3,[r11],#1 1947 ldrb r4,[r11],#1 1948 orr r0,r0,r1,lsl#8 1949 ldrb r1,[r11],#1 1950 orr r0,r0,r2,lsl#16 1951 ldrb r2,[r11],#1 1952 orr r0,r0,r3,lsl#24 @ lo 1953 ldrb r3,[r11],#1 1954 orr r1,r4,r1,lsl#8 1955 orr r1,r1,r2,lsl#16 1956 orr r1,r1,r3,lsl#24 @ hi 1957 1958 and r2,r0,r6 @ &=0x55555555 1959 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1960 and r3,r1,r6 @ &=0x55555555 1961 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1962 orr r2,r2,r2,lsr#1 1963 orr r0,r0,r0,lsl#1 1964 orr r3,r3,r3,lsr#1 1965 orr r1,r1,r1,lsl#1 1966 and r2,r2,r7 @ &=0x33333333 1967 and r0,r0,r7,lsl#2 @ &=0xcccccccc 1968 and r3,r3,r7 @ &=0x33333333 1969 and r1,r1,r7,lsl#2 @ &=0xcccccccc 1970 orr r2,r2,r2,lsr#2 1971 orr r0,r0,r0,lsl#2 1972 orr r3,r3,r3,lsr#2 1973 orr r1,r1,r1,lsl#2 1974 and r2,r2,r8 @ &=0x0f0f0f0f 1975 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1976 and r3,r3,r8 @ &=0x0f0f0f0f 1977 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1978 ldmia r10,{r4,r5} @ A_flat[i] 1979 orr r2,r2,r2,lsr#4 1980 orr r0,r0,r0,lsl#4 1981 orr r3,r3,r3,lsr#4 1982 orr r1,r1,r1,lsl#4 1983 and r2,r2,r9 @ &=0x00ff00ff 1984 and r0,r0,r9,lsl#8 @ &=0xff00ff00 1985 and r3,r3,r9 @ &=0x00ff00ff 1986 and r1,r1,r9,lsl#8 @ &=0xff00ff00 1987 orr r2,r2,r2,lsr#8 1988 orr r0,r0,r0,lsl#8 1989 orr r3,r3,r3,lsr#8 1990 orr r1,r1,r1,lsl#8 1991 1992 mov r2,r2,lsl#16 1993 mov r1,r1,lsr#16 1994 eor r4,r4,r3,lsl#16 1995 eor r5,r5,r0,lsr#16 1996 eor r4,r4,r2,lsr#16 1997 eor r5,r5,r1,lsl#16 1998 stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1999 2000 subs r14,r14,#8 2001 bhi .Loop_block 2002 2003 str r11,[sp,#476] 2004 2005 bl KeccakF1600_int 2006 2007 add r14,sp,#456 2008 ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables 2009 b .Loop_absorb 2010 2011.align 4 2012.Labsorbed: 2013 add r11,sp,#40 2014 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2015 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 2016 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2017 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2018 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2019 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2020 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2021 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2022 ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2023 stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2024 2025.Labsorb_abort: 2026 add sp,sp,#456+32 2027 mov r0,r12 @ return value 2028#if __ARM_ARCH__>=5 2029 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 2030#else 2031 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 2032 tst lr,#1 2033 moveq pc,lr @ be binary compatible with V4, yet 2034.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2035#endif 2036.size SHA3_absorb,.-SHA3_absorb 2037.globl SHA3_squeeze 2038.type SHA3_squeeze,%function 2039.align 5 2040SHA3_squeeze: 2041 stmdb sp!,{r0,r3-r10,lr} 2042 2043 mov r10,r0 2044 mov r4,r1 2045 mov r5,r2 2046 mov r12,r3 2047 ldr r0, [sp, #40] @ next is after the 10 pushed registers (10*4) 2048 2049#ifdef __thumb2__ 2050 mov r9,#0x00ff00ff 2051 mov r8,#0x0f0f0f0f 2052 mov r7,#0x33333333 2053 mov r6,#0x55555555 2054#else 2055 mov r6,#0x11 @ compose constants 2056 mov r8,#0x0f 2057 mov r9,#0xff 2058 orr r6,r6,r6,lsl#8 2059 orr r8,r8,r8,lsl#8 2060 orr r6,r6,r6,lsl#16 @ 0x11111111 2061 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2062 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2063 orr r7,r6,r6,lsl#1 @ 0x33333333 2064 orr r6,r6,r6,lsl#2 @ 0x55555555 2065#endif 2066 stmdb sp!,{r6,r7,r8,r9} 2067 2068 mov r14,r10 2069 cmp r0, #1 2070 beq .Lnext_block 2071 b .Loop_squeeze 2072 2073.align 4 2074.Loop_squeeze: 2075 ldmia r10!,{r0,r1} @ A_flat[i++] 2076 2077 mov r2,r0,lsl#16 2078 mov r3,r1,lsl#16 @ r3 = r1 << 16 2079 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2080 mov r1,r1,lsr#16 2081 mov r0,r0,lsr#16 @ r0 = r0 >> 16 2082 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2083 2084 orr r2,r2,r2,lsl#8 2085 orr r3,r3,r3,lsr#8 2086 orr r0,r0,r0,lsl#8 2087 orr r1,r1,r1,lsr#8 2088 and r2,r2,r9 @ &=0x00ff00ff 2089 and r3,r3,r9,lsl#8 @ &=0xff00ff00 2090 and r0,r0,r9 @ &=0x00ff00ff 2091 and r1,r1,r9,lsl#8 @ &=0xff00ff00 2092 orr r2,r2,r2,lsl#4 2093 orr r3,r3,r3,lsr#4 2094 orr r0,r0,r0,lsl#4 2095 orr r1,r1,r1,lsr#4 2096 and r2,r2,r8 @ &=0x0f0f0f0f 2097 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2098 and r0,r0,r8 @ &=0x0f0f0f0f 2099 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2100 orr r2,r2,r2,lsl#2 2101 orr r3,r3,r3,lsr#2 2102 orr r0,r0,r0,lsl#2 2103 orr r1,r1,r1,lsr#2 2104 and r2,r2,r7 @ &=0x33333333 2105 and r3,r3,r7,lsl#2 @ &=0xcccccccc 2106 and r0,r0,r7 @ &=0x33333333 2107 and r1,r1,r7,lsl#2 @ &=0xcccccccc 2108 orr r2,r2,r2,lsl#1 2109 orr r3,r3,r3,lsr#1 2110 orr r0,r0,r0,lsl#1 2111 orr r1,r1,r1,lsr#1 2112 and r2,r2,r6 @ &=0x55555555 2113 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2114 and r0,r0,r6 @ &=0x55555555 2115 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2116 2117 orr r2,r2,r3 2118 orr r0,r0,r1 2119 2120 cmp r5,#8 2121 blo .Lsqueeze_tail 2122 mov r1,r2,lsr#8 2123 strb r2,[r4],#1 2124 mov r3,r2,lsr#16 2125 strb r1,[r4],#1 2126 mov r2,r2,lsr#24 2127 strb r3,[r4],#1 2128 strb r2,[r4],#1 2129 2130 mov r1,r0,lsr#8 2131 strb r0,[r4],#1 2132 mov r3,r0,lsr#16 2133 strb r1,[r4],#1 2134 mov r0,r0,lsr#24 2135 strb r3,[r4],#1 2136 strb r0,[r4],#1 2137 subs r5,r5,#8 2138 beq .Lsqueeze_done 2139 2140 subs r12,r12,#8 @ bsz -= 8 2141 bhi .Loop_squeeze 2142.Lnext_block: 2143 mov r0,r14 @ original r10 2144 2145 bl KeccakF1600 2146 2147 ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables 2148 mov r14,r10 2149 b .Loop_squeeze 2150 2151.align 4 2152.Lsqueeze_tail: 2153 strb r2,[r4],#1 2154 mov r2,r2,lsr#8 2155 subs r5,r5,#1 2156 beq .Lsqueeze_done 2157 strb r2,[r4],#1 2158 mov r2,r2,lsr#8 2159 subs r5,r5,#1 2160 beq .Lsqueeze_done 2161 strb r2,[r4],#1 2162 mov r2,r2,lsr#8 2163 subs r5,r5,#1 2164 beq .Lsqueeze_done 2165 strb r2,[r4],#1 2166 subs r5,r5,#1 2167 beq .Lsqueeze_done 2168 2169 strb r0,[r4],#1 2170 mov r0,r0,lsr#8 2171 subs r5,r5,#1 2172 beq .Lsqueeze_done 2173 strb r0,[r4],#1 2174 mov r0,r0,lsr#8 2175 subs r5,r5,#1 2176 beq .Lsqueeze_done 2177 strb r0,[r4] 2178 b .Lsqueeze_done 2179 2180.align 4 2181.Lsqueeze_done: 2182 add sp,sp,#24 2183#if __ARM_ARCH__>=5 2184 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 2185#else 2186 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 2187 tst lr,#1 2188 moveq pc,lr @ be binary compatible with V4, yet 2189.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2190#endif 2191.size SHA3_squeeze,.-SHA3_squeeze 2192#if __ARM_MAX_ARCH__>=7 2193.fpu neon 2194 2195.type iotas64, %object 2196.align 5 2197iotas64: 2198.quad 0x0000000000000001 2199.quad 0x0000000000008082 2200.quad 0x800000000000808a 2201.quad 0x8000000080008000 2202.quad 0x000000000000808b 2203.quad 0x0000000080000001 2204.quad 0x8000000080008081 2205.quad 0x8000000000008009 2206.quad 0x000000000000008a 2207.quad 0x0000000000000088 2208.quad 0x0000000080008009 2209.quad 0x000000008000000a 2210.quad 0x000000008000808b 2211.quad 0x800000000000008b 2212.quad 0x8000000000008089 2213.quad 0x8000000000008003 2214.quad 0x8000000000008002 2215.quad 0x8000000000000080 2216.quad 0x000000000000800a 2217.quad 0x800000008000000a 2218.quad 0x8000000080008081 2219.quad 0x8000000000008080 2220.quad 0x0000000080000001 2221.quad 0x8000000080008008 2222.size iotas64,.-iotas64 2223 2224.type KeccakF1600_neon, %function 2225.align 5 2226KeccakF1600_neon: 2227 add r1, r0, #16 2228 adr r2, iotas64 2229 mov r3, #24 @ loop counter 2230 b .Loop_neon 2231 2232.align 4 2233.Loop_neon: 2234 @ Theta 2235 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2236 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2237 vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2238 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2239 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2240 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2241 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2242 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2243 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2244 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2245 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2246 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2247 veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2248 veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2249 veor d25, d25, d24 @ C[4]^=A[4][4] 2250 2251 vadd.u64 q4, q13, q13 @ C[0..1]<<1 2252 vadd.u64 q15, q14, q14 @ C[2..3]<<1 2253 vadd.u64 d18, d25, d25 @ C[4]<<1 2254 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2255 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2256 vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2257 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2258 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2259 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2260 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2261 2262 veor d0, d0, d25 @ A[0][0] ^= C[4] 2263 veor d1, d1, d25 @ A[1][0] ^= C[4] 2264 veor d10, d10, d25 @ A[2][0] ^= C[4] 2265 veor d11, d11, d25 @ A[3][0] ^= C[4] 2266 veor d20, d20, d25 @ A[4][0] ^= C[4] 2267 2268 veor d2, d2, d26 @ A[0][1] ^= D[1] 2269 veor d3, d3, d26 @ A[1][1] ^= D[1] 2270 veor d12, d12, d26 @ A[2][1] ^= D[1] 2271 veor d13, d13, d26 @ A[3][1] ^= D[1] 2272 veor d21, d21, d26 @ A[4][1] ^= D[1] 2273 vmov d26, d27 2274 2275 veor d6, d6, d28 @ A[0][3] ^= C[2] 2276 veor d7, d7, d28 @ A[1][3] ^= C[2] 2277 veor d16, d16, d28 @ A[2][3] ^= C[2] 2278 veor d17, d17, d28 @ A[3][3] ^= C[2] 2279 veor d23, d23, d28 @ A[4][3] ^= C[2] 2280 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2281 vmov d28, d29 2282 2283 vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2284 veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2285 veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2286 veor d22, d22, d27 @ A[4][2] ^= D[2] 2287 2288 veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2289 veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2290 veor d24, d24, d29 @ A[4][4] ^= C[3] 2291 2292 @ Rho + Pi 2293 vmov d26, d2 @ C[1] = A[0][1] 2294 vshl.u64 d2, d3, #44 2295 vmov d27, d4 @ C[2] = A[0][2] 2296 vshl.u64 d4, d14, #43 2297 vmov d28, d6 @ C[3] = A[0][3] 2298 vshl.u64 d6, d17, #21 2299 vmov d29, d8 @ C[4] = A[0][4] 2300 vshl.u64 d8, d24, #14 2301 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2302 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2303 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2304 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2305 2306 vshl.u64 d3, d9, #20 2307 vshl.u64 d14, d16, #25 2308 vshl.u64 d17, d15, #15 2309 vshl.u64 d24, d21, #2 2310 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2311 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2312 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2313 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2314 2315 vshl.u64 d9, d22, #61 2316 @ vshl.u64 d16, d19, #8 2317 vshl.u64 d15, d12, #10 2318 vshl.u64 d21, d7, #55 2319 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2320 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2321 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2322 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2323 2324 vshl.u64 d22, d18, #39 2325 @ vshl.u64 d19, d23, #56 2326 vshl.u64 d12, d5, #6 2327 vshl.u64 d7, d13, #45 2328 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2329 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2330 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2331 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2332 2333 vshl.u64 d18, d20, #18 2334 vshl.u64 d23, d11, #41 2335 vshl.u64 d5, d10, #3 2336 vshl.u64 d13, d1, #36 2337 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2338 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2339 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2340 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2341 2342 vshl.u64 d1, d28, #28 2343 vshl.u64 d10, d26, #1 2344 vshl.u64 d11, d29, #27 2345 vshl.u64 d20, d27, #62 2346 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2347 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2348 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2349 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2350 2351 @ Chi + Iota 2352 vbic q13, q2, q1 2353 vbic q14, q3, q2 2354 vbic q15, q4, q3 2355 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2356 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2357 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2358 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2359 vbic q13, q0, q4 2360 vbic q15, q1, q0 2361 vmov q1, q14 @ A[0..1][1] 2362 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2363 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2364 2365 vbic q13, q7, q6 2366 vmov q0, q5 @ A[2..3][0] 2367 vbic q14, q8, q7 2368 vmov q15, q6 @ A[2..3][1] 2369 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2370 vbic q13, q9, q8 2371 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2372 vbic q14, q0, q9 2373 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2374 vbic q13, q15, q0 2375 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2376 vmov q14, q10 @ A[4][0..1] 2377 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2378 2379 vld1.64 d25, [r2,:64]! @ Iota[i++] 2380 vbic d26, d22, d21 2381 vbic d27, d23, d22 2382 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2383 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2384 vbic d26, d24, d23 2385 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2386 vbic d27, d28, d24 2387 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2388 vbic d26, d29, d28 2389 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2390 veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2391 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2392 2393 subs r3, r3, #1 2394 bne .Loop_neon 2395 2396 bx lr 2397.size KeccakF1600_neon,.-KeccakF1600_neon 2398 2399.globl SHA3_absorb_neon 2400.type SHA3_absorb_neon, %function 2401.align 5 2402SHA3_absorb_neon: 2403 stmdb sp!, {r4,r5,r6,lr} 2404 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2405 2406 mov r4, r1 @ inp 2407 mov r5, r2 @ len 2408 mov r6, r3 @ bsz 2409 2410 vld1.32 {d0}, [r0,:64]! @ A[0][0] 2411 vld1.32 {d2}, [r0,:64]! @ A[0][1] 2412 vld1.32 {d4}, [r0,:64]! @ A[0][2] 2413 vld1.32 {d6}, [r0,:64]! @ A[0][3] 2414 vld1.32 {d8}, [r0,:64]! @ A[0][4] 2415 2416 vld1.32 {d1}, [r0,:64]! @ A[1][0] 2417 vld1.32 {d3}, [r0,:64]! @ A[1][1] 2418 vld1.32 {d5}, [r0,:64]! @ A[1][2] 2419 vld1.32 {d7}, [r0,:64]! @ A[1][3] 2420 vld1.32 {d9}, [r0,:64]! @ A[1][4] 2421 2422 vld1.32 {d10}, [r0,:64]! @ A[2][0] 2423 vld1.32 {d12}, [r0,:64]! @ A[2][1] 2424 vld1.32 {d14}, [r0,:64]! @ A[2][2] 2425 vld1.32 {d16}, [r0,:64]! @ A[2][3] 2426 vld1.32 {d18}, [r0,:64]! @ A[2][4] 2427 2428 vld1.32 {d11}, [r0,:64]! @ A[3][0] 2429 vld1.32 {d13}, [r0,:64]! @ A[3][1] 2430 vld1.32 {d15}, [r0,:64]! @ A[3][2] 2431 vld1.32 {d17}, [r0,:64]! @ A[3][3] 2432 vld1.32 {d19}, [r0,:64]! @ A[3][4] 2433 2434 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3] 2435 vld1.32 {d24}, [r0,:64] @ A[4][4] 2436 sub r0, r0, #24*8 @ rewind 2437 b .Loop_absorb_neon 2438 2439.align 4 2440.Loop_absorb_neon: 2441 subs r12, r5, r6 @ len - bsz 2442 blo .Labsorbed_neon 2443 mov r5, r12 2444 2445 vld1.8 {d31}, [r4]! @ endian-neutral loads... 2446 cmp r6, #8*2 2447 veor d0, d0, d31 @ A[0][0] ^= *inp++ 2448 blo .Lprocess_neon 2449 vld1.8 {d31}, [r4]! 2450 veor d2, d2, d31 @ A[0][1] ^= *inp++ 2451 beq .Lprocess_neon 2452 vld1.8 {d31}, [r4]! 2453 cmp r6, #8*4 2454 veor d4, d4, d31 @ A[0][2] ^= *inp++ 2455 blo .Lprocess_neon 2456 vld1.8 {d31}, [r4]! 2457 veor d6, d6, d31 @ A[0][3] ^= *inp++ 2458 beq .Lprocess_neon 2459 vld1.8 {d31},[r4]! 2460 cmp r6, #8*6 2461 veor d8, d8, d31 @ A[0][4] ^= *inp++ 2462 blo .Lprocess_neon 2463 2464 vld1.8 {d31}, [r4]! 2465 veor d1, d1, d31 @ A[1][0] ^= *inp++ 2466 beq .Lprocess_neon 2467 vld1.8 {d31}, [r4]! 2468 cmp r6, #8*8 2469 veor d3, d3, d31 @ A[1][1] ^= *inp++ 2470 blo .Lprocess_neon 2471 vld1.8 {d31}, [r4]! 2472 veor d5, d5, d31 @ A[1][2] ^= *inp++ 2473 beq .Lprocess_neon 2474 vld1.8 {d31}, [r4]! 2475 cmp r6, #8*10 2476 veor d7, d7, d31 @ A[1][3] ^= *inp++ 2477 blo .Lprocess_neon 2478 vld1.8 {d31}, [r4]! 2479 veor d9, d9, d31 @ A[1][4] ^= *inp++ 2480 beq .Lprocess_neon 2481 2482 vld1.8 {d31}, [r4]! 2483 cmp r6, #8*12 2484 veor d10, d10, d31 @ A[2][0] ^= *inp++ 2485 blo .Lprocess_neon 2486 vld1.8 {d31}, [r4]! 2487 veor d12, d12, d31 @ A[2][1] ^= *inp++ 2488 beq .Lprocess_neon 2489 vld1.8 {d31}, [r4]! 2490 cmp r6, #8*14 2491 veor d14, d14, d31 @ A[2][2] ^= *inp++ 2492 blo .Lprocess_neon 2493 vld1.8 {d31}, [r4]! 2494 veor d16, d16, d31 @ A[2][3] ^= *inp++ 2495 beq .Lprocess_neon 2496 vld1.8 {d31}, [r4]! 2497 cmp r6, #8*16 2498 veor d18, d18, d31 @ A[2][4] ^= *inp++ 2499 blo .Lprocess_neon 2500 2501 vld1.8 {d31}, [r4]! 2502 veor d11, d11, d31 @ A[3][0] ^= *inp++ 2503 beq .Lprocess_neon 2504 vld1.8 {d31}, [r4]! 2505 cmp r6, #8*18 2506 veor d13, d13, d31 @ A[3][1] ^= *inp++ 2507 blo .Lprocess_neon 2508 vld1.8 {d31}, [r4]! 2509 veor d15, d15, d31 @ A[3][2] ^= *inp++ 2510 beq .Lprocess_neon 2511 vld1.8 {d31}, [r4]! 2512 cmp r6, #8*20 2513 veor d17, d17, d31 @ A[3][3] ^= *inp++ 2514 blo .Lprocess_neon 2515 vld1.8 {d31}, [r4]! 2516 veor d19, d19, d31 @ A[3][4] ^= *inp++ 2517 beq .Lprocess_neon 2518 2519 vld1.8 {d31}, [r4]! 2520 cmp r6, #8*22 2521 veor d20, d20, d31 @ A[4][0] ^= *inp++ 2522 blo .Lprocess_neon 2523 vld1.8 {d31}, [r4]! 2524 veor d21, d21, d31 @ A[4][1] ^= *inp++ 2525 beq .Lprocess_neon 2526 vld1.8 {d31}, [r4]! 2527 cmp r6, #8*24 2528 veor d22, d22, d31 @ A[4][2] ^= *inp++ 2529 blo .Lprocess_neon 2530 vld1.8 {d31}, [r4]! 2531 veor d23, d23, d31 @ A[4][3] ^= *inp++ 2532 beq .Lprocess_neon 2533 vld1.8 {d31}, [r4]! 2534 veor d24, d24, d31 @ A[4][4] ^= *inp++ 2535 2536.Lprocess_neon: 2537 bl KeccakF1600_neon 2538 b .Loop_absorb_neon 2539 2540.align 4 2541.Labsorbed_neon: 2542 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2543 vst1.32 {d2}, [r0,:64]! 2544 vst1.32 {d4}, [r0,:64]! 2545 vst1.32 {d6}, [r0,:64]! 2546 vst1.32 {d8}, [r0,:64]! 2547 2548 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2549 vst1.32 {d3}, [r0,:64]! 2550 vst1.32 {d5}, [r0,:64]! 2551 vst1.32 {d7}, [r0,:64]! 2552 vst1.32 {d9}, [r0,:64]! 2553 2554 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2555 vst1.32 {d12}, [r0,:64]! 2556 vst1.32 {d14}, [r0,:64]! 2557 vst1.32 {d16}, [r0,:64]! 2558 vst1.32 {d18}, [r0,:64]! 2559 2560 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2561 vst1.32 {d13}, [r0,:64]! 2562 vst1.32 {d15}, [r0,:64]! 2563 vst1.32 {d17}, [r0,:64]! 2564 vst1.32 {d19}, [r0,:64]! 2565 2566 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2567 vst1.32 {d24}, [r0,:64] 2568 2569 mov r0, r5 @ return value 2570 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2571 ldmia sp!, {r4,r5,r6,pc} 2572.size SHA3_absorb_neon,.-SHA3_absorb_neon 2573 2574.globl SHA3_squeeze_neon 2575.type SHA3_squeeze_neon, %function 2576.align 5 2577SHA3_squeeze_neon: 2578 stmdb sp!, {r4,r5,r6,lr} 2579 2580 mov r4, r1 @ out 2581 mov r5, r2 @ len 2582 mov r6, r3 @ bsz 2583 mov r12, r0 @ A_flat 2584 mov r14, r3 @ bsz 2585 b .Loop_squeeze_neon 2586 2587.align 4 2588.Loop_squeeze_neon: 2589 cmp r5, #8 2590 blo .Lsqueeze_neon_tail 2591 vld1.32 {d0}, [r12]! 2592 vst1.8 {d0}, [r4]! @ endian-neutral store 2593 2594 subs r5, r5, #8 @ len -= 8 2595 beq .Lsqueeze_neon_done 2596 2597 subs r14, r14, #8 @ bsz -= 8 2598 bhi .Loop_squeeze_neon 2599 2600 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2601 2602 vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2603 vld1.32 {d2}, [r0,:64]! 2604 vld1.32 {d4}, [r0,:64]! 2605 vld1.32 {d6}, [r0,:64]! 2606 vld1.32 {d8}, [r0,:64]! 2607 2608 vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2609 vld1.32 {d3}, [r0,:64]! 2610 vld1.32 {d5}, [r0,:64]! 2611 vld1.32 {d7}, [r0,:64]! 2612 vld1.32 {d9}, [r0,:64]! 2613 2614 vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2615 vld1.32 {d12}, [r0,:64]! 2616 vld1.32 {d14}, [r0,:64]! 2617 vld1.32 {d16}, [r0,:64]! 2618 vld1.32 {d18}, [r0,:64]! 2619 2620 vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2621 vld1.32 {d13}, [r0,:64]! 2622 vld1.32 {d15}, [r0,:64]! 2623 vld1.32 {d17}, [r0,:64]! 2624 vld1.32 {d19}, [r0,:64]! 2625 2626 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2627 vld1.32 {d24}, [r0,:64] 2628 sub r0, r0, #24*8 @ rewind 2629 2630 bl KeccakF1600_neon 2631 2632 mov r12, r0 @ A_flat 2633 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2634 vst1.32 {d2}, [r0,:64]! 2635 vst1.32 {d4}, [r0,:64]! 2636 vst1.32 {d6}, [r0,:64]! 2637 vst1.32 {d8}, [r0,:64]! 2638 2639 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2640 vst1.32 {d3}, [r0,:64]! 2641 vst1.32 {d5}, [r0,:64]! 2642 vst1.32 {d7}, [r0,:64]! 2643 vst1.32 {d9}, [r0,:64]! 2644 2645 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2646 vst1.32 {d12}, [r0,:64]! 2647 vst1.32 {d14}, [r0,:64]! 2648 vst1.32 {d16}, [r0,:64]! 2649 vst1.32 {d18}, [r0,:64]! 2650 2651 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2652 vst1.32 {d13}, [r0,:64]! 2653 vst1.32 {d15}, [r0,:64]! 2654 vst1.32 {d17}, [r0,:64]! 2655 vst1.32 {d19}, [r0,:64]! 2656 2657 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2658 mov r14, r6 @ bsz 2659 vst1.32 {d24}, [r0,:64] 2660 mov r0, r12 @ rewind 2661 2662 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2663 b .Loop_squeeze_neon 2664 2665.align 4 2666.Lsqueeze_neon_tail: 2667 ldmia r12, {r2,r3} 2668 cmp r5, #2 2669 strb r2, [r4],#1 @ endian-neutral store 2670 mov r2, r2, lsr#8 2671 blo .Lsqueeze_neon_done 2672 strb r2, [r4], #1 2673 mov r2, r2, lsr#8 2674 beq .Lsqueeze_neon_done 2675 strb r2, [r4], #1 2676 mov r2, r2, lsr#8 2677 cmp r5, #4 2678 blo .Lsqueeze_neon_done 2679 strb r2, [r4], #1 2680 beq .Lsqueeze_neon_done 2681 2682 strb r3, [r4], #1 2683 mov r3, r3, lsr#8 2684 cmp r5, #6 2685 blo .Lsqueeze_neon_done 2686 strb r3, [r4], #1 2687 mov r3, r3, lsr#8 2688 beq .Lsqueeze_neon_done 2689 strb r3, [r4], #1 2690 2691.Lsqueeze_neon_done: 2692 ldmia sp!, {r4,r5,r6,pc} 2693.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2694#endif 2695.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2696.align 2 2697.align 2 2698