1/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */ 2#include "arm_arch.h" 3 4#if defined(__thumb2__) 5.syntax unified 6.thumb 7#else 8.code 32 9#endif 10 11.text 12 13.type iotas32, %object 14.align 5 15iotas32: 16.long 0x00000001, 0x00000000 17.long 0x00000000, 0x00000089 18.long 0x00000000, 0x8000008b 19.long 0x00000000, 0x80008080 20.long 0x00000001, 0x0000008b 21.long 0x00000001, 0x00008000 22.long 0x00000001, 0x80008088 23.long 0x00000001, 0x80000082 24.long 0x00000000, 0x0000000b 25.long 0x00000000, 0x0000000a 26.long 0x00000001, 0x00008082 27.long 0x00000000, 0x00008003 28.long 0x00000001, 0x0000808b 29.long 0x00000001, 0x8000000b 30.long 0x00000001, 0x8000008a 31.long 0x00000001, 0x80000081 32.long 0x00000000, 0x80000081 33.long 0x00000000, 0x80000008 34.long 0x00000000, 0x00000083 35.long 0x00000000, 0x80008003 36.long 0x00000001, 0x80008088 37.long 0x00000000, 0x80000088 38.long 0x00000001, 0x00008000 39.long 0x00000000, 0x80008082 40.size iotas32,.-iotas32 41 42.type KeccakF1600_int, %function 43.align 5 44KeccakF1600_int: 45 add r9,sp,#176 46 add r12,sp,#0 47 add r10,sp,#40 48 ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4] 49KeccakF1600_enter: 50 str lr,[sp,#440] 51 eor r11,r11,r11 52 str r11,[sp,#444] 53 b .Lround2x 54 55.align 4 56.Lround2x: 57 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 58 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 59#ifdef __thumb2__ 60 eor r0,r0,r10 61 eor r1,r1,r11 62 eor r2,r2,r12 63 ldrd r10,r11,[sp,#56] 64 eor r3,r3,r14 65 ldrd r12,r14,[sp,#64] 66 eor r4,r4,r10 67 eor r5,r5,r11 68 eor r6,r6,r12 69 ldrd r10,r11,[sp,#72] 70 eor r7,r7,r14 71 ldrd r12,r14,[sp,#80] 72 eor r8,r8,r10 73 eor r9,r9,r11 74 eor r0,r0,r12 75 ldrd r10,r11,[sp,#88] 76 eor r1,r1,r14 77 ldrd r12,r14,[sp,#96] 78 eor r2,r2,r10 79 eor r3,r3,r11 80 eor r4,r4,r12 81 ldrd r10,r11,[sp,#104] 82 eor r5,r5,r14 83 ldrd r12,r14,[sp,#112] 84 eor r6,r6,r10 85 eor r7,r7,r11 86 eor r8,r8,r12 87 ldrd r10,r11,[sp,#120] 88 eor r9,r9,r14 89 ldrd r12,r14,[sp,#128] 90 eor r0,r0,r10 91 eor r1,r1,r11 92 eor r2,r2,r12 93 ldrd r10,r11,[sp,#136] 94 eor r3,r3,r14 95 ldrd r12,r14,[sp,#144] 96 eor r4,r4,r10 97 eor r5,r5,r11 98 eor r6,r6,r12 99 ldrd r10,r11,[sp,#152] 100 eor r7,r7,r14 101 ldrd r12,r14,[sp,#160] 102 eor r8,r8,r10 103 eor r9,r9,r11 104 eor r0,r0,r12 105 ldrd r10,r11,[sp,#168] 106 eor r1,r1,r14 107 ldrd r12,r14,[sp,#16] 108 eor r2,r2,r10 109 eor r3,r3,r11 110 eor r4,r4,r12 111 ldrd r10,r11,[sp,#24] 112 eor r5,r5,r14 113 ldrd r12,r14,[sp,#32] 114#else 115 eor r0,r0,r10 116 add r10,sp,#56 117 eor r1,r1,r11 118 eor r2,r2,r12 119 eor r3,r3,r14 120 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 121 eor r4,r4,r10 122 add r10,sp,#72 123 eor r5,r5,r11 124 eor r6,r6,r12 125 eor r7,r7,r14 126 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 127 eor r8,r8,r10 128 add r10,sp,#88 129 eor r9,r9,r11 130 eor r0,r0,r12 131 eor r1,r1,r14 132 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 133 eor r2,r2,r10 134 add r10,sp,#104 135 eor r3,r3,r11 136 eor r4,r4,r12 137 eor r5,r5,r14 138 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 139 eor r6,r6,r10 140 add r10,sp,#120 141 eor r7,r7,r11 142 eor r8,r8,r12 143 eor r9,r9,r14 144 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 145 eor r0,r0,r10 146 add r10,sp,#136 147 eor r1,r1,r11 148 eor r2,r2,r12 149 eor r3,r3,r14 150 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 151 eor r4,r4,r10 152 add r10,sp,#152 153 eor r5,r5,r11 154 eor r6,r6,r12 155 eor r7,r7,r14 156 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 157 eor r8,r8,r10 158 ldr r10,[sp,#168] @ A[4][1] 159 eor r9,r9,r11 160 ldr r11,[sp,#168+4] 161 eor r0,r0,r12 162 ldr r12,[sp,#16] @ A[0][2] 163 eor r1,r1,r14 164 ldr r14,[sp,#16+4] 165 eor r2,r2,r10 166 add r10,sp,#24 167 eor r3,r3,r11 168 eor r4,r4,r12 169 eor r5,r5,r14 170 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 171#endif 172 eor r6,r6,r10 173 eor r7,r7,r11 174 eor r8,r8,r12 175 eor r9,r9,r14 176 177 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 178#ifndef __thumb2__ 179 str r10,[sp,#208] @ D[1] = E[0] 180#endif 181 eor r11,r1,r4 182#ifndef __thumb2__ 183 str r11,[sp,#208+4] 184#else 185 strd r10,r11,[sp,#208] @ D[1] = E[0] 186#endif 187 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 188 eor r14,r7,r0 189#ifndef __thumb2__ 190 str r12,[sp,#232] @ D[4] = E[1] 191#endif 192 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 193#ifndef __thumb2__ 194 str r14,[sp,#232+4] 195#else 196 strd r12,r14,[sp,#232] @ D[4] = E[1] 197#endif 198 eor r1,r9,r2 199#ifndef __thumb2__ 200 str r0,[sp,#200] @ D[0] = C[0] 201#endif 202 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 203#ifndef __thumb2__ 204 ldr r7,[sp,#144] 205#endif 206 eor r3,r3,r6 207#ifndef __thumb2__ 208 str r1,[sp,#200+4] 209#else 210 strd r0,r1,[sp,#200] @ D[0] = C[0] 211#endif 212#ifndef __thumb2__ 213 ldr r6,[sp,#144+4] 214#else 215 ldrd r7,r6,[sp,#144] 216#endif 217#ifndef __thumb2__ 218 str r2,[sp,#216] @ D[2] = C[1] 219#endif 220 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 221#ifndef __thumb2__ 222 str r3,[sp,#216+4] 223#else 224 strd r2,r3,[sp,#216] @ D[2] = C[1] 225#endif 226 eor r5,r5,r8 227 228#ifndef __thumb2__ 229 ldr r8,[sp,#192] 230#endif 231#ifndef __thumb2__ 232 ldr r9,[sp,#192+4] 233#else 234 ldrd r8,r9,[sp,#192] 235#endif 236#ifndef __thumb2__ 237 str r4,[sp,#224] @ D[3] = C[2] 238#endif 239 eor r7,r7,r4 240#ifndef __thumb2__ 241 str r5,[sp,#224+4] 242#else 243 strd r4,r5,[sp,#224] @ D[3] = C[2] 244#endif 245 eor r6,r6,r5 246#ifndef __thumb2__ 247 ldr r4,[sp,#0] 248#endif 249 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 250 @ mov r6,r6,ror#32-11 251#ifndef __thumb2__ 252 ldr r5,[sp,#0+4] 253#else 254 ldrd r4,r5,[sp,#0] 255#endif 256 eor r8,r8,r12 257 eor r9,r9,r14 258#ifndef __thumb2__ 259 ldr r12,[sp,#96] 260#endif 261 eor r0,r0,r4 262#ifndef __thumb2__ 263 ldr r14,[sp,#96+4] 264#else 265 ldrd r12,r14,[sp,#96] 266#endif 267 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 268 @ mov r9,r9,ror#32-7 269 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 270 eor r12,r12,r2 271#ifndef __thumb2__ 272 ldr r2,[sp,#48] 273#endif 274 eor r14,r14,r3 275#ifndef __thumb2__ 276 ldr r3,[sp,#48+4] 277#else 278 ldrd r2,r3,[sp,#48] 279#endif 280 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 281 ldr r12,[sp,#444] @ load counter 282 eor r2,r2,r10 283 adr r10,iotas32 284 mov r4,r14,ror#32-22 285 add r14,r10,r12 286 eor r3,r3,r11 287 ldmia r14,{r10,r11} @ iotas[i] 288 bic r12,r4,r2,ror#32-22 289 bic r14,r5,r3,ror#32-22 290 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 291 mov r3,r3,ror#32-22 292 eor r12,r12,r0 293 eor r14,r14,r1 294 eor r10,r10,r12 295 eor r11,r11,r14 296#ifndef __thumb2__ 297 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 298#endif 299 bic r12,r6,r4,ror#11 300#ifndef __thumb2__ 301 str r11,[sp,#240+4] 302#else 303 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 304#endif 305 bic r14,r7,r5,ror#10 306 bic r10,r8,r6,ror#32-(11-7) 307 bic r11,r9,r7,ror#32-(10-7) 308 eor r12,r2,r12,ror#32-11 309#ifndef __thumb2__ 310 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 311#endif 312 eor r14,r3,r14,ror#32-10 313#ifndef __thumb2__ 314 str r14,[sp,#248+4] 315#else 316 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 317#endif 318 eor r10,r4,r10,ror#32-7 319 eor r11,r5,r11,ror#32-7 320#ifndef __thumb2__ 321 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 322#endif 323 bic r12,r0,r8,ror#32-7 324#ifndef __thumb2__ 325 str r11,[sp,#256+4] 326#else 327 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 328#endif 329 bic r14,r1,r9,ror#32-7 330 eor r12,r12,r6,ror#32-11 331#ifndef __thumb2__ 332 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 333#endif 334 eor r14,r14,r7,ror#32-10 335#ifndef __thumb2__ 336 str r14,[sp,#264+4] 337#else 338 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 339#endif 340 bic r10,r2,r0 341 add r14,sp,#224 342#ifndef __thumb2__ 343 ldr r0,[sp,#24] @ A[0][3] 344#endif 345 bic r11,r3,r1 346#ifndef __thumb2__ 347 ldr r1,[sp,#24+4] 348#else 349 ldrd r0,r1,[sp,#24] @ A[0][3] 350#endif 351 eor r10,r10,r8,ror#32-7 352 eor r11,r11,r9,ror#32-7 353#ifndef __thumb2__ 354 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 355#endif 356 add r9,sp,#200 357#ifndef __thumb2__ 358 str r11,[sp,#272+4] 359#else 360 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 361#endif 362 363 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 364 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 365 366#ifndef __thumb2__ 367 ldr r2,[sp,#72] @ A[1][4] 368#endif 369 eor r0,r0,r10 370#ifndef __thumb2__ 371 ldr r3,[sp,#72+4] 372#else 373 ldrd r2,r3,[sp,#72] @ A[1][4] 374#endif 375 eor r1,r1,r11 376 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 377#ifndef __thumb2__ 378 ldr r10,[sp,#128] @ A[3][1] 379#endif 380 @ mov r1,r1,ror#32-14 381#ifndef __thumb2__ 382 ldr r11,[sp,#128+4] 383#else 384 ldrd r10,r11,[sp,#128] @ A[3][1] 385#endif 386 387 eor r2,r2,r12 388#ifndef __thumb2__ 389 ldr r4,[sp,#80] @ A[2][0] 390#endif 391 eor r3,r3,r14 392#ifndef __thumb2__ 393 ldr r5,[sp,#80+4] 394#else 395 ldrd r4,r5,[sp,#80] @ A[2][0] 396#endif 397 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 398 @ mov r3,r3,ror#32-10 399 400 eor r6,r6,r4 401#ifndef __thumb2__ 402 ldr r12,[sp,#216] @ D[2] 403#endif 404 eor r7,r7,r5 405#ifndef __thumb2__ 406 ldr r14,[sp,#216+4] 407#else 408 ldrd r12,r14,[sp,#216] @ D[2] 409#endif 410 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 411 mov r4,r7,ror#32-2 412 413 eor r10,r10,r8 414#ifndef __thumb2__ 415 ldr r8,[sp,#176] @ A[4][2] 416#endif 417 eor r11,r11,r9 418#ifndef __thumb2__ 419 ldr r9,[sp,#176+4] 420#else 421 ldrd r8,r9,[sp,#176] @ A[4][2] 422#endif 423 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 424 mov r6,r11,ror#32-23 425 426 bic r10,r4,r2,ror#32-10 427 bic r11,r5,r3,ror#32-10 428 eor r12,r12,r8 429 eor r14,r14,r9 430 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 431 mov r8,r14,ror#32-31 432 eor r10,r10,r0,ror#32-14 433 eor r11,r11,r1,ror#32-14 434#ifndef __thumb2__ 435 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 436#endif 437 bic r12,r6,r4 438#ifndef __thumb2__ 439 str r11,[sp,#280+4] 440#else 441 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 442#endif 443 bic r14,r7,r5 444 eor r12,r12,r2,ror#32-10 445#ifndef __thumb2__ 446 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 447#endif 448 eor r14,r14,r3,ror#32-10 449#ifndef __thumb2__ 450 str r14,[sp,#288+4] 451#else 452 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 453#endif 454 bic r10,r8,r6 455 bic r11,r9,r7 456 bic r12,r0,r8,ror#14 457 bic r14,r1,r9,ror#14 458 eor r10,r10,r4 459 eor r11,r11,r5 460#ifndef __thumb2__ 461 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 462#endif 463 bic r2,r2,r0,ror#32-(14-10) 464#ifndef __thumb2__ 465 str r11,[sp,#296+4] 466#else 467 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 468#endif 469 eor r12,r6,r12,ror#32-14 470 bic r11,r3,r1,ror#32-(14-10) 471#ifndef __thumb2__ 472 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 473#endif 474 eor r14,r7,r14,ror#32-14 475#ifndef __thumb2__ 476 str r14,[sp,#304+4] 477#else 478 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 479#endif 480 add r12,sp,#208 481#ifndef __thumb2__ 482 ldr r1,[sp,#8] @ A[0][1] 483#endif 484 eor r10,r8,r2,ror#32-10 485#ifndef __thumb2__ 486 ldr r0,[sp,#8+4] 487#else 488 ldrd r1,r0,[sp,#8] @ A[0][1] 489#endif 490 eor r11,r9,r11,ror#32-10 491#ifndef __thumb2__ 492 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 493#endif 494#ifndef __thumb2__ 495 str r11,[sp,#312+4] 496#else 497 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 498#endif 499 500 add r9,sp,#224 501 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 502#ifndef __thumb2__ 503 ldr r2,[sp,#56] @ A[1][2] 504#endif 505#ifndef __thumb2__ 506 ldr r3,[sp,#56+4] 507#else 508 ldrd r2,r3,[sp,#56] @ A[1][2] 509#endif 510 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 511 512 eor r1,r1,r10 513#ifndef __thumb2__ 514 ldr r4,[sp,#104] @ A[2][3] 515#endif 516 eor r0,r0,r11 517#ifndef __thumb2__ 518 ldr r5,[sp,#104+4] 519#else 520 ldrd r4,r5,[sp,#104] @ A[2][3] 521#endif 522 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 523 524 eor r2,r2,r12 525#ifndef __thumb2__ 526 ldr r10,[sp,#152] @ A[3][4] 527#endif 528 eor r3,r3,r14 529#ifndef __thumb2__ 530 ldr r11,[sp,#152+4] 531#else 532 ldrd r10,r11,[sp,#152] @ A[3][4] 533#endif 534 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 535#ifndef __thumb2__ 536 ldr r12,[sp,#200] @ D[0] 537#endif 538 @ mov r3,r3,ror#32-3 539#ifndef __thumb2__ 540 ldr r14,[sp,#200+4] 541#else 542 ldrd r12,r14,[sp,#200] @ D[0] 543#endif 544 545 eor r4,r4,r6 546 eor r5,r5,r7 547 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 548 @ mov r4,r7,ror#32-13 @ [track reverse order below] 549 550 eor r10,r10,r8 551#ifndef __thumb2__ 552 ldr r8,[sp,#160] @ A[4][0] 553#endif 554 eor r11,r11,r9 555#ifndef __thumb2__ 556 ldr r9,[sp,#160+4] 557#else 558 ldrd r8,r9,[sp,#160] @ A[4][0] 559#endif 560 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 561 mov r7,r11,ror#32-4 562 563 eor r12,r12,r8 564 eor r14,r14,r9 565 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 566 mov r9,r14,ror#32-9 567 568 bic r10,r5,r2,ror#13-3 569 bic r11,r4,r3,ror#12-3 570 bic r12,r6,r5,ror#32-13 571 bic r14,r7,r4,ror#32-12 572 eor r10,r0,r10,ror#32-13 573 eor r11,r1,r11,ror#32-12 574#ifndef __thumb2__ 575 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 576#endif 577 eor r12,r12,r2,ror#32-3 578#ifndef __thumb2__ 579 str r11,[sp,#320+4] 580#else 581 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 582#endif 583 eor r14,r14,r3,ror#32-3 584#ifndef __thumb2__ 585 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 586#endif 587 bic r10,r8,r6 588 bic r11,r9,r7 589#ifndef __thumb2__ 590 str r14,[sp,#328+4] 591#else 592 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 593#endif 594 eor r10,r10,r5,ror#32-13 595 eor r11,r11,r4,ror#32-12 596#ifndef __thumb2__ 597 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 598#endif 599 bic r12,r0,r8 600#ifndef __thumb2__ 601 str r11,[sp,#336+4] 602#else 603 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 604#endif 605 bic r14,r1,r9 606 eor r12,r12,r6 607 eor r14,r14,r7 608#ifndef __thumb2__ 609 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 610#endif 611 bic r10,r2,r0,ror#3 612#ifndef __thumb2__ 613 str r14,[sp,#344+4] 614#else 615 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 616#endif 617 bic r11,r3,r1,ror#3 618#ifndef __thumb2__ 619 ldr r1,[sp,#32] @ A[0][4] [in reverse order] 620#endif 621 eor r10,r8,r10,ror#32-3 622#ifndef __thumb2__ 623 ldr r0,[sp,#32+4] 624#else 625 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 626#endif 627 eor r11,r9,r11,ror#32-3 628#ifndef __thumb2__ 629 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 630#endif 631 add r9,sp,#208 632#ifndef __thumb2__ 633 str r11,[sp,#352+4] 634#else 635 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 636#endif 637 638#ifndef __thumb2__ 639 ldr r10,[sp,#232] @ D[4] 640#endif 641#ifndef __thumb2__ 642 ldr r11,[sp,#232+4] 643#else 644 ldrd r10,r11,[sp,#232] @ D[4] 645#endif 646#ifndef __thumb2__ 647 ldr r12,[sp,#200] @ D[0] 648#endif 649#ifndef __thumb2__ 650 ldr r14,[sp,#200+4] 651#else 652 ldrd r12,r14,[sp,#200] @ D[0] 653#endif 654 655 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 656 657 eor r1,r1,r10 658#ifndef __thumb2__ 659 ldr r2,[sp,#40] @ A[1][0] 660#endif 661 eor r0,r0,r11 662#ifndef __thumb2__ 663 ldr r3,[sp,#40+4] 664#else 665 ldrd r2,r3,[sp,#40] @ A[1][0] 666#endif 667 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 668#ifndef __thumb2__ 669 ldr r4,[sp,#88] @ A[2][1] 670#endif 671 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 672#ifndef __thumb2__ 673 ldr r5,[sp,#88+4] 674#else 675 ldrd r4,r5,[sp,#88] @ A[2][1] 676#endif 677 678 eor r2,r2,r12 679#ifndef __thumb2__ 680 ldr r10,[sp,#136] @ A[3][2] 681#endif 682 eor r3,r3,r14 683#ifndef __thumb2__ 684 ldr r11,[sp,#136+4] 685#else 686 ldrd r10,r11,[sp,#136] @ A[3][2] 687#endif 688 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 689#ifndef __thumb2__ 690 ldr r12,[sp,#224] @ D[3] 691#endif 692 @ mov r3,r3,ror#32-18 693#ifndef __thumb2__ 694 ldr r14,[sp,#224+4] 695#else 696 ldrd r12,r14,[sp,#224] @ D[3] 697#endif 698 699 eor r6,r6,r4 700 eor r7,r7,r5 701 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 702 mov r5,r7,ror#32-5 703 704 eor r10,r10,r8 705#ifndef __thumb2__ 706 ldr r8,[sp,#184] @ A[4][3] 707#endif 708 eor r11,r11,r9 709#ifndef __thumb2__ 710 ldr r9,[sp,#184+4] 711#else 712 ldrd r8,r9,[sp,#184] @ A[4][3] 713#endif 714 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 715 mov r6,r11,ror#32-8 716 717 eor r12,r12,r8 718 eor r14,r14,r9 719 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 720 mov r9,r14,ror#32-28 721 722 bic r10,r4,r2,ror#32-18 723 bic r11,r5,r3,ror#32-18 724 eor r10,r10,r0,ror#32-14 725 eor r11,r11,r1,ror#32-13 726#ifndef __thumb2__ 727 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 728#endif 729 bic r12,r6,r4 730#ifndef __thumb2__ 731 str r11,[sp,#360+4] 732#else 733 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 734#endif 735 bic r14,r7,r5 736 eor r12,r12,r2,ror#32-18 737#ifndef __thumb2__ 738 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 739#endif 740 eor r14,r14,r3,ror#32-18 741#ifndef __thumb2__ 742 str r14,[sp,#368+4] 743#else 744 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 745#endif 746 bic r10,r8,r6 747 bic r11,r9,r7 748 bic r12,r0,r8,ror#14 749 bic r14,r1,r9,ror#13 750 eor r10,r10,r4 751 eor r11,r11,r5 752#ifndef __thumb2__ 753 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 754#endif 755 bic r2,r2,r0,ror#18-14 756#ifndef __thumb2__ 757 str r11,[sp,#376+4] 758#else 759 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 760#endif 761 eor r12,r6,r12,ror#32-14 762 bic r11,r3,r1,ror#18-13 763 eor r14,r7,r14,ror#32-13 764#ifndef __thumb2__ 765 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 766#endif 767#ifndef __thumb2__ 768 str r14,[sp,#384+4] 769#else 770 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 771#endif 772 add r14,sp,#216 773#ifndef __thumb2__ 774 ldr r0,[sp,#16] @ A[0][2] 775#endif 776 eor r10,r8,r2,ror#32-18 777#ifndef __thumb2__ 778 ldr r1,[sp,#16+4] 779#else 780 ldrd r0,r1,[sp,#16] @ A[0][2] 781#endif 782 eor r11,r9,r11,ror#32-18 783#ifndef __thumb2__ 784 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 785#endif 786#ifndef __thumb2__ 787 str r11,[sp,#392+4] 788#else 789 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 790#endif 791 792 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 793#ifndef __thumb2__ 794 ldr r2,[sp,#64] @ A[1][3] 795#endif 796#ifndef __thumb2__ 797 ldr r3,[sp,#64+4] 798#else 799 ldrd r2,r3,[sp,#64] @ A[1][3] 800#endif 801#ifndef __thumb2__ 802 ldr r6,[sp,#232] @ D[4] 803#endif 804#ifndef __thumb2__ 805 ldr r7,[sp,#232+4] 806#else 807 ldrd r6,r7,[sp,#232] @ D[4] 808#endif 809 810 eor r0,r0,r10 811#ifndef __thumb2__ 812 ldr r4,[sp,#112] @ A[2][4] 813#endif 814 eor r1,r1,r11 815#ifndef __thumb2__ 816 ldr r5,[sp,#112+4] 817#else 818 ldrd r4,r5,[sp,#112] @ A[2][4] 819#endif 820 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 821#ifndef __thumb2__ 822 ldr r8,[sp,#200] @ D[0] 823#endif 824 @ mov r1,r1,ror#32-31 825#ifndef __thumb2__ 826 ldr r9,[sp,#200+4] 827#else 828 ldrd r8,r9,[sp,#200] @ D[0] 829#endif 830 831 eor r12,r12,r2 832#ifndef __thumb2__ 833 ldr r10,[sp,#120] @ A[3][0] 834#endif 835 eor r14,r14,r3 836#ifndef __thumb2__ 837 ldr r11,[sp,#120+4] 838#else 839 ldrd r10,r11,[sp,#120] @ A[3][0] 840#endif 841 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 842#ifndef __thumb2__ 843 ldr r12,[sp,#208] @ D[1] 844#endif 845 mov r2,r14,ror#32-28 846#ifndef __thumb2__ 847 ldr r14,[sp,#208+4] 848#else 849 ldrd r12,r14,[sp,#208] @ D[1] 850#endif 851 852 eor r6,r6,r4 853 eor r7,r7,r5 854 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 855 mov r4,r7,ror#32-20 856 857 eor r10,r10,r8 858#ifndef __thumb2__ 859 ldr r8,[sp,#168] @ A[4][1] 860#endif 861 eor r11,r11,r9 862#ifndef __thumb2__ 863 ldr r9,[sp,#168+4] 864#else 865 ldrd r8,r9,[sp,#168] @ A[4][1] 866#endif 867 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 868 mov r6,r11,ror#32-21 869 870 eor r8,r8,r12 871 eor r9,r9,r14 872 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 873 @ mov r9,r3,ror#32-1 874 875 bic r10,r4,r2 876 bic r11,r5,r3 877 eor r10,r10,r0,ror#32-31 878#ifndef __thumb2__ 879 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 880#endif 881 eor r11,r11,r1,ror#32-31 882#ifndef __thumb2__ 883 str r11,[sp,#400+4] 884#else 885 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 886#endif 887 bic r12,r6,r4 888 bic r14,r7,r5 889 eor r12,r12,r2 890 eor r14,r14,r3 891#ifndef __thumb2__ 892 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 893#endif 894 bic r10,r8,r6,ror#1 895#ifndef __thumb2__ 896 str r14,[sp,#408+4] 897#else 898 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 899#endif 900 bic r11,r9,r7,ror#1 901 bic r12,r0,r8,ror#31-1 902 bic r14,r1,r9,ror#31-1 903 eor r4,r4,r10,ror#32-1 904#ifndef __thumb2__ 905 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 906#endif 907 eor r5,r5,r11,ror#32-1 908#ifndef __thumb2__ 909 str r5,[sp,#416+4] 910#else 911 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 912#endif 913 eor r6,r6,r12,ror#32-31 914 eor r7,r7,r14,ror#32-31 915#ifndef __thumb2__ 916 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 917#endif 918 bic r10,r2,r0,ror#32-31 919#ifndef __thumb2__ 920 str r7,[sp,#424+4] 921#else 922 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 923#endif 924 bic r11,r3,r1,ror#32-31 925 add r12,sp,#240 926 eor r8,r10,r8,ror#32-1 927 add r10,sp,#280 928 eor r9,r11,r9,ror#32-1 929#ifndef __thumb2__ 930 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 931#endif 932#ifndef __thumb2__ 933 str r9,[sp,#432+4] 934#else 935 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 936#endif 937 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 938 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 939#ifdef __thumb2__ 940 eor r0,r0,r10 941 eor r1,r1,r11 942 eor r2,r2,r12 943 ldrd r10,r11,[sp,#296] 944 eor r3,r3,r14 945 ldrd r12,r14,[sp,#304] 946 eor r4,r4,r10 947 eor r5,r5,r11 948 eor r6,r6,r12 949 ldrd r10,r11,[sp,#312] 950 eor r7,r7,r14 951 ldrd r12,r14,[sp,#320] 952 eor r8,r8,r10 953 eor r9,r9,r11 954 eor r0,r0,r12 955 ldrd r10,r11,[sp,#328] 956 eor r1,r1,r14 957 ldrd r12,r14,[sp,#336] 958 eor r2,r2,r10 959 eor r3,r3,r11 960 eor r4,r4,r12 961 ldrd r10,r11,[sp,#344] 962 eor r5,r5,r14 963 ldrd r12,r14,[sp,#352] 964 eor r6,r6,r10 965 eor r7,r7,r11 966 eor r8,r8,r12 967 ldrd r10,r11,[sp,#360] 968 eor r9,r9,r14 969 ldrd r12,r14,[sp,#368] 970 eor r0,r0,r10 971 eor r1,r1,r11 972 eor r2,r2,r12 973 ldrd r10,r11,[sp,#376] 974 eor r3,r3,r14 975 ldrd r12,r14,[sp,#384] 976 eor r4,r4,r10 977 eor r5,r5,r11 978 eor r6,r6,r12 979 ldrd r10,r11,[sp,#392] 980 eor r7,r7,r14 981 ldrd r12,r14,[sp,#400] 982 eor r8,r8,r10 983 eor r9,r9,r11 984 eor r0,r0,r12 985 ldrd r10,r11,[sp,#408] 986 eor r1,r1,r14 987 ldrd r12,r14,[sp,#256] 988 eor r2,r2,r10 989 eor r3,r3,r11 990 eor r4,r4,r12 991 ldrd r10,r11,[sp,#264] 992 eor r5,r5,r14 993 ldrd r12,r14,[sp,#272] 994#else 995 eor r0,r0,r10 996 add r10,sp,#296 997 eor r1,r1,r11 998 eor r2,r2,r12 999 eor r3,r3,r14 1000 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 1001 eor r4,r4,r10 1002 add r10,sp,#312 1003 eor r5,r5,r11 1004 eor r6,r6,r12 1005 eor r7,r7,r14 1006 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 1007 eor r8,r8,r10 1008 add r10,sp,#328 1009 eor r9,r9,r11 1010 eor r0,r0,r12 1011 eor r1,r1,r14 1012 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 1013 eor r2,r2,r10 1014 add r10,sp,#344 1015 eor r3,r3,r11 1016 eor r4,r4,r12 1017 eor r5,r5,r14 1018 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 1019 eor r6,r6,r10 1020 add r10,sp,#360 1021 eor r7,r7,r11 1022 eor r8,r8,r12 1023 eor r9,r9,r14 1024 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 1025 eor r0,r0,r10 1026 add r10,sp,#376 1027 eor r1,r1,r11 1028 eor r2,r2,r12 1029 eor r3,r3,r14 1030 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 1031 eor r4,r4,r10 1032 add r10,sp,#392 1033 eor r5,r5,r11 1034 eor r6,r6,r12 1035 eor r7,r7,r14 1036 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 1037 eor r8,r8,r10 1038 ldr r10,[sp,#408] @ A[4][1] 1039 eor r9,r9,r11 1040 ldr r11,[sp,#408+4] 1041 eor r0,r0,r12 1042 ldr r12,[sp,#256] @ A[0][2] 1043 eor r1,r1,r14 1044 ldr r14,[sp,#256+4] 1045 eor r2,r2,r10 1046 add r10,sp,#264 1047 eor r3,r3,r11 1048 eor r4,r4,r12 1049 eor r5,r5,r14 1050 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 1051#endif 1052 eor r6,r6,r10 1053 eor r7,r7,r11 1054 eor r8,r8,r12 1055 eor r9,r9,r14 1056 1057 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1058#ifndef __thumb2__ 1059 str r10,[sp,#208] @ D[1] = E[0] 1060#endif 1061 eor r11,r1,r4 1062#ifndef __thumb2__ 1063 str r11,[sp,#208+4] 1064#else 1065 strd r10,r11,[sp,#208] @ D[1] = E[0] 1066#endif 1067 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1068 eor r14,r7,r0 1069#ifndef __thumb2__ 1070 str r12,[sp,#232] @ D[4] = E[1] 1071#endif 1072 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1073#ifndef __thumb2__ 1074 str r14,[sp,#232+4] 1075#else 1076 strd r12,r14,[sp,#232] @ D[4] = E[1] 1077#endif 1078 eor r1,r9,r2 1079#ifndef __thumb2__ 1080 str r0,[sp,#200] @ D[0] = C[0] 1081#endif 1082 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1083#ifndef __thumb2__ 1084 ldr r7,[sp,#384] 1085#endif 1086 eor r3,r3,r6 1087#ifndef __thumb2__ 1088 str r1,[sp,#200+4] 1089#else 1090 strd r0,r1,[sp,#200] @ D[0] = C[0] 1091#endif 1092#ifndef __thumb2__ 1093 ldr r6,[sp,#384+4] 1094#else 1095 ldrd r7,r6,[sp,#384] 1096#endif 1097#ifndef __thumb2__ 1098 str r2,[sp,#216] @ D[2] = C[1] 1099#endif 1100 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1101#ifndef __thumb2__ 1102 str r3,[sp,#216+4] 1103#else 1104 strd r2,r3,[sp,#216] @ D[2] = C[1] 1105#endif 1106 eor r5,r5,r8 1107 1108#ifndef __thumb2__ 1109 ldr r8,[sp,#432] 1110#endif 1111#ifndef __thumb2__ 1112 ldr r9,[sp,#432+4] 1113#else 1114 ldrd r8,r9,[sp,#432] 1115#endif 1116#ifndef __thumb2__ 1117 str r4,[sp,#224] @ D[3] = C[2] 1118#endif 1119 eor r7,r7,r4 1120#ifndef __thumb2__ 1121 str r5,[sp,#224+4] 1122#else 1123 strd r4,r5,[sp,#224] @ D[3] = C[2] 1124#endif 1125 eor r6,r6,r5 1126#ifndef __thumb2__ 1127 ldr r4,[sp,#240] 1128#endif 1129 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1130 @ mov r6,r6,ror#32-11 1131#ifndef __thumb2__ 1132 ldr r5,[sp,#240+4] 1133#else 1134 ldrd r4,r5,[sp,#240] 1135#endif 1136 eor r8,r8,r12 1137 eor r9,r9,r14 1138#ifndef __thumb2__ 1139 ldr r12,[sp,#336] 1140#endif 1141 eor r0,r0,r4 1142#ifndef __thumb2__ 1143 ldr r14,[sp,#336+4] 1144#else 1145 ldrd r12,r14,[sp,#336] 1146#endif 1147 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1148 @ mov r9,r9,ror#32-7 1149 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 1150 eor r12,r12,r2 1151#ifndef __thumb2__ 1152 ldr r2,[sp,#288] 1153#endif 1154 eor r14,r14,r3 1155#ifndef __thumb2__ 1156 ldr r3,[sp,#288+4] 1157#else 1158 ldrd r2,r3,[sp,#288] 1159#endif 1160 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 1161 ldr r12,[sp,#444] @ load counter 1162 eor r2,r2,r10 1163 adr r10,iotas32 1164 mov r4,r14,ror#32-22 1165 add r14,r10,r12 1166 eor r3,r3,r11 1167#ifndef __thumb2__ 1168 ldr r10,[r14,#8] @ iotas[i].lo 1169#endif 1170 add r12,r12,#16 1171#ifndef __thumb2__ 1172 ldr r11,[r14,#12] @ iotas[i].hi 1173#else 1174 ldrd r10,r11,[r14,#8] @ iotas[i].lo 1175#endif 1176 cmp r12,#192 1177 str r12,[sp,#444] @ store counter 1178 bic r12,r4,r2,ror#32-22 1179 bic r14,r5,r3,ror#32-22 1180 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 1181 mov r3,r3,ror#32-22 1182 eor r12,r12,r0 1183 eor r14,r14,r1 1184 eor r10,r10,r12 1185 eor r11,r11,r14 1186#ifndef __thumb2__ 1187 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1188#endif 1189 bic r12,r6,r4,ror#11 1190#ifndef __thumb2__ 1191 str r11,[sp,#0+4] 1192#else 1193 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1194#endif 1195 bic r14,r7,r5,ror#10 1196 bic r10,r8,r6,ror#32-(11-7) 1197 bic r11,r9,r7,ror#32-(10-7) 1198 eor r12,r2,r12,ror#32-11 1199#ifndef __thumb2__ 1200 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1201#endif 1202 eor r14,r3,r14,ror#32-10 1203#ifndef __thumb2__ 1204 str r14,[sp,#8+4] 1205#else 1206 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1207#endif 1208 eor r10,r4,r10,ror#32-7 1209 eor r11,r5,r11,ror#32-7 1210#ifndef __thumb2__ 1211 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1212#endif 1213 bic r12,r0,r8,ror#32-7 1214#ifndef __thumb2__ 1215 str r11,[sp,#16+4] 1216#else 1217 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1218#endif 1219 bic r14,r1,r9,ror#32-7 1220 eor r12,r12,r6,ror#32-11 1221#ifndef __thumb2__ 1222 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1223#endif 1224 eor r14,r14,r7,ror#32-10 1225#ifndef __thumb2__ 1226 str r14,[sp,#24+4] 1227#else 1228 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1229#endif 1230 bic r10,r2,r0 1231 add r14,sp,#224 1232#ifndef __thumb2__ 1233 ldr r0,[sp,#264] @ A[0][3] 1234#endif 1235 bic r11,r3,r1 1236#ifndef __thumb2__ 1237 ldr r1,[sp,#264+4] 1238#else 1239 ldrd r0,r1,[sp,#264] @ A[0][3] 1240#endif 1241 eor r10,r10,r8,ror#32-7 1242 eor r11,r11,r9,ror#32-7 1243#ifndef __thumb2__ 1244 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1245#endif 1246 add r9,sp,#200 1247#ifndef __thumb2__ 1248 str r11,[sp,#32+4] 1249#else 1250 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1251#endif 1252 1253 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 1254 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 1255 1256#ifndef __thumb2__ 1257 ldr r2,[sp,#312] @ A[1][4] 1258#endif 1259 eor r0,r0,r10 1260#ifndef __thumb2__ 1261 ldr r3,[sp,#312+4] 1262#else 1263 ldrd r2,r3,[sp,#312] @ A[1][4] 1264#endif 1265 eor r1,r1,r11 1266 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1267#ifndef __thumb2__ 1268 ldr r10,[sp,#368] @ A[3][1] 1269#endif 1270 @ mov r1,r1,ror#32-14 1271#ifndef __thumb2__ 1272 ldr r11,[sp,#368+4] 1273#else 1274 ldrd r10,r11,[sp,#368] @ A[3][1] 1275#endif 1276 1277 eor r2,r2,r12 1278#ifndef __thumb2__ 1279 ldr r4,[sp,#320] @ A[2][0] 1280#endif 1281 eor r3,r3,r14 1282#ifndef __thumb2__ 1283 ldr r5,[sp,#320+4] 1284#else 1285 ldrd r4,r5,[sp,#320] @ A[2][0] 1286#endif 1287 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1288 @ mov r3,r3,ror#32-10 1289 1290 eor r6,r6,r4 1291#ifndef __thumb2__ 1292 ldr r12,[sp,#216] @ D[2] 1293#endif 1294 eor r7,r7,r5 1295#ifndef __thumb2__ 1296 ldr r14,[sp,#216+4] 1297#else 1298 ldrd r12,r14,[sp,#216] @ D[2] 1299#endif 1300 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1301 mov r4,r7,ror#32-2 1302 1303 eor r10,r10,r8 1304#ifndef __thumb2__ 1305 ldr r8,[sp,#416] @ A[4][2] 1306#endif 1307 eor r11,r11,r9 1308#ifndef __thumb2__ 1309 ldr r9,[sp,#416+4] 1310#else 1311 ldrd r8,r9,[sp,#416] @ A[4][2] 1312#endif 1313 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1314 mov r6,r11,ror#32-23 1315 1316 bic r10,r4,r2,ror#32-10 1317 bic r11,r5,r3,ror#32-10 1318 eor r12,r12,r8 1319 eor r14,r14,r9 1320 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1321 mov r8,r14,ror#32-31 1322 eor r10,r10,r0,ror#32-14 1323 eor r11,r11,r1,ror#32-14 1324#ifndef __thumb2__ 1325 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1326#endif 1327 bic r12,r6,r4 1328#ifndef __thumb2__ 1329 str r11,[sp,#40+4] 1330#else 1331 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1332#endif 1333 bic r14,r7,r5 1334 eor r12,r12,r2,ror#32-10 1335#ifndef __thumb2__ 1336 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1337#endif 1338 eor r14,r14,r3,ror#32-10 1339#ifndef __thumb2__ 1340 str r14,[sp,#48+4] 1341#else 1342 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1343#endif 1344 bic r10,r8,r6 1345 bic r11,r9,r7 1346 bic r12,r0,r8,ror#14 1347 bic r14,r1,r9,ror#14 1348 eor r10,r10,r4 1349 eor r11,r11,r5 1350#ifndef __thumb2__ 1351 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1352#endif 1353 bic r2,r2,r0,ror#32-(14-10) 1354#ifndef __thumb2__ 1355 str r11,[sp,#56+4] 1356#else 1357 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1358#endif 1359 eor r12,r6,r12,ror#32-14 1360 bic r11,r3,r1,ror#32-(14-10) 1361#ifndef __thumb2__ 1362 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1363#endif 1364 eor r14,r7,r14,ror#32-14 1365#ifndef __thumb2__ 1366 str r14,[sp,#64+4] 1367#else 1368 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1369#endif 1370 add r12,sp,#208 1371#ifndef __thumb2__ 1372 ldr r1,[sp,#248] @ A[0][1] 1373#endif 1374 eor r10,r8,r2,ror#32-10 1375#ifndef __thumb2__ 1376 ldr r0,[sp,#248+4] 1377#else 1378 ldrd r1,r0,[sp,#248] @ A[0][1] 1379#endif 1380 eor r11,r9,r11,ror#32-10 1381#ifndef __thumb2__ 1382 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1383#endif 1384#ifndef __thumb2__ 1385 str r11,[sp,#72+4] 1386#else 1387 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1388#endif 1389 1390 add r9,sp,#224 1391 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 1392#ifndef __thumb2__ 1393 ldr r2,[sp,#296] @ A[1][2] 1394#endif 1395#ifndef __thumb2__ 1396 ldr r3,[sp,#296+4] 1397#else 1398 ldrd r2,r3,[sp,#296] @ A[1][2] 1399#endif 1400 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 1401 1402 eor r1,r1,r10 1403#ifndef __thumb2__ 1404 ldr r4,[sp,#344] @ A[2][3] 1405#endif 1406 eor r0,r0,r11 1407#ifndef __thumb2__ 1408 ldr r5,[sp,#344+4] 1409#else 1410 ldrd r4,r5,[sp,#344] @ A[2][3] 1411#endif 1412 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1413 1414 eor r2,r2,r12 1415#ifndef __thumb2__ 1416 ldr r10,[sp,#392] @ A[3][4] 1417#endif 1418 eor r3,r3,r14 1419#ifndef __thumb2__ 1420 ldr r11,[sp,#392+4] 1421#else 1422 ldrd r10,r11,[sp,#392] @ A[3][4] 1423#endif 1424 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1425#ifndef __thumb2__ 1426 ldr r12,[sp,#200] @ D[0] 1427#endif 1428 @ mov r3,r3,ror#32-3 1429#ifndef __thumb2__ 1430 ldr r14,[sp,#200+4] 1431#else 1432 ldrd r12,r14,[sp,#200] @ D[0] 1433#endif 1434 1435 eor r4,r4,r6 1436 eor r5,r5,r7 1437 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1438 @ mov r4,r7,ror#32-13 @ [track reverse order below] 1439 1440 eor r10,r10,r8 1441#ifndef __thumb2__ 1442 ldr r8,[sp,#400] @ A[4][0] 1443#endif 1444 eor r11,r11,r9 1445#ifndef __thumb2__ 1446 ldr r9,[sp,#400+4] 1447#else 1448 ldrd r8,r9,[sp,#400] @ A[4][0] 1449#endif 1450 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1451 mov r7,r11,ror#32-4 1452 1453 eor r12,r12,r8 1454 eor r14,r14,r9 1455 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1456 mov r9,r14,ror#32-9 1457 1458 bic r10,r5,r2,ror#13-3 1459 bic r11,r4,r3,ror#12-3 1460 bic r12,r6,r5,ror#32-13 1461 bic r14,r7,r4,ror#32-12 1462 eor r10,r0,r10,ror#32-13 1463 eor r11,r1,r11,ror#32-12 1464#ifndef __thumb2__ 1465 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1466#endif 1467 eor r12,r12,r2,ror#32-3 1468#ifndef __thumb2__ 1469 str r11,[sp,#80+4] 1470#else 1471 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1472#endif 1473 eor r14,r14,r3,ror#32-3 1474#ifndef __thumb2__ 1475 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1476#endif 1477 bic r10,r8,r6 1478 bic r11,r9,r7 1479#ifndef __thumb2__ 1480 str r14,[sp,#88+4] 1481#else 1482 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1483#endif 1484 eor r10,r10,r5,ror#32-13 1485 eor r11,r11,r4,ror#32-12 1486#ifndef __thumb2__ 1487 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1488#endif 1489 bic r12,r0,r8 1490#ifndef __thumb2__ 1491 str r11,[sp,#96+4] 1492#else 1493 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1494#endif 1495 bic r14,r1,r9 1496 eor r12,r12,r6 1497 eor r14,r14,r7 1498#ifndef __thumb2__ 1499 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1500#endif 1501 bic r10,r2,r0,ror#3 1502#ifndef __thumb2__ 1503 str r14,[sp,#104+4] 1504#else 1505 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1506#endif 1507 bic r11,r3,r1,ror#3 1508#ifndef __thumb2__ 1509 ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1510#endif 1511 eor r10,r8,r10,ror#32-3 1512#ifndef __thumb2__ 1513 ldr r0,[sp,#272+4] 1514#else 1515 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1516#endif 1517 eor r11,r9,r11,ror#32-3 1518#ifndef __thumb2__ 1519 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1520#endif 1521 add r9,sp,#208 1522#ifndef __thumb2__ 1523 str r11,[sp,#112+4] 1524#else 1525 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1526#endif 1527 1528#ifndef __thumb2__ 1529 ldr r10,[sp,#232] @ D[4] 1530#endif 1531#ifndef __thumb2__ 1532 ldr r11,[sp,#232+4] 1533#else 1534 ldrd r10,r11,[sp,#232] @ D[4] 1535#endif 1536#ifndef __thumb2__ 1537 ldr r12,[sp,#200] @ D[0] 1538#endif 1539#ifndef __thumb2__ 1540 ldr r14,[sp,#200+4] 1541#else 1542 ldrd r12,r14,[sp,#200] @ D[0] 1543#endif 1544 1545 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 1546 1547 eor r1,r1,r10 1548#ifndef __thumb2__ 1549 ldr r2,[sp,#280] @ A[1][0] 1550#endif 1551 eor r0,r0,r11 1552#ifndef __thumb2__ 1553 ldr r3,[sp,#280+4] 1554#else 1555 ldrd r2,r3,[sp,#280] @ A[1][0] 1556#endif 1557 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1558#ifndef __thumb2__ 1559 ldr r4,[sp,#328] @ A[2][1] 1560#endif 1561 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1562#ifndef __thumb2__ 1563 ldr r5,[sp,#328+4] 1564#else 1565 ldrd r4,r5,[sp,#328] @ A[2][1] 1566#endif 1567 1568 eor r2,r2,r12 1569#ifndef __thumb2__ 1570 ldr r10,[sp,#376] @ A[3][2] 1571#endif 1572 eor r3,r3,r14 1573#ifndef __thumb2__ 1574 ldr r11,[sp,#376+4] 1575#else 1576 ldrd r10,r11,[sp,#376] @ A[3][2] 1577#endif 1578 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1579#ifndef __thumb2__ 1580 ldr r12,[sp,#224] @ D[3] 1581#endif 1582 @ mov r3,r3,ror#32-18 1583#ifndef __thumb2__ 1584 ldr r14,[sp,#224+4] 1585#else 1586 ldrd r12,r14,[sp,#224] @ D[3] 1587#endif 1588 1589 eor r6,r6,r4 1590 eor r7,r7,r5 1591 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1592 mov r5,r7,ror#32-5 1593 1594 eor r10,r10,r8 1595#ifndef __thumb2__ 1596 ldr r8,[sp,#424] @ A[4][3] 1597#endif 1598 eor r11,r11,r9 1599#ifndef __thumb2__ 1600 ldr r9,[sp,#424+4] 1601#else 1602 ldrd r8,r9,[sp,#424] @ A[4][3] 1603#endif 1604 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1605 mov r6,r11,ror#32-8 1606 1607 eor r12,r12,r8 1608 eor r14,r14,r9 1609 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1610 mov r9,r14,ror#32-28 1611 1612 bic r10,r4,r2,ror#32-18 1613 bic r11,r5,r3,ror#32-18 1614 eor r10,r10,r0,ror#32-14 1615 eor r11,r11,r1,ror#32-13 1616#ifndef __thumb2__ 1617 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1618#endif 1619 bic r12,r6,r4 1620#ifndef __thumb2__ 1621 str r11,[sp,#120+4] 1622#else 1623 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1624#endif 1625 bic r14,r7,r5 1626 eor r12,r12,r2,ror#32-18 1627#ifndef __thumb2__ 1628 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1629#endif 1630 eor r14,r14,r3,ror#32-18 1631#ifndef __thumb2__ 1632 str r14,[sp,#128+4] 1633#else 1634 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1635#endif 1636 bic r10,r8,r6 1637 bic r11,r9,r7 1638 bic r12,r0,r8,ror#14 1639 bic r14,r1,r9,ror#13 1640 eor r10,r10,r4 1641 eor r11,r11,r5 1642#ifndef __thumb2__ 1643 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1644#endif 1645 bic r2,r2,r0,ror#18-14 1646#ifndef __thumb2__ 1647 str r11,[sp,#136+4] 1648#else 1649 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1650#endif 1651 eor r12,r6,r12,ror#32-14 1652 bic r11,r3,r1,ror#18-13 1653 eor r14,r7,r14,ror#32-13 1654#ifndef __thumb2__ 1655 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1656#endif 1657#ifndef __thumb2__ 1658 str r14,[sp,#144+4] 1659#else 1660 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1661#endif 1662 add r14,sp,#216 1663#ifndef __thumb2__ 1664 ldr r0,[sp,#256] @ A[0][2] 1665#endif 1666 eor r10,r8,r2,ror#32-18 1667#ifndef __thumb2__ 1668 ldr r1,[sp,#256+4] 1669#else 1670 ldrd r0,r1,[sp,#256] @ A[0][2] 1671#endif 1672 eor r11,r9,r11,ror#32-18 1673#ifndef __thumb2__ 1674 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1675#endif 1676#ifndef __thumb2__ 1677 str r11,[sp,#152+4] 1678#else 1679 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1680#endif 1681 1682 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 1683#ifndef __thumb2__ 1684 ldr r2,[sp,#304] @ A[1][3] 1685#endif 1686#ifndef __thumb2__ 1687 ldr r3,[sp,#304+4] 1688#else 1689 ldrd r2,r3,[sp,#304] @ A[1][3] 1690#endif 1691#ifndef __thumb2__ 1692 ldr r6,[sp,#232] @ D[4] 1693#endif 1694#ifndef __thumb2__ 1695 ldr r7,[sp,#232+4] 1696#else 1697 ldrd r6,r7,[sp,#232] @ D[4] 1698#endif 1699 1700 eor r0,r0,r10 1701#ifndef __thumb2__ 1702 ldr r4,[sp,#352] @ A[2][4] 1703#endif 1704 eor r1,r1,r11 1705#ifndef __thumb2__ 1706 ldr r5,[sp,#352+4] 1707#else 1708 ldrd r4,r5,[sp,#352] @ A[2][4] 1709#endif 1710 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1711#ifndef __thumb2__ 1712 ldr r8,[sp,#200] @ D[0] 1713#endif 1714 @ mov r1,r1,ror#32-31 1715#ifndef __thumb2__ 1716 ldr r9,[sp,#200+4] 1717#else 1718 ldrd r8,r9,[sp,#200] @ D[0] 1719#endif 1720 1721 eor r12,r12,r2 1722#ifndef __thumb2__ 1723 ldr r10,[sp,#360] @ A[3][0] 1724#endif 1725 eor r14,r14,r3 1726#ifndef __thumb2__ 1727 ldr r11,[sp,#360+4] 1728#else 1729 ldrd r10,r11,[sp,#360] @ A[3][0] 1730#endif 1731 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1732#ifndef __thumb2__ 1733 ldr r12,[sp,#208] @ D[1] 1734#endif 1735 mov r2,r14,ror#32-28 1736#ifndef __thumb2__ 1737 ldr r14,[sp,#208+4] 1738#else 1739 ldrd r12,r14,[sp,#208] @ D[1] 1740#endif 1741 1742 eor r6,r6,r4 1743 eor r7,r7,r5 1744 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1745 mov r4,r7,ror#32-20 1746 1747 eor r10,r10,r8 1748#ifndef __thumb2__ 1749 ldr r8,[sp,#408] @ A[4][1] 1750#endif 1751 eor r11,r11,r9 1752#ifndef __thumb2__ 1753 ldr r9,[sp,#408+4] 1754#else 1755 ldrd r8,r9,[sp,#408] @ A[4][1] 1756#endif 1757 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1758 mov r6,r11,ror#32-21 1759 1760 eor r8,r8,r12 1761 eor r9,r9,r14 1762 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1763 @ mov r9,r3,ror#32-1 1764 1765 bic r10,r4,r2 1766 bic r11,r5,r3 1767 eor r10,r10,r0,ror#32-31 1768#ifndef __thumb2__ 1769 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1770#endif 1771 eor r11,r11,r1,ror#32-31 1772#ifndef __thumb2__ 1773 str r11,[sp,#160+4] 1774#else 1775 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1776#endif 1777 bic r12,r6,r4 1778 bic r14,r7,r5 1779 eor r12,r12,r2 1780 eor r14,r14,r3 1781#ifndef __thumb2__ 1782 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1783#endif 1784 bic r10,r8,r6,ror#1 1785#ifndef __thumb2__ 1786 str r14,[sp,#168+4] 1787#else 1788 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1789#endif 1790 bic r11,r9,r7,ror#1 1791 bic r12,r0,r8,ror#31-1 1792 bic r14,r1,r9,ror#31-1 1793 eor r4,r4,r10,ror#32-1 1794#ifndef __thumb2__ 1795 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1796#endif 1797 eor r5,r5,r11,ror#32-1 1798#ifndef __thumb2__ 1799 str r5,[sp,#176+4] 1800#else 1801 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1802#endif 1803 eor r6,r6,r12,ror#32-31 1804 eor r7,r7,r14,ror#32-31 1805#ifndef __thumb2__ 1806 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1807#endif 1808 bic r10,r2,r0,ror#32-31 1809#ifndef __thumb2__ 1810 str r7,[sp,#184+4] 1811#else 1812 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1813#endif 1814 bic r11,r3,r1,ror#32-31 1815 add r12,sp,#0 1816 eor r8,r10,r8,ror#32-1 1817 add r10,sp,#40 1818 eor r9,r11,r9,ror#32-1 1819#ifndef __thumb2__ 1820 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1821#endif 1822#ifndef __thumb2__ 1823 str r9,[sp,#192+4] 1824#else 1825 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1826#endif 1827 blo .Lround2x 1828 1829#if __ARM_ARCH__>=5 1830 ldr pc,[sp,#440] 1831#else 1832 ldr lr,[sp,#440] 1833 tst lr,#1 1834 moveq pc,lr @ be binary compatible with V4, yet 1835.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1836#endif 1837.size KeccakF1600_int,.-KeccakF1600_int 1838 1839.type KeccakF1600, %function 1840.align 5 1841KeccakF1600: 1842 stmdb sp!,{r0,r4-r11,lr} 1843 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1844 1845 add r10,r0,#40 1846 add r11,sp,#40 1847 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1848 stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1849 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1850 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1851 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1852 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1853 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1854 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1855 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1856 add r12,sp,#0 1857 add r10,sp,#40 1858 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1859 1860 bl KeccakF1600_enter 1861 1862 ldr r11, [sp,#440+16] @ restore pointer to A 1863 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1864 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 1865 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1866 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1867 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1868 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1869 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1870 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1871 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1872 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1873 1874 add sp,sp,#440+20 1875#if __ARM_ARCH__>=5 1876 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} 1877#else 1878 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} 1879 tst lr,#1 1880 moveq pc,lr @ be binary compatible with V4, yet 1881.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1882#endif 1883.size KeccakF1600,.-KeccakF1600 1884.globl SHA3_absorb 1885.type SHA3_absorb,%function 1886.align 5 1887SHA3_absorb: 1888 stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1889 sub sp,sp,#456+16 1890 1891 add r10,r0,#40 1892 @ mov r11,r1 1893 mov r12,r2 1894 mov r14,r3 1895 cmp r2,r3 1896 blo .Labsorb_abort 1897 1898 add r11,sp,#0 1899 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1900 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1901 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1902 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1903 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1904 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1905 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1906 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1907 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1908 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1909 1910 ldr r11,[sp,#476] @ restore r11 1911#ifdef __thumb2__ 1912 mov r9,#0x00ff00ff 1913 mov r8,#0x0f0f0f0f 1914 mov r7,#0x33333333 1915 mov r6,#0x55555555 1916#else 1917 mov r6,#0x11 @ compose constants 1918 mov r8,#0x0f 1919 mov r9,#0xff 1920 orr r6,r6,r6,lsl#8 1921 orr r8,r8,r8,lsl#8 1922 orr r6,r6,r6,lsl#16 @ 0x11111111 1923 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1924 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1925 orr r7,r6,r6,lsl#1 @ 0x33333333 1926 orr r6,r6,r6,lsl#2 @ 0x55555555 1927#endif 1928 str r9,[sp,#468] 1929 str r8,[sp,#464] 1930 str r7,[sp,#460] 1931 str r6,[sp,#456] 1932 b .Loop_absorb 1933 1934.align 4 1935.Loop_absorb: 1936 subs r0,r12,r14 1937 blo .Labsorbed 1938 add r10,sp,#0 1939 str r0,[sp,#480] @ save len - bsz 1940 1941.align 4 1942.Loop_block: 1943 ldrb r0,[r11],#1 1944 ldrb r1,[r11],#1 1945 ldrb r2,[r11],#1 1946 ldrb r3,[r11],#1 1947 ldrb r4,[r11],#1 1948 orr r0,r0,r1,lsl#8 1949 ldrb r1,[r11],#1 1950 orr r0,r0,r2,lsl#16 1951 ldrb r2,[r11],#1 1952 orr r0,r0,r3,lsl#24 @ lo 1953 ldrb r3,[r11],#1 1954 orr r1,r4,r1,lsl#8 1955 orr r1,r1,r2,lsl#16 1956 orr r1,r1,r3,lsl#24 @ hi 1957 1958 and r2,r0,r6 @ &=0x55555555 1959 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1960 and r3,r1,r6 @ &=0x55555555 1961 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1962 orr r2,r2,r2,lsr#1 1963 orr r0,r0,r0,lsl#1 1964 orr r3,r3,r3,lsr#1 1965 orr r1,r1,r1,lsl#1 1966 and r2,r2,r7 @ &=0x33333333 1967 and r0,r0,r7,lsl#2 @ &=0xcccccccc 1968 and r3,r3,r7 @ &=0x33333333 1969 and r1,r1,r7,lsl#2 @ &=0xcccccccc 1970 orr r2,r2,r2,lsr#2 1971 orr r0,r0,r0,lsl#2 1972 orr r3,r3,r3,lsr#2 1973 orr r1,r1,r1,lsl#2 1974 and r2,r2,r8 @ &=0x0f0f0f0f 1975 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1976 and r3,r3,r8 @ &=0x0f0f0f0f 1977 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1978 ldmia r10,{r4,r5} @ A_flat[i] 1979 orr r2,r2,r2,lsr#4 1980 orr r0,r0,r0,lsl#4 1981 orr r3,r3,r3,lsr#4 1982 orr r1,r1,r1,lsl#4 1983 and r2,r2,r9 @ &=0x00ff00ff 1984 and r0,r0,r9,lsl#8 @ &=0xff00ff00 1985 and r3,r3,r9 @ &=0x00ff00ff 1986 and r1,r1,r9,lsl#8 @ &=0xff00ff00 1987 orr r2,r2,r2,lsr#8 1988 orr r0,r0,r0,lsl#8 1989 orr r3,r3,r3,lsr#8 1990 orr r1,r1,r1,lsl#8 1991 1992 mov r2,r2,lsl#16 1993 mov r1,r1,lsr#16 1994 eor r4,r4,r3,lsl#16 1995 eor r5,r5,r0,lsr#16 1996 eor r4,r4,r2,lsr#16 1997 eor r5,r5,r1,lsl#16 1998 stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1999 2000 subs r14,r14,#8 2001 bhi .Loop_block 2002 2003 str r11,[sp,#476] 2004 2005 bl KeccakF1600_int 2006 2007 add r14,sp,#456 2008 ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables 2009 b .Loop_absorb 2010 2011.align 4 2012.Labsorbed: 2013 add r11,sp,#40 2014 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2015 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 2016 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2017 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2018 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2019 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2020 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2021 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2022 ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2023 stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2024 2025.Labsorb_abort: 2026 add sp,sp,#456+32 2027 mov r0,r12 @ return value 2028#if __ARM_ARCH__>=5 2029 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 2030#else 2031 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 2032 tst lr,#1 2033 moveq pc,lr @ be binary compatible with V4, yet 2034.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2035#endif 2036.size SHA3_absorb,.-SHA3_absorb 2037.globl SHA3_squeeze 2038.type SHA3_squeeze,%function 2039.align 5 2040SHA3_squeeze: 2041 stmdb sp!,{r0,r3-r10,lr} 2042 2043 mov r10,r0 2044 mov r4,r1 2045 mov r5,r2 2046 mov r12,r3 2047 2048#ifdef __thumb2__ 2049 mov r9,#0x00ff00ff 2050 mov r8,#0x0f0f0f0f 2051 mov r7,#0x33333333 2052 mov r6,#0x55555555 2053#else 2054 mov r6,#0x11 @ compose constants 2055 mov r8,#0x0f 2056 mov r9,#0xff 2057 orr r6,r6,r6,lsl#8 2058 orr r8,r8,r8,lsl#8 2059 orr r6,r6,r6,lsl#16 @ 0x11111111 2060 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2061 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2062 orr r7,r6,r6,lsl#1 @ 0x33333333 2063 orr r6,r6,r6,lsl#2 @ 0x55555555 2064#endif 2065 stmdb sp!,{r6,r7,r8,r9} 2066 2067 mov r14,r10 2068 b .Loop_squeeze 2069 2070.align 4 2071.Loop_squeeze: 2072 ldmia r10!,{r0,r1} @ A_flat[i++] 2073 2074 mov r2,r0,lsl#16 2075 mov r3,r1,lsl#16 @ r3 = r1 << 16 2076 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2077 mov r1,r1,lsr#16 2078 mov r0,r0,lsr#16 @ r0 = r0 >> 16 2079 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2080 2081 orr r2,r2,r2,lsl#8 2082 orr r3,r3,r3,lsr#8 2083 orr r0,r0,r0,lsl#8 2084 orr r1,r1,r1,lsr#8 2085 and r2,r2,r9 @ &=0x00ff00ff 2086 and r3,r3,r9,lsl#8 @ &=0xff00ff00 2087 and r0,r0,r9 @ &=0x00ff00ff 2088 and r1,r1,r9,lsl#8 @ &=0xff00ff00 2089 orr r2,r2,r2,lsl#4 2090 orr r3,r3,r3,lsr#4 2091 orr r0,r0,r0,lsl#4 2092 orr r1,r1,r1,lsr#4 2093 and r2,r2,r8 @ &=0x0f0f0f0f 2094 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2095 and r0,r0,r8 @ &=0x0f0f0f0f 2096 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2097 orr r2,r2,r2,lsl#2 2098 orr r3,r3,r3,lsr#2 2099 orr r0,r0,r0,lsl#2 2100 orr r1,r1,r1,lsr#2 2101 and r2,r2,r7 @ &=0x33333333 2102 and r3,r3,r7,lsl#2 @ &=0xcccccccc 2103 and r0,r0,r7 @ &=0x33333333 2104 and r1,r1,r7,lsl#2 @ &=0xcccccccc 2105 orr r2,r2,r2,lsl#1 2106 orr r3,r3,r3,lsr#1 2107 orr r0,r0,r0,lsl#1 2108 orr r1,r1,r1,lsr#1 2109 and r2,r2,r6 @ &=0x55555555 2110 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2111 and r0,r0,r6 @ &=0x55555555 2112 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2113 2114 orr r2,r2,r3 2115 orr r0,r0,r1 2116 2117 cmp r5,#8 2118 blo .Lsqueeze_tail 2119 mov r1,r2,lsr#8 2120 strb r2,[r4],#1 2121 mov r3,r2,lsr#16 2122 strb r1,[r4],#1 2123 mov r2,r2,lsr#24 2124 strb r3,[r4],#1 2125 strb r2,[r4],#1 2126 2127 mov r1,r0,lsr#8 2128 strb r0,[r4],#1 2129 mov r3,r0,lsr#16 2130 strb r1,[r4],#1 2131 mov r0,r0,lsr#24 2132 strb r3,[r4],#1 2133 strb r0,[r4],#1 2134 subs r5,r5,#8 2135 beq .Lsqueeze_done 2136 2137 subs r12,r12,#8 @ bsz -= 8 2138 bhi .Loop_squeeze 2139 2140 mov r0,r14 @ original r10 2141 2142 bl KeccakF1600 2143 2144 ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables 2145 mov r14,r10 2146 b .Loop_squeeze 2147 2148.align 4 2149.Lsqueeze_tail: 2150 strb r2,[r4],#1 2151 mov r2,r2,lsr#8 2152 subs r5,r5,#1 2153 beq .Lsqueeze_done 2154 strb r2,[r4],#1 2155 mov r2,r2,lsr#8 2156 subs r5,r5,#1 2157 beq .Lsqueeze_done 2158 strb r2,[r4],#1 2159 mov r2,r2,lsr#8 2160 subs r5,r5,#1 2161 beq .Lsqueeze_done 2162 strb r2,[r4],#1 2163 subs r5,r5,#1 2164 beq .Lsqueeze_done 2165 2166 strb r0,[r4],#1 2167 mov r0,r0,lsr#8 2168 subs r5,r5,#1 2169 beq .Lsqueeze_done 2170 strb r0,[r4],#1 2171 mov r0,r0,lsr#8 2172 subs r5,r5,#1 2173 beq .Lsqueeze_done 2174 strb r0,[r4] 2175 b .Lsqueeze_done 2176 2177.align 4 2178.Lsqueeze_done: 2179 add sp,sp,#24 2180#if __ARM_ARCH__>=5 2181 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 2182#else 2183 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 2184 tst lr,#1 2185 moveq pc,lr @ be binary compatible with V4, yet 2186.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2187#endif 2188.size SHA3_squeeze,.-SHA3_squeeze 2189#if __ARM_MAX_ARCH__>=7 2190.fpu neon 2191 2192.type iotas64, %object 2193.align 5 2194iotas64: 2195.quad 0x0000000000000001 2196.quad 0x0000000000008082 2197.quad 0x800000000000808a 2198.quad 0x8000000080008000 2199.quad 0x000000000000808b 2200.quad 0x0000000080000001 2201.quad 0x8000000080008081 2202.quad 0x8000000000008009 2203.quad 0x000000000000008a 2204.quad 0x0000000000000088 2205.quad 0x0000000080008009 2206.quad 0x000000008000000a 2207.quad 0x000000008000808b 2208.quad 0x800000000000008b 2209.quad 0x8000000000008089 2210.quad 0x8000000000008003 2211.quad 0x8000000000008002 2212.quad 0x8000000000000080 2213.quad 0x000000000000800a 2214.quad 0x800000008000000a 2215.quad 0x8000000080008081 2216.quad 0x8000000000008080 2217.quad 0x0000000080000001 2218.quad 0x8000000080008008 2219.size iotas64,.-iotas64 2220 2221.type KeccakF1600_neon, %function 2222.align 5 2223KeccakF1600_neon: 2224 add r1, r0, #16 2225 adr r2, iotas64 2226 mov r3, #24 @ loop counter 2227 b .Loop_neon 2228 2229.align 4 2230.Loop_neon: 2231 @ Theta 2232 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2233 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2234 vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2235 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2236 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2237 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2238 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2239 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2240 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2241 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2242 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2243 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2244 veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2245 veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2246 veor d25, d25, d24 @ C[4]^=A[4][4] 2247 2248 vadd.u64 q4, q13, q13 @ C[0..1]<<1 2249 vadd.u64 q15, q14, q14 @ C[2..3]<<1 2250 vadd.u64 d18, d25, d25 @ C[4]<<1 2251 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2252 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2253 vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2254 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2255 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2256 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2257 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2258 2259 veor d0, d0, d25 @ A[0][0] ^= C[4] 2260 veor d1, d1, d25 @ A[1][0] ^= C[4] 2261 veor d10, d10, d25 @ A[2][0] ^= C[4] 2262 veor d11, d11, d25 @ A[3][0] ^= C[4] 2263 veor d20, d20, d25 @ A[4][0] ^= C[4] 2264 2265 veor d2, d2, d26 @ A[0][1] ^= D[1] 2266 veor d3, d3, d26 @ A[1][1] ^= D[1] 2267 veor d12, d12, d26 @ A[2][1] ^= D[1] 2268 veor d13, d13, d26 @ A[3][1] ^= D[1] 2269 veor d21, d21, d26 @ A[4][1] ^= D[1] 2270 vmov d26, d27 2271 2272 veor d6, d6, d28 @ A[0][3] ^= C[2] 2273 veor d7, d7, d28 @ A[1][3] ^= C[2] 2274 veor d16, d16, d28 @ A[2][3] ^= C[2] 2275 veor d17, d17, d28 @ A[3][3] ^= C[2] 2276 veor d23, d23, d28 @ A[4][3] ^= C[2] 2277 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2278 vmov d28, d29 2279 2280 vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2281 veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2282 veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2283 veor d22, d22, d27 @ A[4][2] ^= D[2] 2284 2285 veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2286 veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2287 veor d24, d24, d29 @ A[4][4] ^= C[3] 2288 2289 @ Rho + Pi 2290 vmov d26, d2 @ C[1] = A[0][1] 2291 vshl.u64 d2, d3, #44 2292 vmov d27, d4 @ C[2] = A[0][2] 2293 vshl.u64 d4, d14, #43 2294 vmov d28, d6 @ C[3] = A[0][3] 2295 vshl.u64 d6, d17, #21 2296 vmov d29, d8 @ C[4] = A[0][4] 2297 vshl.u64 d8, d24, #14 2298 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2299 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2300 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2301 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2302 2303 vshl.u64 d3, d9, #20 2304 vshl.u64 d14, d16, #25 2305 vshl.u64 d17, d15, #15 2306 vshl.u64 d24, d21, #2 2307 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2308 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2309 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2310 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2311 2312 vshl.u64 d9, d22, #61 2313 @ vshl.u64 d16, d19, #8 2314 vshl.u64 d15, d12, #10 2315 vshl.u64 d21, d7, #55 2316 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2317 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2318 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2319 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2320 2321 vshl.u64 d22, d18, #39 2322 @ vshl.u64 d19, d23, #56 2323 vshl.u64 d12, d5, #6 2324 vshl.u64 d7, d13, #45 2325 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2326 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2327 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2328 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2329 2330 vshl.u64 d18, d20, #18 2331 vshl.u64 d23, d11, #41 2332 vshl.u64 d5, d10, #3 2333 vshl.u64 d13, d1, #36 2334 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2335 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2336 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2337 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2338 2339 vshl.u64 d1, d28, #28 2340 vshl.u64 d10, d26, #1 2341 vshl.u64 d11, d29, #27 2342 vshl.u64 d20, d27, #62 2343 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2344 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2345 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2346 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2347 2348 @ Chi + Iota 2349 vbic q13, q2, q1 2350 vbic q14, q3, q2 2351 vbic q15, q4, q3 2352 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2353 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2354 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2355 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2356 vbic q13, q0, q4 2357 vbic q15, q1, q0 2358 vmov q1, q14 @ A[0..1][1] 2359 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2360 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2361 2362 vbic q13, q7, q6 2363 vmov q0, q5 @ A[2..3][0] 2364 vbic q14, q8, q7 2365 vmov q15, q6 @ A[2..3][1] 2366 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2367 vbic q13, q9, q8 2368 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2369 vbic q14, q0, q9 2370 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2371 vbic q13, q15, q0 2372 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2373 vmov q14, q10 @ A[4][0..1] 2374 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2375 2376 vld1.64 d25, [r2,:64]! @ Iota[i++] 2377 vbic d26, d22, d21 2378 vbic d27, d23, d22 2379 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2380 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2381 vbic d26, d24, d23 2382 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2383 vbic d27, d28, d24 2384 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2385 vbic d26, d29, d28 2386 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2387 veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2388 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2389 2390 subs r3, r3, #1 2391 bne .Loop_neon 2392 2393 bx lr 2394.size KeccakF1600_neon,.-KeccakF1600_neon 2395 2396.globl SHA3_absorb_neon 2397.type SHA3_absorb_neon, %function 2398.align 5 2399SHA3_absorb_neon: 2400 stmdb sp!, {r4,r5,r6,lr} 2401 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2402 2403 mov r4, r1 @ inp 2404 mov r5, r2 @ len 2405 mov r6, r3 @ bsz 2406 2407 vld1.32 {d0}, [r0,:64]! @ A[0][0] 2408 vld1.32 {d2}, [r0,:64]! @ A[0][1] 2409 vld1.32 {d4}, [r0,:64]! @ A[0][2] 2410 vld1.32 {d6}, [r0,:64]! @ A[0][3] 2411 vld1.32 {d8}, [r0,:64]! @ A[0][4] 2412 2413 vld1.32 {d1}, [r0,:64]! @ A[1][0] 2414 vld1.32 {d3}, [r0,:64]! @ A[1][1] 2415 vld1.32 {d5}, [r0,:64]! @ A[1][2] 2416 vld1.32 {d7}, [r0,:64]! @ A[1][3] 2417 vld1.32 {d9}, [r0,:64]! @ A[1][4] 2418 2419 vld1.32 {d10}, [r0,:64]! @ A[2][0] 2420 vld1.32 {d12}, [r0,:64]! @ A[2][1] 2421 vld1.32 {d14}, [r0,:64]! @ A[2][2] 2422 vld1.32 {d16}, [r0,:64]! @ A[2][3] 2423 vld1.32 {d18}, [r0,:64]! @ A[2][4] 2424 2425 vld1.32 {d11}, [r0,:64]! @ A[3][0] 2426 vld1.32 {d13}, [r0,:64]! @ A[3][1] 2427 vld1.32 {d15}, [r0,:64]! @ A[3][2] 2428 vld1.32 {d17}, [r0,:64]! @ A[3][3] 2429 vld1.32 {d19}, [r0,:64]! @ A[3][4] 2430 2431 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3] 2432 vld1.32 {d24}, [r0,:64] @ A[4][4] 2433 sub r0, r0, #24*8 @ rewind 2434 b .Loop_absorb_neon 2435 2436.align 4 2437.Loop_absorb_neon: 2438 subs r12, r5, r6 @ len - bsz 2439 blo .Labsorbed_neon 2440 mov r5, r12 2441 2442 vld1.8 {d31}, [r4]! @ endian-neutral loads... 2443 cmp r6, #8*2 2444 veor d0, d0, d31 @ A[0][0] ^= *inp++ 2445 blo .Lprocess_neon 2446 vld1.8 {d31}, [r4]! 2447 veor d2, d2, d31 @ A[0][1] ^= *inp++ 2448 beq .Lprocess_neon 2449 vld1.8 {d31}, [r4]! 2450 cmp r6, #8*4 2451 veor d4, d4, d31 @ A[0][2] ^= *inp++ 2452 blo .Lprocess_neon 2453 vld1.8 {d31}, [r4]! 2454 veor d6, d6, d31 @ A[0][3] ^= *inp++ 2455 beq .Lprocess_neon 2456 vld1.8 {d31},[r4]! 2457 cmp r6, #8*6 2458 veor d8, d8, d31 @ A[0][4] ^= *inp++ 2459 blo .Lprocess_neon 2460 2461 vld1.8 {d31}, [r4]! 2462 veor d1, d1, d31 @ A[1][0] ^= *inp++ 2463 beq .Lprocess_neon 2464 vld1.8 {d31}, [r4]! 2465 cmp r6, #8*8 2466 veor d3, d3, d31 @ A[1][1] ^= *inp++ 2467 blo .Lprocess_neon 2468 vld1.8 {d31}, [r4]! 2469 veor d5, d5, d31 @ A[1][2] ^= *inp++ 2470 beq .Lprocess_neon 2471 vld1.8 {d31}, [r4]! 2472 cmp r6, #8*10 2473 veor d7, d7, d31 @ A[1][3] ^= *inp++ 2474 blo .Lprocess_neon 2475 vld1.8 {d31}, [r4]! 2476 veor d9, d9, d31 @ A[1][4] ^= *inp++ 2477 beq .Lprocess_neon 2478 2479 vld1.8 {d31}, [r4]! 2480 cmp r6, #8*12 2481 veor d10, d10, d31 @ A[2][0] ^= *inp++ 2482 blo .Lprocess_neon 2483 vld1.8 {d31}, [r4]! 2484 veor d12, d12, d31 @ A[2][1] ^= *inp++ 2485 beq .Lprocess_neon 2486 vld1.8 {d31}, [r4]! 2487 cmp r6, #8*14 2488 veor d14, d14, d31 @ A[2][2] ^= *inp++ 2489 blo .Lprocess_neon 2490 vld1.8 {d31}, [r4]! 2491 veor d16, d16, d31 @ A[2][3] ^= *inp++ 2492 beq .Lprocess_neon 2493 vld1.8 {d31}, [r4]! 2494 cmp r6, #8*16 2495 veor d18, d18, d31 @ A[2][4] ^= *inp++ 2496 blo .Lprocess_neon 2497 2498 vld1.8 {d31}, [r4]! 2499 veor d11, d11, d31 @ A[3][0] ^= *inp++ 2500 beq .Lprocess_neon 2501 vld1.8 {d31}, [r4]! 2502 cmp r6, #8*18 2503 veor d13, d13, d31 @ A[3][1] ^= *inp++ 2504 blo .Lprocess_neon 2505 vld1.8 {d31}, [r4]! 2506 veor d15, d15, d31 @ A[3][2] ^= *inp++ 2507 beq .Lprocess_neon 2508 vld1.8 {d31}, [r4]! 2509 cmp r6, #8*20 2510 veor d17, d17, d31 @ A[3][3] ^= *inp++ 2511 blo .Lprocess_neon 2512 vld1.8 {d31}, [r4]! 2513 veor d19, d19, d31 @ A[3][4] ^= *inp++ 2514 beq .Lprocess_neon 2515 2516 vld1.8 {d31}, [r4]! 2517 cmp r6, #8*22 2518 veor d20, d20, d31 @ A[4][0] ^= *inp++ 2519 blo .Lprocess_neon 2520 vld1.8 {d31}, [r4]! 2521 veor d21, d21, d31 @ A[4][1] ^= *inp++ 2522 beq .Lprocess_neon 2523 vld1.8 {d31}, [r4]! 2524 cmp r6, #8*24 2525 veor d22, d22, d31 @ A[4][2] ^= *inp++ 2526 blo .Lprocess_neon 2527 vld1.8 {d31}, [r4]! 2528 veor d23, d23, d31 @ A[4][3] ^= *inp++ 2529 beq .Lprocess_neon 2530 vld1.8 {d31}, [r4]! 2531 veor d24, d24, d31 @ A[4][4] ^= *inp++ 2532 2533.Lprocess_neon: 2534 bl KeccakF1600_neon 2535 b .Loop_absorb_neon 2536 2537.align 4 2538.Labsorbed_neon: 2539 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2540 vst1.32 {d2}, [r0,:64]! 2541 vst1.32 {d4}, [r0,:64]! 2542 vst1.32 {d6}, [r0,:64]! 2543 vst1.32 {d8}, [r0,:64]! 2544 2545 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2546 vst1.32 {d3}, [r0,:64]! 2547 vst1.32 {d5}, [r0,:64]! 2548 vst1.32 {d7}, [r0,:64]! 2549 vst1.32 {d9}, [r0,:64]! 2550 2551 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2552 vst1.32 {d12}, [r0,:64]! 2553 vst1.32 {d14}, [r0,:64]! 2554 vst1.32 {d16}, [r0,:64]! 2555 vst1.32 {d18}, [r0,:64]! 2556 2557 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2558 vst1.32 {d13}, [r0,:64]! 2559 vst1.32 {d15}, [r0,:64]! 2560 vst1.32 {d17}, [r0,:64]! 2561 vst1.32 {d19}, [r0,:64]! 2562 2563 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2564 vst1.32 {d24}, [r0,:64] 2565 2566 mov r0, r5 @ return value 2567 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2568 ldmia sp!, {r4,r5,r6,pc} 2569.size SHA3_absorb_neon,.-SHA3_absorb_neon 2570 2571.globl SHA3_squeeze_neon 2572.type SHA3_squeeze_neon, %function 2573.align 5 2574SHA3_squeeze_neon: 2575 stmdb sp!, {r4,r5,r6,lr} 2576 2577 mov r4, r1 @ out 2578 mov r5, r2 @ len 2579 mov r6, r3 @ bsz 2580 mov r12, r0 @ A_flat 2581 mov r14, r3 @ bsz 2582 b .Loop_squeeze_neon 2583 2584.align 4 2585.Loop_squeeze_neon: 2586 cmp r5, #8 2587 blo .Lsqueeze_neon_tail 2588 vld1.32 {d0}, [r12]! 2589 vst1.8 {d0}, [r4]! @ endian-neutral store 2590 2591 subs r5, r5, #8 @ len -= 8 2592 beq .Lsqueeze_neon_done 2593 2594 subs r14, r14, #8 @ bsz -= 8 2595 bhi .Loop_squeeze_neon 2596 2597 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2598 2599 vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2600 vld1.32 {d2}, [r0,:64]! 2601 vld1.32 {d4}, [r0,:64]! 2602 vld1.32 {d6}, [r0,:64]! 2603 vld1.32 {d8}, [r0,:64]! 2604 2605 vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2606 vld1.32 {d3}, [r0,:64]! 2607 vld1.32 {d5}, [r0,:64]! 2608 vld1.32 {d7}, [r0,:64]! 2609 vld1.32 {d9}, [r0,:64]! 2610 2611 vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2612 vld1.32 {d12}, [r0,:64]! 2613 vld1.32 {d14}, [r0,:64]! 2614 vld1.32 {d16}, [r0,:64]! 2615 vld1.32 {d18}, [r0,:64]! 2616 2617 vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2618 vld1.32 {d13}, [r0,:64]! 2619 vld1.32 {d15}, [r0,:64]! 2620 vld1.32 {d17}, [r0,:64]! 2621 vld1.32 {d19}, [r0,:64]! 2622 2623 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2624 vld1.32 {d24}, [r0,:64] 2625 sub r0, r0, #24*8 @ rewind 2626 2627 bl KeccakF1600_neon 2628 2629 mov r12, r0 @ A_flat 2630 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2631 vst1.32 {d2}, [r0,:64]! 2632 vst1.32 {d4}, [r0,:64]! 2633 vst1.32 {d6}, [r0,:64]! 2634 vst1.32 {d8}, [r0,:64]! 2635 2636 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2637 vst1.32 {d3}, [r0,:64]! 2638 vst1.32 {d5}, [r0,:64]! 2639 vst1.32 {d7}, [r0,:64]! 2640 vst1.32 {d9}, [r0,:64]! 2641 2642 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2643 vst1.32 {d12}, [r0,:64]! 2644 vst1.32 {d14}, [r0,:64]! 2645 vst1.32 {d16}, [r0,:64]! 2646 vst1.32 {d18}, [r0,:64]! 2647 2648 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2649 vst1.32 {d13}, [r0,:64]! 2650 vst1.32 {d15}, [r0,:64]! 2651 vst1.32 {d17}, [r0,:64]! 2652 vst1.32 {d19}, [r0,:64]! 2653 2654 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2655 mov r14, r6 @ bsz 2656 vst1.32 {d24}, [r0,:64] 2657 mov r0, r12 @ rewind 2658 2659 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2660 b .Loop_squeeze_neon 2661 2662.align 4 2663.Lsqueeze_neon_tail: 2664 ldmia r12, {r2,r3} 2665 cmp r5, #2 2666 strb r2, [r4],#1 @ endian-neutral store 2667 mov r2, r2, lsr#8 2668 blo .Lsqueeze_neon_done 2669 strb r2, [r4], #1 2670 mov r2, r2, lsr#8 2671 beq .Lsqueeze_neon_done 2672 strb r2, [r4], #1 2673 mov r2, r2, lsr#8 2674 cmp r5, #4 2675 blo .Lsqueeze_neon_done 2676 strb r2, [r4], #1 2677 beq .Lsqueeze_neon_done 2678 2679 strb r3, [r4], #1 2680 mov r3, r3, lsr#8 2681 cmp r5, #6 2682 blo .Lsqueeze_neon_done 2683 strb r3, [r4], #1 2684 mov r3, r3, lsr#8 2685 beq .Lsqueeze_neon_done 2686 strb r3, [r4], #1 2687 2688.Lsqueeze_neon_done: 2689 ldmia sp!, {r4,r5,r6,pc} 2690.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2691#endif 2692.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2693.align 2 2694.align 2 2695