1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */ 3#include "arm_arch.h" 4 5.text 6 7#if defined(__thumb2__) 8.syntax unified 9.thumb 10#else 11.code 32 12#endif 13 14.type iotas32, %object 15.align 5 16iotas32: 17.long 0x00000001, 0x00000000 18.long 0x00000000, 0x00000089 19.long 0x00000000, 0x8000008b 20.long 0x00000000, 0x80008080 21.long 0x00000001, 0x0000008b 22.long 0x00000001, 0x00008000 23.long 0x00000001, 0x80008088 24.long 0x00000001, 0x80000082 25.long 0x00000000, 0x0000000b 26.long 0x00000000, 0x0000000a 27.long 0x00000001, 0x00008082 28.long 0x00000000, 0x00008003 29.long 0x00000001, 0x0000808b 30.long 0x00000001, 0x8000000b 31.long 0x00000001, 0x8000008a 32.long 0x00000001, 0x80000081 33.long 0x00000000, 0x80000081 34.long 0x00000000, 0x80000008 35.long 0x00000000, 0x00000083 36.long 0x00000000, 0x80008003 37.long 0x00000001, 0x80008088 38.long 0x00000000, 0x80000088 39.long 0x00000001, 0x00008000 40.long 0x00000000, 0x80008082 41.size iotas32,.-iotas32 42 43.type KeccakF1600_int, %function 44.align 5 45KeccakF1600_int: 46 add r9,sp,#176 47 add r12,sp,#0 48 add r10,sp,#40 49 ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4] 50KeccakF1600_enter: 51 str lr,[sp,#440] 52 eor r11,r11,r11 53 str r11,[sp,#444] 54 b .Lround2x 55 56.align 4 57.Lround2x: 58 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 59 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 60#ifdef __thumb2__ 61 eor r0,r0,r10 62 eor r1,r1,r11 63 eor r2,r2,r12 64 ldrd r10,r11,[sp,#56] 65 eor r3,r3,r14 66 ldrd r12,r14,[sp,#64] 67 eor r4,r4,r10 68 eor r5,r5,r11 69 eor r6,r6,r12 70 ldrd r10,r11,[sp,#72] 71 eor r7,r7,r14 72 ldrd r12,r14,[sp,#80] 73 eor r8,r8,r10 74 eor r9,r9,r11 75 eor r0,r0,r12 76 ldrd r10,r11,[sp,#88] 77 eor r1,r1,r14 78 ldrd r12,r14,[sp,#96] 79 eor r2,r2,r10 80 eor r3,r3,r11 81 eor r4,r4,r12 82 ldrd r10,r11,[sp,#104] 83 eor r5,r5,r14 84 ldrd r12,r14,[sp,#112] 85 eor r6,r6,r10 86 eor r7,r7,r11 87 eor r8,r8,r12 88 ldrd r10,r11,[sp,#120] 89 eor r9,r9,r14 90 ldrd r12,r14,[sp,#128] 91 eor r0,r0,r10 92 eor r1,r1,r11 93 eor r2,r2,r12 94 ldrd r10,r11,[sp,#136] 95 eor r3,r3,r14 96 ldrd r12,r14,[sp,#144] 97 eor r4,r4,r10 98 eor r5,r5,r11 99 eor r6,r6,r12 100 ldrd r10,r11,[sp,#152] 101 eor r7,r7,r14 102 ldrd r12,r14,[sp,#160] 103 eor r8,r8,r10 104 eor r9,r9,r11 105 eor r0,r0,r12 106 ldrd r10,r11,[sp,#168] 107 eor r1,r1,r14 108 ldrd r12,r14,[sp,#16] 109 eor r2,r2,r10 110 eor r3,r3,r11 111 eor r4,r4,r12 112 ldrd r10,r11,[sp,#24] 113 eor r5,r5,r14 114 ldrd r12,r14,[sp,#32] 115#else 116 eor r0,r0,r10 117 add r10,sp,#56 118 eor r1,r1,r11 119 eor r2,r2,r12 120 eor r3,r3,r14 121 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 122 eor r4,r4,r10 123 add r10,sp,#72 124 eor r5,r5,r11 125 eor r6,r6,r12 126 eor r7,r7,r14 127 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 128 eor r8,r8,r10 129 add r10,sp,#88 130 eor r9,r9,r11 131 eor r0,r0,r12 132 eor r1,r1,r14 133 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 134 eor r2,r2,r10 135 add r10,sp,#104 136 eor r3,r3,r11 137 eor r4,r4,r12 138 eor r5,r5,r14 139 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 140 eor r6,r6,r10 141 add r10,sp,#120 142 eor r7,r7,r11 143 eor r8,r8,r12 144 eor r9,r9,r14 145 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 146 eor r0,r0,r10 147 add r10,sp,#136 148 eor r1,r1,r11 149 eor r2,r2,r12 150 eor r3,r3,r14 151 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 152 eor r4,r4,r10 153 add r10,sp,#152 154 eor r5,r5,r11 155 eor r6,r6,r12 156 eor r7,r7,r14 157 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 158 eor r8,r8,r10 159 ldr r10,[sp,#168] @ A[4][1] 160 eor r9,r9,r11 161 ldr r11,[sp,#168+4] 162 eor r0,r0,r12 163 ldr r12,[sp,#16] @ A[0][2] 164 eor r1,r1,r14 165 ldr r14,[sp,#16+4] 166 eor r2,r2,r10 167 add r10,sp,#24 168 eor r3,r3,r11 169 eor r4,r4,r12 170 eor r5,r5,r14 171 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 172#endif 173 eor r6,r6,r10 174 eor r7,r7,r11 175 eor r8,r8,r12 176 eor r9,r9,r14 177 178 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 179#ifndef __thumb2__ 180 str r10,[sp,#208] @ D[1] = E[0] 181#endif 182 eor r11,r1,r4 183#ifndef __thumb2__ 184 str r11,[sp,#208+4] 185#else 186 strd r10,r11,[sp,#208] @ D[1] = E[0] 187#endif 188 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 189 eor r14,r7,r0 190#ifndef __thumb2__ 191 str r12,[sp,#232] @ D[4] = E[1] 192#endif 193 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 194#ifndef __thumb2__ 195 str r14,[sp,#232+4] 196#else 197 strd r12,r14,[sp,#232] @ D[4] = E[1] 198#endif 199 eor r1,r9,r2 200#ifndef __thumb2__ 201 str r0,[sp,#200] @ D[0] = C[0] 202#endif 203 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 204#ifndef __thumb2__ 205 ldr r7,[sp,#144] 206#endif 207 eor r3,r3,r6 208#ifndef __thumb2__ 209 str r1,[sp,#200+4] 210#else 211 strd r0,r1,[sp,#200] @ D[0] = C[0] 212#endif 213#ifndef __thumb2__ 214 ldr r6,[sp,#144+4] 215#else 216 ldrd r7,r6,[sp,#144] 217#endif 218#ifndef __thumb2__ 219 str r2,[sp,#216] @ D[2] = C[1] 220#endif 221 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 222#ifndef __thumb2__ 223 str r3,[sp,#216+4] 224#else 225 strd r2,r3,[sp,#216] @ D[2] = C[1] 226#endif 227 eor r5,r5,r8 228 229#ifndef __thumb2__ 230 ldr r8,[sp,#192] 231#endif 232#ifndef __thumb2__ 233 ldr r9,[sp,#192+4] 234#else 235 ldrd r8,r9,[sp,#192] 236#endif 237#ifndef __thumb2__ 238 str r4,[sp,#224] @ D[3] = C[2] 239#endif 240 eor r7,r7,r4 241#ifndef __thumb2__ 242 str r5,[sp,#224+4] 243#else 244 strd r4,r5,[sp,#224] @ D[3] = C[2] 245#endif 246 eor r6,r6,r5 247#ifndef __thumb2__ 248 ldr r4,[sp,#0] 249#endif 250 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 251 @ mov r6,r6,ror#32-11 252#ifndef __thumb2__ 253 ldr r5,[sp,#0+4] 254#else 255 ldrd r4,r5,[sp,#0] 256#endif 257 eor r8,r8,r12 258 eor r9,r9,r14 259#ifndef __thumb2__ 260 ldr r12,[sp,#96] 261#endif 262 eor r0,r0,r4 263#ifndef __thumb2__ 264 ldr r14,[sp,#96+4] 265#else 266 ldrd r12,r14,[sp,#96] 267#endif 268 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 269 @ mov r9,r9,ror#32-7 270 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 271 eor r12,r12,r2 272#ifndef __thumb2__ 273 ldr r2,[sp,#48] 274#endif 275 eor r14,r14,r3 276#ifndef __thumb2__ 277 ldr r3,[sp,#48+4] 278#else 279 ldrd r2,r3,[sp,#48] 280#endif 281 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 282 ldr r12,[sp,#444] @ load counter 283 eor r2,r2,r10 284 adr r10,iotas32 285 mov r4,r14,ror#32-22 286 add r14,r10,r12 287 eor r3,r3,r11 288 ldmia r14,{r10,r11} @ iotas[i] 289 bic r12,r4,r2,ror#32-22 290 bic r14,r5,r3,ror#32-22 291 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 292 mov r3,r3,ror#32-22 293 eor r12,r12,r0 294 eor r14,r14,r1 295 eor r10,r10,r12 296 eor r11,r11,r14 297#ifndef __thumb2__ 298 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 299#endif 300 bic r12,r6,r4,ror#11 301#ifndef __thumb2__ 302 str r11,[sp,#240+4] 303#else 304 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 305#endif 306 bic r14,r7,r5,ror#10 307 bic r10,r8,r6,ror#32-(11-7) 308 bic r11,r9,r7,ror#32-(10-7) 309 eor r12,r2,r12,ror#32-11 310#ifndef __thumb2__ 311 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 312#endif 313 eor r14,r3,r14,ror#32-10 314#ifndef __thumb2__ 315 str r14,[sp,#248+4] 316#else 317 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 318#endif 319 eor r10,r4,r10,ror#32-7 320 eor r11,r5,r11,ror#32-7 321#ifndef __thumb2__ 322 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 323#endif 324 bic r12,r0,r8,ror#32-7 325#ifndef __thumb2__ 326 str r11,[sp,#256+4] 327#else 328 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 329#endif 330 bic r14,r1,r9,ror#32-7 331 eor r12,r12,r6,ror#32-11 332#ifndef __thumb2__ 333 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 334#endif 335 eor r14,r14,r7,ror#32-10 336#ifndef __thumb2__ 337 str r14,[sp,#264+4] 338#else 339 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 340#endif 341 bic r10,r2,r0 342 add r14,sp,#224 343#ifndef __thumb2__ 344 ldr r0,[sp,#24] @ A[0][3] 345#endif 346 bic r11,r3,r1 347#ifndef __thumb2__ 348 ldr r1,[sp,#24+4] 349#else 350 ldrd r0,r1,[sp,#24] @ A[0][3] 351#endif 352 eor r10,r10,r8,ror#32-7 353 eor r11,r11,r9,ror#32-7 354#ifndef __thumb2__ 355 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 356#endif 357 add r9,sp,#200 358#ifndef __thumb2__ 359 str r11,[sp,#272+4] 360#else 361 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 362#endif 363 364 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 365 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 366 367#ifndef __thumb2__ 368 ldr r2,[sp,#72] @ A[1][4] 369#endif 370 eor r0,r0,r10 371#ifndef __thumb2__ 372 ldr r3,[sp,#72+4] 373#else 374 ldrd r2,r3,[sp,#72] @ A[1][4] 375#endif 376 eor r1,r1,r11 377 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 378#ifndef __thumb2__ 379 ldr r10,[sp,#128] @ A[3][1] 380#endif 381 @ mov r1,r1,ror#32-14 382#ifndef __thumb2__ 383 ldr r11,[sp,#128+4] 384#else 385 ldrd r10,r11,[sp,#128] @ A[3][1] 386#endif 387 388 eor r2,r2,r12 389#ifndef __thumb2__ 390 ldr r4,[sp,#80] @ A[2][0] 391#endif 392 eor r3,r3,r14 393#ifndef __thumb2__ 394 ldr r5,[sp,#80+4] 395#else 396 ldrd r4,r5,[sp,#80] @ A[2][0] 397#endif 398 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 399 @ mov r3,r3,ror#32-10 400 401 eor r6,r6,r4 402#ifndef __thumb2__ 403 ldr r12,[sp,#216] @ D[2] 404#endif 405 eor r7,r7,r5 406#ifndef __thumb2__ 407 ldr r14,[sp,#216+4] 408#else 409 ldrd r12,r14,[sp,#216] @ D[2] 410#endif 411 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 412 mov r4,r7,ror#32-2 413 414 eor r10,r10,r8 415#ifndef __thumb2__ 416 ldr r8,[sp,#176] @ A[4][2] 417#endif 418 eor r11,r11,r9 419#ifndef __thumb2__ 420 ldr r9,[sp,#176+4] 421#else 422 ldrd r8,r9,[sp,#176] @ A[4][2] 423#endif 424 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 425 mov r6,r11,ror#32-23 426 427 bic r10,r4,r2,ror#32-10 428 bic r11,r5,r3,ror#32-10 429 eor r12,r12,r8 430 eor r14,r14,r9 431 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 432 mov r8,r14,ror#32-31 433 eor r10,r10,r0,ror#32-14 434 eor r11,r11,r1,ror#32-14 435#ifndef __thumb2__ 436 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 437#endif 438 bic r12,r6,r4 439#ifndef __thumb2__ 440 str r11,[sp,#280+4] 441#else 442 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 443#endif 444 bic r14,r7,r5 445 eor r12,r12,r2,ror#32-10 446#ifndef __thumb2__ 447 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 448#endif 449 eor r14,r14,r3,ror#32-10 450#ifndef __thumb2__ 451 str r14,[sp,#288+4] 452#else 453 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 454#endif 455 bic r10,r8,r6 456 bic r11,r9,r7 457 bic r12,r0,r8,ror#14 458 bic r14,r1,r9,ror#14 459 eor r10,r10,r4 460 eor r11,r11,r5 461#ifndef __thumb2__ 462 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 463#endif 464 bic r2,r2,r0,ror#32-(14-10) 465#ifndef __thumb2__ 466 str r11,[sp,#296+4] 467#else 468 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 469#endif 470 eor r12,r6,r12,ror#32-14 471 bic r11,r3,r1,ror#32-(14-10) 472#ifndef __thumb2__ 473 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 474#endif 475 eor r14,r7,r14,ror#32-14 476#ifndef __thumb2__ 477 str r14,[sp,#304+4] 478#else 479 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 480#endif 481 add r12,sp,#208 482#ifndef __thumb2__ 483 ldr r1,[sp,#8] @ A[0][1] 484#endif 485 eor r10,r8,r2,ror#32-10 486#ifndef __thumb2__ 487 ldr r0,[sp,#8+4] 488#else 489 ldrd r1,r0,[sp,#8] @ A[0][1] 490#endif 491 eor r11,r9,r11,ror#32-10 492#ifndef __thumb2__ 493 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 494#endif 495#ifndef __thumb2__ 496 str r11,[sp,#312+4] 497#else 498 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 499#endif 500 501 add r9,sp,#224 502 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 503#ifndef __thumb2__ 504 ldr r2,[sp,#56] @ A[1][2] 505#endif 506#ifndef __thumb2__ 507 ldr r3,[sp,#56+4] 508#else 509 ldrd r2,r3,[sp,#56] @ A[1][2] 510#endif 511 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 512 513 eor r1,r1,r10 514#ifndef __thumb2__ 515 ldr r4,[sp,#104] @ A[2][3] 516#endif 517 eor r0,r0,r11 518#ifndef __thumb2__ 519 ldr r5,[sp,#104+4] 520#else 521 ldrd r4,r5,[sp,#104] @ A[2][3] 522#endif 523 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 524 525 eor r2,r2,r12 526#ifndef __thumb2__ 527 ldr r10,[sp,#152] @ A[3][4] 528#endif 529 eor r3,r3,r14 530#ifndef __thumb2__ 531 ldr r11,[sp,#152+4] 532#else 533 ldrd r10,r11,[sp,#152] @ A[3][4] 534#endif 535 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 536#ifndef __thumb2__ 537 ldr r12,[sp,#200] @ D[0] 538#endif 539 @ mov r3,r3,ror#32-3 540#ifndef __thumb2__ 541 ldr r14,[sp,#200+4] 542#else 543 ldrd r12,r14,[sp,#200] @ D[0] 544#endif 545 546 eor r4,r4,r6 547 eor r5,r5,r7 548 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 549 @ mov r4,r7,ror#32-13 @ [track reverse order below] 550 551 eor r10,r10,r8 552#ifndef __thumb2__ 553 ldr r8,[sp,#160] @ A[4][0] 554#endif 555 eor r11,r11,r9 556#ifndef __thumb2__ 557 ldr r9,[sp,#160+4] 558#else 559 ldrd r8,r9,[sp,#160] @ A[4][0] 560#endif 561 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 562 mov r7,r11,ror#32-4 563 564 eor r12,r12,r8 565 eor r14,r14,r9 566 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 567 mov r9,r14,ror#32-9 568 569 bic r10,r5,r2,ror#13-3 570 bic r11,r4,r3,ror#12-3 571 bic r12,r6,r5,ror#32-13 572 bic r14,r7,r4,ror#32-12 573 eor r10,r0,r10,ror#32-13 574 eor r11,r1,r11,ror#32-12 575#ifndef __thumb2__ 576 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 577#endif 578 eor r12,r12,r2,ror#32-3 579#ifndef __thumb2__ 580 str r11,[sp,#320+4] 581#else 582 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 583#endif 584 eor r14,r14,r3,ror#32-3 585#ifndef __thumb2__ 586 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 587#endif 588 bic r10,r8,r6 589 bic r11,r9,r7 590#ifndef __thumb2__ 591 str r14,[sp,#328+4] 592#else 593 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 594#endif 595 eor r10,r10,r5,ror#32-13 596 eor r11,r11,r4,ror#32-12 597#ifndef __thumb2__ 598 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 599#endif 600 bic r12,r0,r8 601#ifndef __thumb2__ 602 str r11,[sp,#336+4] 603#else 604 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 605#endif 606 bic r14,r1,r9 607 eor r12,r12,r6 608 eor r14,r14,r7 609#ifndef __thumb2__ 610 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 611#endif 612 bic r10,r2,r0,ror#3 613#ifndef __thumb2__ 614 str r14,[sp,#344+4] 615#else 616 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 617#endif 618 bic r11,r3,r1,ror#3 619#ifndef __thumb2__ 620 ldr r1,[sp,#32] @ A[0][4] [in reverse order] 621#endif 622 eor r10,r8,r10,ror#32-3 623#ifndef __thumb2__ 624 ldr r0,[sp,#32+4] 625#else 626 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 627#endif 628 eor r11,r9,r11,ror#32-3 629#ifndef __thumb2__ 630 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 631#endif 632 add r9,sp,#208 633#ifndef __thumb2__ 634 str r11,[sp,#352+4] 635#else 636 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 637#endif 638 639#ifndef __thumb2__ 640 ldr r10,[sp,#232] @ D[4] 641#endif 642#ifndef __thumb2__ 643 ldr r11,[sp,#232+4] 644#else 645 ldrd r10,r11,[sp,#232] @ D[4] 646#endif 647#ifndef __thumb2__ 648 ldr r12,[sp,#200] @ D[0] 649#endif 650#ifndef __thumb2__ 651 ldr r14,[sp,#200+4] 652#else 653 ldrd r12,r14,[sp,#200] @ D[0] 654#endif 655 656 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 657 658 eor r1,r1,r10 659#ifndef __thumb2__ 660 ldr r2,[sp,#40] @ A[1][0] 661#endif 662 eor r0,r0,r11 663#ifndef __thumb2__ 664 ldr r3,[sp,#40+4] 665#else 666 ldrd r2,r3,[sp,#40] @ A[1][0] 667#endif 668 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 669#ifndef __thumb2__ 670 ldr r4,[sp,#88] @ A[2][1] 671#endif 672 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 673#ifndef __thumb2__ 674 ldr r5,[sp,#88+4] 675#else 676 ldrd r4,r5,[sp,#88] @ A[2][1] 677#endif 678 679 eor r2,r2,r12 680#ifndef __thumb2__ 681 ldr r10,[sp,#136] @ A[3][2] 682#endif 683 eor r3,r3,r14 684#ifndef __thumb2__ 685 ldr r11,[sp,#136+4] 686#else 687 ldrd r10,r11,[sp,#136] @ A[3][2] 688#endif 689 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 690#ifndef __thumb2__ 691 ldr r12,[sp,#224] @ D[3] 692#endif 693 @ mov r3,r3,ror#32-18 694#ifndef __thumb2__ 695 ldr r14,[sp,#224+4] 696#else 697 ldrd r12,r14,[sp,#224] @ D[3] 698#endif 699 700 eor r6,r6,r4 701 eor r7,r7,r5 702 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 703 mov r5,r7,ror#32-5 704 705 eor r10,r10,r8 706#ifndef __thumb2__ 707 ldr r8,[sp,#184] @ A[4][3] 708#endif 709 eor r11,r11,r9 710#ifndef __thumb2__ 711 ldr r9,[sp,#184+4] 712#else 713 ldrd r8,r9,[sp,#184] @ A[4][3] 714#endif 715 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 716 mov r6,r11,ror#32-8 717 718 eor r12,r12,r8 719 eor r14,r14,r9 720 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 721 mov r9,r14,ror#32-28 722 723 bic r10,r4,r2,ror#32-18 724 bic r11,r5,r3,ror#32-18 725 eor r10,r10,r0,ror#32-14 726 eor r11,r11,r1,ror#32-13 727#ifndef __thumb2__ 728 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 729#endif 730 bic r12,r6,r4 731#ifndef __thumb2__ 732 str r11,[sp,#360+4] 733#else 734 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 735#endif 736 bic r14,r7,r5 737 eor r12,r12,r2,ror#32-18 738#ifndef __thumb2__ 739 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 740#endif 741 eor r14,r14,r3,ror#32-18 742#ifndef __thumb2__ 743 str r14,[sp,#368+4] 744#else 745 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 746#endif 747 bic r10,r8,r6 748 bic r11,r9,r7 749 bic r12,r0,r8,ror#14 750 bic r14,r1,r9,ror#13 751 eor r10,r10,r4 752 eor r11,r11,r5 753#ifndef __thumb2__ 754 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 755#endif 756 bic r2,r2,r0,ror#18-14 757#ifndef __thumb2__ 758 str r11,[sp,#376+4] 759#else 760 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 761#endif 762 eor r12,r6,r12,ror#32-14 763 bic r11,r3,r1,ror#18-13 764 eor r14,r7,r14,ror#32-13 765#ifndef __thumb2__ 766 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 767#endif 768#ifndef __thumb2__ 769 str r14,[sp,#384+4] 770#else 771 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 772#endif 773 add r14,sp,#216 774#ifndef __thumb2__ 775 ldr r0,[sp,#16] @ A[0][2] 776#endif 777 eor r10,r8,r2,ror#32-18 778#ifndef __thumb2__ 779 ldr r1,[sp,#16+4] 780#else 781 ldrd r0,r1,[sp,#16] @ A[0][2] 782#endif 783 eor r11,r9,r11,ror#32-18 784#ifndef __thumb2__ 785 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 786#endif 787#ifndef __thumb2__ 788 str r11,[sp,#392+4] 789#else 790 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 791#endif 792 793 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 794#ifndef __thumb2__ 795 ldr r2,[sp,#64] @ A[1][3] 796#endif 797#ifndef __thumb2__ 798 ldr r3,[sp,#64+4] 799#else 800 ldrd r2,r3,[sp,#64] @ A[1][3] 801#endif 802#ifndef __thumb2__ 803 ldr r6,[sp,#232] @ D[4] 804#endif 805#ifndef __thumb2__ 806 ldr r7,[sp,#232+4] 807#else 808 ldrd r6,r7,[sp,#232] @ D[4] 809#endif 810 811 eor r0,r0,r10 812#ifndef __thumb2__ 813 ldr r4,[sp,#112] @ A[2][4] 814#endif 815 eor r1,r1,r11 816#ifndef __thumb2__ 817 ldr r5,[sp,#112+4] 818#else 819 ldrd r4,r5,[sp,#112] @ A[2][4] 820#endif 821 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 822#ifndef __thumb2__ 823 ldr r8,[sp,#200] @ D[0] 824#endif 825 @ mov r1,r1,ror#32-31 826#ifndef __thumb2__ 827 ldr r9,[sp,#200+4] 828#else 829 ldrd r8,r9,[sp,#200] @ D[0] 830#endif 831 832 eor r12,r12,r2 833#ifndef __thumb2__ 834 ldr r10,[sp,#120] @ A[3][0] 835#endif 836 eor r14,r14,r3 837#ifndef __thumb2__ 838 ldr r11,[sp,#120+4] 839#else 840 ldrd r10,r11,[sp,#120] @ A[3][0] 841#endif 842 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 843#ifndef __thumb2__ 844 ldr r12,[sp,#208] @ D[1] 845#endif 846 mov r2,r14,ror#32-28 847#ifndef __thumb2__ 848 ldr r14,[sp,#208+4] 849#else 850 ldrd r12,r14,[sp,#208] @ D[1] 851#endif 852 853 eor r6,r6,r4 854 eor r7,r7,r5 855 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 856 mov r4,r7,ror#32-20 857 858 eor r10,r10,r8 859#ifndef __thumb2__ 860 ldr r8,[sp,#168] @ A[4][1] 861#endif 862 eor r11,r11,r9 863#ifndef __thumb2__ 864 ldr r9,[sp,#168+4] 865#else 866 ldrd r8,r9,[sp,#168] @ A[4][1] 867#endif 868 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 869 mov r6,r11,ror#32-21 870 871 eor r8,r8,r12 872 eor r9,r9,r14 873 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 874 @ mov r9,r3,ror#32-1 875 876 bic r10,r4,r2 877 bic r11,r5,r3 878 eor r10,r10,r0,ror#32-31 879#ifndef __thumb2__ 880 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 881#endif 882 eor r11,r11,r1,ror#32-31 883#ifndef __thumb2__ 884 str r11,[sp,#400+4] 885#else 886 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 887#endif 888 bic r12,r6,r4 889 bic r14,r7,r5 890 eor r12,r12,r2 891 eor r14,r14,r3 892#ifndef __thumb2__ 893 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 894#endif 895 bic r10,r8,r6,ror#1 896#ifndef __thumb2__ 897 str r14,[sp,#408+4] 898#else 899 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 900#endif 901 bic r11,r9,r7,ror#1 902 bic r12,r0,r8,ror#31-1 903 bic r14,r1,r9,ror#31-1 904 eor r4,r4,r10,ror#32-1 905#ifndef __thumb2__ 906 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 907#endif 908 eor r5,r5,r11,ror#32-1 909#ifndef __thumb2__ 910 str r5,[sp,#416+4] 911#else 912 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 913#endif 914 eor r6,r6,r12,ror#32-31 915 eor r7,r7,r14,ror#32-31 916#ifndef __thumb2__ 917 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 918#endif 919 bic r10,r2,r0,ror#32-31 920#ifndef __thumb2__ 921 str r7,[sp,#424+4] 922#else 923 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 924#endif 925 bic r11,r3,r1,ror#32-31 926 add r12,sp,#240 927 eor r8,r10,r8,ror#32-1 928 add r10,sp,#280 929 eor r9,r11,r9,ror#32-1 930#ifndef __thumb2__ 931 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 932#endif 933#ifndef __thumb2__ 934 str r9,[sp,#432+4] 935#else 936 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 937#endif 938 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 939 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 940#ifdef __thumb2__ 941 eor r0,r0,r10 942 eor r1,r1,r11 943 eor r2,r2,r12 944 ldrd r10,r11,[sp,#296] 945 eor r3,r3,r14 946 ldrd r12,r14,[sp,#304] 947 eor r4,r4,r10 948 eor r5,r5,r11 949 eor r6,r6,r12 950 ldrd r10,r11,[sp,#312] 951 eor r7,r7,r14 952 ldrd r12,r14,[sp,#320] 953 eor r8,r8,r10 954 eor r9,r9,r11 955 eor r0,r0,r12 956 ldrd r10,r11,[sp,#328] 957 eor r1,r1,r14 958 ldrd r12,r14,[sp,#336] 959 eor r2,r2,r10 960 eor r3,r3,r11 961 eor r4,r4,r12 962 ldrd r10,r11,[sp,#344] 963 eor r5,r5,r14 964 ldrd r12,r14,[sp,#352] 965 eor r6,r6,r10 966 eor r7,r7,r11 967 eor r8,r8,r12 968 ldrd r10,r11,[sp,#360] 969 eor r9,r9,r14 970 ldrd r12,r14,[sp,#368] 971 eor r0,r0,r10 972 eor r1,r1,r11 973 eor r2,r2,r12 974 ldrd r10,r11,[sp,#376] 975 eor r3,r3,r14 976 ldrd r12,r14,[sp,#384] 977 eor r4,r4,r10 978 eor r5,r5,r11 979 eor r6,r6,r12 980 ldrd r10,r11,[sp,#392] 981 eor r7,r7,r14 982 ldrd r12,r14,[sp,#400] 983 eor r8,r8,r10 984 eor r9,r9,r11 985 eor r0,r0,r12 986 ldrd r10,r11,[sp,#408] 987 eor r1,r1,r14 988 ldrd r12,r14,[sp,#256] 989 eor r2,r2,r10 990 eor r3,r3,r11 991 eor r4,r4,r12 992 ldrd r10,r11,[sp,#264] 993 eor r5,r5,r14 994 ldrd r12,r14,[sp,#272] 995#else 996 eor r0,r0,r10 997 add r10,sp,#296 998 eor r1,r1,r11 999 eor r2,r2,r12 1000 eor r3,r3,r14 1001 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 1002 eor r4,r4,r10 1003 add r10,sp,#312 1004 eor r5,r5,r11 1005 eor r6,r6,r12 1006 eor r7,r7,r14 1007 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 1008 eor r8,r8,r10 1009 add r10,sp,#328 1010 eor r9,r9,r11 1011 eor r0,r0,r12 1012 eor r1,r1,r14 1013 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 1014 eor r2,r2,r10 1015 add r10,sp,#344 1016 eor r3,r3,r11 1017 eor r4,r4,r12 1018 eor r5,r5,r14 1019 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 1020 eor r6,r6,r10 1021 add r10,sp,#360 1022 eor r7,r7,r11 1023 eor r8,r8,r12 1024 eor r9,r9,r14 1025 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 1026 eor r0,r0,r10 1027 add r10,sp,#376 1028 eor r1,r1,r11 1029 eor r2,r2,r12 1030 eor r3,r3,r14 1031 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 1032 eor r4,r4,r10 1033 add r10,sp,#392 1034 eor r5,r5,r11 1035 eor r6,r6,r12 1036 eor r7,r7,r14 1037 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 1038 eor r8,r8,r10 1039 ldr r10,[sp,#408] @ A[4][1] 1040 eor r9,r9,r11 1041 ldr r11,[sp,#408+4] 1042 eor r0,r0,r12 1043 ldr r12,[sp,#256] @ A[0][2] 1044 eor r1,r1,r14 1045 ldr r14,[sp,#256+4] 1046 eor r2,r2,r10 1047 add r10,sp,#264 1048 eor r3,r3,r11 1049 eor r4,r4,r12 1050 eor r5,r5,r14 1051 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 1052#endif 1053 eor r6,r6,r10 1054 eor r7,r7,r11 1055 eor r8,r8,r12 1056 eor r9,r9,r14 1057 1058 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1059#ifndef __thumb2__ 1060 str r10,[sp,#208] @ D[1] = E[0] 1061#endif 1062 eor r11,r1,r4 1063#ifndef __thumb2__ 1064 str r11,[sp,#208+4] 1065#else 1066 strd r10,r11,[sp,#208] @ D[1] = E[0] 1067#endif 1068 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1069 eor r14,r7,r0 1070#ifndef __thumb2__ 1071 str r12,[sp,#232] @ D[4] = E[1] 1072#endif 1073 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1074#ifndef __thumb2__ 1075 str r14,[sp,#232+4] 1076#else 1077 strd r12,r14,[sp,#232] @ D[4] = E[1] 1078#endif 1079 eor r1,r9,r2 1080#ifndef __thumb2__ 1081 str r0,[sp,#200] @ D[0] = C[0] 1082#endif 1083 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1084#ifndef __thumb2__ 1085 ldr r7,[sp,#384] 1086#endif 1087 eor r3,r3,r6 1088#ifndef __thumb2__ 1089 str r1,[sp,#200+4] 1090#else 1091 strd r0,r1,[sp,#200] @ D[0] = C[0] 1092#endif 1093#ifndef __thumb2__ 1094 ldr r6,[sp,#384+4] 1095#else 1096 ldrd r7,r6,[sp,#384] 1097#endif 1098#ifndef __thumb2__ 1099 str r2,[sp,#216] @ D[2] = C[1] 1100#endif 1101 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1102#ifndef __thumb2__ 1103 str r3,[sp,#216+4] 1104#else 1105 strd r2,r3,[sp,#216] @ D[2] = C[1] 1106#endif 1107 eor r5,r5,r8 1108 1109#ifndef __thumb2__ 1110 ldr r8,[sp,#432] 1111#endif 1112#ifndef __thumb2__ 1113 ldr r9,[sp,#432+4] 1114#else 1115 ldrd r8,r9,[sp,#432] 1116#endif 1117#ifndef __thumb2__ 1118 str r4,[sp,#224] @ D[3] = C[2] 1119#endif 1120 eor r7,r7,r4 1121#ifndef __thumb2__ 1122 str r5,[sp,#224+4] 1123#else 1124 strd r4,r5,[sp,#224] @ D[3] = C[2] 1125#endif 1126 eor r6,r6,r5 1127#ifndef __thumb2__ 1128 ldr r4,[sp,#240] 1129#endif 1130 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1131 @ mov r6,r6,ror#32-11 1132#ifndef __thumb2__ 1133 ldr r5,[sp,#240+4] 1134#else 1135 ldrd r4,r5,[sp,#240] 1136#endif 1137 eor r8,r8,r12 1138 eor r9,r9,r14 1139#ifndef __thumb2__ 1140 ldr r12,[sp,#336] 1141#endif 1142 eor r0,r0,r4 1143#ifndef __thumb2__ 1144 ldr r14,[sp,#336+4] 1145#else 1146 ldrd r12,r14,[sp,#336] 1147#endif 1148 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1149 @ mov r9,r9,ror#32-7 1150 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 1151 eor r12,r12,r2 1152#ifndef __thumb2__ 1153 ldr r2,[sp,#288] 1154#endif 1155 eor r14,r14,r3 1156#ifndef __thumb2__ 1157 ldr r3,[sp,#288+4] 1158#else 1159 ldrd r2,r3,[sp,#288] 1160#endif 1161 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 1162 ldr r12,[sp,#444] @ load counter 1163 eor r2,r2,r10 1164 adr r10,iotas32 1165 mov r4,r14,ror#32-22 1166 add r14,r10,r12 1167 eor r3,r3,r11 1168#ifndef __thumb2__ 1169 ldr r10,[r14,#8] @ iotas[i].lo 1170#endif 1171 add r12,r12,#16 1172#ifndef __thumb2__ 1173 ldr r11,[r14,#12] @ iotas[i].hi 1174#else 1175 ldrd r10,r11,[r14,#8] @ iotas[i].lo 1176#endif 1177 cmp r12,#192 1178 str r12,[sp,#444] @ store counter 1179 bic r12,r4,r2,ror#32-22 1180 bic r14,r5,r3,ror#32-22 1181 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 1182 mov r3,r3,ror#32-22 1183 eor r12,r12,r0 1184 eor r14,r14,r1 1185 eor r10,r10,r12 1186 eor r11,r11,r14 1187#ifndef __thumb2__ 1188 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1189#endif 1190 bic r12,r6,r4,ror#11 1191#ifndef __thumb2__ 1192 str r11,[sp,#0+4] 1193#else 1194 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1195#endif 1196 bic r14,r7,r5,ror#10 1197 bic r10,r8,r6,ror#32-(11-7) 1198 bic r11,r9,r7,ror#32-(10-7) 1199 eor r12,r2,r12,ror#32-11 1200#ifndef __thumb2__ 1201 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1202#endif 1203 eor r14,r3,r14,ror#32-10 1204#ifndef __thumb2__ 1205 str r14,[sp,#8+4] 1206#else 1207 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1208#endif 1209 eor r10,r4,r10,ror#32-7 1210 eor r11,r5,r11,ror#32-7 1211#ifndef __thumb2__ 1212 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1213#endif 1214 bic r12,r0,r8,ror#32-7 1215#ifndef __thumb2__ 1216 str r11,[sp,#16+4] 1217#else 1218 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1219#endif 1220 bic r14,r1,r9,ror#32-7 1221 eor r12,r12,r6,ror#32-11 1222#ifndef __thumb2__ 1223 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1224#endif 1225 eor r14,r14,r7,ror#32-10 1226#ifndef __thumb2__ 1227 str r14,[sp,#24+4] 1228#else 1229 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1230#endif 1231 bic r10,r2,r0 1232 add r14,sp,#224 1233#ifndef __thumb2__ 1234 ldr r0,[sp,#264] @ A[0][3] 1235#endif 1236 bic r11,r3,r1 1237#ifndef __thumb2__ 1238 ldr r1,[sp,#264+4] 1239#else 1240 ldrd r0,r1,[sp,#264] @ A[0][3] 1241#endif 1242 eor r10,r10,r8,ror#32-7 1243 eor r11,r11,r9,ror#32-7 1244#ifndef __thumb2__ 1245 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1246#endif 1247 add r9,sp,#200 1248#ifndef __thumb2__ 1249 str r11,[sp,#32+4] 1250#else 1251 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1252#endif 1253 1254 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 1255 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 1256 1257#ifndef __thumb2__ 1258 ldr r2,[sp,#312] @ A[1][4] 1259#endif 1260 eor r0,r0,r10 1261#ifndef __thumb2__ 1262 ldr r3,[sp,#312+4] 1263#else 1264 ldrd r2,r3,[sp,#312] @ A[1][4] 1265#endif 1266 eor r1,r1,r11 1267 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1268#ifndef __thumb2__ 1269 ldr r10,[sp,#368] @ A[3][1] 1270#endif 1271 @ mov r1,r1,ror#32-14 1272#ifndef __thumb2__ 1273 ldr r11,[sp,#368+4] 1274#else 1275 ldrd r10,r11,[sp,#368] @ A[3][1] 1276#endif 1277 1278 eor r2,r2,r12 1279#ifndef __thumb2__ 1280 ldr r4,[sp,#320] @ A[2][0] 1281#endif 1282 eor r3,r3,r14 1283#ifndef __thumb2__ 1284 ldr r5,[sp,#320+4] 1285#else 1286 ldrd r4,r5,[sp,#320] @ A[2][0] 1287#endif 1288 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1289 @ mov r3,r3,ror#32-10 1290 1291 eor r6,r6,r4 1292#ifndef __thumb2__ 1293 ldr r12,[sp,#216] @ D[2] 1294#endif 1295 eor r7,r7,r5 1296#ifndef __thumb2__ 1297 ldr r14,[sp,#216+4] 1298#else 1299 ldrd r12,r14,[sp,#216] @ D[2] 1300#endif 1301 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1302 mov r4,r7,ror#32-2 1303 1304 eor r10,r10,r8 1305#ifndef __thumb2__ 1306 ldr r8,[sp,#416] @ A[4][2] 1307#endif 1308 eor r11,r11,r9 1309#ifndef __thumb2__ 1310 ldr r9,[sp,#416+4] 1311#else 1312 ldrd r8,r9,[sp,#416] @ A[4][2] 1313#endif 1314 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1315 mov r6,r11,ror#32-23 1316 1317 bic r10,r4,r2,ror#32-10 1318 bic r11,r5,r3,ror#32-10 1319 eor r12,r12,r8 1320 eor r14,r14,r9 1321 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1322 mov r8,r14,ror#32-31 1323 eor r10,r10,r0,ror#32-14 1324 eor r11,r11,r1,ror#32-14 1325#ifndef __thumb2__ 1326 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1327#endif 1328 bic r12,r6,r4 1329#ifndef __thumb2__ 1330 str r11,[sp,#40+4] 1331#else 1332 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1333#endif 1334 bic r14,r7,r5 1335 eor r12,r12,r2,ror#32-10 1336#ifndef __thumb2__ 1337 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1338#endif 1339 eor r14,r14,r3,ror#32-10 1340#ifndef __thumb2__ 1341 str r14,[sp,#48+4] 1342#else 1343 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1344#endif 1345 bic r10,r8,r6 1346 bic r11,r9,r7 1347 bic r12,r0,r8,ror#14 1348 bic r14,r1,r9,ror#14 1349 eor r10,r10,r4 1350 eor r11,r11,r5 1351#ifndef __thumb2__ 1352 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1353#endif 1354 bic r2,r2,r0,ror#32-(14-10) 1355#ifndef __thumb2__ 1356 str r11,[sp,#56+4] 1357#else 1358 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1359#endif 1360 eor r12,r6,r12,ror#32-14 1361 bic r11,r3,r1,ror#32-(14-10) 1362#ifndef __thumb2__ 1363 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1364#endif 1365 eor r14,r7,r14,ror#32-14 1366#ifndef __thumb2__ 1367 str r14,[sp,#64+4] 1368#else 1369 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1370#endif 1371 add r12,sp,#208 1372#ifndef __thumb2__ 1373 ldr r1,[sp,#248] @ A[0][1] 1374#endif 1375 eor r10,r8,r2,ror#32-10 1376#ifndef __thumb2__ 1377 ldr r0,[sp,#248+4] 1378#else 1379 ldrd r1,r0,[sp,#248] @ A[0][1] 1380#endif 1381 eor r11,r9,r11,ror#32-10 1382#ifndef __thumb2__ 1383 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1384#endif 1385#ifndef __thumb2__ 1386 str r11,[sp,#72+4] 1387#else 1388 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1389#endif 1390 1391 add r9,sp,#224 1392 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 1393#ifndef __thumb2__ 1394 ldr r2,[sp,#296] @ A[1][2] 1395#endif 1396#ifndef __thumb2__ 1397 ldr r3,[sp,#296+4] 1398#else 1399 ldrd r2,r3,[sp,#296] @ A[1][2] 1400#endif 1401 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 1402 1403 eor r1,r1,r10 1404#ifndef __thumb2__ 1405 ldr r4,[sp,#344] @ A[2][3] 1406#endif 1407 eor r0,r0,r11 1408#ifndef __thumb2__ 1409 ldr r5,[sp,#344+4] 1410#else 1411 ldrd r4,r5,[sp,#344] @ A[2][3] 1412#endif 1413 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1414 1415 eor r2,r2,r12 1416#ifndef __thumb2__ 1417 ldr r10,[sp,#392] @ A[3][4] 1418#endif 1419 eor r3,r3,r14 1420#ifndef __thumb2__ 1421 ldr r11,[sp,#392+4] 1422#else 1423 ldrd r10,r11,[sp,#392] @ A[3][4] 1424#endif 1425 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1426#ifndef __thumb2__ 1427 ldr r12,[sp,#200] @ D[0] 1428#endif 1429 @ mov r3,r3,ror#32-3 1430#ifndef __thumb2__ 1431 ldr r14,[sp,#200+4] 1432#else 1433 ldrd r12,r14,[sp,#200] @ D[0] 1434#endif 1435 1436 eor r4,r4,r6 1437 eor r5,r5,r7 1438 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1439 @ mov r4,r7,ror#32-13 @ [track reverse order below] 1440 1441 eor r10,r10,r8 1442#ifndef __thumb2__ 1443 ldr r8,[sp,#400] @ A[4][0] 1444#endif 1445 eor r11,r11,r9 1446#ifndef __thumb2__ 1447 ldr r9,[sp,#400+4] 1448#else 1449 ldrd r8,r9,[sp,#400] @ A[4][0] 1450#endif 1451 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1452 mov r7,r11,ror#32-4 1453 1454 eor r12,r12,r8 1455 eor r14,r14,r9 1456 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1457 mov r9,r14,ror#32-9 1458 1459 bic r10,r5,r2,ror#13-3 1460 bic r11,r4,r3,ror#12-3 1461 bic r12,r6,r5,ror#32-13 1462 bic r14,r7,r4,ror#32-12 1463 eor r10,r0,r10,ror#32-13 1464 eor r11,r1,r11,ror#32-12 1465#ifndef __thumb2__ 1466 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1467#endif 1468 eor r12,r12,r2,ror#32-3 1469#ifndef __thumb2__ 1470 str r11,[sp,#80+4] 1471#else 1472 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1473#endif 1474 eor r14,r14,r3,ror#32-3 1475#ifndef __thumb2__ 1476 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1477#endif 1478 bic r10,r8,r6 1479 bic r11,r9,r7 1480#ifndef __thumb2__ 1481 str r14,[sp,#88+4] 1482#else 1483 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1484#endif 1485 eor r10,r10,r5,ror#32-13 1486 eor r11,r11,r4,ror#32-12 1487#ifndef __thumb2__ 1488 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1489#endif 1490 bic r12,r0,r8 1491#ifndef __thumb2__ 1492 str r11,[sp,#96+4] 1493#else 1494 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1495#endif 1496 bic r14,r1,r9 1497 eor r12,r12,r6 1498 eor r14,r14,r7 1499#ifndef __thumb2__ 1500 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1501#endif 1502 bic r10,r2,r0,ror#3 1503#ifndef __thumb2__ 1504 str r14,[sp,#104+4] 1505#else 1506 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1507#endif 1508 bic r11,r3,r1,ror#3 1509#ifndef __thumb2__ 1510 ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1511#endif 1512 eor r10,r8,r10,ror#32-3 1513#ifndef __thumb2__ 1514 ldr r0,[sp,#272+4] 1515#else 1516 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1517#endif 1518 eor r11,r9,r11,ror#32-3 1519#ifndef __thumb2__ 1520 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1521#endif 1522 add r9,sp,#208 1523#ifndef __thumb2__ 1524 str r11,[sp,#112+4] 1525#else 1526 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1527#endif 1528 1529#ifndef __thumb2__ 1530 ldr r10,[sp,#232] @ D[4] 1531#endif 1532#ifndef __thumb2__ 1533 ldr r11,[sp,#232+4] 1534#else 1535 ldrd r10,r11,[sp,#232] @ D[4] 1536#endif 1537#ifndef __thumb2__ 1538 ldr r12,[sp,#200] @ D[0] 1539#endif 1540#ifndef __thumb2__ 1541 ldr r14,[sp,#200+4] 1542#else 1543 ldrd r12,r14,[sp,#200] @ D[0] 1544#endif 1545 1546 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 1547 1548 eor r1,r1,r10 1549#ifndef __thumb2__ 1550 ldr r2,[sp,#280] @ A[1][0] 1551#endif 1552 eor r0,r0,r11 1553#ifndef __thumb2__ 1554 ldr r3,[sp,#280+4] 1555#else 1556 ldrd r2,r3,[sp,#280] @ A[1][0] 1557#endif 1558 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1559#ifndef __thumb2__ 1560 ldr r4,[sp,#328] @ A[2][1] 1561#endif 1562 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1563#ifndef __thumb2__ 1564 ldr r5,[sp,#328+4] 1565#else 1566 ldrd r4,r5,[sp,#328] @ A[2][1] 1567#endif 1568 1569 eor r2,r2,r12 1570#ifndef __thumb2__ 1571 ldr r10,[sp,#376] @ A[3][2] 1572#endif 1573 eor r3,r3,r14 1574#ifndef __thumb2__ 1575 ldr r11,[sp,#376+4] 1576#else 1577 ldrd r10,r11,[sp,#376] @ A[3][2] 1578#endif 1579 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1580#ifndef __thumb2__ 1581 ldr r12,[sp,#224] @ D[3] 1582#endif 1583 @ mov r3,r3,ror#32-18 1584#ifndef __thumb2__ 1585 ldr r14,[sp,#224+4] 1586#else 1587 ldrd r12,r14,[sp,#224] @ D[3] 1588#endif 1589 1590 eor r6,r6,r4 1591 eor r7,r7,r5 1592 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1593 mov r5,r7,ror#32-5 1594 1595 eor r10,r10,r8 1596#ifndef __thumb2__ 1597 ldr r8,[sp,#424] @ A[4][3] 1598#endif 1599 eor r11,r11,r9 1600#ifndef __thumb2__ 1601 ldr r9,[sp,#424+4] 1602#else 1603 ldrd r8,r9,[sp,#424] @ A[4][3] 1604#endif 1605 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1606 mov r6,r11,ror#32-8 1607 1608 eor r12,r12,r8 1609 eor r14,r14,r9 1610 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1611 mov r9,r14,ror#32-28 1612 1613 bic r10,r4,r2,ror#32-18 1614 bic r11,r5,r3,ror#32-18 1615 eor r10,r10,r0,ror#32-14 1616 eor r11,r11,r1,ror#32-13 1617#ifndef __thumb2__ 1618 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1619#endif 1620 bic r12,r6,r4 1621#ifndef __thumb2__ 1622 str r11,[sp,#120+4] 1623#else 1624 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1625#endif 1626 bic r14,r7,r5 1627 eor r12,r12,r2,ror#32-18 1628#ifndef __thumb2__ 1629 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1630#endif 1631 eor r14,r14,r3,ror#32-18 1632#ifndef __thumb2__ 1633 str r14,[sp,#128+4] 1634#else 1635 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1636#endif 1637 bic r10,r8,r6 1638 bic r11,r9,r7 1639 bic r12,r0,r8,ror#14 1640 bic r14,r1,r9,ror#13 1641 eor r10,r10,r4 1642 eor r11,r11,r5 1643#ifndef __thumb2__ 1644 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1645#endif 1646 bic r2,r2,r0,ror#18-14 1647#ifndef __thumb2__ 1648 str r11,[sp,#136+4] 1649#else 1650 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1651#endif 1652 eor r12,r6,r12,ror#32-14 1653 bic r11,r3,r1,ror#18-13 1654 eor r14,r7,r14,ror#32-13 1655#ifndef __thumb2__ 1656 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1657#endif 1658#ifndef __thumb2__ 1659 str r14,[sp,#144+4] 1660#else 1661 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1662#endif 1663 add r14,sp,#216 1664#ifndef __thumb2__ 1665 ldr r0,[sp,#256] @ A[0][2] 1666#endif 1667 eor r10,r8,r2,ror#32-18 1668#ifndef __thumb2__ 1669 ldr r1,[sp,#256+4] 1670#else 1671 ldrd r0,r1,[sp,#256] @ A[0][2] 1672#endif 1673 eor r11,r9,r11,ror#32-18 1674#ifndef __thumb2__ 1675 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1676#endif 1677#ifndef __thumb2__ 1678 str r11,[sp,#152+4] 1679#else 1680 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1681#endif 1682 1683 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 1684#ifndef __thumb2__ 1685 ldr r2,[sp,#304] @ A[1][3] 1686#endif 1687#ifndef __thumb2__ 1688 ldr r3,[sp,#304+4] 1689#else 1690 ldrd r2,r3,[sp,#304] @ A[1][3] 1691#endif 1692#ifndef __thumb2__ 1693 ldr r6,[sp,#232] @ D[4] 1694#endif 1695#ifndef __thumb2__ 1696 ldr r7,[sp,#232+4] 1697#else 1698 ldrd r6,r7,[sp,#232] @ D[4] 1699#endif 1700 1701 eor r0,r0,r10 1702#ifndef __thumb2__ 1703 ldr r4,[sp,#352] @ A[2][4] 1704#endif 1705 eor r1,r1,r11 1706#ifndef __thumb2__ 1707 ldr r5,[sp,#352+4] 1708#else 1709 ldrd r4,r5,[sp,#352] @ A[2][4] 1710#endif 1711 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1712#ifndef __thumb2__ 1713 ldr r8,[sp,#200] @ D[0] 1714#endif 1715 @ mov r1,r1,ror#32-31 1716#ifndef __thumb2__ 1717 ldr r9,[sp,#200+4] 1718#else 1719 ldrd r8,r9,[sp,#200] @ D[0] 1720#endif 1721 1722 eor r12,r12,r2 1723#ifndef __thumb2__ 1724 ldr r10,[sp,#360] @ A[3][0] 1725#endif 1726 eor r14,r14,r3 1727#ifndef __thumb2__ 1728 ldr r11,[sp,#360+4] 1729#else 1730 ldrd r10,r11,[sp,#360] @ A[3][0] 1731#endif 1732 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1733#ifndef __thumb2__ 1734 ldr r12,[sp,#208] @ D[1] 1735#endif 1736 mov r2,r14,ror#32-28 1737#ifndef __thumb2__ 1738 ldr r14,[sp,#208+4] 1739#else 1740 ldrd r12,r14,[sp,#208] @ D[1] 1741#endif 1742 1743 eor r6,r6,r4 1744 eor r7,r7,r5 1745 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1746 mov r4,r7,ror#32-20 1747 1748 eor r10,r10,r8 1749#ifndef __thumb2__ 1750 ldr r8,[sp,#408] @ A[4][1] 1751#endif 1752 eor r11,r11,r9 1753#ifndef __thumb2__ 1754 ldr r9,[sp,#408+4] 1755#else 1756 ldrd r8,r9,[sp,#408] @ A[4][1] 1757#endif 1758 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1759 mov r6,r11,ror#32-21 1760 1761 eor r8,r8,r12 1762 eor r9,r9,r14 1763 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1764 @ mov r9,r3,ror#32-1 1765 1766 bic r10,r4,r2 1767 bic r11,r5,r3 1768 eor r10,r10,r0,ror#32-31 1769#ifndef __thumb2__ 1770 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1771#endif 1772 eor r11,r11,r1,ror#32-31 1773#ifndef __thumb2__ 1774 str r11,[sp,#160+4] 1775#else 1776 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1777#endif 1778 bic r12,r6,r4 1779 bic r14,r7,r5 1780 eor r12,r12,r2 1781 eor r14,r14,r3 1782#ifndef __thumb2__ 1783 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1784#endif 1785 bic r10,r8,r6,ror#1 1786#ifndef __thumb2__ 1787 str r14,[sp,#168+4] 1788#else 1789 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1790#endif 1791 bic r11,r9,r7,ror#1 1792 bic r12,r0,r8,ror#31-1 1793 bic r14,r1,r9,ror#31-1 1794 eor r4,r4,r10,ror#32-1 1795#ifndef __thumb2__ 1796 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1797#endif 1798 eor r5,r5,r11,ror#32-1 1799#ifndef __thumb2__ 1800 str r5,[sp,#176+4] 1801#else 1802 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1803#endif 1804 eor r6,r6,r12,ror#32-31 1805 eor r7,r7,r14,ror#32-31 1806#ifndef __thumb2__ 1807 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1808#endif 1809 bic r10,r2,r0,ror#32-31 1810#ifndef __thumb2__ 1811 str r7,[sp,#184+4] 1812#else 1813 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1814#endif 1815 bic r11,r3,r1,ror#32-31 1816 add r12,sp,#0 1817 eor r8,r10,r8,ror#32-1 1818 add r10,sp,#40 1819 eor r9,r11,r9,ror#32-1 1820#ifndef __thumb2__ 1821 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1822#endif 1823#ifndef __thumb2__ 1824 str r9,[sp,#192+4] 1825#else 1826 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1827#endif 1828 blo .Lround2x 1829 1830 ldr pc,[sp,#440] 1831.size KeccakF1600_int,.-KeccakF1600_int 1832 1833.type KeccakF1600, %function 1834.align 5 1835KeccakF1600: 1836 stmdb sp!,{r0,r4-r11,lr} 1837 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1838 1839 add r10,r0,#40 1840 add r11,sp,#40 1841 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1842 stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1843 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1844 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1845 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1846 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1847 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1848 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1849 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1850 add r12,sp,#0 1851 add r10,sp,#40 1852 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1853 1854 bl KeccakF1600_enter 1855 1856 ldr r11, [sp,#440+16] @ restore pointer to A 1857 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1858 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 1859 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1860 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1861 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1862 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1863 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1864 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1865 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1866 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1867 1868 add sp,sp,#440+20 1869 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} 1870.size KeccakF1600,.-KeccakF1600 1871.globl SHA3_absorb 1872.type SHA3_absorb,%function 1873.align 5 1874SHA3_absorb: 1875 stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1876 sub sp,sp,#456+16 1877 1878 add r10,r0,#40 1879 @ mov r11,r1 1880 mov r12,r2 1881 mov r14,r3 1882 cmp r2,r3 1883 blo .Labsorb_abort 1884 1885 add r11,sp,#0 1886 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1887 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1888 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1889 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1890 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1891 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1892 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1893 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1894 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1895 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1896 1897 ldr r11,[sp,#476] @ restore r11 1898#ifdef __thumb2__ 1899 mov r9,#0x00ff00ff 1900 mov r8,#0x0f0f0f0f 1901 mov r7,#0x33333333 1902 mov r6,#0x55555555 1903#else 1904 mov r6,#0x11 @ compose constants 1905 mov r8,#0x0f 1906 mov r9,#0xff 1907 orr r6,r6,r6,lsl#8 1908 orr r8,r8,r8,lsl#8 1909 orr r6,r6,r6,lsl#16 @ 0x11111111 1910 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1911 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1912 orr r7,r6,r6,lsl#1 @ 0x33333333 1913 orr r6,r6,r6,lsl#2 @ 0x55555555 1914#endif 1915 str r9,[sp,#468] 1916 str r8,[sp,#464] 1917 str r7,[sp,#460] 1918 str r6,[sp,#456] 1919 b .Loop_absorb 1920 1921.align 4 1922.Loop_absorb: 1923 subs r0,r12,r14 1924 blo .Labsorbed 1925 add r10,sp,#0 1926 str r0,[sp,#480] @ save len - bsz 1927 1928.align 4 1929.Loop_block: 1930 ldrb r0,[r11],#1 1931 ldrb r1,[r11],#1 1932 ldrb r2,[r11],#1 1933 ldrb r3,[r11],#1 1934 ldrb r4,[r11],#1 1935 orr r0,r0,r1,lsl#8 1936 ldrb r1,[r11],#1 1937 orr r0,r0,r2,lsl#16 1938 ldrb r2,[r11],#1 1939 orr r0,r0,r3,lsl#24 @ lo 1940 ldrb r3,[r11],#1 1941 orr r1,r4,r1,lsl#8 1942 orr r1,r1,r2,lsl#16 1943 orr r1,r1,r3,lsl#24 @ hi 1944 1945 and r2,r0,r6 @ &=0x55555555 1946 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1947 and r3,r1,r6 @ &=0x55555555 1948 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1949 orr r2,r2,r2,lsr#1 1950 orr r0,r0,r0,lsl#1 1951 orr r3,r3,r3,lsr#1 1952 orr r1,r1,r1,lsl#1 1953 and r2,r2,r7 @ &=0x33333333 1954 and r0,r0,r7,lsl#2 @ &=0xcccccccc 1955 and r3,r3,r7 @ &=0x33333333 1956 and r1,r1,r7,lsl#2 @ &=0xcccccccc 1957 orr r2,r2,r2,lsr#2 1958 orr r0,r0,r0,lsl#2 1959 orr r3,r3,r3,lsr#2 1960 orr r1,r1,r1,lsl#2 1961 and r2,r2,r8 @ &=0x0f0f0f0f 1962 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1963 and r3,r3,r8 @ &=0x0f0f0f0f 1964 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1965 ldmia r10,{r4,r5} @ A_flat[i] 1966 orr r2,r2,r2,lsr#4 1967 orr r0,r0,r0,lsl#4 1968 orr r3,r3,r3,lsr#4 1969 orr r1,r1,r1,lsl#4 1970 and r2,r2,r9 @ &=0x00ff00ff 1971 and r0,r0,r9,lsl#8 @ &=0xff00ff00 1972 and r3,r3,r9 @ &=0x00ff00ff 1973 and r1,r1,r9,lsl#8 @ &=0xff00ff00 1974 orr r2,r2,r2,lsr#8 1975 orr r0,r0,r0,lsl#8 1976 orr r3,r3,r3,lsr#8 1977 orr r1,r1,r1,lsl#8 1978 1979 mov r2,r2,lsl#16 1980 mov r1,r1,lsr#16 1981 eor r4,r4,r3,lsl#16 1982 eor r5,r5,r0,lsr#16 1983 eor r4,r4,r2,lsr#16 1984 eor r5,r5,r1,lsl#16 1985 stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1986 1987 subs r14,r14,#8 1988 bhi .Loop_block 1989 1990 str r11,[sp,#476] 1991 1992 bl KeccakF1600_int 1993 1994 add r14,sp,#456 1995 ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables 1996 b .Loop_absorb 1997 1998.align 4 1999.Labsorbed: 2000 add r11,sp,#40 2001 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2002 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 2003 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2004 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2005 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2006 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2007 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2008 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2009 ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2010 stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2011 2012.Labsorb_abort: 2013 add sp,sp,#456+32 2014 mov r0,r12 @ return value 2015 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 2016.size SHA3_absorb,.-SHA3_absorb 2017.globl SHA3_squeeze 2018.type SHA3_squeeze,%function 2019.align 5 2020SHA3_squeeze: 2021 stmdb sp!,{r0,r3-r10,lr} 2022 2023 mov r10,r0 2024 mov r4,r1 2025 mov r5,r2 2026 mov r12,r3 2027 2028#ifdef __thumb2__ 2029 mov r9,#0x00ff00ff 2030 mov r8,#0x0f0f0f0f 2031 mov r7,#0x33333333 2032 mov r6,#0x55555555 2033#else 2034 mov r6,#0x11 @ compose constants 2035 mov r8,#0x0f 2036 mov r9,#0xff 2037 orr r6,r6,r6,lsl#8 2038 orr r8,r8,r8,lsl#8 2039 orr r6,r6,r6,lsl#16 @ 0x11111111 2040 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2041 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2042 orr r7,r6,r6,lsl#1 @ 0x33333333 2043 orr r6,r6,r6,lsl#2 @ 0x55555555 2044#endif 2045 stmdb sp!,{r6,r7,r8,r9} 2046 2047 mov r14,r10 2048 b .Loop_squeeze 2049 2050.align 4 2051.Loop_squeeze: 2052 ldmia r10!,{r0,r1} @ A_flat[i++] 2053 2054 mov r2,r0,lsl#16 2055 mov r3,r1,lsl#16 @ r3 = r1 << 16 2056 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2057 mov r1,r1,lsr#16 2058 mov r0,r0,lsr#16 @ r0 = r0 >> 16 2059 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2060 2061 orr r2,r2,r2,lsl#8 2062 orr r3,r3,r3,lsr#8 2063 orr r0,r0,r0,lsl#8 2064 orr r1,r1,r1,lsr#8 2065 and r2,r2,r9 @ &=0x00ff00ff 2066 and r3,r3,r9,lsl#8 @ &=0xff00ff00 2067 and r0,r0,r9 @ &=0x00ff00ff 2068 and r1,r1,r9,lsl#8 @ &=0xff00ff00 2069 orr r2,r2,r2,lsl#4 2070 orr r3,r3,r3,lsr#4 2071 orr r0,r0,r0,lsl#4 2072 orr r1,r1,r1,lsr#4 2073 and r2,r2,r8 @ &=0x0f0f0f0f 2074 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2075 and r0,r0,r8 @ &=0x0f0f0f0f 2076 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2077 orr r2,r2,r2,lsl#2 2078 orr r3,r3,r3,lsr#2 2079 orr r0,r0,r0,lsl#2 2080 orr r1,r1,r1,lsr#2 2081 and r2,r2,r7 @ &=0x33333333 2082 and r3,r3,r7,lsl#2 @ &=0xcccccccc 2083 and r0,r0,r7 @ &=0x33333333 2084 and r1,r1,r7,lsl#2 @ &=0xcccccccc 2085 orr r2,r2,r2,lsl#1 2086 orr r3,r3,r3,lsr#1 2087 orr r0,r0,r0,lsl#1 2088 orr r1,r1,r1,lsr#1 2089 and r2,r2,r6 @ &=0x55555555 2090 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2091 and r0,r0,r6 @ &=0x55555555 2092 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2093 2094 orr r2,r2,r3 2095 orr r0,r0,r1 2096 2097 cmp r5,#8 2098 blo .Lsqueeze_tail 2099 mov r1,r2,lsr#8 2100 strb r2,[r4],#1 2101 mov r3,r2,lsr#16 2102 strb r1,[r4],#1 2103 mov r2,r2,lsr#24 2104 strb r3,[r4],#1 2105 strb r2,[r4],#1 2106 2107 mov r1,r0,lsr#8 2108 strb r0,[r4],#1 2109 mov r3,r0,lsr#16 2110 strb r1,[r4],#1 2111 mov r0,r0,lsr#24 2112 strb r3,[r4],#1 2113 strb r0,[r4],#1 2114 subs r5,r5,#8 2115 beq .Lsqueeze_done 2116 2117 subs r12,r12,#8 @ bsz -= 8 2118 bhi .Loop_squeeze 2119 2120 mov r0,r14 @ original r10 2121 2122 bl KeccakF1600 2123 2124 ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables 2125 mov r14,r10 2126 b .Loop_squeeze 2127 2128.align 4 2129.Lsqueeze_tail: 2130 strb r2,[r4],#1 2131 mov r2,r2,lsr#8 2132 subs r5,r5,#1 2133 beq .Lsqueeze_done 2134 strb r2,[r4],#1 2135 mov r2,r2,lsr#8 2136 subs r5,r5,#1 2137 beq .Lsqueeze_done 2138 strb r2,[r4],#1 2139 mov r2,r2,lsr#8 2140 subs r5,r5,#1 2141 beq .Lsqueeze_done 2142 strb r2,[r4],#1 2143 subs r5,r5,#1 2144 beq .Lsqueeze_done 2145 2146 strb r0,[r4],#1 2147 mov r0,r0,lsr#8 2148 subs r5,r5,#1 2149 beq .Lsqueeze_done 2150 strb r0,[r4],#1 2151 mov r0,r0,lsr#8 2152 subs r5,r5,#1 2153 beq .Lsqueeze_done 2154 strb r0,[r4] 2155 b .Lsqueeze_done 2156 2157.align 4 2158.Lsqueeze_done: 2159 add sp,sp,#24 2160 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 2161.size SHA3_squeeze,.-SHA3_squeeze 2162#if __ARM_MAX_ARCH__>=7 2163.fpu neon 2164 2165.type iotas64, %object 2166.align 5 2167iotas64: 2168.quad 0x0000000000000001 2169.quad 0x0000000000008082 2170.quad 0x800000000000808a 2171.quad 0x8000000080008000 2172.quad 0x000000000000808b 2173.quad 0x0000000080000001 2174.quad 0x8000000080008081 2175.quad 0x8000000000008009 2176.quad 0x000000000000008a 2177.quad 0x0000000000000088 2178.quad 0x0000000080008009 2179.quad 0x000000008000000a 2180.quad 0x000000008000808b 2181.quad 0x800000000000008b 2182.quad 0x8000000000008089 2183.quad 0x8000000000008003 2184.quad 0x8000000000008002 2185.quad 0x8000000000000080 2186.quad 0x000000000000800a 2187.quad 0x800000008000000a 2188.quad 0x8000000080008081 2189.quad 0x8000000000008080 2190.quad 0x0000000080000001 2191.quad 0x8000000080008008 2192.size iotas64,.-iotas64 2193 2194.type KeccakF1600_neon, %function 2195.align 5 2196KeccakF1600_neon: 2197 add r1, r0, #16 2198 adr r2, iotas64 2199 mov r3, #24 @ loop counter 2200 b .Loop_neon 2201 2202.align 4 2203.Loop_neon: 2204 @ Theta 2205 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2206 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2207 vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2208 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2209 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2210 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2211 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2212 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2213 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2214 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2215 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2216 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2217 veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2218 veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2219 veor d25, d25, d24 @ C[4]^=A[4][4] 2220 2221 vadd.u64 q4, q13, q13 @ C[0..1]<<1 2222 vadd.u64 q15, q14, q14 @ C[2..3]<<1 2223 vadd.u64 d18, d25, d25 @ C[4]<<1 2224 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2225 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2226 vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2227 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2228 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2229 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2230 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2231 2232 veor d0, d0, d25 @ A[0][0] ^= C[4] 2233 veor d1, d1, d25 @ A[1][0] ^= C[4] 2234 veor d10, d10, d25 @ A[2][0] ^= C[4] 2235 veor d11, d11, d25 @ A[3][0] ^= C[4] 2236 veor d20, d20, d25 @ A[4][0] ^= C[4] 2237 2238 veor d2, d2, d26 @ A[0][1] ^= D[1] 2239 veor d3, d3, d26 @ A[1][1] ^= D[1] 2240 veor d12, d12, d26 @ A[2][1] ^= D[1] 2241 veor d13, d13, d26 @ A[3][1] ^= D[1] 2242 veor d21, d21, d26 @ A[4][1] ^= D[1] 2243 vmov d26, d27 2244 2245 veor d6, d6, d28 @ A[0][3] ^= C[2] 2246 veor d7, d7, d28 @ A[1][3] ^= C[2] 2247 veor d16, d16, d28 @ A[2][3] ^= C[2] 2248 veor d17, d17, d28 @ A[3][3] ^= C[2] 2249 veor d23, d23, d28 @ A[4][3] ^= C[2] 2250 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2251 vmov d28, d29 2252 2253 vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2254 veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2255 veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2256 veor d22, d22, d27 @ A[4][2] ^= D[2] 2257 2258 veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2259 veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2260 veor d24, d24, d29 @ A[4][4] ^= C[3] 2261 2262 @ Rho + Pi 2263 vmov d26, d2 @ C[1] = A[0][1] 2264 vshl.u64 d2, d3, #44 2265 vmov d27, d4 @ C[2] = A[0][2] 2266 vshl.u64 d4, d14, #43 2267 vmov d28, d6 @ C[3] = A[0][3] 2268 vshl.u64 d6, d17, #21 2269 vmov d29, d8 @ C[4] = A[0][4] 2270 vshl.u64 d8, d24, #14 2271 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2272 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2273 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2274 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2275 2276 vshl.u64 d3, d9, #20 2277 vshl.u64 d14, d16, #25 2278 vshl.u64 d17, d15, #15 2279 vshl.u64 d24, d21, #2 2280 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2281 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2282 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2283 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2284 2285 vshl.u64 d9, d22, #61 2286 @ vshl.u64 d16, d19, #8 2287 vshl.u64 d15, d12, #10 2288 vshl.u64 d21, d7, #55 2289 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2290 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2291 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2292 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2293 2294 vshl.u64 d22, d18, #39 2295 @ vshl.u64 d19, d23, #56 2296 vshl.u64 d12, d5, #6 2297 vshl.u64 d7, d13, #45 2298 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2299 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2300 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2301 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2302 2303 vshl.u64 d18, d20, #18 2304 vshl.u64 d23, d11, #41 2305 vshl.u64 d5, d10, #3 2306 vshl.u64 d13, d1, #36 2307 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2308 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2309 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2310 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2311 2312 vshl.u64 d1, d28, #28 2313 vshl.u64 d10, d26, #1 2314 vshl.u64 d11, d29, #27 2315 vshl.u64 d20, d27, #62 2316 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2317 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2318 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2319 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2320 2321 @ Chi + Iota 2322 vbic q13, q2, q1 2323 vbic q14, q3, q2 2324 vbic q15, q4, q3 2325 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2326 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2327 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2328 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2329 vbic q13, q0, q4 2330 vbic q15, q1, q0 2331 vmov q1, q14 @ A[0..1][1] 2332 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2333 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2334 2335 vbic q13, q7, q6 2336 vmov q0, q5 @ A[2..3][0] 2337 vbic q14, q8, q7 2338 vmov q15, q6 @ A[2..3][1] 2339 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2340 vbic q13, q9, q8 2341 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2342 vbic q14, q0, q9 2343 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2344 vbic q13, q15, q0 2345 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2346 vmov q14, q10 @ A[4][0..1] 2347 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2348 2349 vld1.64 d25, [r2,:64]! @ Iota[i++] 2350 vbic d26, d22, d21 2351 vbic d27, d23, d22 2352 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2353 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2354 vbic d26, d24, d23 2355 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2356 vbic d27, d28, d24 2357 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2358 vbic d26, d29, d28 2359 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2360 veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2361 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2362 2363 subs r3, r3, #1 2364 bne .Loop_neon 2365 2366.word 0xe12fff1e 2367.size KeccakF1600_neon,.-KeccakF1600_neon 2368 2369.globl SHA3_absorb_neon 2370.type SHA3_absorb_neon, %function 2371.align 5 2372SHA3_absorb_neon: 2373 stmdb sp!, {r4,r5,r6,lr} 2374 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2375 2376 mov r4, r1 @ inp 2377 mov r5, r2 @ len 2378 mov r6, r3 @ bsz 2379 2380 vld1.32 {d0}, [r0,:64]! @ A[0][0] 2381 vld1.32 {d2}, [r0,:64]! @ A[0][1] 2382 vld1.32 {d4}, [r0,:64]! @ A[0][2] 2383 vld1.32 {d6}, [r0,:64]! @ A[0][3] 2384 vld1.32 {d8}, [r0,:64]! @ A[0][4] 2385 2386 vld1.32 {d1}, [r0,:64]! @ A[1][0] 2387 vld1.32 {d3}, [r0,:64]! @ A[1][1] 2388 vld1.32 {d5}, [r0,:64]! @ A[1][2] 2389 vld1.32 {d7}, [r0,:64]! @ A[1][3] 2390 vld1.32 {d9}, [r0,:64]! @ A[1][4] 2391 2392 vld1.32 {d10}, [r0,:64]! @ A[2][0] 2393 vld1.32 {d12}, [r0,:64]! @ A[2][1] 2394 vld1.32 {d14}, [r0,:64]! @ A[2][2] 2395 vld1.32 {d16}, [r0,:64]! @ A[2][3] 2396 vld1.32 {d18}, [r0,:64]! @ A[2][4] 2397 2398 vld1.32 {d11}, [r0,:64]! @ A[3][0] 2399 vld1.32 {d13}, [r0,:64]! @ A[3][1] 2400 vld1.32 {d15}, [r0,:64]! @ A[3][2] 2401 vld1.32 {d17}, [r0,:64]! @ A[3][3] 2402 vld1.32 {d19}, [r0,:64]! @ A[3][4] 2403 2404 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3] 2405 vld1.32 {d24}, [r0,:64] @ A[4][4] 2406 sub r0, r0, #24*8 @ rewind 2407 b .Loop_absorb_neon 2408 2409.align 4 2410.Loop_absorb_neon: 2411 subs r12, r5, r6 @ len - bsz 2412 blo .Labsorbed_neon 2413 mov r5, r12 2414 2415 vld1.8 {d31}, [r4]! @ endian-neutral loads... 2416 cmp r6, #8*2 2417 veor d0, d0, d31 @ A[0][0] ^= *inp++ 2418 blo .Lprocess_neon 2419 vld1.8 {d31}, [r4]! 2420 veor d2, d2, d31 @ A[0][1] ^= *inp++ 2421 beq .Lprocess_neon 2422 vld1.8 {d31}, [r4]! 2423 cmp r6, #8*4 2424 veor d4, d4, d31 @ A[0][2] ^= *inp++ 2425 blo .Lprocess_neon 2426 vld1.8 {d31}, [r4]! 2427 veor d6, d6, d31 @ A[0][3] ^= *inp++ 2428 beq .Lprocess_neon 2429 vld1.8 {d31},[r4]! 2430 cmp r6, #8*6 2431 veor d8, d8, d31 @ A[0][4] ^= *inp++ 2432 blo .Lprocess_neon 2433 2434 vld1.8 {d31}, [r4]! 2435 veor d1, d1, d31 @ A[1][0] ^= *inp++ 2436 beq .Lprocess_neon 2437 vld1.8 {d31}, [r4]! 2438 cmp r6, #8*8 2439 veor d3, d3, d31 @ A[1][1] ^= *inp++ 2440 blo .Lprocess_neon 2441 vld1.8 {d31}, [r4]! 2442 veor d5, d5, d31 @ A[1][2] ^= *inp++ 2443 beq .Lprocess_neon 2444 vld1.8 {d31}, [r4]! 2445 cmp r6, #8*10 2446 veor d7, d7, d31 @ A[1][3] ^= *inp++ 2447 blo .Lprocess_neon 2448 vld1.8 {d31}, [r4]! 2449 veor d9, d9, d31 @ A[1][4] ^= *inp++ 2450 beq .Lprocess_neon 2451 2452 vld1.8 {d31}, [r4]! 2453 cmp r6, #8*12 2454 veor d10, d10, d31 @ A[2][0] ^= *inp++ 2455 blo .Lprocess_neon 2456 vld1.8 {d31}, [r4]! 2457 veor d12, d12, d31 @ A[2][1] ^= *inp++ 2458 beq .Lprocess_neon 2459 vld1.8 {d31}, [r4]! 2460 cmp r6, #8*14 2461 veor d14, d14, d31 @ A[2][2] ^= *inp++ 2462 blo .Lprocess_neon 2463 vld1.8 {d31}, [r4]! 2464 veor d16, d16, d31 @ A[2][3] ^= *inp++ 2465 beq .Lprocess_neon 2466 vld1.8 {d31}, [r4]! 2467 cmp r6, #8*16 2468 veor d18, d18, d31 @ A[2][4] ^= *inp++ 2469 blo .Lprocess_neon 2470 2471 vld1.8 {d31}, [r4]! 2472 veor d11, d11, d31 @ A[3][0] ^= *inp++ 2473 beq .Lprocess_neon 2474 vld1.8 {d31}, [r4]! 2475 cmp r6, #8*18 2476 veor d13, d13, d31 @ A[3][1] ^= *inp++ 2477 blo .Lprocess_neon 2478 vld1.8 {d31}, [r4]! 2479 veor d15, d15, d31 @ A[3][2] ^= *inp++ 2480 beq .Lprocess_neon 2481 vld1.8 {d31}, [r4]! 2482 cmp r6, #8*20 2483 veor d17, d17, d31 @ A[3][3] ^= *inp++ 2484 blo .Lprocess_neon 2485 vld1.8 {d31}, [r4]! 2486 veor d19, d19, d31 @ A[3][4] ^= *inp++ 2487 beq .Lprocess_neon 2488 2489 vld1.8 {d31}, [r4]! 2490 cmp r6, #8*22 2491 veor d20, d20, d31 @ A[4][0] ^= *inp++ 2492 blo .Lprocess_neon 2493 vld1.8 {d31}, [r4]! 2494 veor d21, d21, d31 @ A[4][1] ^= *inp++ 2495 beq .Lprocess_neon 2496 vld1.8 {d31}, [r4]! 2497 cmp r6, #8*24 2498 veor d22, d22, d31 @ A[4][2] ^= *inp++ 2499 blo .Lprocess_neon 2500 vld1.8 {d31}, [r4]! 2501 veor d23, d23, d31 @ A[4][3] ^= *inp++ 2502 beq .Lprocess_neon 2503 vld1.8 {d31}, [r4]! 2504 veor d24, d24, d31 @ A[4][4] ^= *inp++ 2505 2506.Lprocess_neon: 2507 bl KeccakF1600_neon 2508 b .Loop_absorb_neon 2509 2510.align 4 2511.Labsorbed_neon: 2512 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2513 vst1.32 {d2}, [r0,:64]! 2514 vst1.32 {d4}, [r0,:64]! 2515 vst1.32 {d6}, [r0,:64]! 2516 vst1.32 {d8}, [r0,:64]! 2517 2518 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2519 vst1.32 {d3}, [r0,:64]! 2520 vst1.32 {d5}, [r0,:64]! 2521 vst1.32 {d7}, [r0,:64]! 2522 vst1.32 {d9}, [r0,:64]! 2523 2524 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2525 vst1.32 {d12}, [r0,:64]! 2526 vst1.32 {d14}, [r0,:64]! 2527 vst1.32 {d16}, [r0,:64]! 2528 vst1.32 {d18}, [r0,:64]! 2529 2530 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2531 vst1.32 {d13}, [r0,:64]! 2532 vst1.32 {d15}, [r0,:64]! 2533 vst1.32 {d17}, [r0,:64]! 2534 vst1.32 {d19}, [r0,:64]! 2535 2536 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2537 vst1.32 {d24}, [r0,:64] 2538 2539 mov r0, r5 @ return value 2540 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2541 ldmia sp!, {r4,r5,r6,pc} 2542.size SHA3_absorb_neon,.-SHA3_absorb_neon 2543 2544.globl SHA3_squeeze_neon 2545.type SHA3_squeeze_neon, %function 2546.align 5 2547SHA3_squeeze_neon: 2548 stmdb sp!, {r4,r5,r6,lr} 2549 2550 mov r4, r1 @ out 2551 mov r5, r2 @ len 2552 mov r6, r3 @ bsz 2553 mov r12, r0 @ A_flat 2554 mov r14, r3 @ bsz 2555 b .Loop_squeeze_neon 2556 2557.align 4 2558.Loop_squeeze_neon: 2559 cmp r5, #8 2560 blo .Lsqueeze_neon_tail 2561 vld1.32 {d0}, [r12]! 2562 vst1.8 {d0}, [r4]! @ endian-neutral store 2563 2564 subs r5, r5, #8 @ len -= 8 2565 beq .Lsqueeze_neon_done 2566 2567 subs r14, r14, #8 @ bsz -= 8 2568 bhi .Loop_squeeze_neon 2569 2570 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2571 2572 vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2573 vld1.32 {d2}, [r0,:64]! 2574 vld1.32 {d4}, [r0,:64]! 2575 vld1.32 {d6}, [r0,:64]! 2576 vld1.32 {d8}, [r0,:64]! 2577 2578 vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2579 vld1.32 {d3}, [r0,:64]! 2580 vld1.32 {d5}, [r0,:64]! 2581 vld1.32 {d7}, [r0,:64]! 2582 vld1.32 {d9}, [r0,:64]! 2583 2584 vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2585 vld1.32 {d12}, [r0,:64]! 2586 vld1.32 {d14}, [r0,:64]! 2587 vld1.32 {d16}, [r0,:64]! 2588 vld1.32 {d18}, [r0,:64]! 2589 2590 vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2591 vld1.32 {d13}, [r0,:64]! 2592 vld1.32 {d15}, [r0,:64]! 2593 vld1.32 {d17}, [r0,:64]! 2594 vld1.32 {d19}, [r0,:64]! 2595 2596 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2597 vld1.32 {d24}, [r0,:64] 2598 sub r0, r0, #24*8 @ rewind 2599 2600 bl KeccakF1600_neon 2601 2602 mov r12, r0 @ A_flat 2603 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2604 vst1.32 {d2}, [r0,:64]! 2605 vst1.32 {d4}, [r0,:64]! 2606 vst1.32 {d6}, [r0,:64]! 2607 vst1.32 {d8}, [r0,:64]! 2608 2609 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2610 vst1.32 {d3}, [r0,:64]! 2611 vst1.32 {d5}, [r0,:64]! 2612 vst1.32 {d7}, [r0,:64]! 2613 vst1.32 {d9}, [r0,:64]! 2614 2615 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2616 vst1.32 {d12}, [r0,:64]! 2617 vst1.32 {d14}, [r0,:64]! 2618 vst1.32 {d16}, [r0,:64]! 2619 vst1.32 {d18}, [r0,:64]! 2620 2621 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2622 vst1.32 {d13}, [r0,:64]! 2623 vst1.32 {d15}, [r0,:64]! 2624 vst1.32 {d17}, [r0,:64]! 2625 vst1.32 {d19}, [r0,:64]! 2626 2627 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2628 mov r14, r6 @ bsz 2629 vst1.32 {d24}, [r0,:64] 2630 mov r0, r12 @ rewind 2631 2632 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2633 b .Loop_squeeze_neon 2634 2635.align 4 2636.Lsqueeze_neon_tail: 2637 ldmia r12, {r2,r3} 2638 cmp r5, #2 2639 strb r2, [r4],#1 @ endian-neutral store 2640 mov r2, r2, lsr#8 2641 blo .Lsqueeze_neon_done 2642 strb r2, [r4], #1 2643 mov r2, r2, lsr#8 2644 beq .Lsqueeze_neon_done 2645 strb r2, [r4], #1 2646 mov r2, r2, lsr#8 2647 cmp r5, #4 2648 blo .Lsqueeze_neon_done 2649 strb r2, [r4], #1 2650 beq .Lsqueeze_neon_done 2651 2652 strb r3, [r4], #1 2653 mov r3, r3, lsr#8 2654 cmp r5, #6 2655 blo .Lsqueeze_neon_done 2656 strb r3, [r4], #1 2657 mov r3, r3, lsr#8 2658 beq .Lsqueeze_neon_done 2659 strb r3, [r4], #1 2660 2661.Lsqueeze_neon_done: 2662 ldmia sp!, {r4,r5,r6,pc} 2663.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2664#endif 2665.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2666.align 2 2667.align 2 2668