1/* Do not modify. This file is auto-generated from keccak1600-armv8.pl. */ 2#include "arm_arch.h" 3 4.text 5 6.align 8 // strategic alignment and padding that allows to use 7 // address value as loop termination condition... 8.quad 0,0,0,0,0,0,0,0 9.type iotas,%object 10iotas: 11.quad 0x0000000000000001 12.quad 0x0000000000008082 13.quad 0x800000000000808a 14.quad 0x8000000080008000 15.quad 0x000000000000808b 16.quad 0x0000000080000001 17.quad 0x8000000080008081 18.quad 0x8000000000008009 19.quad 0x000000000000008a 20.quad 0x0000000000000088 21.quad 0x0000000080008009 22.quad 0x000000008000000a 23.quad 0x000000008000808b 24.quad 0x800000000000008b 25.quad 0x8000000000008089 26.quad 0x8000000000008003 27.quad 0x8000000000008002 28.quad 0x8000000000000080 29.quad 0x000000000000800a 30.quad 0x800000008000000a 31.quad 0x8000000080008081 32.quad 0x8000000000008080 33.quad 0x0000000080000001 34.quad 0x8000000080008008 35.size iotas,.-iotas 36.type KeccakF1600_int,%function 37.align 5 38KeccakF1600_int: 39 AARCH64_SIGN_LINK_REGISTER 40 adr x28,iotas 41 stp x28,x30,[sp,#16] // 32 bytes on top are mine 42 b .Loop 43.align 4 44.Loop: 45 ////////////////////////////////////////// Theta 46 eor x26,x0,x5 47 stp x4,x9,[sp,#0] // offload pair... 48 eor x27,x1,x6 49 eor x28,x2,x7 50 eor x30,x3,x8 51 eor x4,x4,x9 52 eor x26,x26,x10 53 eor x27,x27,x11 54 eor x28,x28,x12 55 eor x30,x30,x13 56 eor x4,x4,x14 57 eor x26,x26,x15 58 eor x27,x27,x16 59 eor x28,x28,x17 60 eor x30,x30,x25 61 eor x4,x4,x19 62 eor x26,x26,x20 63 eor x28,x28,x22 64 eor x27,x27,x21 65 eor x30,x30,x23 66 eor x4,x4,x24 67 68 eor x9,x26,x28,ror#63 69 70 eor x1,x1,x9 71 eor x6,x6,x9 72 eor x11,x11,x9 73 eor x16,x16,x9 74 eor x21,x21,x9 75 76 eor x9,x27,x30,ror#63 77 eor x28,x28,x4,ror#63 78 eor x30,x30,x26,ror#63 79 eor x4,x4,x27,ror#63 80 81 eor x27, x2,x9 // mov x27,x2 82 eor x7,x7,x9 83 eor x12,x12,x9 84 eor x17,x17,x9 85 eor x22,x22,x9 86 87 eor x0,x0,x4 88 eor x5,x5,x4 89 eor x10,x10,x4 90 eor x15,x15,x4 91 eor x20,x20,x4 92 ldp x4,x9,[sp,#0] // re-load offloaded data 93 eor x26, x3,x28 // mov x26,x3 94 eor x8,x8,x28 95 eor x13,x13,x28 96 eor x25,x25,x28 97 eor x23,x23,x28 98 99 eor x28, x4,x30 // mov x28,x4 100 eor x9,x9,x30 101 eor x14,x14,x30 102 eor x19,x19,x30 103 eor x24,x24,x30 104 105 ////////////////////////////////////////// Rho+Pi 106 mov x30,x1 107 ror x1,x6,#64-44 108 //mov x27,x2 109 ror x2,x12,#64-43 110 //mov x26,x3 111 ror x3,x25,#64-21 112 //mov x28,x4 113 ror x4,x24,#64-14 114 115 ror x6,x9,#64-20 116 ror x12,x13,#64-25 117 ror x25,x17,#64-15 118 ror x24,x21,#64-2 119 120 ror x9,x22,#64-61 121 ror x13,x19,#64-8 122 ror x17,x11,#64-10 123 ror x21,x8,#64-55 124 125 ror x22,x14,#64-39 126 ror x19,x23,#64-56 127 ror x11,x7,#64-6 128 ror x8,x16,#64-45 129 130 ror x14,x20,#64-18 131 ror x23,x15,#64-41 132 ror x7,x10,#64-3 133 ror x16,x5,#64-36 134 135 ror x5,x26,#64-28 136 ror x10,x30,#64-1 137 ror x15,x28,#64-27 138 ror x20,x27,#64-62 139 140 ////////////////////////////////////////// Chi+Iota 141 bic x26,x2,x1 142 bic x27,x3,x2 143 bic x28,x0,x4 144 bic x30,x1,x0 145 eor x0,x0,x26 146 bic x26,x4,x3 147 eor x1,x1,x27 148 ldr x27,[sp,#16] 149 eor x3,x3,x28 150 eor x4,x4,x30 151 eor x2,x2,x26 152 ldr x30,[x27],#8 // Iota[i++] 153 154 bic x26,x7,x6 155 tst x27,#255 // are we done? 156 str x27,[sp,#16] 157 bic x27,x8,x7 158 bic x28,x5,x9 159 eor x0,x0,x30 // A[0][0] ^= Iota 160 bic x30,x6,x5 161 eor x5,x5,x26 162 bic x26,x9,x8 163 eor x6,x6,x27 164 eor x8,x8,x28 165 eor x9,x9,x30 166 eor x7,x7,x26 167 168 bic x26,x12,x11 169 bic x27,x13,x12 170 bic x28,x10,x14 171 bic x30,x11,x10 172 eor x10,x10,x26 173 bic x26,x14,x13 174 eor x11,x11,x27 175 eor x13,x13,x28 176 eor x14,x14,x30 177 eor x12,x12,x26 178 179 bic x26,x17,x16 180 bic x27,x25,x17 181 bic x28,x15,x19 182 bic x30,x16,x15 183 eor x15,x15,x26 184 bic x26,x19,x25 185 eor x16,x16,x27 186 eor x25,x25,x28 187 eor x19,x19,x30 188 eor x17,x17,x26 189 190 bic x26,x22,x21 191 bic x27,x23,x22 192 bic x28,x20,x24 193 bic x30,x21,x20 194 eor x20,x20,x26 195 bic x26,x24,x23 196 eor x21,x21,x27 197 eor x23,x23,x28 198 eor x24,x24,x30 199 eor x22,x22,x26 200 201 bne .Loop 202 203 ldr x30,[sp,#24] 204 AARCH64_VALIDATE_LINK_REGISTER 205 ret 206.size KeccakF1600_int,.-KeccakF1600_int 207 208.type KeccakF1600,%function 209.align 5 210KeccakF1600: 211 AARCH64_SIGN_LINK_REGISTER 212 stp x29,x30,[sp,#-128]! 213 add x29,sp,#0 214 stp x19,x20,[sp,#16] 215 stp x21,x22,[sp,#32] 216 stp x23,x24,[sp,#48] 217 stp x25,x26,[sp,#64] 218 stp x27,x28,[sp,#80] 219 sub sp,sp,#48 220 221 str x0,[sp,#32] // offload argument 222 mov x26,x0 223 ldp x0,x1,[x0,#16*0] 224 ldp x2,x3,[x26,#16*1] 225 ldp x4,x5,[x26,#16*2] 226 ldp x6,x7,[x26,#16*3] 227 ldp x8,x9,[x26,#16*4] 228 ldp x10,x11,[x26,#16*5] 229 ldp x12,x13,[x26,#16*6] 230 ldp x14,x15,[x26,#16*7] 231 ldp x16,x17,[x26,#16*8] 232 ldp x25,x19,[x26,#16*9] 233 ldp x20,x21,[x26,#16*10] 234 ldp x22,x23,[x26,#16*11] 235 ldr x24,[x26,#16*12] 236 237 bl KeccakF1600_int 238 239 ldr x26,[sp,#32] 240 stp x0,x1,[x26,#16*0] 241 stp x2,x3,[x26,#16*1] 242 stp x4,x5,[x26,#16*2] 243 stp x6,x7,[x26,#16*3] 244 stp x8,x9,[x26,#16*4] 245 stp x10,x11,[x26,#16*5] 246 stp x12,x13,[x26,#16*6] 247 stp x14,x15,[x26,#16*7] 248 stp x16,x17,[x26,#16*8] 249 stp x25,x19,[x26,#16*9] 250 stp x20,x21,[x26,#16*10] 251 stp x22,x23,[x26,#16*11] 252 str x24,[x26,#16*12] 253 254 ldp x19,x20,[x29,#16] 255 add sp,sp,#48 256 ldp x21,x22,[x29,#32] 257 ldp x23,x24,[x29,#48] 258 ldp x25,x26,[x29,#64] 259 ldp x27,x28,[x29,#80] 260 ldp x29,x30,[sp],#128 261 AARCH64_VALIDATE_LINK_REGISTER 262 ret 263.size KeccakF1600,.-KeccakF1600 264 265.globl SHA3_absorb 266.type SHA3_absorb,%function 267.align 5 268SHA3_absorb: 269 AARCH64_SIGN_LINK_REGISTER 270 stp x29,x30,[sp,#-128]! 271 add x29,sp,#0 272 stp x19,x20,[sp,#16] 273 stp x21,x22,[sp,#32] 274 stp x23,x24,[sp,#48] 275 stp x25,x26,[sp,#64] 276 stp x27,x28,[sp,#80] 277 sub sp,sp,#64 278 279 stp x0,x1,[sp,#32] // offload arguments 280 stp x2,x3,[sp,#48] 281 282 mov x26,x0 // uint64_t A[5][5] 283 mov x27,x1 // const void *inp 284 mov x28,x2 // size_t len 285 mov x30,x3 // size_t bsz 286 ldp x0,x1,[x26,#16*0] 287 ldp x2,x3,[x26,#16*1] 288 ldp x4,x5,[x26,#16*2] 289 ldp x6,x7,[x26,#16*3] 290 ldp x8,x9,[x26,#16*4] 291 ldp x10,x11,[x26,#16*5] 292 ldp x12,x13,[x26,#16*6] 293 ldp x14,x15,[x26,#16*7] 294 ldp x16,x17,[x26,#16*8] 295 ldp x25,x19,[x26,#16*9] 296 ldp x20,x21,[x26,#16*10] 297 ldp x22,x23,[x26,#16*11] 298 ldr x24,[x26,#16*12] 299 b .Loop_absorb 300 301.align 4 302.Loop_absorb: 303 subs x26,x28,x30 // len - bsz 304 blo .Labsorbed 305 306 str x26,[sp,#48] // save len - bsz 307 ldr x26,[x27],#8 // *inp++ 308#ifdef __AARCH64EB__ 309 rev x26,x26 310#endif 311 eor x0,x0,x26 312 cmp x30,#8*(0+2) 313 blo .Lprocess_block 314 ldr x26,[x27],#8 // *inp++ 315#ifdef __AARCH64EB__ 316 rev x26,x26 317#endif 318 eor x1,x1,x26 319 beq .Lprocess_block 320 ldr x26,[x27],#8 // *inp++ 321#ifdef __AARCH64EB__ 322 rev x26,x26 323#endif 324 eor x2,x2,x26 325 cmp x30,#8*(2+2) 326 blo .Lprocess_block 327 ldr x26,[x27],#8 // *inp++ 328#ifdef __AARCH64EB__ 329 rev x26,x26 330#endif 331 eor x3,x3,x26 332 beq .Lprocess_block 333 ldr x26,[x27],#8 // *inp++ 334#ifdef __AARCH64EB__ 335 rev x26,x26 336#endif 337 eor x4,x4,x26 338 cmp x30,#8*(4+2) 339 blo .Lprocess_block 340 ldr x26,[x27],#8 // *inp++ 341#ifdef __AARCH64EB__ 342 rev x26,x26 343#endif 344 eor x5,x5,x26 345 beq .Lprocess_block 346 ldr x26,[x27],#8 // *inp++ 347#ifdef __AARCH64EB__ 348 rev x26,x26 349#endif 350 eor x6,x6,x26 351 cmp x30,#8*(6+2) 352 blo .Lprocess_block 353 ldr x26,[x27],#8 // *inp++ 354#ifdef __AARCH64EB__ 355 rev x26,x26 356#endif 357 eor x7,x7,x26 358 beq .Lprocess_block 359 ldr x26,[x27],#8 // *inp++ 360#ifdef __AARCH64EB__ 361 rev x26,x26 362#endif 363 eor x8,x8,x26 364 cmp x30,#8*(8+2) 365 blo .Lprocess_block 366 ldr x26,[x27],#8 // *inp++ 367#ifdef __AARCH64EB__ 368 rev x26,x26 369#endif 370 eor x9,x9,x26 371 beq .Lprocess_block 372 ldr x26,[x27],#8 // *inp++ 373#ifdef __AARCH64EB__ 374 rev x26,x26 375#endif 376 eor x10,x10,x26 377 cmp x30,#8*(10+2) 378 blo .Lprocess_block 379 ldr x26,[x27],#8 // *inp++ 380#ifdef __AARCH64EB__ 381 rev x26,x26 382#endif 383 eor x11,x11,x26 384 beq .Lprocess_block 385 ldr x26,[x27],#8 // *inp++ 386#ifdef __AARCH64EB__ 387 rev x26,x26 388#endif 389 eor x12,x12,x26 390 cmp x30,#8*(12+2) 391 blo .Lprocess_block 392 ldr x26,[x27],#8 // *inp++ 393#ifdef __AARCH64EB__ 394 rev x26,x26 395#endif 396 eor x13,x13,x26 397 beq .Lprocess_block 398 ldr x26,[x27],#8 // *inp++ 399#ifdef __AARCH64EB__ 400 rev x26,x26 401#endif 402 eor x14,x14,x26 403 cmp x30,#8*(14+2) 404 blo .Lprocess_block 405 ldr x26,[x27],#8 // *inp++ 406#ifdef __AARCH64EB__ 407 rev x26,x26 408#endif 409 eor x15,x15,x26 410 beq .Lprocess_block 411 ldr x26,[x27],#8 // *inp++ 412#ifdef __AARCH64EB__ 413 rev x26,x26 414#endif 415 eor x16,x16,x26 416 cmp x30,#8*(16+2) 417 blo .Lprocess_block 418 ldr x26,[x27],#8 // *inp++ 419#ifdef __AARCH64EB__ 420 rev x26,x26 421#endif 422 eor x17,x17,x26 423 beq .Lprocess_block 424 ldr x26,[x27],#8 // *inp++ 425#ifdef __AARCH64EB__ 426 rev x26,x26 427#endif 428 eor x25,x25,x26 429 cmp x30,#8*(18+2) 430 blo .Lprocess_block 431 ldr x26,[x27],#8 // *inp++ 432#ifdef __AARCH64EB__ 433 rev x26,x26 434#endif 435 eor x19,x19,x26 436 beq .Lprocess_block 437 ldr x26,[x27],#8 // *inp++ 438#ifdef __AARCH64EB__ 439 rev x26,x26 440#endif 441 eor x20,x20,x26 442 cmp x30,#8*(20+2) 443 blo .Lprocess_block 444 ldr x26,[x27],#8 // *inp++ 445#ifdef __AARCH64EB__ 446 rev x26,x26 447#endif 448 eor x21,x21,x26 449 beq .Lprocess_block 450 ldr x26,[x27],#8 // *inp++ 451#ifdef __AARCH64EB__ 452 rev x26,x26 453#endif 454 eor x22,x22,x26 455 cmp x30,#8*(22+2) 456 blo .Lprocess_block 457 ldr x26,[x27],#8 // *inp++ 458#ifdef __AARCH64EB__ 459 rev x26,x26 460#endif 461 eor x23,x23,x26 462 beq .Lprocess_block 463 ldr x26,[x27],#8 // *inp++ 464#ifdef __AARCH64EB__ 465 rev x26,x26 466#endif 467 eor x24,x24,x26 468 469.Lprocess_block: 470 str x27,[sp,#40] // save inp 471 472 bl KeccakF1600_int 473 474 ldr x27,[sp,#40] // restore arguments 475 ldp x28,x30,[sp,#48] 476 b .Loop_absorb 477 478.align 4 479.Labsorbed: 480 ldr x27,[sp,#32] 481 stp x0,x1,[x27,#16*0] 482 stp x2,x3,[x27,#16*1] 483 stp x4,x5,[x27,#16*2] 484 stp x6,x7,[x27,#16*3] 485 stp x8,x9,[x27,#16*4] 486 stp x10,x11,[x27,#16*5] 487 stp x12,x13,[x27,#16*6] 488 stp x14,x15,[x27,#16*7] 489 stp x16,x17,[x27,#16*8] 490 stp x25,x19,[x27,#16*9] 491 stp x20,x21,[x27,#16*10] 492 stp x22,x23,[x27,#16*11] 493 str x24,[x27,#16*12] 494 495 mov x0,x28 // return value 496 ldp x19,x20,[x29,#16] 497 add sp,sp,#64 498 ldp x21,x22,[x29,#32] 499 ldp x23,x24,[x29,#48] 500 ldp x25,x26,[x29,#64] 501 ldp x27,x28,[x29,#80] 502 ldp x29,x30,[sp],#128 503 AARCH64_VALIDATE_LINK_REGISTER 504 ret 505.size SHA3_absorb,.-SHA3_absorb 506.globl SHA3_squeeze 507.type SHA3_squeeze,%function 508.align 5 509SHA3_squeeze: 510 AARCH64_SIGN_LINK_REGISTER 511 stp x29,x30,[sp,#-48]! 512 add x29,sp,#0 513 stp x19,x20,[sp,#16] 514 stp x21,x22,[sp,#32] 515 516 mov x19,x0 // put aside arguments 517 mov x20,x1 518 mov x21,x2 519 mov x22,x3 520 521.Loop_squeeze: 522 ldr x4,[x0],#8 523 cmp x21,#8 524 blo .Lsqueeze_tail 525#ifdef __AARCH64EB__ 526 rev x4,x4 527#endif 528 str x4,[x20],#8 529 subs x21,x21,#8 530 beq .Lsqueeze_done 531 532 subs x3,x3,#8 533 bhi .Loop_squeeze 534 535 mov x0,x19 536 bl KeccakF1600 537 mov x0,x19 538 mov x3,x22 539 b .Loop_squeeze 540 541.align 4 542.Lsqueeze_tail: 543 strb w4,[x20],#1 544 lsr x4,x4,#8 545 subs x21,x21,#1 546 beq .Lsqueeze_done 547 strb w4,[x20],#1 548 lsr x4,x4,#8 549 subs x21,x21,#1 550 beq .Lsqueeze_done 551 strb w4,[x20],#1 552 lsr x4,x4,#8 553 subs x21,x21,#1 554 beq .Lsqueeze_done 555 strb w4,[x20],#1 556 lsr x4,x4,#8 557 subs x21,x21,#1 558 beq .Lsqueeze_done 559 strb w4,[x20],#1 560 lsr x4,x4,#8 561 subs x21,x21,#1 562 beq .Lsqueeze_done 563 strb w4,[x20],#1 564 lsr x4,x4,#8 565 subs x21,x21,#1 566 beq .Lsqueeze_done 567 strb w4,[x20],#1 568 569.Lsqueeze_done: 570 ldp x19,x20,[sp,#16] 571 ldp x21,x22,[sp,#32] 572 ldp x29,x30,[sp],#48 573 AARCH64_VALIDATE_LINK_REGISTER 574 ret 575.size SHA3_squeeze,.-SHA3_squeeze 576.type KeccakF1600_ce,%function 577.align 5 578KeccakF1600_ce: 579 mov x9,#24 580 adr x10,iotas 581 b .Loop_ce 582.align 4 583.Loop_ce: 584 ////////////////////////////////////////////////// Theta 585.inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b 586.inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b 587.inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b 588.inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b 589.inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b 590.inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b 591.inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b 592.inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b 593.inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b 594.inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b 595 596.inst 0xce7b8f3e //rax1 v30.16b,v25.16b,v27.16b // D[1] 597.inst 0xce7c8f5f //rax1 v31.16b,v26.16b,v28.16b // D[2] 598.inst 0xce7d8f7b //rax1 v27.16b,v27.16b,v29.16b // D[3] 599.inst 0xce798f9c //rax1 v28.16b,v28.16b,v25.16b // D[4] 600.inst 0xce7a8fbd //rax1 v29.16b,v29.16b,v26.16b // D[0] 601 602 ////////////////////////////////////////////////// Theta+Rho+Pi 603.inst 0xce9efc39 //xar v25.16b, v1.16b,v30.16b,#64-1 // C[0]=A[2][0] 604 605.inst 0xce9e50c1 //xar v1.16b,v6.16b,v30.16b,#64-44 606.inst 0xce9cb126 //xar v6.16b,v9.16b,v28.16b,#64-20 607.inst 0xce9f0ec9 //xar v9.16b,v22.16b,v31.16b,#64-61 608.inst 0xce9c65d6 //xar v22.16b,v14.16b,v28.16b,#64-39 609.inst 0xce9dba8e //xar v14.16b,v20.16b,v29.16b,#64-18 610 611.inst 0xce9f085a //xar v26.16b, v2.16b,v31.16b,#64-62 // C[1]=A[4][0] 612 613.inst 0xce9f5582 //xar v2.16b,v12.16b,v31.16b,#64-43 614.inst 0xce9b9dac //xar v12.16b,v13.16b,v27.16b,#64-25 615.inst 0xce9ce26d //xar v13.16b,v19.16b,v28.16b,#64-8 616.inst 0xce9b22f3 //xar v19.16b,v23.16b,v27.16b,#64-56 617.inst 0xce9d5df7 //xar v23.16b,v15.16b,v29.16b,#64-41 618 619.inst 0xce9c948f //xar v15.16b,v4.16b,v28.16b,#64-27 620 621.inst 0xce9ccb1c //xar v28.16b, v24.16b,v28.16b,#64-14 // D[4]=A[0][4] 622.inst 0xce9efab8 //xar v24.16b,v21.16b,v30.16b,#64-2 623.inst 0xce9b2508 //xar v8.16b,v8.16b,v27.16b,#64-55 // A[1][3]=A[4][1] 624.inst 0xce9e4e04 //xar v4.16b,v16.16b,v30.16b,#64-45 // A[0][4]=A[1][3] 625.inst 0xce9d70b0 //xar v16.16b,v5.16b,v29.16b,#64-36 626 627.inst 0xce9b9065 //xar v5.16b,v3.16b,v27.16b,#64-28 628 629 eor v0.16b,v0.16b,v29.16b 630 631.inst 0xce9bae5b //xar v27.16b, v18.16b,v27.16b,#64-21 // D[3]=A[0][3] 632.inst 0xce9fc623 //xar v3.16b,v17.16b,v31.16b,#64-15 // A[0][3]=A[3][3] 633.inst 0xce9ed97e //xar v30.16b, v11.16b,v30.16b,#64-10 // D[1]=A[3][2] 634.inst 0xce9fe8ff //xar v31.16b, v7.16b,v31.16b,#64-6 // D[2]=A[2][1] 635.inst 0xce9df55d //xar v29.16b, v10.16b,v29.16b,#64-3 // D[0]=A[1][2] 636 637 ////////////////////////////////////////////////// Chi+Iota 638.inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] 639.inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] 640.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b 641.inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b 642.inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] 643 644 ld1r {v26.2d},[x10],#8 645 646.inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] 647.inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] 648.inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b 649.inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b 650.inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] 651 652.inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b 653.inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b 654.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b 655.inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b 656.inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b 657 658.inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] 659.inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] 660.inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b 661.inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b 662.inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] 663 664.inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b 665.inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b 666.inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b 667.inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b 668.inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b 669 670 eor v0.16b,v0.16b,v26.16b 671 672 subs x9,x9,#1 673 bne .Loop_ce 674 675 ret 676.size KeccakF1600_ce,.-KeccakF1600_ce 677 678.type KeccakF1600_cext,%function 679.align 5 680KeccakF1600_cext: 681 AARCH64_SIGN_LINK_REGISTER 682 stp x29,x30,[sp,#-80]! 683 add x29,sp,#0 684 stp d8,d9,[sp,#16] // per ABI requirement 685 stp d10,d11,[sp,#32] 686 stp d12,d13,[sp,#48] 687 stp d14,d15,[sp,#64] 688 ldp d0,d1,[x0,#8*0] 689 ldp d2,d3,[x0,#8*2] 690 ldp d4,d5,[x0,#8*4] 691 ldp d6,d7,[x0,#8*6] 692 ldp d8,d9,[x0,#8*8] 693 ldp d10,d11,[x0,#8*10] 694 ldp d12,d13,[x0,#8*12] 695 ldp d14,d15,[x0,#8*14] 696 ldp d16,d17,[x0,#8*16] 697 ldp d18,d19,[x0,#8*18] 698 ldp d20,d21,[x0,#8*20] 699 ldp d22,d23,[x0,#8*22] 700 ldr d24,[x0,#8*24] 701 bl KeccakF1600_ce 702 ldr x30,[sp,#8] 703 stp d0,d1,[x0,#8*0] 704 stp d2,d3,[x0,#8*2] 705 stp d4,d5,[x0,#8*4] 706 stp d6,d7,[x0,#8*6] 707 stp d8,d9,[x0,#8*8] 708 stp d10,d11,[x0,#8*10] 709 stp d12,d13,[x0,#8*12] 710 stp d14,d15,[x0,#8*14] 711 stp d16,d17,[x0,#8*16] 712 stp d18,d19,[x0,#8*18] 713 stp d20,d21,[x0,#8*20] 714 stp d22,d23,[x0,#8*22] 715 str d24,[x0,#8*24] 716 717 ldp d8,d9,[sp,#16] 718 ldp d10,d11,[sp,#32] 719 ldp d12,d13,[sp,#48] 720 ldp d14,d15,[sp,#64] 721 ldr x29,[sp],#80 722 AARCH64_VALIDATE_LINK_REGISTER 723 ret 724.size KeccakF1600_cext,.-KeccakF1600_cext 725.globl SHA3_absorb_cext 726.type SHA3_absorb_cext,%function 727.align 5 728SHA3_absorb_cext: 729 AARCH64_SIGN_LINK_REGISTER 730 stp x29,x30,[sp,#-80]! 731 add x29,sp,#0 732 stp d8,d9,[sp,#16] // per ABI requirement 733 stp d10,d11,[sp,#32] 734 stp d12,d13,[sp,#48] 735 stp d14,d15,[sp,#64] 736 ldp d0,d1,[x0,#8*0] 737 ldp d2,d3,[x0,#8*2] 738 ldp d4,d5,[x0,#8*4] 739 ldp d6,d7,[x0,#8*6] 740 ldp d8,d9,[x0,#8*8] 741 ldp d10,d11,[x0,#8*10] 742 ldp d12,d13,[x0,#8*12] 743 ldp d14,d15,[x0,#8*14] 744 ldp d16,d17,[x0,#8*16] 745 ldp d18,d19,[x0,#8*18] 746 ldp d20,d21,[x0,#8*20] 747 ldp d22,d23,[x0,#8*22] 748 ldr d24,[x0,#8*24] 749 b .Loop_absorb_ce 750 751.align 4 752.Loop_absorb_ce: 753 subs x2,x2,x3 // len - bsz 754 blo .Labsorbed_ce 755 ldr d31,[x1],#8 // *inp++ 756#ifdef __AARCH64EB__ 757 rev64 v31.16b,v31.16b 758#endif 759 eor v0.16b,v0.16b,v31.16b 760 cmp x3,#8*(0+2) 761 blo .Lprocess_block_ce 762 ldr d31,[x1],#8 // *inp++ 763#ifdef __AARCH64EB__ 764 rev64 v31.16b,v31.16b 765#endif 766 eor v1.16b,v1.16b,v31.16b 767 beq .Lprocess_block_ce 768 ldr d31,[x1],#8 // *inp++ 769#ifdef __AARCH64EB__ 770 rev64 v31.16b,v31.16b 771#endif 772 eor v2.16b,v2.16b,v31.16b 773 cmp x3,#8*(2+2) 774 blo .Lprocess_block_ce 775 ldr d31,[x1],#8 // *inp++ 776#ifdef __AARCH64EB__ 777 rev64 v31.16b,v31.16b 778#endif 779 eor v3.16b,v3.16b,v31.16b 780 beq .Lprocess_block_ce 781 ldr d31,[x1],#8 // *inp++ 782#ifdef __AARCH64EB__ 783 rev64 v31.16b,v31.16b 784#endif 785 eor v4.16b,v4.16b,v31.16b 786 cmp x3,#8*(4+2) 787 blo .Lprocess_block_ce 788 ldr d31,[x1],#8 // *inp++ 789#ifdef __AARCH64EB__ 790 rev64 v31.16b,v31.16b 791#endif 792 eor v5.16b,v5.16b,v31.16b 793 beq .Lprocess_block_ce 794 ldr d31,[x1],#8 // *inp++ 795#ifdef __AARCH64EB__ 796 rev64 v31.16b,v31.16b 797#endif 798 eor v6.16b,v6.16b,v31.16b 799 cmp x3,#8*(6+2) 800 blo .Lprocess_block_ce 801 ldr d31,[x1],#8 // *inp++ 802#ifdef __AARCH64EB__ 803 rev64 v31.16b,v31.16b 804#endif 805 eor v7.16b,v7.16b,v31.16b 806 beq .Lprocess_block_ce 807 ldr d31,[x1],#8 // *inp++ 808#ifdef __AARCH64EB__ 809 rev64 v31.16b,v31.16b 810#endif 811 eor v8.16b,v8.16b,v31.16b 812 cmp x3,#8*(8+2) 813 blo .Lprocess_block_ce 814 ldr d31,[x1],#8 // *inp++ 815#ifdef __AARCH64EB__ 816 rev64 v31.16b,v31.16b 817#endif 818 eor v9.16b,v9.16b,v31.16b 819 beq .Lprocess_block_ce 820 ldr d31,[x1],#8 // *inp++ 821#ifdef __AARCH64EB__ 822 rev64 v31.16b,v31.16b 823#endif 824 eor v10.16b,v10.16b,v31.16b 825 cmp x3,#8*(10+2) 826 blo .Lprocess_block_ce 827 ldr d31,[x1],#8 // *inp++ 828#ifdef __AARCH64EB__ 829 rev64 v31.16b,v31.16b 830#endif 831 eor v11.16b,v11.16b,v31.16b 832 beq .Lprocess_block_ce 833 ldr d31,[x1],#8 // *inp++ 834#ifdef __AARCH64EB__ 835 rev64 v31.16b,v31.16b 836#endif 837 eor v12.16b,v12.16b,v31.16b 838 cmp x3,#8*(12+2) 839 blo .Lprocess_block_ce 840 ldr d31,[x1],#8 // *inp++ 841#ifdef __AARCH64EB__ 842 rev64 v31.16b,v31.16b 843#endif 844 eor v13.16b,v13.16b,v31.16b 845 beq .Lprocess_block_ce 846 ldr d31,[x1],#8 // *inp++ 847#ifdef __AARCH64EB__ 848 rev64 v31.16b,v31.16b 849#endif 850 eor v14.16b,v14.16b,v31.16b 851 cmp x3,#8*(14+2) 852 blo .Lprocess_block_ce 853 ldr d31,[x1],#8 // *inp++ 854#ifdef __AARCH64EB__ 855 rev64 v31.16b,v31.16b 856#endif 857 eor v15.16b,v15.16b,v31.16b 858 beq .Lprocess_block_ce 859 ldr d31,[x1],#8 // *inp++ 860#ifdef __AARCH64EB__ 861 rev64 v31.16b,v31.16b 862#endif 863 eor v16.16b,v16.16b,v31.16b 864 cmp x3,#8*(16+2) 865 blo .Lprocess_block_ce 866 ldr d31,[x1],#8 // *inp++ 867#ifdef __AARCH64EB__ 868 rev64 v31.16b,v31.16b 869#endif 870 eor v17.16b,v17.16b,v31.16b 871 beq .Lprocess_block_ce 872 ldr d31,[x1],#8 // *inp++ 873#ifdef __AARCH64EB__ 874 rev64 v31.16b,v31.16b 875#endif 876 eor v18.16b,v18.16b,v31.16b 877 cmp x3,#8*(18+2) 878 blo .Lprocess_block_ce 879 ldr d31,[x1],#8 // *inp++ 880#ifdef __AARCH64EB__ 881 rev64 v31.16b,v31.16b 882#endif 883 eor v19.16b,v19.16b,v31.16b 884 beq .Lprocess_block_ce 885 ldr d31,[x1],#8 // *inp++ 886#ifdef __AARCH64EB__ 887 rev64 v31.16b,v31.16b 888#endif 889 eor v20.16b,v20.16b,v31.16b 890 cmp x3,#8*(20+2) 891 blo .Lprocess_block_ce 892 ldr d31,[x1],#8 // *inp++ 893#ifdef __AARCH64EB__ 894 rev64 v31.16b,v31.16b 895#endif 896 eor v21.16b,v21.16b,v31.16b 897 beq .Lprocess_block_ce 898 ldr d31,[x1],#8 // *inp++ 899#ifdef __AARCH64EB__ 900 rev64 v31.16b,v31.16b 901#endif 902 eor v22.16b,v22.16b,v31.16b 903 cmp x3,#8*(22+2) 904 blo .Lprocess_block_ce 905 ldr d31,[x1],#8 // *inp++ 906#ifdef __AARCH64EB__ 907 rev64 v31.16b,v31.16b 908#endif 909 eor v23.16b,v23.16b,v31.16b 910 beq .Lprocess_block_ce 911 ldr d31,[x1],#8 // *inp++ 912#ifdef __AARCH64EB__ 913 rev64 v31.16b,v31.16b 914#endif 915 eor v24.16b,v24.16b,v31.16b 916 917.Lprocess_block_ce: 918 919 bl KeccakF1600_ce 920 921 b .Loop_absorb_ce 922 923.align 4 924.Labsorbed_ce: 925 stp d0,d1,[x0,#8*0] 926 stp d2,d3,[x0,#8*2] 927 stp d4,d5,[x0,#8*4] 928 stp d6,d7,[x0,#8*6] 929 stp d8,d9,[x0,#8*8] 930 stp d10,d11,[x0,#8*10] 931 stp d12,d13,[x0,#8*12] 932 stp d14,d15,[x0,#8*14] 933 stp d16,d17,[x0,#8*16] 934 stp d18,d19,[x0,#8*18] 935 stp d20,d21,[x0,#8*20] 936 stp d22,d23,[x0,#8*22] 937 str d24,[x0,#8*24] 938 add x0,x2,x3 // return value 939 940 ldp d8,d9,[sp,#16] 941 ldp d10,d11,[sp,#32] 942 ldp d12,d13,[sp,#48] 943 ldp d14,d15,[sp,#64] 944 ldp x29,x30,[sp],#80 945 AARCH64_VALIDATE_LINK_REGISTER 946 ret 947.size SHA3_absorb_cext,.-SHA3_absorb_cext 948.globl SHA3_squeeze_cext 949.type SHA3_squeeze_cext,%function 950.align 5 951SHA3_squeeze_cext: 952 AARCH64_SIGN_LINK_REGISTER 953 stp x29,x30,[sp,#-16]! 954 add x29,sp,#0 955 mov x9,x0 956 mov x10,x3 957 958.Loop_squeeze_ce: 959 ldr x4,[x9],#8 960 cmp x2,#8 961 blo .Lsqueeze_tail_ce 962#ifdef __AARCH64EB__ 963 rev x4,x4 964#endif 965 str x4,[x1],#8 966 beq .Lsqueeze_done_ce 967 968 sub x2,x2,#8 969 subs x10,x10,#8 970 bhi .Loop_squeeze_ce 971 972 bl KeccakF1600_cext 973 ldr x30,[sp,#8] 974 mov x9,x0 975 mov x10,x3 976 b .Loop_squeeze_ce 977 978.align 4 979.Lsqueeze_tail_ce: 980 strb w4,[x1],#1 981 lsr x4,x4,#8 982 subs x2,x2,#1 983 beq .Lsqueeze_done_ce 984 strb w4,[x1],#1 985 lsr x4,x4,#8 986 subs x2,x2,#1 987 beq .Lsqueeze_done_ce 988 strb w4,[x1],#1 989 lsr x4,x4,#8 990 subs x2,x2,#1 991 beq .Lsqueeze_done_ce 992 strb w4,[x1],#1 993 lsr x4,x4,#8 994 subs x2,x2,#1 995 beq .Lsqueeze_done_ce 996 strb w4,[x1],#1 997 lsr x4,x4,#8 998 subs x2,x2,#1 999 beq .Lsqueeze_done_ce 1000 strb w4,[x1],#1 1001 lsr x4,x4,#8 1002 subs x2,x2,#1 1003 beq .Lsqueeze_done_ce 1004 strb w4,[x1],#1 1005 1006.Lsqueeze_done_ce: 1007 ldr x29,[sp],#16 1008 AARCH64_VALIDATE_LINK_REGISTER 1009 ret 1010.size SHA3_squeeze_cext,.-SHA3_squeeze_cext 1011.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1012.align 2 1013