1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from keccak1600-armv8.pl. */ 3.text 4 5.align 8 // strategic alignment and padding that allows to use 6 // address value as loop termination condition... 7.quad 0,0,0,0,0,0,0,0 8.type iotas,%object 9iotas: 10.quad 0x0000000000000001 11.quad 0x0000000000008082 12.quad 0x800000000000808a 13.quad 0x8000000080008000 14.quad 0x000000000000808b 15.quad 0x0000000080000001 16.quad 0x8000000080008081 17.quad 0x8000000000008009 18.quad 0x000000000000008a 19.quad 0x0000000000000088 20.quad 0x0000000080008009 21.quad 0x000000008000000a 22.quad 0x000000008000808b 23.quad 0x800000000000008b 24.quad 0x8000000000008089 25.quad 0x8000000000008003 26.quad 0x8000000000008002 27.quad 0x8000000000000080 28.quad 0x000000000000800a 29.quad 0x800000008000000a 30.quad 0x8000000080008081 31.quad 0x8000000000008080 32.quad 0x0000000080000001 33.quad 0x8000000080008008 34.size iotas,.-iotas 35.type KeccakF1600_int,%function 36.align 5 37KeccakF1600_int: 38 adr x28,iotas 39.inst 0xd503233f // paciasp 40 stp x28,x30,[sp,#16] // 32 bytes on top are mine 41 b .Loop 42.align 4 43.Loop: 44 ////////////////////////////////////////// Theta 45 eor x26,x0,x5 46 stp x4,x9,[sp,#0] // offload pair... 47 eor x27,x1,x6 48 eor x28,x2,x7 49 eor x30,x3,x8 50 eor x4,x4,x9 51 eor x26,x26,x10 52 eor x27,x27,x11 53 eor x28,x28,x12 54 eor x30,x30,x13 55 eor x4,x4,x14 56 eor x26,x26,x15 57 eor x27,x27,x16 58 eor x28,x28,x17 59 eor x30,x30,x25 60 eor x4,x4,x19 61 eor x26,x26,x20 62 eor x28,x28,x22 63 eor x27,x27,x21 64 eor x30,x30,x23 65 eor x4,x4,x24 66 67 eor x9,x26,x28,ror#63 68 69 eor x1,x1,x9 70 eor x6,x6,x9 71 eor x11,x11,x9 72 eor x16,x16,x9 73 eor x21,x21,x9 74 75 eor x9,x27,x30,ror#63 76 eor x28,x28,x4,ror#63 77 eor x30,x30,x26,ror#63 78 eor x4,x4,x27,ror#63 79 80 eor x27, x2,x9 // mov x27,x2 81 eor x7,x7,x9 82 eor x12,x12,x9 83 eor x17,x17,x9 84 eor x22,x22,x9 85 86 eor x0,x0,x4 87 eor x5,x5,x4 88 eor x10,x10,x4 89 eor x15,x15,x4 90 eor x20,x20,x4 91 ldp x4,x9,[sp,#0] // re-load offloaded data 92 eor x26, x3,x28 // mov x26,x3 93 eor x8,x8,x28 94 eor x13,x13,x28 95 eor x25,x25,x28 96 eor x23,x23,x28 97 98 eor x28, x4,x30 // mov x28,x4 99 eor x9,x9,x30 100 eor x14,x14,x30 101 eor x19,x19,x30 102 eor x24,x24,x30 103 104 ////////////////////////////////////////// Rho+Pi 105 mov x30,x1 106 ror x1,x6,#64-44 107 //mov x27,x2 108 ror x2,x12,#64-43 109 //mov x26,x3 110 ror x3,x25,#64-21 111 //mov x28,x4 112 ror x4,x24,#64-14 113 114 ror x6,x9,#64-20 115 ror x12,x13,#64-25 116 ror x25,x17,#64-15 117 ror x24,x21,#64-2 118 119 ror x9,x22,#64-61 120 ror x13,x19,#64-8 121 ror x17,x11,#64-10 122 ror x21,x8,#64-55 123 124 ror x22,x14,#64-39 125 ror x19,x23,#64-56 126 ror x11,x7,#64-6 127 ror x8,x16,#64-45 128 129 ror x14,x20,#64-18 130 ror x23,x15,#64-41 131 ror x7,x10,#64-3 132 ror x16,x5,#64-36 133 134 ror x5,x26,#64-28 135 ror x10,x30,#64-1 136 ror x15,x28,#64-27 137 ror x20,x27,#64-62 138 139 ////////////////////////////////////////// Chi+Iota 140 bic x26,x2,x1 141 bic x27,x3,x2 142 bic x28,x0,x4 143 bic x30,x1,x0 144 eor x0,x0,x26 145 bic x26,x4,x3 146 eor x1,x1,x27 147 ldr x27,[sp,#16] 148 eor x3,x3,x28 149 eor x4,x4,x30 150 eor x2,x2,x26 151 ldr x30,[x27],#8 // Iota[i++] 152 153 bic x26,x7,x6 154 tst x27,#255 // are we done? 155 str x27,[sp,#16] 156 bic x27,x8,x7 157 bic x28,x5,x9 158 eor x0,x0,x30 // A[0][0] ^= Iota 159 bic x30,x6,x5 160 eor x5,x5,x26 161 bic x26,x9,x8 162 eor x6,x6,x27 163 eor x8,x8,x28 164 eor x9,x9,x30 165 eor x7,x7,x26 166 167 bic x26,x12,x11 168 bic x27,x13,x12 169 bic x28,x10,x14 170 bic x30,x11,x10 171 eor x10,x10,x26 172 bic x26,x14,x13 173 eor x11,x11,x27 174 eor x13,x13,x28 175 eor x14,x14,x30 176 eor x12,x12,x26 177 178 bic x26,x17,x16 179 bic x27,x25,x17 180 bic x28,x15,x19 181 bic x30,x16,x15 182 eor x15,x15,x26 183 bic x26,x19,x25 184 eor x16,x16,x27 185 eor x25,x25,x28 186 eor x19,x19,x30 187 eor x17,x17,x26 188 189 bic x26,x22,x21 190 bic x27,x23,x22 191 bic x28,x20,x24 192 bic x30,x21,x20 193 eor x20,x20,x26 194 bic x26,x24,x23 195 eor x21,x21,x27 196 eor x23,x23,x28 197 eor x24,x24,x30 198 eor x22,x22,x26 199 200 bne .Loop 201 202 ldr x30,[sp,#24] 203.inst 0xd50323bf // autiasp 204 ret 205.size KeccakF1600_int,.-KeccakF1600_int 206 207.type KeccakF1600,%function 208.align 5 209KeccakF1600: 210.inst 0xd503233f // paciasp 211 stp x29,x30,[sp,#-128]! 212 add x29,sp,#0 213 stp x19,x20,[sp,#16] 214 stp x21,x22,[sp,#32] 215 stp x23,x24,[sp,#48] 216 stp x25,x26,[sp,#64] 217 stp x27,x28,[sp,#80] 218 sub sp,sp,#48 219 220 str x0,[sp,#32] // offload argument 221 mov x26,x0 222 ldp x0,x1,[x0,#16*0] 223 ldp x2,x3,[x26,#16*1] 224 ldp x4,x5,[x26,#16*2] 225 ldp x6,x7,[x26,#16*3] 226 ldp x8,x9,[x26,#16*4] 227 ldp x10,x11,[x26,#16*5] 228 ldp x12,x13,[x26,#16*6] 229 ldp x14,x15,[x26,#16*7] 230 ldp x16,x17,[x26,#16*8] 231 ldp x25,x19,[x26,#16*9] 232 ldp x20,x21,[x26,#16*10] 233 ldp x22,x23,[x26,#16*11] 234 ldr x24,[x26,#16*12] 235 236 bl KeccakF1600_int 237 238 ldr x26,[sp,#32] 239 stp x0,x1,[x26,#16*0] 240 stp x2,x3,[x26,#16*1] 241 stp x4,x5,[x26,#16*2] 242 stp x6,x7,[x26,#16*3] 243 stp x8,x9,[x26,#16*4] 244 stp x10,x11,[x26,#16*5] 245 stp x12,x13,[x26,#16*6] 246 stp x14,x15,[x26,#16*7] 247 stp x16,x17,[x26,#16*8] 248 stp x25,x19,[x26,#16*9] 249 stp x20,x21,[x26,#16*10] 250 stp x22,x23,[x26,#16*11] 251 str x24,[x26,#16*12] 252 253 ldp x19,x20,[x29,#16] 254 add sp,sp,#48 255 ldp x21,x22,[x29,#32] 256 ldp x23,x24,[x29,#48] 257 ldp x25,x26,[x29,#64] 258 ldp x27,x28,[x29,#80] 259 ldp x29,x30,[sp],#128 260.inst 0xd50323bf // autiasp 261 ret 262.size KeccakF1600,.-KeccakF1600 263 264.globl SHA3_absorb 265.type SHA3_absorb,%function 266.align 5 267SHA3_absorb: 268.inst 0xd503233f // paciasp 269 stp x29,x30,[sp,#-128]! 270 add x29,sp,#0 271 stp x19,x20,[sp,#16] 272 stp x21,x22,[sp,#32] 273 stp x23,x24,[sp,#48] 274 stp x25,x26,[sp,#64] 275 stp x27,x28,[sp,#80] 276 sub sp,sp,#64 277 278 stp x0,x1,[sp,#32] // offload arguments 279 stp x2,x3,[sp,#48] 280 281 mov x26,x0 // uint64_t A[5][5] 282 mov x27,x1 // const void *inp 283 mov x28,x2 // size_t len 284 mov x30,x3 // size_t bsz 285 ldp x0,x1,[x26,#16*0] 286 ldp x2,x3,[x26,#16*1] 287 ldp x4,x5,[x26,#16*2] 288 ldp x6,x7,[x26,#16*3] 289 ldp x8,x9,[x26,#16*4] 290 ldp x10,x11,[x26,#16*5] 291 ldp x12,x13,[x26,#16*6] 292 ldp x14,x15,[x26,#16*7] 293 ldp x16,x17,[x26,#16*8] 294 ldp x25,x19,[x26,#16*9] 295 ldp x20,x21,[x26,#16*10] 296 ldp x22,x23,[x26,#16*11] 297 ldr x24,[x26,#16*12] 298 b .Loop_absorb 299 300.align 4 301.Loop_absorb: 302 subs x26,x28,x30 // len - bsz 303 blo .Labsorbed 304 305 str x26,[sp,#48] // save len - bsz 306 ldr x26,[x27],#8 // *inp++ 307#ifdef __AARCH64EB__ 308 rev x26,x26 309#endif 310 eor x0,x0,x26 311 cmp x30,#8*(0+2) 312 blo .Lprocess_block 313 ldr x26,[x27],#8 // *inp++ 314#ifdef __AARCH64EB__ 315 rev x26,x26 316#endif 317 eor x1,x1,x26 318 beq .Lprocess_block 319 ldr x26,[x27],#8 // *inp++ 320#ifdef __AARCH64EB__ 321 rev x26,x26 322#endif 323 eor x2,x2,x26 324 cmp x30,#8*(2+2) 325 blo .Lprocess_block 326 ldr x26,[x27],#8 // *inp++ 327#ifdef __AARCH64EB__ 328 rev x26,x26 329#endif 330 eor x3,x3,x26 331 beq .Lprocess_block 332 ldr x26,[x27],#8 // *inp++ 333#ifdef __AARCH64EB__ 334 rev x26,x26 335#endif 336 eor x4,x4,x26 337 cmp x30,#8*(4+2) 338 blo .Lprocess_block 339 ldr x26,[x27],#8 // *inp++ 340#ifdef __AARCH64EB__ 341 rev x26,x26 342#endif 343 eor x5,x5,x26 344 beq .Lprocess_block 345 ldr x26,[x27],#8 // *inp++ 346#ifdef __AARCH64EB__ 347 rev x26,x26 348#endif 349 eor x6,x6,x26 350 cmp x30,#8*(6+2) 351 blo .Lprocess_block 352 ldr x26,[x27],#8 // *inp++ 353#ifdef __AARCH64EB__ 354 rev x26,x26 355#endif 356 eor x7,x7,x26 357 beq .Lprocess_block 358 ldr x26,[x27],#8 // *inp++ 359#ifdef __AARCH64EB__ 360 rev x26,x26 361#endif 362 eor x8,x8,x26 363 cmp x30,#8*(8+2) 364 blo .Lprocess_block 365 ldr x26,[x27],#8 // *inp++ 366#ifdef __AARCH64EB__ 367 rev x26,x26 368#endif 369 eor x9,x9,x26 370 beq .Lprocess_block 371 ldr x26,[x27],#8 // *inp++ 372#ifdef __AARCH64EB__ 373 rev x26,x26 374#endif 375 eor x10,x10,x26 376 cmp x30,#8*(10+2) 377 blo .Lprocess_block 378 ldr x26,[x27],#8 // *inp++ 379#ifdef __AARCH64EB__ 380 rev x26,x26 381#endif 382 eor x11,x11,x26 383 beq .Lprocess_block 384 ldr x26,[x27],#8 // *inp++ 385#ifdef __AARCH64EB__ 386 rev x26,x26 387#endif 388 eor x12,x12,x26 389 cmp x30,#8*(12+2) 390 blo .Lprocess_block 391 ldr x26,[x27],#8 // *inp++ 392#ifdef __AARCH64EB__ 393 rev x26,x26 394#endif 395 eor x13,x13,x26 396 beq .Lprocess_block 397 ldr x26,[x27],#8 // *inp++ 398#ifdef __AARCH64EB__ 399 rev x26,x26 400#endif 401 eor x14,x14,x26 402 cmp x30,#8*(14+2) 403 blo .Lprocess_block 404 ldr x26,[x27],#8 // *inp++ 405#ifdef __AARCH64EB__ 406 rev x26,x26 407#endif 408 eor x15,x15,x26 409 beq .Lprocess_block 410 ldr x26,[x27],#8 // *inp++ 411#ifdef __AARCH64EB__ 412 rev x26,x26 413#endif 414 eor x16,x16,x26 415 cmp x30,#8*(16+2) 416 blo .Lprocess_block 417 ldr x26,[x27],#8 // *inp++ 418#ifdef __AARCH64EB__ 419 rev x26,x26 420#endif 421 eor x17,x17,x26 422 beq .Lprocess_block 423 ldr x26,[x27],#8 // *inp++ 424#ifdef __AARCH64EB__ 425 rev x26,x26 426#endif 427 eor x25,x25,x26 428 cmp x30,#8*(18+2) 429 blo .Lprocess_block 430 ldr x26,[x27],#8 // *inp++ 431#ifdef __AARCH64EB__ 432 rev x26,x26 433#endif 434 eor x19,x19,x26 435 beq .Lprocess_block 436 ldr x26,[x27],#8 // *inp++ 437#ifdef __AARCH64EB__ 438 rev x26,x26 439#endif 440 eor x20,x20,x26 441 cmp x30,#8*(20+2) 442 blo .Lprocess_block 443 ldr x26,[x27],#8 // *inp++ 444#ifdef __AARCH64EB__ 445 rev x26,x26 446#endif 447 eor x21,x21,x26 448 beq .Lprocess_block 449 ldr x26,[x27],#8 // *inp++ 450#ifdef __AARCH64EB__ 451 rev x26,x26 452#endif 453 eor x22,x22,x26 454 cmp x30,#8*(22+2) 455 blo .Lprocess_block 456 ldr x26,[x27],#8 // *inp++ 457#ifdef __AARCH64EB__ 458 rev x26,x26 459#endif 460 eor x23,x23,x26 461 beq .Lprocess_block 462 ldr x26,[x27],#8 // *inp++ 463#ifdef __AARCH64EB__ 464 rev x26,x26 465#endif 466 eor x24,x24,x26 467 468.Lprocess_block: 469 str x27,[sp,#40] // save inp 470 471 bl KeccakF1600_int 472 473 ldr x27,[sp,#40] // restore arguments 474 ldp x28,x30,[sp,#48] 475 b .Loop_absorb 476 477.align 4 478.Labsorbed: 479 ldr x27,[sp,#32] 480 stp x0,x1,[x27,#16*0] 481 stp x2,x3,[x27,#16*1] 482 stp x4,x5,[x27,#16*2] 483 stp x6,x7,[x27,#16*3] 484 stp x8,x9,[x27,#16*4] 485 stp x10,x11,[x27,#16*5] 486 stp x12,x13,[x27,#16*6] 487 stp x14,x15,[x27,#16*7] 488 stp x16,x17,[x27,#16*8] 489 stp x25,x19,[x27,#16*9] 490 stp x20,x21,[x27,#16*10] 491 stp x22,x23,[x27,#16*11] 492 str x24,[x27,#16*12] 493 494 mov x0,x28 // return value 495 ldp x19,x20,[x29,#16] 496 add sp,sp,#64 497 ldp x21,x22,[x29,#32] 498 ldp x23,x24,[x29,#48] 499 ldp x25,x26,[x29,#64] 500 ldp x27,x28,[x29,#80] 501 ldp x29,x30,[sp],#128 502.inst 0xd50323bf // autiasp 503 ret 504.size SHA3_absorb,.-SHA3_absorb 505.globl SHA3_squeeze 506.type SHA3_squeeze,%function 507.align 5 508SHA3_squeeze: 509.inst 0xd503233f // paciasp 510 stp x29,x30,[sp,#-48]! 511 add x29,sp,#0 512 stp x19,x20,[sp,#16] 513 stp x21,x22,[sp,#32] 514 515 mov x19,x0 // put aside arguments 516 mov x20,x1 517 mov x21,x2 518 mov x22,x3 519 520.Loop_squeeze: 521 ldr x4,[x0],#8 522 cmp x21,#8 523 blo .Lsqueeze_tail 524#ifdef __AARCH64EB__ 525 rev x4,x4 526#endif 527 str x4,[x20],#8 528 subs x21,x21,#8 529 beq .Lsqueeze_done 530 531 subs x3,x3,#8 532 bhi .Loop_squeeze 533 534 mov x0,x19 535 bl KeccakF1600 536 mov x0,x19 537 mov x3,x22 538 b .Loop_squeeze 539 540.align 4 541.Lsqueeze_tail: 542 strb w4,[x20],#1 543 lsr x4,x4,#8 544 subs x21,x21,#1 545 beq .Lsqueeze_done 546 strb w4,[x20],#1 547 lsr x4,x4,#8 548 subs x21,x21,#1 549 beq .Lsqueeze_done 550 strb w4,[x20],#1 551 lsr x4,x4,#8 552 subs x21,x21,#1 553 beq .Lsqueeze_done 554 strb w4,[x20],#1 555 lsr x4,x4,#8 556 subs x21,x21,#1 557 beq .Lsqueeze_done 558 strb w4,[x20],#1 559 lsr x4,x4,#8 560 subs x21,x21,#1 561 beq .Lsqueeze_done 562 strb w4,[x20],#1 563 lsr x4,x4,#8 564 subs x21,x21,#1 565 beq .Lsqueeze_done 566 strb w4,[x20],#1 567 568.Lsqueeze_done: 569 ldp x19,x20,[sp,#16] 570 ldp x21,x22,[sp,#32] 571 ldp x29,x30,[sp],#48 572.inst 0xd50323bf // autiasp 573 ret 574.size SHA3_squeeze,.-SHA3_squeeze 575.type KeccakF1600_ce,%function 576.align 5 577KeccakF1600_ce: 578 mov x9,#12 579 adr x10,iotas 580 b .Loop_ce 581.align 4 582.Loop_ce: 583 ////////////////////////////////////////////////// Theta 584.inst 0xce052819 //eor3 v25.16b,v0.16b,v5.16b,v10.16b 585.inst 0xce062c3a //eor3 v26.16b,v1.16b,v6.16b,v11.16b 586.inst 0xce07305b //eor3 v27.16b,v2.16b,v7.16b,v12.16b 587.inst 0xce08347c //eor3 v28.16b,v3.16b,v8.16b,v13.16b 588.inst 0xce09389d //eor3 v29.16b,v4.16b,v9.16b,v14.16b 589.inst 0xce0f5339 //eor3 v25.16b,v25.16b, v15.16b,v20.16b 590.inst 0xce10575a //eor3 v26.16b,v26.16b, v16.16b,v21.16b 591.inst 0xce115b7b //eor3 v27.16b,v27.16b, v17.16b,v22.16b 592.inst 0xce125f9c //eor3 v28.16b,v28.16b, v18.16b,v23.16b 593.inst 0xce1363bd //eor3 v29.16b,v29.16b, v19.16b,v24.16b 594 595.inst 0xce7b8f3e //rax1 v30.16b,v25.16b,v27.16b // D[1] 596.inst 0xce7c8f5f //rax1 v31.16b,v26.16b,v28.16b // D[2] 597.inst 0xce7d8f7b //rax1 v27.16b,v27.16b,v29.16b // D[3] 598.inst 0xce798f9c //rax1 v28.16b,v28.16b,v25.16b // D[4] 599.inst 0xce7a8fbd //rax1 v29.16b,v29.16b,v26.16b // D[0] 600 601 ////////////////////////////////////////////////// Theta+Rho+Pi 602.inst 0xce9e50d9 //xar v25.16b, v6.16b,v30.16b,#64-44 // C[0]=A[0][1] 603.inst 0xce9cb126 //xar v6.16b,v9.16b,v28.16b,#64-20 604.inst 0xce9f0ec9 //xar v9.16b,v22.16b,v31.16b,#64-61 605.inst 0xce9c65d6 //xar v22.16b,v14.16b,v28.16b,#64-39 606.inst 0xce9dba8e //xar v14.16b,v20.16b,v29.16b,#64-18 607 608.inst 0xce9f0854 //xar v20.16b,v2.16b,v31.16b,#64-62 609 610.inst 0xce9f5582 //xar v2.16b,v12.16b,v31.16b,#64-43 611.inst 0xce9b9dac //xar v12.16b,v13.16b,v27.16b,#64-25 612.inst 0xce9ce26d //xar v13.16b,v19.16b,v28.16b,#64-8 613.inst 0xce9b22f3 //xar v19.16b,v23.16b,v27.16b,#64-56 614.inst 0xce9d5df7 //xar v23.16b,v15.16b,v29.16b,#64-41 615 616.inst 0xce9c948f //xar v15.16b,v4.16b,v28.16b,#64-27 617 618 eor v0.16b,v0.16b,v29.16b 619 ldr x11,[x10],#8 620 621.inst 0xce9bae5a //xar v26.16b, v18.16b,v27.16b,#64-21 // C[1]=A[0][3] 622.inst 0xce9fc632 //xar v18.16b,v17.16b,v31.16b,#64-15 623.inst 0xce9ed971 //xar v17.16b,v11.16b,v30.16b,#64-10 624.inst 0xce9fe8eb //xar v11.16b,v7.16b,v31.16b,#64-6 625.inst 0xce9df547 //xar v7.16b,v10.16b,v29.16b,#64-3 626 627.inst 0xce9efc2a //xar v10.16b,v1.16b,v30.16b,#64-1 // * 628 629.inst 0xce9ccb04 //xar v4.16b,v24.16b,v28.16b,#64-14 630.inst 0xce9efab8 //xar v24.16b,v21.16b,v30.16b,#64-2 631.inst 0xce9b2515 //xar v21.16b,v8.16b,v27.16b,#64-55 632.inst 0xce9e4e08 //xar v8.16b,v16.16b,v30.16b,#64-45 633.inst 0xce9d70b0 //xar v16.16b,v5.16b,v29.16b,#64-36 634 635.inst 0xce9b907b //xar v27.16b, v3.16b,v27.16b,#64-28 // C[2]=A[1][0] 636 637 ////////////////////////////////////////////////// Chi+Iota 638 dup v31.2d,x11 // borrow C[6] 639.inst 0xce22641c //bcax v28.16b, v0.16b,v2.16b,v25.16b // * 640.inst 0xce3a0b21 //bcax v1.16b,v25.16b, v26.16b, v2.16b // * 641.inst 0xce246842 //bcax v2.16b,v2.16b,v4.16b,v26.16b 642.inst 0xce201343 //bcax v3.16b,v26.16b, v0.16b,v4.16b 643.inst 0xce390084 //bcax v4.16b,v4.16b,v25.16b, v0.16b 644 645.inst 0xce271b65 //bcax v5.16b,v27.16b, v7.16b,v6.16b // * 646.inst 0xce281cd9 //bcax v25.16b, v6.16b,v8.16b,v7.16b // * 647.inst 0xce2920e7 //bcax v7.16b,v7.16b,v9.16b,v8.16b 648.inst 0xce3b2508 //bcax v8.16b,v8.16b,v27.16b, v9.16b 649.inst 0xce266d29 //bcax v9.16b,v9.16b,v6.16b,v27.16b 650 651 eor v0.16b,v28.16b,v31.16b // Iota 652 653.inst 0xce2c2d5a //bcax v26.16b, v10.16b,v12.16b,v11.16b // * 654.inst 0xce2d317b //bcax v27.16b, v11.16b,v13.16b,v12.16b // * 655.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b 656.inst 0xce2a39ad //bcax v13.16b,v13.16b,v10.16b,v14.16b 657.inst 0xce2b29ce //bcax v14.16b,v14.16b,v11.16b,v10.16b 658 659.inst 0xce3141fc //bcax v28.16b, v15.16b,v17.16b,v16.16b // * 660.inst 0xce32461d //bcax v29.16b, v16.16b,v18.16b,v17.16b // * 661.inst 0xce334a31 //bcax v17.16b,v17.16b,v19.16b,v18.16b 662.inst 0xce2f4e52 //bcax v18.16b,v18.16b,v15.16b,v19.16b 663.inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b 664 665.inst 0xce36569e //bcax v30.16b, v20.16b,v22.16b,v21.16b // * 666.inst 0xce375abf //bcax v31.16b, v21.16b,v23.16b,v22.16b // * 667.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b 668.inst 0xce3462f7 //bcax v23.16b,v23.16b,v20.16b,v24.16b 669.inst 0xce355318 //bcax v24.16b,v24.16b,v21.16b,v20.16b 670 ////////////////////////////////////////////////// Theta 671.inst 0xce056806 //eor3 v6.16b,v0.16b,v5.16b,v26.16b 672.inst 0xce196c2a //eor3 v10.16b,v1.16b,v25.16b,v27.16b 673.inst 0xce07304b //eor3 v11.16b,v2.16b,v7.16b,v12.16b 674.inst 0xce08346f //eor3 v15.16b,v3.16b,v8.16b,v13.16b 675.inst 0xce093890 //eor3 v16.16b,v4.16b,v9.16b,v14.16b 676.inst 0xce1c78c6 //eor3 v6.16b,v6.16b, v28.16b,v30.16b 677.inst 0xce1d7d4a //eor3 v10.16b,v10.16b, v29.16b,v31.16b 678.inst 0xce11596b //eor3 v11.16b,v11.16b, v17.16b,v22.16b 679.inst 0xce125def //eor3 v15.16b,v15.16b, v18.16b,v23.16b 680.inst 0xce136210 //eor3 v16.16b,v16.16b, v19.16b,v24.16b 681 682.inst 0xce6b8cd4 //rax1 v20.16b,v6.16b,v11.16b // D[1] 683.inst 0xce6f8d55 //rax1 v21.16b,v10.16b,v15.16b // D[2] 684.inst 0xce708d6b //rax1 v11.16b,v11.16b,v16.16b // D[3] 685.inst 0xce668def //rax1 v15.16b,v15.16b,v6.16b // D[4] 686.inst 0xce6a8e10 //rax1 v16.16b,v16.16b,v10.16b // D[0] 687 688 ////////////////////////////////////////////////// Theta+Rho+Pi 689.inst 0xce945326 //xar v6.16b, v25.16b,v20.16b,#64-44 // C[0]=A[0][1] 690.inst 0xce8fb139 //xar v25.16b,v9.16b,v15.16b,#64-20 691.inst 0xce950ec9 //xar v9.16b,v22.16b,v21.16b,#64-61 692.inst 0xce8f65d6 //xar v22.16b,v14.16b,v15.16b,#64-39 693.inst 0xce90bbce //xar v14.16b,v30.16b,v16.16b,#64-18 694 695.inst 0xce95085e //xar v30.16b,v2.16b,v21.16b,#64-62 696 697.inst 0xce955582 //xar v2.16b,v12.16b,v21.16b,#64-43 698.inst 0xce8b9dac //xar v12.16b,v13.16b,v11.16b,#64-25 699.inst 0xce8fe26d //xar v13.16b,v19.16b,v15.16b,#64-8 700.inst 0xce8b22f3 //xar v19.16b,v23.16b,v11.16b,#64-56 701.inst 0xce905f97 //xar v23.16b,v28.16b,v16.16b,#64-41 702 703.inst 0xce8f949c //xar v28.16b,v4.16b,v15.16b,#64-27 704 705 eor v0.16b,v0.16b,v16.16b 706 ldr x11,[x10],#8 707 708.inst 0xce8bae4a //xar v10.16b, v18.16b,v11.16b,#64-21 // C[1]=A[0][3] 709.inst 0xce95c632 //xar v18.16b,v17.16b,v21.16b,#64-15 710.inst 0xce94db71 //xar v17.16b,v27.16b,v20.16b,#64-10 711.inst 0xce95e8fb //xar v27.16b,v7.16b,v21.16b,#64-6 712.inst 0xce90f747 //xar v7.16b,v26.16b,v16.16b,#64-3 713 714.inst 0xce94fc3a //xar v26.16b,v1.16b,v20.16b,#64-1 // * 715 716.inst 0xce8fcb04 //xar v4.16b,v24.16b,v15.16b,#64-14 717.inst 0xce94fbf8 //xar v24.16b,v31.16b,v20.16b,#64-2 718.inst 0xce8b251f //xar v31.16b,v8.16b,v11.16b,#64-55 719.inst 0xce944fa8 //xar v8.16b,v29.16b,v20.16b,#64-45 720.inst 0xce9070bd //xar v29.16b,v5.16b,v16.16b,#64-36 721 722.inst 0xce8b906b //xar v11.16b, v3.16b,v11.16b,#64-28 // C[2]=A[1][0] 723 724 ////////////////////////////////////////////////// Chi+Iota 725 dup v21.2d,x11 // borrow C[6] 726.inst 0xce22180f //bcax v15.16b, v0.16b,v2.16b,v6.16b // * 727.inst 0xce2a08c1 //bcax v1.16b,v6.16b, v10.16b, v2.16b // * 728.inst 0xce242842 //bcax v2.16b,v2.16b,v4.16b,v10.16b 729.inst 0xce201143 //bcax v3.16b,v10.16b, v0.16b,v4.16b 730.inst 0xce260084 //bcax v4.16b,v4.16b,v6.16b, v0.16b 731 732.inst 0xce276565 //bcax v5.16b,v11.16b, v7.16b,v25.16b // * 733.inst 0xce281f26 //bcax v6.16b, v25.16b,v8.16b,v7.16b // * 734.inst 0xce2920e7 //bcax v7.16b,v7.16b,v9.16b,v8.16b 735.inst 0xce2b2508 //bcax v8.16b,v8.16b,v11.16b, v9.16b 736.inst 0xce392d29 //bcax v9.16b,v9.16b,v25.16b,v11.16b 737 738 eor v0.16b,v15.16b,v21.16b // Iota 739 740.inst 0xce2c6f4a //bcax v10.16b, v26.16b,v12.16b,v27.16b // * 741.inst 0xce2d336b //bcax v11.16b, v27.16b,v13.16b,v12.16b // * 742.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b 743.inst 0xce3a39ad //bcax v13.16b,v13.16b,v26.16b,v14.16b 744.inst 0xce3b69ce //bcax v14.16b,v14.16b,v27.16b,v26.16b 745 746.inst 0xce31778f //bcax v15.16b, v28.16b,v17.16b,v29.16b // * 747.inst 0xce3247b0 //bcax v16.16b, v29.16b,v18.16b,v17.16b // * 748.inst 0xce334a31 //bcax v17.16b,v17.16b,v19.16b,v18.16b 749.inst 0xce3c4e52 //bcax v18.16b,v18.16b,v28.16b,v19.16b 750.inst 0xce3d7273 //bcax v19.16b,v19.16b,v29.16b,v28.16b 751 752.inst 0xce367fd4 //bcax v20.16b, v30.16b,v22.16b,v31.16b // * 753.inst 0xce375bf5 //bcax v21.16b, v31.16b,v23.16b,v22.16b // * 754.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b 755.inst 0xce3e62f7 //bcax v23.16b,v23.16b,v30.16b,v24.16b 756.inst 0xce3f7b18 //bcax v24.16b,v24.16b,v31.16b,v30.16b 757 subs x9,x9,#1 758 bne .Loop_ce 759 760 ret 761.size KeccakF1600_ce,.-KeccakF1600_ce 762 763.type KeccakF1600_cext,%function 764.align 5 765KeccakF1600_cext: 766.inst 0xd503233f // paciasp 767 stp x29,x30,[sp,#-80]! 768 add x29,sp,#0 769 stp d8,d9,[sp,#16] // per ABI requirement 770 stp d10,d11,[sp,#32] 771 stp d12,d13,[sp,#48] 772 stp d14,d15,[sp,#64] 773 ldp d0,d1,[x0,#8*0] 774 ldp d2,d3,[x0,#8*2] 775 ldp d4,d5,[x0,#8*4] 776 ldp d6,d7,[x0,#8*6] 777 ldp d8,d9,[x0,#8*8] 778 ldp d10,d11,[x0,#8*10] 779 ldp d12,d13,[x0,#8*12] 780 ldp d14,d15,[x0,#8*14] 781 ldp d16,d17,[x0,#8*16] 782 ldp d18,d19,[x0,#8*18] 783 ldp d20,d21,[x0,#8*20] 784 ldp d22,d23,[x0,#8*22] 785 ldr d24,[x0,#8*24] 786 bl KeccakF1600_ce 787 ldr x30,[sp,#8] 788 stp d0,d1,[x0,#8*0] 789 stp d2,d3,[x0,#8*2] 790 stp d4,d5,[x0,#8*4] 791 stp d6,d7,[x0,#8*6] 792 stp d8,d9,[x0,#8*8] 793 stp d10,d11,[x0,#8*10] 794 stp d12,d13,[x0,#8*12] 795 stp d14,d15,[x0,#8*14] 796 stp d16,d17,[x0,#8*16] 797 stp d18,d19,[x0,#8*18] 798 stp d20,d21,[x0,#8*20] 799 stp d22,d23,[x0,#8*22] 800 str d24,[x0,#8*24] 801 802 ldp d8,d9,[sp,#16] 803 ldp d10,d11,[sp,#32] 804 ldp d12,d13,[sp,#48] 805 ldp d14,d15,[sp,#64] 806 ldr x29,[sp],#80 807.inst 0xd50323bf // autiasp 808 ret 809.size KeccakF1600_cext,.-KeccakF1600_cext 810.globl SHA3_absorb_cext 811.type SHA3_absorb_cext,%function 812.align 5 813SHA3_absorb_cext: 814.inst 0xd503233f // paciasp 815 stp x29,x30,[sp,#-80]! 816 add x29,sp,#0 817 stp d8,d9,[sp,#16] // per ABI requirement 818 stp d10,d11,[sp,#32] 819 stp d12,d13,[sp,#48] 820 stp d14,d15,[sp,#64] 821 ldp d0,d1,[x0,#8*0] 822 ldp d2,d3,[x0,#8*2] 823 ldp d4,d5,[x0,#8*4] 824 ldp d6,d7,[x0,#8*6] 825 ldp d8,d9,[x0,#8*8] 826 ldp d10,d11,[x0,#8*10] 827 ldp d12,d13,[x0,#8*12] 828 ldp d14,d15,[x0,#8*14] 829 ldp d16,d17,[x0,#8*16] 830 ldp d18,d19,[x0,#8*18] 831 ldp d20,d21,[x0,#8*20] 832 ldp d22,d23,[x0,#8*22] 833 ldr d24,[x0,#8*24] 834 b .Loop_absorb_ce 835 836.align 4 837.Loop_absorb_ce: 838 subs x2,x2,x3 // len - bsz 839 blo .Labsorbed_ce 840 ldr d31,[x1],#8 // *inp++ 841#ifdef __AARCH64EB__ 842 rev64 v31.16b,v31.16b 843#endif 844 eor v0.16b,v0.16b,v31.16b 845 cmp x3,#8*(0+2) 846 blo .Lprocess_block_ce 847 ldr d31,[x1],#8 // *inp++ 848#ifdef __AARCH64EB__ 849 rev64 v31.16b,v31.16b 850#endif 851 eor v1.16b,v1.16b,v31.16b 852 beq .Lprocess_block_ce 853 ldr d31,[x1],#8 // *inp++ 854#ifdef __AARCH64EB__ 855 rev64 v31.16b,v31.16b 856#endif 857 eor v2.16b,v2.16b,v31.16b 858 cmp x3,#8*(2+2) 859 blo .Lprocess_block_ce 860 ldr d31,[x1],#8 // *inp++ 861#ifdef __AARCH64EB__ 862 rev64 v31.16b,v31.16b 863#endif 864 eor v3.16b,v3.16b,v31.16b 865 beq .Lprocess_block_ce 866 ldr d31,[x1],#8 // *inp++ 867#ifdef __AARCH64EB__ 868 rev64 v31.16b,v31.16b 869#endif 870 eor v4.16b,v4.16b,v31.16b 871 cmp x3,#8*(4+2) 872 blo .Lprocess_block_ce 873 ldr d31,[x1],#8 // *inp++ 874#ifdef __AARCH64EB__ 875 rev64 v31.16b,v31.16b 876#endif 877 eor v5.16b,v5.16b,v31.16b 878 beq .Lprocess_block_ce 879 ldr d31,[x1],#8 // *inp++ 880#ifdef __AARCH64EB__ 881 rev64 v31.16b,v31.16b 882#endif 883 eor v6.16b,v6.16b,v31.16b 884 cmp x3,#8*(6+2) 885 blo .Lprocess_block_ce 886 ldr d31,[x1],#8 // *inp++ 887#ifdef __AARCH64EB__ 888 rev64 v31.16b,v31.16b 889#endif 890 eor v7.16b,v7.16b,v31.16b 891 beq .Lprocess_block_ce 892 ldr d31,[x1],#8 // *inp++ 893#ifdef __AARCH64EB__ 894 rev64 v31.16b,v31.16b 895#endif 896 eor v8.16b,v8.16b,v31.16b 897 cmp x3,#8*(8+2) 898 blo .Lprocess_block_ce 899 ldr d31,[x1],#8 // *inp++ 900#ifdef __AARCH64EB__ 901 rev64 v31.16b,v31.16b 902#endif 903 eor v9.16b,v9.16b,v31.16b 904 beq .Lprocess_block_ce 905 ldr d31,[x1],#8 // *inp++ 906#ifdef __AARCH64EB__ 907 rev64 v31.16b,v31.16b 908#endif 909 eor v10.16b,v10.16b,v31.16b 910 cmp x3,#8*(10+2) 911 blo .Lprocess_block_ce 912 ldr d31,[x1],#8 // *inp++ 913#ifdef __AARCH64EB__ 914 rev64 v31.16b,v31.16b 915#endif 916 eor v11.16b,v11.16b,v31.16b 917 beq .Lprocess_block_ce 918 ldr d31,[x1],#8 // *inp++ 919#ifdef __AARCH64EB__ 920 rev64 v31.16b,v31.16b 921#endif 922 eor v12.16b,v12.16b,v31.16b 923 cmp x3,#8*(12+2) 924 blo .Lprocess_block_ce 925 ldr d31,[x1],#8 // *inp++ 926#ifdef __AARCH64EB__ 927 rev64 v31.16b,v31.16b 928#endif 929 eor v13.16b,v13.16b,v31.16b 930 beq .Lprocess_block_ce 931 ldr d31,[x1],#8 // *inp++ 932#ifdef __AARCH64EB__ 933 rev64 v31.16b,v31.16b 934#endif 935 eor v14.16b,v14.16b,v31.16b 936 cmp x3,#8*(14+2) 937 blo .Lprocess_block_ce 938 ldr d31,[x1],#8 // *inp++ 939#ifdef __AARCH64EB__ 940 rev64 v31.16b,v31.16b 941#endif 942 eor v15.16b,v15.16b,v31.16b 943 beq .Lprocess_block_ce 944 ldr d31,[x1],#8 // *inp++ 945#ifdef __AARCH64EB__ 946 rev64 v31.16b,v31.16b 947#endif 948 eor v16.16b,v16.16b,v31.16b 949 cmp x3,#8*(16+2) 950 blo .Lprocess_block_ce 951 ldr d31,[x1],#8 // *inp++ 952#ifdef __AARCH64EB__ 953 rev64 v31.16b,v31.16b 954#endif 955 eor v17.16b,v17.16b,v31.16b 956 beq .Lprocess_block_ce 957 ldr d31,[x1],#8 // *inp++ 958#ifdef __AARCH64EB__ 959 rev64 v31.16b,v31.16b 960#endif 961 eor v18.16b,v18.16b,v31.16b 962 cmp x3,#8*(18+2) 963 blo .Lprocess_block_ce 964 ldr d31,[x1],#8 // *inp++ 965#ifdef __AARCH64EB__ 966 rev64 v31.16b,v31.16b 967#endif 968 eor v19.16b,v19.16b,v31.16b 969 beq .Lprocess_block_ce 970 ldr d31,[x1],#8 // *inp++ 971#ifdef __AARCH64EB__ 972 rev64 v31.16b,v31.16b 973#endif 974 eor v20.16b,v20.16b,v31.16b 975 cmp x3,#8*(20+2) 976 blo .Lprocess_block_ce 977 ldr d31,[x1],#8 // *inp++ 978#ifdef __AARCH64EB__ 979 rev64 v31.16b,v31.16b 980#endif 981 eor v21.16b,v21.16b,v31.16b 982 beq .Lprocess_block_ce 983 ldr d31,[x1],#8 // *inp++ 984#ifdef __AARCH64EB__ 985 rev64 v31.16b,v31.16b 986#endif 987 eor v22.16b,v22.16b,v31.16b 988 cmp x3,#8*(22+2) 989 blo .Lprocess_block_ce 990 ldr d31,[x1],#8 // *inp++ 991#ifdef __AARCH64EB__ 992 rev64 v31.16b,v31.16b 993#endif 994 eor v23.16b,v23.16b,v31.16b 995 beq .Lprocess_block_ce 996 ldr d31,[x1],#8 // *inp++ 997#ifdef __AARCH64EB__ 998 rev64 v31.16b,v31.16b 999#endif 1000 eor v24.16b,v24.16b,v31.16b 1001 1002.Lprocess_block_ce: 1003 1004 bl KeccakF1600_ce 1005 1006 b .Loop_absorb_ce 1007 1008.align 4 1009.Labsorbed_ce: 1010 stp d0,d1,[x0,#8*0] 1011 stp d2,d3,[x0,#8*2] 1012 stp d4,d5,[x0,#8*4] 1013 stp d6,d7,[x0,#8*6] 1014 stp d8,d9,[x0,#8*8] 1015 stp d10,d11,[x0,#8*10] 1016 stp d12,d13,[x0,#8*12] 1017 stp d14,d15,[x0,#8*14] 1018 stp d16,d17,[x0,#8*16] 1019 stp d18,d19,[x0,#8*18] 1020 stp d20,d21,[x0,#8*20] 1021 stp d22,d23,[x0,#8*22] 1022 str d24,[x0,#8*24] 1023 add x0,x2,x3 // return value 1024 1025 ldp d8,d9,[sp,#16] 1026 ldp d10,d11,[sp,#32] 1027 ldp d12,d13,[sp,#48] 1028 ldp d14,d15,[sp,#64] 1029 ldp x29,x30,[sp],#80 1030.inst 0xd50323bf // autiasp 1031 ret 1032.size SHA3_absorb_cext,.-SHA3_absorb_cext 1033.globl SHA3_squeeze_cext 1034.type SHA3_squeeze_cext,%function 1035.align 5 1036SHA3_squeeze_cext: 1037.inst 0xd503233f // paciasp 1038 stp x29,x30,[sp,#-16]! 1039 add x29,sp,#0 1040 mov x9,x0 1041 mov x10,x3 1042 1043.Loop_squeeze_ce: 1044 ldr x4,[x9],#8 1045 cmp x2,#8 1046 blo .Lsqueeze_tail_ce 1047#ifdef __AARCH64EB__ 1048 rev x4,x4 1049#endif 1050 str x4,[x1],#8 1051 beq .Lsqueeze_done_ce 1052 1053 sub x2,x2,#8 1054 subs x10,x10,#8 1055 bhi .Loop_squeeze_ce 1056 1057 bl KeccakF1600_cext 1058 ldr x30,[sp,#8] 1059 mov x9,x0 1060 mov x10,x3 1061 b .Loop_squeeze_ce 1062 1063.align 4 1064.Lsqueeze_tail_ce: 1065 strb w4,[x1],#1 1066 lsr x4,x4,#8 1067 subs x2,x2,#1 1068 beq .Lsqueeze_done_ce 1069 strb w4,[x1],#1 1070 lsr x4,x4,#8 1071 subs x2,x2,#1 1072 beq .Lsqueeze_done_ce 1073 strb w4,[x1],#1 1074 lsr x4,x4,#8 1075 subs x2,x2,#1 1076 beq .Lsqueeze_done_ce 1077 strb w4,[x1],#1 1078 lsr x4,x4,#8 1079 subs x2,x2,#1 1080 beq .Lsqueeze_done_ce 1081 strb w4,[x1],#1 1082 lsr x4,x4,#8 1083 subs x2,x2,#1 1084 beq .Lsqueeze_done_ce 1085 strb w4,[x1],#1 1086 lsr x4,x4,#8 1087 subs x2,x2,#1 1088 beq .Lsqueeze_done_ce 1089 strb w4,[x1],#1 1090 1091.Lsqueeze_done_ce: 1092 ldr x29,[sp],#16 1093.inst 0xd50323bf // autiasp 1094 ret 1095.size SHA3_squeeze_cext,.-SHA3_squeeze_cext 1096.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1097.align 2 1098