1/* Do not modify. This file is auto-generated from keccak1600-armv8.pl. */ 2#include "arm_arch.h" 3 4.section .rodata 5 6.align 8 // strategic alignment and padding that allows to use 7 // address value as loop termination condition... 8.quad 0,0,0,0,0,0,0,0 9.type iotas,%object 10iotas: 11.quad 0x0000000000000001 12.quad 0x0000000000008082 13.quad 0x800000000000808a 14.quad 0x8000000080008000 15.quad 0x000000000000808b 16.quad 0x0000000080000001 17.quad 0x8000000080008081 18.quad 0x8000000000008009 19.quad 0x000000000000008a 20.quad 0x0000000000000088 21.quad 0x0000000080008009 22.quad 0x000000008000000a 23.quad 0x000000008000808b 24.quad 0x800000000000008b 25.quad 0x8000000000008089 26.quad 0x8000000000008003 27.quad 0x8000000000008002 28.quad 0x8000000000000080 29.quad 0x000000000000800a 30.quad 0x800000008000000a 31.quad 0x8000000080008081 32.quad 0x8000000000008080 33.quad 0x0000000080000001 34.quad 0x8000000080008008 35.size iotas,.-iotas 36.text 37 38.type KeccakF1600_int,%function 39.align 5 40KeccakF1600_int: 41 AARCH64_SIGN_LINK_REGISTER 42 adrp x28,iotas 43 add x28,x28,#:lo12:iotas 44 stp x28,x30,[sp,#16] // 32 bytes on top are mine 45 b .Loop 46.align 4 47.Loop: 48 ////////////////////////////////////////// Theta 49 eor x26,x0,x5 50 stp x4,x9,[sp,#0] // offload pair... 51 eor x27,x1,x6 52 eor x28,x2,x7 53 eor x30,x3,x8 54 eor x4,x4,x9 55 eor x26,x26,x10 56 eor x27,x27,x11 57 eor x28,x28,x12 58 eor x30,x30,x13 59 eor x4,x4,x14 60 eor x26,x26,x15 61 eor x27,x27,x16 62 eor x28,x28,x17 63 eor x30,x30,x25 64 eor x4,x4,x19 65 eor x26,x26,x20 66 eor x28,x28,x22 67 eor x27,x27,x21 68 eor x30,x30,x23 69 eor x4,x4,x24 70 71 eor x9,x26,x28,ror#63 72 73 eor x1,x1,x9 74 eor x6,x6,x9 75 eor x11,x11,x9 76 eor x16,x16,x9 77 eor x21,x21,x9 78 79 eor x9,x27,x30,ror#63 80 eor x28,x28,x4,ror#63 81 eor x30,x30,x26,ror#63 82 eor x4,x4,x27,ror#63 83 84 eor x27, x2,x9 // mov x27,x2 85 eor x7,x7,x9 86 eor x12,x12,x9 87 eor x17,x17,x9 88 eor x22,x22,x9 89 90 eor x0,x0,x4 91 eor x5,x5,x4 92 eor x10,x10,x4 93 eor x15,x15,x4 94 eor x20,x20,x4 95 ldp x4,x9,[sp,#0] // re-load offloaded data 96 eor x26, x3,x28 // mov x26,x3 97 eor x8,x8,x28 98 eor x13,x13,x28 99 eor x25,x25,x28 100 eor x23,x23,x28 101 102 eor x28, x4,x30 // mov x28,x4 103 eor x9,x9,x30 104 eor x14,x14,x30 105 eor x19,x19,x30 106 eor x24,x24,x30 107 108 ////////////////////////////////////////// Rho+Pi 109 mov x30,x1 110 ror x1,x6,#64-44 111 //mov x27,x2 112 ror x2,x12,#64-43 113 //mov x26,x3 114 ror x3,x25,#64-21 115 //mov x28,x4 116 ror x4,x24,#64-14 117 118 ror x6,x9,#64-20 119 ror x12,x13,#64-25 120 ror x25,x17,#64-15 121 ror x24,x21,#64-2 122 123 ror x9,x22,#64-61 124 ror x13,x19,#64-8 125 ror x17,x11,#64-10 126 ror x21,x8,#64-55 127 128 ror x22,x14,#64-39 129 ror x19,x23,#64-56 130 ror x11,x7,#64-6 131 ror x8,x16,#64-45 132 133 ror x14,x20,#64-18 134 ror x23,x15,#64-41 135 ror x7,x10,#64-3 136 ror x16,x5,#64-36 137 138 ror x5,x26,#64-28 139 ror x10,x30,#64-1 140 ror x15,x28,#64-27 141 ror x20,x27,#64-62 142 143 ////////////////////////////////////////// Chi+Iota 144 bic x26,x2,x1 145 bic x27,x3,x2 146 bic x28,x0,x4 147 bic x30,x1,x0 148 eor x0,x0,x26 149 bic x26,x4,x3 150 eor x1,x1,x27 151 ldr x27,[sp,#16] 152 eor x3,x3,x28 153 eor x4,x4,x30 154 eor x2,x2,x26 155 ldr x30,[x27],#8 // Iota[i++] 156 157 bic x26,x7,x6 158 tst x27,#255 // are we done? 159 str x27,[sp,#16] 160 bic x27,x8,x7 161 bic x28,x5,x9 162 eor x0,x0,x30 // A[0][0] ^= Iota 163 bic x30,x6,x5 164 eor x5,x5,x26 165 bic x26,x9,x8 166 eor x6,x6,x27 167 eor x8,x8,x28 168 eor x9,x9,x30 169 eor x7,x7,x26 170 171 bic x26,x12,x11 172 bic x27,x13,x12 173 bic x28,x10,x14 174 bic x30,x11,x10 175 eor x10,x10,x26 176 bic x26,x14,x13 177 eor x11,x11,x27 178 eor x13,x13,x28 179 eor x14,x14,x30 180 eor x12,x12,x26 181 182 bic x26,x17,x16 183 bic x27,x25,x17 184 bic x28,x15,x19 185 bic x30,x16,x15 186 eor x15,x15,x26 187 bic x26,x19,x25 188 eor x16,x16,x27 189 eor x25,x25,x28 190 eor x19,x19,x30 191 eor x17,x17,x26 192 193 bic x26,x22,x21 194 bic x27,x23,x22 195 bic x28,x20,x24 196 bic x30,x21,x20 197 eor x20,x20,x26 198 bic x26,x24,x23 199 eor x21,x21,x27 200 eor x23,x23,x28 201 eor x24,x24,x30 202 eor x22,x22,x26 203 204 bne .Loop 205 206 ldr x30,[sp,#24] 207 AARCH64_VALIDATE_LINK_REGISTER 208 ret 209.size KeccakF1600_int,.-KeccakF1600_int 210 211.type KeccakF1600,%function 212.align 5 213KeccakF1600: 214 AARCH64_SIGN_LINK_REGISTER 215 stp x29,x30,[sp,#-128]! 216 add x29,sp,#0 217 stp x19,x20,[sp,#16] 218 stp x21,x22,[sp,#32] 219 stp x23,x24,[sp,#48] 220 stp x25,x26,[sp,#64] 221 stp x27,x28,[sp,#80] 222 sub sp,sp,#48 223 224 str x0,[sp,#32] // offload argument 225 mov x26,x0 226 ldp x0,x1,[x0,#16*0] 227 ldp x2,x3,[x26,#16*1] 228 ldp x4,x5,[x26,#16*2] 229 ldp x6,x7,[x26,#16*3] 230 ldp x8,x9,[x26,#16*4] 231 ldp x10,x11,[x26,#16*5] 232 ldp x12,x13,[x26,#16*6] 233 ldp x14,x15,[x26,#16*7] 234 ldp x16,x17,[x26,#16*8] 235 ldp x25,x19,[x26,#16*9] 236 ldp x20,x21,[x26,#16*10] 237 ldp x22,x23,[x26,#16*11] 238 ldr x24,[x26,#16*12] 239 240 bl KeccakF1600_int 241 242 ldr x26,[sp,#32] 243 stp x0,x1,[x26,#16*0] 244 stp x2,x3,[x26,#16*1] 245 stp x4,x5,[x26,#16*2] 246 stp x6,x7,[x26,#16*3] 247 stp x8,x9,[x26,#16*4] 248 stp x10,x11,[x26,#16*5] 249 stp x12,x13,[x26,#16*6] 250 stp x14,x15,[x26,#16*7] 251 stp x16,x17,[x26,#16*8] 252 stp x25,x19,[x26,#16*9] 253 stp x20,x21,[x26,#16*10] 254 stp x22,x23,[x26,#16*11] 255 str x24,[x26,#16*12] 256 257 ldp x19,x20,[x29,#16] 258 add sp,sp,#48 259 ldp x21,x22,[x29,#32] 260 ldp x23,x24,[x29,#48] 261 ldp x25,x26,[x29,#64] 262 ldp x27,x28,[x29,#80] 263 ldp x29,x30,[sp],#128 264 AARCH64_VALIDATE_LINK_REGISTER 265 ret 266.size KeccakF1600,.-KeccakF1600 267 268.globl SHA3_absorb 269.type SHA3_absorb,%function 270.align 5 271SHA3_absorb: 272 AARCH64_SIGN_LINK_REGISTER 273 stp x29,x30,[sp,#-128]! 274 add x29,sp,#0 275 stp x19,x20,[sp,#16] 276 stp x21,x22,[sp,#32] 277 stp x23,x24,[sp,#48] 278 stp x25,x26,[sp,#64] 279 stp x27,x28,[sp,#80] 280 sub sp,sp,#64 281 282 stp x0,x1,[sp,#32] // offload arguments 283 stp x2,x3,[sp,#48] 284 285 mov x26,x0 // uint64_t A[5][5] 286 mov x27,x1 // const void *inp 287 mov x28,x2 // size_t len 288 mov x30,x3 // size_t bsz 289 ldp x0,x1,[x26,#16*0] 290 ldp x2,x3,[x26,#16*1] 291 ldp x4,x5,[x26,#16*2] 292 ldp x6,x7,[x26,#16*3] 293 ldp x8,x9,[x26,#16*4] 294 ldp x10,x11,[x26,#16*5] 295 ldp x12,x13,[x26,#16*6] 296 ldp x14,x15,[x26,#16*7] 297 ldp x16,x17,[x26,#16*8] 298 ldp x25,x19,[x26,#16*9] 299 ldp x20,x21,[x26,#16*10] 300 ldp x22,x23,[x26,#16*11] 301 ldr x24,[x26,#16*12] 302 b .Loop_absorb 303 304.align 4 305.Loop_absorb: 306 subs x26,x28,x30 // len - bsz 307 blo .Labsorbed 308 309 str x26,[sp,#48] // save len - bsz 310 ldr x26,[x27],#8 // *inp++ 311#ifdef __AARCH64EB__ 312 rev x26,x26 313#endif 314 eor x0,x0,x26 315 cmp x30,#8*(0+2) 316 blo .Lprocess_block 317 ldr x26,[x27],#8 // *inp++ 318#ifdef __AARCH64EB__ 319 rev x26,x26 320#endif 321 eor x1,x1,x26 322 beq .Lprocess_block 323 ldr x26,[x27],#8 // *inp++ 324#ifdef __AARCH64EB__ 325 rev x26,x26 326#endif 327 eor x2,x2,x26 328 cmp x30,#8*(2+2) 329 blo .Lprocess_block 330 ldr x26,[x27],#8 // *inp++ 331#ifdef __AARCH64EB__ 332 rev x26,x26 333#endif 334 eor x3,x3,x26 335 beq .Lprocess_block 336 ldr x26,[x27],#8 // *inp++ 337#ifdef __AARCH64EB__ 338 rev x26,x26 339#endif 340 eor x4,x4,x26 341 cmp x30,#8*(4+2) 342 blo .Lprocess_block 343 ldr x26,[x27],#8 // *inp++ 344#ifdef __AARCH64EB__ 345 rev x26,x26 346#endif 347 eor x5,x5,x26 348 beq .Lprocess_block 349 ldr x26,[x27],#8 // *inp++ 350#ifdef __AARCH64EB__ 351 rev x26,x26 352#endif 353 eor x6,x6,x26 354 cmp x30,#8*(6+2) 355 blo .Lprocess_block 356 ldr x26,[x27],#8 // *inp++ 357#ifdef __AARCH64EB__ 358 rev x26,x26 359#endif 360 eor x7,x7,x26 361 beq .Lprocess_block 362 ldr x26,[x27],#8 // *inp++ 363#ifdef __AARCH64EB__ 364 rev x26,x26 365#endif 366 eor x8,x8,x26 367 cmp x30,#8*(8+2) 368 blo .Lprocess_block 369 ldr x26,[x27],#8 // *inp++ 370#ifdef __AARCH64EB__ 371 rev x26,x26 372#endif 373 eor x9,x9,x26 374 beq .Lprocess_block 375 ldr x26,[x27],#8 // *inp++ 376#ifdef __AARCH64EB__ 377 rev x26,x26 378#endif 379 eor x10,x10,x26 380 cmp x30,#8*(10+2) 381 blo .Lprocess_block 382 ldr x26,[x27],#8 // *inp++ 383#ifdef __AARCH64EB__ 384 rev x26,x26 385#endif 386 eor x11,x11,x26 387 beq .Lprocess_block 388 ldr x26,[x27],#8 // *inp++ 389#ifdef __AARCH64EB__ 390 rev x26,x26 391#endif 392 eor x12,x12,x26 393 cmp x30,#8*(12+2) 394 blo .Lprocess_block 395 ldr x26,[x27],#8 // *inp++ 396#ifdef __AARCH64EB__ 397 rev x26,x26 398#endif 399 eor x13,x13,x26 400 beq .Lprocess_block 401 ldr x26,[x27],#8 // *inp++ 402#ifdef __AARCH64EB__ 403 rev x26,x26 404#endif 405 eor x14,x14,x26 406 cmp x30,#8*(14+2) 407 blo .Lprocess_block 408 ldr x26,[x27],#8 // *inp++ 409#ifdef __AARCH64EB__ 410 rev x26,x26 411#endif 412 eor x15,x15,x26 413 beq .Lprocess_block 414 ldr x26,[x27],#8 // *inp++ 415#ifdef __AARCH64EB__ 416 rev x26,x26 417#endif 418 eor x16,x16,x26 419 cmp x30,#8*(16+2) 420 blo .Lprocess_block 421 ldr x26,[x27],#8 // *inp++ 422#ifdef __AARCH64EB__ 423 rev x26,x26 424#endif 425 eor x17,x17,x26 426 beq .Lprocess_block 427 ldr x26,[x27],#8 // *inp++ 428#ifdef __AARCH64EB__ 429 rev x26,x26 430#endif 431 eor x25,x25,x26 432 cmp x30,#8*(18+2) 433 blo .Lprocess_block 434 ldr x26,[x27],#8 // *inp++ 435#ifdef __AARCH64EB__ 436 rev x26,x26 437#endif 438 eor x19,x19,x26 439 beq .Lprocess_block 440 ldr x26,[x27],#8 // *inp++ 441#ifdef __AARCH64EB__ 442 rev x26,x26 443#endif 444 eor x20,x20,x26 445 cmp x30,#8*(20+2) 446 blo .Lprocess_block 447 ldr x26,[x27],#8 // *inp++ 448#ifdef __AARCH64EB__ 449 rev x26,x26 450#endif 451 eor x21,x21,x26 452 beq .Lprocess_block 453 ldr x26,[x27],#8 // *inp++ 454#ifdef __AARCH64EB__ 455 rev x26,x26 456#endif 457 eor x22,x22,x26 458 cmp x30,#8*(22+2) 459 blo .Lprocess_block 460 ldr x26,[x27],#8 // *inp++ 461#ifdef __AARCH64EB__ 462 rev x26,x26 463#endif 464 eor x23,x23,x26 465 beq .Lprocess_block 466 ldr x26,[x27],#8 // *inp++ 467#ifdef __AARCH64EB__ 468 rev x26,x26 469#endif 470 eor x24,x24,x26 471 472.Lprocess_block: 473 str x27,[sp,#40] // save inp 474 475 bl KeccakF1600_int 476 477 ldr x27,[sp,#40] // restore arguments 478 ldp x28,x30,[sp,#48] 479 b .Loop_absorb 480 481.align 4 482.Labsorbed: 483 ldr x27,[sp,#32] 484 stp x0,x1,[x27,#16*0] 485 stp x2,x3,[x27,#16*1] 486 stp x4,x5,[x27,#16*2] 487 stp x6,x7,[x27,#16*3] 488 stp x8,x9,[x27,#16*4] 489 stp x10,x11,[x27,#16*5] 490 stp x12,x13,[x27,#16*6] 491 stp x14,x15,[x27,#16*7] 492 stp x16,x17,[x27,#16*8] 493 stp x25,x19,[x27,#16*9] 494 stp x20,x21,[x27,#16*10] 495 stp x22,x23,[x27,#16*11] 496 str x24,[x27,#16*12] 497 498 mov x0,x28 // return value 499 ldp x19,x20,[x29,#16] 500 add sp,sp,#64 501 ldp x21,x22,[x29,#32] 502 ldp x23,x24,[x29,#48] 503 ldp x25,x26,[x29,#64] 504 ldp x27,x28,[x29,#80] 505 ldp x29,x30,[sp],#128 506 AARCH64_VALIDATE_LINK_REGISTER 507 ret 508.size SHA3_absorb,.-SHA3_absorb 509.globl SHA3_squeeze 510.type SHA3_squeeze,%function 511.align 5 512SHA3_squeeze: 513 AARCH64_SIGN_LINK_REGISTER 514 stp x29,x30,[sp,#-48]! 515 add x29,sp,#0 516 stp x19,x20,[sp,#16] 517 stp x21,x22,[sp,#32] 518 519 mov x19,x0 // put aside arguments 520 mov x20,x1 521 mov x21,x2 522 mov x22,x3 523 cmp w4, #0 // w4 = 'next' argument 524 bne .Lnext_block 525 526.Loop_squeeze: 527 ldr x4,[x0],#8 528 cmp x21,#8 529 blo .Lsqueeze_tail 530#ifdef __AARCH64EB__ 531 rev x4,x4 532#endif 533 str x4,[x20],#8 534 subs x21,x21,#8 535 beq .Lsqueeze_done 536 537 subs x3,x3,#8 538 bhi .Loop_squeeze 539.Lnext_block: 540 mov x0,x19 541 bl KeccakF1600 542 mov x0,x19 543 mov x3,x22 544 b .Loop_squeeze 545 546.align 4 547.Lsqueeze_tail: 548 strb w4,[x20],#1 549 lsr x4,x4,#8 550 subs x21,x21,#1 551 beq .Lsqueeze_done 552 strb w4,[x20],#1 553 lsr x4,x4,#8 554 subs x21,x21,#1 555 beq .Lsqueeze_done 556 strb w4,[x20],#1 557 lsr x4,x4,#8 558 subs x21,x21,#1 559 beq .Lsqueeze_done 560 strb w4,[x20],#1 561 lsr x4,x4,#8 562 subs x21,x21,#1 563 beq .Lsqueeze_done 564 strb w4,[x20],#1 565 lsr x4,x4,#8 566 subs x21,x21,#1 567 beq .Lsqueeze_done 568 strb w4,[x20],#1 569 lsr x4,x4,#8 570 subs x21,x21,#1 571 beq .Lsqueeze_done 572 strb w4,[x20],#1 573 574.Lsqueeze_done: 575 ldp x19,x20,[sp,#16] 576 ldp x21,x22,[sp,#32] 577 ldp x29,x30,[sp],#48 578 AARCH64_VALIDATE_LINK_REGISTER 579 ret 580.size SHA3_squeeze,.-SHA3_squeeze 581.type KeccakF1600_ce,%function 582.align 5 583KeccakF1600_ce: 584 mov x9,#24 585 adrp x10,iotas 586 add x10,x10,#:lo12:iotas 587 b .Loop_ce 588.align 4 589.Loop_ce: 590 ////////////////////////////////////////////////// Theta 591.inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b 592.inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b 593.inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b 594.inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b 595.inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b 596.inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b 597.inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b 598.inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b 599.inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b 600.inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b 601 602.inst 0xce7b8f3e //rax1 v30.16b,v25.16b,v27.16b // D[1] 603.inst 0xce7c8f5f //rax1 v31.16b,v26.16b,v28.16b // D[2] 604.inst 0xce7d8f7b //rax1 v27.16b,v27.16b,v29.16b // D[3] 605.inst 0xce798f9c //rax1 v28.16b,v28.16b,v25.16b // D[4] 606.inst 0xce7a8fbd //rax1 v29.16b,v29.16b,v26.16b // D[0] 607 608 ////////////////////////////////////////////////// Theta+Rho+Pi 609.inst 0xce9efc39 //xar v25.16b, v1.16b,v30.16b,#64-1 // C[0]=A[2][0] 610 611.inst 0xce9e50c1 //xar v1.16b,v6.16b,v30.16b,#64-44 612.inst 0xce9cb126 //xar v6.16b,v9.16b,v28.16b,#64-20 613.inst 0xce9f0ec9 //xar v9.16b,v22.16b,v31.16b,#64-61 614.inst 0xce9c65d6 //xar v22.16b,v14.16b,v28.16b,#64-39 615.inst 0xce9dba8e //xar v14.16b,v20.16b,v29.16b,#64-18 616 617.inst 0xce9f085a //xar v26.16b, v2.16b,v31.16b,#64-62 // C[1]=A[4][0] 618 619.inst 0xce9f5582 //xar v2.16b,v12.16b,v31.16b,#64-43 620.inst 0xce9b9dac //xar v12.16b,v13.16b,v27.16b,#64-25 621.inst 0xce9ce26d //xar v13.16b,v19.16b,v28.16b,#64-8 622.inst 0xce9b22f3 //xar v19.16b,v23.16b,v27.16b,#64-56 623.inst 0xce9d5df7 //xar v23.16b,v15.16b,v29.16b,#64-41 624 625.inst 0xce9c948f //xar v15.16b,v4.16b,v28.16b,#64-27 626 627.inst 0xce9ccb1c //xar v28.16b, v24.16b,v28.16b,#64-14 // D[4]=A[0][4] 628.inst 0xce9efab8 //xar v24.16b,v21.16b,v30.16b,#64-2 629.inst 0xce9b2508 //xar v8.16b,v8.16b,v27.16b,#64-55 // A[1][3]=A[4][1] 630.inst 0xce9e4e04 //xar v4.16b,v16.16b,v30.16b,#64-45 // A[0][4]=A[1][3] 631.inst 0xce9d70b0 //xar v16.16b,v5.16b,v29.16b,#64-36 632 633.inst 0xce9b9065 //xar v5.16b,v3.16b,v27.16b,#64-28 634 635 eor v0.16b,v0.16b,v29.16b 636 637.inst 0xce9bae5b //xar v27.16b, v18.16b,v27.16b,#64-21 // D[3]=A[0][3] 638.inst 0xce9fc623 //xar v3.16b,v17.16b,v31.16b,#64-15 // A[0][3]=A[3][3] 639.inst 0xce9ed97e //xar v30.16b, v11.16b,v30.16b,#64-10 // D[1]=A[3][2] 640.inst 0xce9fe8ff //xar v31.16b, v7.16b,v31.16b,#64-6 // D[2]=A[2][1] 641.inst 0xce9df55d //xar v29.16b, v10.16b,v29.16b,#64-3 // D[0]=A[1][2] 642 643 ////////////////////////////////////////////////// Chi+Iota 644.inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] 645.inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] 646.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b 647.inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b 648.inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] 649 650 ld1r {v26.2d},[x10],#8 651 652.inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] 653.inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] 654.inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b 655.inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b 656.inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] 657 658.inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b 659.inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b 660.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b 661.inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b 662.inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b 663 664.inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] 665.inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] 666.inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b 667.inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b 668.inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] 669 670.inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b 671.inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b 672.inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b 673.inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b 674.inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b 675 676 eor v0.16b,v0.16b,v26.16b 677 678 subs x9,x9,#1 679 bne .Loop_ce 680 681 ret 682.size KeccakF1600_ce,.-KeccakF1600_ce 683 684.type KeccakF1600_cext,%function 685.align 5 686KeccakF1600_cext: 687 AARCH64_SIGN_LINK_REGISTER 688 stp x29,x30,[sp,#-80]! 689 add x29,sp,#0 690 stp d8,d9,[sp,#16] // per ABI requirement 691 stp d10,d11,[sp,#32] 692 stp d12,d13,[sp,#48] 693 stp d14,d15,[sp,#64] 694 ldp d0,d1,[x0,#8*0] 695 ldp d2,d3,[x0,#8*2] 696 ldp d4,d5,[x0,#8*4] 697 ldp d6,d7,[x0,#8*6] 698 ldp d8,d9,[x0,#8*8] 699 ldp d10,d11,[x0,#8*10] 700 ldp d12,d13,[x0,#8*12] 701 ldp d14,d15,[x0,#8*14] 702 ldp d16,d17,[x0,#8*16] 703 ldp d18,d19,[x0,#8*18] 704 ldp d20,d21,[x0,#8*20] 705 ldp d22,d23,[x0,#8*22] 706 ldr d24,[x0,#8*24] 707 bl KeccakF1600_ce 708 ldr x30,[sp,#8] 709 stp d0,d1,[x0,#8*0] 710 stp d2,d3,[x0,#8*2] 711 stp d4,d5,[x0,#8*4] 712 stp d6,d7,[x0,#8*6] 713 stp d8,d9,[x0,#8*8] 714 stp d10,d11,[x0,#8*10] 715 stp d12,d13,[x0,#8*12] 716 stp d14,d15,[x0,#8*14] 717 stp d16,d17,[x0,#8*16] 718 stp d18,d19,[x0,#8*18] 719 stp d20,d21,[x0,#8*20] 720 stp d22,d23,[x0,#8*22] 721 str d24,[x0,#8*24] 722 723 ldp d8,d9,[sp,#16] 724 ldp d10,d11,[sp,#32] 725 ldp d12,d13,[sp,#48] 726 ldp d14,d15,[sp,#64] 727 ldr x29,[sp],#80 728 AARCH64_VALIDATE_LINK_REGISTER 729 ret 730.size KeccakF1600_cext,.-KeccakF1600_cext 731.globl SHA3_absorb_cext 732.type SHA3_absorb_cext,%function 733.align 5 734SHA3_absorb_cext: 735 AARCH64_SIGN_LINK_REGISTER 736 stp x29,x30,[sp,#-80]! 737 add x29,sp,#0 738 stp d8,d9,[sp,#16] // per ABI requirement 739 stp d10,d11,[sp,#32] 740 stp d12,d13,[sp,#48] 741 stp d14,d15,[sp,#64] 742 ldp d0,d1,[x0,#8*0] 743 ldp d2,d3,[x0,#8*2] 744 ldp d4,d5,[x0,#8*4] 745 ldp d6,d7,[x0,#8*6] 746 ldp d8,d9,[x0,#8*8] 747 ldp d10,d11,[x0,#8*10] 748 ldp d12,d13,[x0,#8*12] 749 ldp d14,d15,[x0,#8*14] 750 ldp d16,d17,[x0,#8*16] 751 ldp d18,d19,[x0,#8*18] 752 ldp d20,d21,[x0,#8*20] 753 ldp d22,d23,[x0,#8*22] 754 ldr d24,[x0,#8*24] 755 b .Loop_absorb_ce 756 757.align 4 758.Loop_absorb_ce: 759 subs x2,x2,x3 // len - bsz 760 blo .Labsorbed_ce 761 ldr d31,[x1],#8 // *inp++ 762#ifdef __AARCH64EB__ 763 rev64 v31.16b,v31.16b 764#endif 765 eor v0.16b,v0.16b,v31.16b 766 cmp x3,#8*(0+2) 767 blo .Lprocess_block_ce 768 ldr d31,[x1],#8 // *inp++ 769#ifdef __AARCH64EB__ 770 rev64 v31.16b,v31.16b 771#endif 772 eor v1.16b,v1.16b,v31.16b 773 beq .Lprocess_block_ce 774 ldr d31,[x1],#8 // *inp++ 775#ifdef __AARCH64EB__ 776 rev64 v31.16b,v31.16b 777#endif 778 eor v2.16b,v2.16b,v31.16b 779 cmp x3,#8*(2+2) 780 blo .Lprocess_block_ce 781 ldr d31,[x1],#8 // *inp++ 782#ifdef __AARCH64EB__ 783 rev64 v31.16b,v31.16b 784#endif 785 eor v3.16b,v3.16b,v31.16b 786 beq .Lprocess_block_ce 787 ldr d31,[x1],#8 // *inp++ 788#ifdef __AARCH64EB__ 789 rev64 v31.16b,v31.16b 790#endif 791 eor v4.16b,v4.16b,v31.16b 792 cmp x3,#8*(4+2) 793 blo .Lprocess_block_ce 794 ldr d31,[x1],#8 // *inp++ 795#ifdef __AARCH64EB__ 796 rev64 v31.16b,v31.16b 797#endif 798 eor v5.16b,v5.16b,v31.16b 799 beq .Lprocess_block_ce 800 ldr d31,[x1],#8 // *inp++ 801#ifdef __AARCH64EB__ 802 rev64 v31.16b,v31.16b 803#endif 804 eor v6.16b,v6.16b,v31.16b 805 cmp x3,#8*(6+2) 806 blo .Lprocess_block_ce 807 ldr d31,[x1],#8 // *inp++ 808#ifdef __AARCH64EB__ 809 rev64 v31.16b,v31.16b 810#endif 811 eor v7.16b,v7.16b,v31.16b 812 beq .Lprocess_block_ce 813 ldr d31,[x1],#8 // *inp++ 814#ifdef __AARCH64EB__ 815 rev64 v31.16b,v31.16b 816#endif 817 eor v8.16b,v8.16b,v31.16b 818 cmp x3,#8*(8+2) 819 blo .Lprocess_block_ce 820 ldr d31,[x1],#8 // *inp++ 821#ifdef __AARCH64EB__ 822 rev64 v31.16b,v31.16b 823#endif 824 eor v9.16b,v9.16b,v31.16b 825 beq .Lprocess_block_ce 826 ldr d31,[x1],#8 // *inp++ 827#ifdef __AARCH64EB__ 828 rev64 v31.16b,v31.16b 829#endif 830 eor v10.16b,v10.16b,v31.16b 831 cmp x3,#8*(10+2) 832 blo .Lprocess_block_ce 833 ldr d31,[x1],#8 // *inp++ 834#ifdef __AARCH64EB__ 835 rev64 v31.16b,v31.16b 836#endif 837 eor v11.16b,v11.16b,v31.16b 838 beq .Lprocess_block_ce 839 ldr d31,[x1],#8 // *inp++ 840#ifdef __AARCH64EB__ 841 rev64 v31.16b,v31.16b 842#endif 843 eor v12.16b,v12.16b,v31.16b 844 cmp x3,#8*(12+2) 845 blo .Lprocess_block_ce 846 ldr d31,[x1],#8 // *inp++ 847#ifdef __AARCH64EB__ 848 rev64 v31.16b,v31.16b 849#endif 850 eor v13.16b,v13.16b,v31.16b 851 beq .Lprocess_block_ce 852 ldr d31,[x1],#8 // *inp++ 853#ifdef __AARCH64EB__ 854 rev64 v31.16b,v31.16b 855#endif 856 eor v14.16b,v14.16b,v31.16b 857 cmp x3,#8*(14+2) 858 blo .Lprocess_block_ce 859 ldr d31,[x1],#8 // *inp++ 860#ifdef __AARCH64EB__ 861 rev64 v31.16b,v31.16b 862#endif 863 eor v15.16b,v15.16b,v31.16b 864 beq .Lprocess_block_ce 865 ldr d31,[x1],#8 // *inp++ 866#ifdef __AARCH64EB__ 867 rev64 v31.16b,v31.16b 868#endif 869 eor v16.16b,v16.16b,v31.16b 870 cmp x3,#8*(16+2) 871 blo .Lprocess_block_ce 872 ldr d31,[x1],#8 // *inp++ 873#ifdef __AARCH64EB__ 874 rev64 v31.16b,v31.16b 875#endif 876 eor v17.16b,v17.16b,v31.16b 877 beq .Lprocess_block_ce 878 ldr d31,[x1],#8 // *inp++ 879#ifdef __AARCH64EB__ 880 rev64 v31.16b,v31.16b 881#endif 882 eor v18.16b,v18.16b,v31.16b 883 cmp x3,#8*(18+2) 884 blo .Lprocess_block_ce 885 ldr d31,[x1],#8 // *inp++ 886#ifdef __AARCH64EB__ 887 rev64 v31.16b,v31.16b 888#endif 889 eor v19.16b,v19.16b,v31.16b 890 beq .Lprocess_block_ce 891 ldr d31,[x1],#8 // *inp++ 892#ifdef __AARCH64EB__ 893 rev64 v31.16b,v31.16b 894#endif 895 eor v20.16b,v20.16b,v31.16b 896 cmp x3,#8*(20+2) 897 blo .Lprocess_block_ce 898 ldr d31,[x1],#8 // *inp++ 899#ifdef __AARCH64EB__ 900 rev64 v31.16b,v31.16b 901#endif 902 eor v21.16b,v21.16b,v31.16b 903 beq .Lprocess_block_ce 904 ldr d31,[x1],#8 // *inp++ 905#ifdef __AARCH64EB__ 906 rev64 v31.16b,v31.16b 907#endif 908 eor v22.16b,v22.16b,v31.16b 909 cmp x3,#8*(22+2) 910 blo .Lprocess_block_ce 911 ldr d31,[x1],#8 // *inp++ 912#ifdef __AARCH64EB__ 913 rev64 v31.16b,v31.16b 914#endif 915 eor v23.16b,v23.16b,v31.16b 916 beq .Lprocess_block_ce 917 ldr d31,[x1],#8 // *inp++ 918#ifdef __AARCH64EB__ 919 rev64 v31.16b,v31.16b 920#endif 921 eor v24.16b,v24.16b,v31.16b 922 923.Lprocess_block_ce: 924 925 bl KeccakF1600_ce 926 927 b .Loop_absorb_ce 928 929.align 4 930.Labsorbed_ce: 931 stp d0,d1,[x0,#8*0] 932 stp d2,d3,[x0,#8*2] 933 stp d4,d5,[x0,#8*4] 934 stp d6,d7,[x0,#8*6] 935 stp d8,d9,[x0,#8*8] 936 stp d10,d11,[x0,#8*10] 937 stp d12,d13,[x0,#8*12] 938 stp d14,d15,[x0,#8*14] 939 stp d16,d17,[x0,#8*16] 940 stp d18,d19,[x0,#8*18] 941 stp d20,d21,[x0,#8*20] 942 stp d22,d23,[x0,#8*22] 943 str d24,[x0,#8*24] 944 add x0,x2,x3 // return value 945 946 ldp d8,d9,[sp,#16] 947 ldp d10,d11,[sp,#32] 948 ldp d12,d13,[sp,#48] 949 ldp d14,d15,[sp,#64] 950 ldp x29,x30,[sp],#80 951 AARCH64_VALIDATE_LINK_REGISTER 952 ret 953.size SHA3_absorb_cext,.-SHA3_absorb_cext 954.globl SHA3_squeeze_cext 955.type SHA3_squeeze_cext,%function 956.align 5 957SHA3_squeeze_cext: 958 AARCH64_SIGN_LINK_REGISTER 959 stp x29,x30,[sp,#-16]! 960 add x29,sp,#0 961 mov x9,x0 962 mov x10,x3 963 964.Loop_squeeze_ce: 965 ldr x4,[x9],#8 966 cmp x2,#8 967 blo .Lsqueeze_tail_ce 968#ifdef __AARCH64EB__ 969 rev x4,x4 970#endif 971 str x4,[x1],#8 972 beq .Lsqueeze_done_ce 973 974 sub x2,x2,#8 975 subs x10,x10,#8 976 bhi .Loop_squeeze_ce 977 978 bl KeccakF1600_cext 979 ldr x30,[sp,#8] 980 mov x9,x0 981 mov x10,x3 982 b .Loop_squeeze_ce 983 984.align 4 985.Lsqueeze_tail_ce: 986 strb w4,[x1],#1 987 lsr x4,x4,#8 988 subs x2,x2,#1 989 beq .Lsqueeze_done_ce 990 strb w4,[x1],#1 991 lsr x4,x4,#8 992 subs x2,x2,#1 993 beq .Lsqueeze_done_ce 994 strb w4,[x1],#1 995 lsr x4,x4,#8 996 subs x2,x2,#1 997 beq .Lsqueeze_done_ce 998 strb w4,[x1],#1 999 lsr x4,x4,#8 1000 subs x2,x2,#1 1001 beq .Lsqueeze_done_ce 1002 strb w4,[x1],#1 1003 lsr x4,x4,#8 1004 subs x2,x2,#1 1005 beq .Lsqueeze_done_ce 1006 strb w4,[x1],#1 1007 lsr x4,x4,#8 1008 subs x2,x2,#1 1009 beq .Lsqueeze_done_ce 1010 strb w4,[x1],#1 1011 1012.Lsqueeze_done_ce: 1013 ldr x29,[sp],#16 1014 AARCH64_VALIDATE_LINK_REGISTER 1015 ret 1016.size SHA3_squeeze_cext,.-SHA3_squeeze_cext 1017.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1018.align 2 1019