1#! /usr/bin/env perl 2# Copyright 2023-2025 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8# 9# ==================================================================== 10# Written by Danny Tsen <dtsen@us.ibm.com> # for the OpenSSL project. 11# 12# Copyright 2025- IBM Corp. 13# ==================================================================== 14# 15# p384 lower-level primitives for PPC64. 16# 17 18 19use strict; 20use warnings; 21 22my $flavour = shift; 23my $output = ""; 24while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} 25if (!$output) { 26 $output = "-"; 27} 28 29my ($xlate, $dir); 30$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 31( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 32( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 33die "can't locate ppc-xlate.pl"; 34 35open OUT,"| \"$^X\" $xlate $flavour $output"; 36*STDOUT=*OUT; 37 38my $code = ""; 39 40$code.=<<___; 41.machine "any" 42.text 43 44.globl p384_felem_mul 45.type p384_felem_mul,\@function 46.align 4 47p384_felem_mul: 48 49 stdu 1, -176(1) 50 mflr 0 51 std 14, 56(1) 52 std 15, 64(1) 53 std 16, 72(1) 54 std 17, 80(1) 55 std 18, 88(1) 56 std 19, 96(1) 57 std 20, 104(1) 58 std 21, 112(1) 59 std 22, 120(1) 60 61 bl _p384_felem_mul_core 62 63 mtlr 0 64 ld 14, 56(1) 65 ld 15, 64(1) 66 ld 16, 72(1) 67 ld 17, 80(1) 68 ld 18, 88(1) 69 ld 19, 96(1) 70 ld 20, 104(1) 71 ld 21, 112(1) 72 ld 22, 120(1) 73 addi 1, 1, 176 74 blr 75.size p384_felem_mul,.-p384_felem_mul 76 77.globl p384_felem_square 78.type p384_felem_square,\@function 79.align 4 80p384_felem_square: 81 82 stdu 1, -176(1) 83 mflr 0 84 std 14, 56(1) 85 std 15, 64(1) 86 std 16, 72(1) 87 std 17, 80(1) 88 89 bl _p384_felem_square_core 90 91 mtlr 0 92 ld 14, 56(1) 93 ld 15, 64(1) 94 ld 16, 72(1) 95 ld 17, 80(1) 96 addi 1, 1, 176 97 blr 98.size p384_felem_square,.-p384_felem_square 99 100# 101# Felem mul core function - 102# r3, r4 and r5 need to pre-loaded. 103# 104.type _p384_felem_mul_core,\@function 105.align 4 106_p384_felem_mul_core: 107 108 ld 6,0(4) 109 ld 14,0(5) 110 ld 7,8(4) 111 ld 15,8(5) 112 ld 8,16(4) 113 ld 16,16(5) 114 ld 9,24(4) 115 ld 17,24(5) 116 ld 10,32(4) 117 ld 18,32(5) 118 ld 11,40(4) 119 ld 19,40(5) 120 ld 12,48(4) 121 ld 20,48(5) 122 123 # out0 124 mulld 21, 14, 6 125 mulhdu 22, 14, 6 126 std 21, 0(3) 127 std 22, 8(3) 128 129 vxor 0, 0, 0 130 131 # out1 132 mtvsrdd 32+13, 14, 6 133 mtvsrdd 32+14, 7, 15 134 vmsumudm 1, 13, 14, 0 135 136 # out2 137 mtvsrdd 32+15, 15, 6 138 mtvsrdd 32+16, 7, 16 139 mtvsrdd 32+17, 0, 8 140 mtvsrdd 32+18, 0, 14 141 vmsumudm 19, 15, 16, 0 142 vmsumudm 2, 17, 18, 19 143 144 # out3 145 mtvsrdd 32+13, 16, 6 146 mtvsrdd 32+14, 7, 17 147 mtvsrdd 32+15, 14, 8 148 mtvsrdd 32+16, 9, 15 149 vmsumudm 19, 13, 14, 0 150 vmsumudm 3, 15, 16, 19 151 152 # out4 153 mtvsrdd 32+13, 17, 6 154 mtvsrdd 32+14, 7, 18 155 mtvsrdd 32+15, 15, 8 156 mtvsrdd 32+16, 9, 16 157 mtvsrdd 32+17, 0, 10 158 mtvsrdd 32+18, 0, 14 159 vmsumudm 19, 13, 14, 0 160 vmsumudm 4, 15, 16, 19 161 vmsumudm 4, 17, 18, 4 162 163 # out5 164 mtvsrdd 32+13, 18, 6 165 mtvsrdd 32+14, 7, 19 166 mtvsrdd 32+15, 16, 8 167 mtvsrdd 32+16, 9, 17 168 mtvsrdd 32+17, 14, 10 169 mtvsrdd 32+18, 11, 15 170 vmsumudm 19, 13, 14, 0 171 vmsumudm 5, 15, 16, 19 172 vmsumudm 5, 17, 18, 5 173 174 stxv 32+1, 16(3) 175 stxv 32+2, 32(3) 176 stxv 32+3, 48(3) 177 stxv 32+4, 64(3) 178 stxv 32+5, 80(3) 179 180 # out6 181 mtvsrdd 32+13, 19, 6 182 mtvsrdd 32+14, 7, 20 183 mtvsrdd 32+15, 17, 8 184 mtvsrdd 32+16, 9, 18 185 mtvsrdd 32+17, 15, 10 186 mtvsrdd 32+18, 11, 16 187 vmsumudm 19, 13, 14, 0 188 vmsumudm 6, 15, 16, 19 189 mtvsrdd 32+13, 0, 12 190 mtvsrdd 32+14, 0, 14 191 vmsumudm 19, 17, 18, 6 192 vmsumudm 6, 13, 14, 19 193 194 # out7 195 mtvsrdd 32+13, 19, 7 196 mtvsrdd 32+14, 8, 20 197 mtvsrdd 32+15, 17, 9 198 mtvsrdd 32+16, 10, 18 199 mtvsrdd 32+17, 15, 11 200 mtvsrdd 32+18, 12, 16 201 vmsumudm 19, 13, 14, 0 202 vmsumudm 7, 15, 16, 19 203 vmsumudm 7, 17, 18, 7 204 205 # out8 206 mtvsrdd 32+13, 19, 8 207 mtvsrdd 32+14, 9, 20 208 mtvsrdd 32+15, 17, 10 209 mtvsrdd 32+16, 11, 18 210 mtvsrdd 32+17, 0, 12 211 mtvsrdd 32+18, 0, 16 212 vmsumudm 19, 13, 14, 0 213 vmsumudm 8, 15, 16, 19 214 vmsumudm 8, 17, 18, 8 215 216 # out9 217 mtvsrdd 32+13, 19, 9 218 mtvsrdd 32+14, 10, 20 219 mtvsrdd 32+15, 17, 11 220 mtvsrdd 32+16, 12, 18 221 vmsumudm 19, 13, 14, 0 222 vmsumudm 9, 15, 16, 19 223 224 # out10 225 mtvsrdd 32+13, 19, 10 226 mtvsrdd 32+14, 11, 20 227 mtvsrdd 32+15, 0, 12 228 mtvsrdd 32+16, 0, 18 229 vmsumudm 19, 13, 14, 0 230 vmsumudm 10, 15, 16, 19 231 232 # out11 233 mtvsrdd 32+17, 19, 11 234 mtvsrdd 32+18, 12, 20 235 vmsumudm 11, 17, 18, 0 236 237 stxv 32+6, 96(3) 238 stxv 32+7, 112(3) 239 stxv 32+8, 128(3) 240 stxv 32+9, 144(3) 241 stxv 32+10, 160(3) 242 stxv 32+11, 176(3) 243 244 # out12 245 mulld 21, 20, 12 246 mulhdu 22, 20, 12 # out12 247 248 std 21, 192(3) 249 std 22, 200(3) 250 251 blr 252.size _p384_felem_mul_core,.-_p384_felem_mul_core 253 254# 255# Felem square core function - 256# r3 and r4 need to pre-loaded. 257# 258.type _p384_felem_square_core,\@function 259.align 4 260_p384_felem_square_core: 261 262 ld 6, 0(4) 263 ld 7, 8(4) 264 ld 8, 16(4) 265 ld 9, 24(4) 266 ld 10, 32(4) 267 ld 11, 40(4) 268 ld 12, 48(4) 269 270 vxor 0, 0, 0 271 272 # out0 273 mulld 14, 6, 6 274 mulhdu 15, 6, 6 275 std 14, 0(3) 276 std 15, 8(3) 277 278 # out1 279 add 14, 6, 6 280 mtvsrdd 32+13, 0, 14 281 mtvsrdd 32+14, 0, 7 282 vmsumudm 1, 13, 14, 0 283 284 # out2 285 mtvsrdd 32+15, 7, 14 286 mtvsrdd 32+16, 7, 8 287 vmsumudm 2, 15, 16, 0 288 289 # out3 290 add 15, 7, 7 291 mtvsrdd 32+13, 8, 14 292 mtvsrdd 32+14, 15, 9 293 vmsumudm 3, 13, 14, 0 294 295 # out4 296 mtvsrdd 32+13, 9, 14 297 mtvsrdd 32+14, 15, 10 298 mtvsrdd 32+15, 0, 8 299 vmsumudm 4, 13, 14, 0 300 vmsumudm 4, 15, 15, 4 301 302 # out5 303 mtvsrdd 32+13, 10, 14 304 mtvsrdd 32+14, 15, 11 305 add 16, 8, 8 306 mtvsrdd 32+15, 0, 16 307 mtvsrdd 32+16, 0, 9 308 vmsumudm 5, 13, 14, 0 309 vmsumudm 5, 15, 16, 5 310 311 stxv 32+1, 16(3) 312 stxv 32+2, 32(3) 313 stxv 32+3, 48(3) 314 stxv 32+4, 64(3) 315 316 # out6 317 mtvsrdd 32+13, 11, 14 318 mtvsrdd 32+14, 15, 12 319 mtvsrdd 32+15, 9, 16 320 mtvsrdd 32+16, 9, 10 321 stxv 32+5, 80(3) 322 vmsumudm 19, 13, 14, 0 323 vmsumudm 6, 15, 16, 19 324 325 # out7 326 add 17, 9, 9 327 mtvsrdd 32+13, 11, 15 328 mtvsrdd 32+14, 16, 12 329 mtvsrdd 32+15, 0, 17 330 mtvsrdd 32+16, 0, 10 331 vmsumudm 19, 13, 14, 0 332 vmsumudm 7, 15, 16, 19 333 334 # out8 335 mtvsrdd 32+13, 11, 16 336 mtvsrdd 32+14, 17, 12 337 mtvsrdd 32+15, 0, 10 338 vmsumudm 19, 13, 14, 0 339 vmsumudm 8, 15, 15, 19 340 341 # out9 342 add 14, 10, 10 343 mtvsrdd 32+13, 11, 17 344 mtvsrdd 32+14, 14, 12 345 vmsumudm 9, 13, 14, 0 346 347 # out10 348 mtvsrdd 32+13, 11, 14 349 mtvsrdd 32+14, 11, 12 350 vmsumudm 10, 13, 14, 0 351 352 stxv 32+6, 96(3) 353 stxv 32+7, 112(3) 354 355 # out11 356 #add 14, 11, 11 357 #mtvsrdd 32+13, 0, 14 358 #mtvsrdd 32+14, 0, 12 359 #vmsumudm 11, 13, 14, 0 360 361 mulld 6, 12, 11 362 mulhdu 7, 12, 11 363 addc 8, 6, 6 364 adde 9, 7, 7 365 366 stxv 32+8, 128(3) 367 stxv 32+9, 144(3) 368 stxv 32+10, 160(3) 369 #stxv 32+11, 176(3) 370 371 # out12 372 mulld 14, 12, 12 373 mulhdu 15, 12, 12 374 375 std 8, 176(3) 376 std 9, 184(3) 377 std 14, 192(3) 378 std 15, 200(3) 379 380 blr 381.size _p384_felem_square_core,.-_p384_felem_square_core 382 383# 384# widefelem (128 bits) * 8 385# 386.macro F128_X_8 _off1 _off2 387 ld 9,\\_off1(3) 388 ld 8,\\_off2(3) 389 srdi 10,9,61 390 rldimi 10,8,3,0 391 sldi 9,9,3 392 std 9,\\_off1(3) 393 std 10,\\_off2(3) 394.endm 395 396.globl p384_felem128_mul_by_8 397.type p384_felem128_mul_by_8, \@function 398.align 4 399p384_felem128_mul_by_8: 400 401 F128_X_8 0, 8 402 403 F128_X_8 16, 24 404 405 F128_X_8 32, 40 406 407 F128_X_8 48, 56 408 409 F128_X_8 64, 72 410 411 F128_X_8 80, 88 412 413 F128_X_8 96, 104 414 415 F128_X_8 112, 120 416 417 F128_X_8 128, 136 418 419 F128_X_8 144, 152 420 421 F128_X_8 160, 168 422 423 F128_X_8 176, 184 424 425 F128_X_8 192, 200 426 427 blr 428.size p384_felem128_mul_by_8,.-p384_felem128_mul_by_8 429 430# 431# widefelem (128 bits) * 2 432# 433.macro F128_X_2 _off1 _off2 434 ld 9,\\_off1(3) 435 ld 8,\\_off2(3) 436 srdi 10,9,63 437 rldimi 10,8,1,0 438 sldi 9,9,1 439 std 9,\\_off1(3) 440 std 10,\\_off2(3) 441.endm 442 443.globl p384_felem128_mul_by_2 444.type p384_felem128_mul_by_2, \@function 445.align 4 446p384_felem128_mul_by_2: 447 448 F128_X_2 0, 8 449 450 F128_X_2 16, 24 451 452 F128_X_2 32, 40 453 454 F128_X_2 48, 56 455 456 F128_X_2 64, 72 457 458 F128_X_2 80, 88 459 460 F128_X_2 96, 104 461 462 F128_X_2 112, 120 463 464 F128_X_2 128, 136 465 466 F128_X_2 144, 152 467 468 F128_X_2 160, 168 469 470 F128_X_2 176, 184 471 472 F128_X_2 192, 200 473 474 blr 475.size p384_felem128_mul_by_2,.-p384_felem128_mul_by_2 476 477.globl p384_felem_diff128 478.type p384_felem_diff128, \@function 479.align 4 480p384_felem_diff128: 481 482 addis 5, 2, .LConst_two127\@toc\@ha 483 addi 5, 5, .LConst_two127\@toc\@l 484 485 ld 10, 0(3) 486 ld 8, 8(3) 487 li 9, 0 488 addc 10, 10, 9 489 li 7, -1 490 rldicr 7, 7, 0, 0 # two127 491 adde 8, 8, 7 492 ld 11, 0(4) 493 ld 12, 8(4) 494 subfc 11, 11, 10 495 subfe 12, 12, 8 496 std 11, 0(3) # out0 497 std 12, 8(3) 498 499 # two127m71 = (r10, r9) 500 ld 8, 16(3) 501 ld 7, 24(3) 502 ld 10, 24(5) # two127m71 503 addc 8, 8, 9 504 adde 7, 7, 10 505 ld 11, 16(4) 506 ld 12, 24(4) 507 subfc 11, 11, 8 508 subfe 12, 12, 7 509 std 11, 16(3) # out1 510 std 12, 24(3) 511 512 ld 8, 32(3) 513 ld 7, 40(3) 514 addc 8, 8, 9 515 adde 7, 7, 10 516 ld 11, 32(4) 517 ld 12, 40(4) 518 subfc 11, 11, 8 519 subfe 12, 12, 7 520 std 11, 32(3) # out2 521 std 12, 40(3) 522 523 ld 8, 48(3) 524 ld 7, 56(3) 525 addc 8, 8, 9 526 adde 7, 7, 10 527 ld 11, 48(4) 528 ld 12, 56(4) 529 subfc 11, 11, 8 530 subfe 12, 12, 7 531 std 11, 48(3) # out3 532 std 12, 56(3) 533 534 ld 8, 64(3) 535 ld 7, 72(3) 536 addc 8, 8, 9 537 adde 7, 7, 10 538 ld 11, 64(4) 539 ld 12, 72(4) 540 subfc 11, 11, 8 541 subfe 12, 12, 7 542 std 11, 64(3) # out4 543 std 12, 72(3) 544 545 ld 8, 80(3) 546 ld 7, 88(3) 547 addc 8, 8, 9 548 adde 7, 7, 10 549 ld 11, 80(4) 550 ld 12, 88(4) 551 subfc 11, 11, 8 552 subfe 12, 12, 7 553 std 11, 80(3) # out5 554 std 12, 88(3) 555 556 ld 8, 96(3) 557 ld 7, 104(3) 558 ld 6, 40(5) # two127p111m79m71 559 addc 8, 8, 9 560 adde 7, 7, 6 561 ld 11, 96(4) 562 ld 12, 104(4) 563 subfc 11, 11, 8 564 subfe 12, 12, 7 565 std 11, 96(3) # out6 566 std 12, 104(3) 567 568 ld 8, 112(3) 569 ld 7, 120(3) 570 ld 6, 56(5) # two127m119m71 571 addc 8, 8, 9 572 adde 7, 7, 6 573 ld 11, 112(4) 574 ld 12, 120(4) 575 subfc 11, 11, 8 576 subfe 12, 12, 7 577 std 11, 112(3) # out7 578 std 12, 120(3) 579 580 ld 8, 128(3) 581 ld 7, 136(3) 582 ld 6, 72(5) # two127m95m71 583 addc 8, 8, 9 584 adde 7, 7, 6 585 ld 11, 128(4) 586 ld 12, 136(4) 587 subfc 11, 11, 8 588 subfe 12, 12, 7 589 std 11, 128(3) # out8 590 std 12, 136(3) 591 592 ld 8, 144(3) 593 ld 7, 152(3) 594 addc 8, 8, 9 595 adde 7, 7, 10 596 ld 11, 144(4) 597 ld 12, 152(4) 598 subfc 11, 11, 8 599 subfe 12, 12, 7 600 std 11, 144(3) # out9 601 std 12, 152(3) 602 603 ld 8, 160(3) 604 ld 7, 168(3) 605 addc 8, 8, 9 606 adde 7, 7, 10 607 ld 11, 160(4) 608 ld 12, 168(4) 609 subfc 11, 11, 8 610 subfe 12, 12, 7 611 std 11, 160(3) # out10 612 std 12, 168(3) 613 614 ld 8, 176(3) 615 ld 7, 184(3) 616 addc 8, 8, 9 617 adde 7, 7, 10 618 ld 11, 176(4) 619 ld 12, 184(4) 620 subfc 11, 11, 8 621 subfe 12, 12, 7 622 std 11, 176(3) # out11 623 std 12, 184(3) 624 625 ld 8, 192(3) 626 ld 7, 200(3) 627 addc 8, 8, 9 628 adde 7, 7, 10 629 ld 11, 192(4) 630 ld 12, 200(4) 631 subfc 11, 11, 8 632 subfe 12, 12, 7 633 std 11, 192(3) # out12 634 std 12, 200(3) 635 636 blr 637.size p384_felem_diff128,.-p384_felem_diff128 638 639.data 640.align 4 641.LConst_two127: 642#two127 643.long 0x00000000, 0x00000000, 0x00000000, 0x80000000 644#two127m71 645.long 0x00000000, 0x00000000, 0xffffff80, 0x7fffffff 646#two127p111m79m71 647.long 0x00000000, 0x00000000, 0xffff7f80, 0x80007fff 648#two127m119m71 649.long 0x00000000, 0x00000000, 0xffffff80, 0x7f7fffff 650#two127m95m71 651.long 0x00000000, 0x00000000, 0x7fffff80, 0x7fffffff 652 653.text 654 655.globl p384_felem_diff_128_64 656.type p384_felem_diff_128_64, \@function 657.align 4 658p384_felem_diff_128_64: 659 addis 5, 2, .LConst_128_two64\@toc\@ha 660 addi 5, 5, .LConst_128_two64\@toc\@l 661 662 ld 9, 0(3) 663 ld 10, 8(3) 664 ld 8, 48(5) # two64p48m16 665 li 7, 0 666 addc 9, 9, 8 667 li 6, 1 668 adde 10, 10, 6 669 ld 11, 0(4) 670 subfc 8, 11, 9 671 subfe 12, 7, 10 672 std 8, 0(3) # out0 673 std 12, 8(3) 674 675 ld 9, 16(3) 676 ld 10, 24(3) 677 ld 8, 0(5) # two64m56m8 678 addc 9, 9, 8 679 addze 10, 10 680 ld 11, 8(4) 681 subfc 11, 11, 9 682 subfe 12, 7, 10 683 std 11, 16(3) # out1 684 std 12, 24(3) 685 686 ld 9, 32(3) 687 ld 10, 40(3) 688 ld 8, 16(5) # two64m32m8 689 addc 9, 9, 8 690 addze 10, 10 691 ld 11, 16(4) 692 subfc 11, 11, 9 693 subfe 12, 7, 10 694 std 11, 32(3) # out2 695 std 12, 40(3) 696 697 ld 10, 48(3) 698 ld 8, 56(3) 699 #ld 9, 32(5) # two64m8 700 li 9, -256 # two64m8 701 addc 10, 10, 9 702 addze 8, 8 703 ld 11, 24(4) 704 subfc 11, 11, 10 705 subfe 12, 7, 8 706 std 11, 48(3) # out3 707 std 12, 56(3) 708 709 ld 10, 64(3) 710 ld 8, 72(3) 711 addc 10, 10, 9 712 addze 8, 8 713 ld 11, 32(4) 714 subfc 11, 11, 10 715 subfe 12, 7, 8 716 std 11, 64(3) # out4 717 std 12, 72(3) 718 719 ld 10, 80(3) 720 ld 8, 88(3) 721 addc 10, 10, 9 722 addze 8, 8 723 ld 11, 40(4) 724 subfc 11, 11, 10 725 subfe 12, 7, 8 726 std 11, 80(3) # out5 727 std 12, 88(3) 728 729 ld 10, 96(3) 730 ld 8, 104(3) 731 addc 10, 10, 9 732 addze 9, 8 733 ld 11, 48(4) 734 subfc 11, 11, 10 735 subfe 12, 7, 9 736 std 11, 96(3) # out6 737 std 12, 104(3) 738 739 blr 740.size p384_felem_diff_128_64,.-p384_felem_diff_128_64 741 742.data 743.align 4 744.LConst_128_two64: 745#two64m56m8 746.long 0xffffff00, 0xfeffffff, 0x00000000, 0x00000000 747#two64m32m8 748.long 0xffffff00, 0xfffffffe, 0x00000000, 0x00000000 749#two64m8 750.long 0xffffff00, 0xffffffff, 0x00000000, 0x00000000 751#two64p48m16 752.long 0xffff0000, 0x0000ffff, 0x00000001, 0x00000000 753 754.LConst_two60: 755#two60m52m4 756.long 0xfffffff0, 0x0fefffff, 0x0, 0x0 757#two60p44m12 758.long 0xfffff000, 0x10000fff, 0x0, 0x0 759#two60m28m4 760.long 0xeffffff0, 0x0fffffff, 0x0, 0x0 761#two60m4 762.long 0xfffffff0, 0x0fffffff, 0x0, 0x0 763 764.text 765# 766# static void felem_diff64(felem out, const felem in) 767# 768.globl p384_felem_diff64 769.type p384_felem_diff64, \@function 770.align 4 771p384_felem_diff64: 772 addis 5, 2, .LConst_two60\@toc\@ha 773 addi 5, 5, .LConst_two60\@toc\@l 774 775 ld 9, 0(3) 776 ld 8, 16(5) # two60p44m12 777 li 7, 0 778 add 9, 9, 8 779 ld 11, 0(4) 780 subf 8, 11, 9 781 std 8, 0(3) # out0 782 783 ld 9, 8(3) 784 ld 8, 0(5) # two60m52m4 785 add 9, 9, 8 786 ld 11, 8(4) 787 subf 11, 11, 9 788 std 11, 8(3) # out1 789 790 ld 9, 16(3) 791 ld 8, 32(5) # two60m28m4 792 add 9, 9, 8 793 ld 11, 16(4) 794 subf 11, 11, 9 795 std 11, 16(3) # out2 796 797 ld 10, 24(3) 798 ld 9, 48(5) # two60m4 799 add 10, 10, 9 800 ld 12, 24(4) 801 subf 12, 12, 10 802 std 12, 24(3) # out3 803 804 ld 10, 32(3) 805 add 10, 10, 9 806 ld 11, 32(4) 807 subf 11, 11, 10 808 std 11, 32(3) # out4 809 810 ld 10, 40(3) 811 add 10, 10, 9 812 ld 12, 40(4) 813 subf 12, 12, 10 814 std 12, 40(3) # out5 815 816 ld 10, 48(3) 817 add 10, 10, 9 818 ld 11, 48(4) 819 subf 11, 11, 10 820 std 11, 48(3) # out6 821 822 blr 823.size p384_felem_diff64,.-p384_felem_diff64 824 825.text 826# 827# Shift 128 bits right <nbits> 828# 829.macro SHR o_h o_l in_h in_l nbits 830 srdi \\o_l, \\in_l, \\nbits # shift lower right <nbits> 831 rldimi \\o_l, \\in_h, 64-\\nbits, 0 # insert <64-nbits> from hi 832 srdi \\o_h, \\in_h, \\nbits # shift higher right <nbits> 833.endm 834 835# 836# static void felem_reduce(felem out, const widefelem in) 837# 838.global p384_felem_reduce 839.type p384_felem_reduce,\@function 840.align 4 841p384_felem_reduce: 842 843 stdu 1, -208(1) 844 mflr 0 845 std 14, 56(1) 846 std 15, 64(1) 847 std 16, 72(1) 848 std 17, 80(1) 849 std 18, 88(1) 850 std 19, 96(1) 851 std 20, 104(1) 852 std 21, 112(1) 853 std 22, 120(1) 854 std 23, 128(1) 855 std 24, 136(1) 856 std 25, 144(1) 857 std 26, 152(1) 858 std 27, 160(1) 859 std 28, 168(1) 860 std 29, 176(1) 861 std 30, 184(1) 862 std 31, 192(1) 863 864 bl _p384_felem_reduce_core 865 866 mtlr 0 867 ld 14, 56(1) 868 ld 15, 64(1) 869 ld 16, 72(1) 870 ld 17, 80(1) 871 ld 18, 88(1) 872 ld 19, 96(1) 873 ld 20, 104(1) 874 ld 21, 112(1) 875 ld 22, 120(1) 876 ld 23, 128(1) 877 ld 24, 136(1) 878 ld 25, 144(1) 879 ld 26, 152(1) 880 ld 27, 160(1) 881 ld 28, 168(1) 882 ld 29, 176(1) 883 ld 30, 184(1) 884 ld 31, 192(1) 885 addi 1, 1, 208 886 blr 887.size p384_felem_reduce,.-p384_felem_reduce 888 889# 890# Felem reduction core function - 891# r3 and r4 need to pre-loaded. 892# 893.type _p384_felem_reduce_core,\@function 894.align 4 895_p384_felem_reduce_core: 896 addis 12, 2, .LConst\@toc\@ha 897 addi 12, 12, .LConst\@toc\@l 898 899 # load constat p 900 ld 11, 8(12) # hi - two124m68 901 902 # acc[6] = in[6] + two124m68; 903 ld 26, 96(4) # in[6].l 904 ld 27, 96+8(4) # in[6].h 905 add 27, 27, 11 906 907 # acc[5] = in[5] + two124m68; 908 ld 24, 80(4) # in[5].l 909 ld 25, 80+8(4) # in[5].h 910 add 25, 25, 11 911 912 # acc[4] = in[4] + two124m68; 913 ld 22, 64(4) # in[4].l 914 ld 23, 64+8(4) # in[4].h 915 add 23, 23, 11 916 917 # acc[3] = in[3] + two124m68; 918 ld 20, 48(4) # in[3].l 919 ld 21, 48+8(4) # in[3].h 920 add 21, 21, 11 921 922 ld 11, 48+8(12) # hi - two124m92m68 923 924 # acc[2] = in[2] + two124m92m68; 925 ld 18, 32(4) # in[2].l 926 ld 19, 32+8(4) # in[2].h 927 add 19, 19, 11 928 929 ld 11, 16+8(12) # high - two124m116m68 930 931 # acc[1] = in[1] + two124m116m68; 932 ld 16, 16(4) # in[1].l 933 ld 17, 16+8(4) # in[1].h 934 add 17, 17, 11 935 936 ld 11, 32+8(12) # high - two124p108m76 937 938 # acc[0] = in[0] + two124p108m76; 939 ld 14, 0(4) # in[0].l 940 ld 15, 0+8(4) # in[0].h 941 add 15, 15, 11 942 943 # compute mask 944 li 7, -1 945 946 # Eliminate in[12] 947 948 # acc[8] += in[12] >> 32; 949 ld 5, 192(4) # in[12].l 950 ld 6, 192+8(4) # in[12].h 951 SHR 9, 10, 6, 5, 32 952 ld 30, 128(4) # in[8].l 953 ld 31, 136(4) # in[8].h 954 addc 30, 30, 10 955 adde 31, 31, 9 956 957 # acc[7] += (in[12] & 0xffffffff) << 24; 958 srdi 11, 7, 32 # 0xffffffff 959 and 11, 11, 5 960 sldi 11, 11, 24 # << 24 961 ld 28, 112(4) # in[7].l 962 ld 29, 120(4) # in[7].h 963 addc 28, 28, 11 964 addze 29, 29 965 966 # acc[7] += in[12] >> 8; 967 SHR 9, 10, 6, 5, 8 968 addc 28, 28, 10 969 adde 29, 29, 9 970 971 # acc[6] += (in[12] & 0xff) << 48; 972 andi. 11, 5, 0xff 973 sldi 11, 11, 48 974 addc 26, 26, 11 975 addze 27, 27 976 977 # acc[6] -= in[12] >> 16; 978 SHR 9, 10, 6, 5, 16 979 subfc 26, 10, 26 980 subfe 27, 9, 27 981 982 # acc[5] -= (in[12] & 0xffff) << 40; 983 srdi 11, 7, 48 # 0xffff 984 and 11, 11, 5 985 sldi 11, 11, 40 # << 40 986 li 9, 0 987 subfc 24, 11, 24 988 subfe 25, 9, 25 989 990 # acc[6] += in[12] >> 48; 991 SHR 9, 10, 6, 5, 48 992 addc 26, 26, 10 993 adde 27, 27, 9 994 995 # acc[5] += (in[12] & 0xffffffffffff) << 8; 996 srdi 11, 7, 16 # 0xffffffffffff 997 and 11, 11, 5 998 sldi 11, 11, 8 # << 8 999 addc 24, 24, 11 1000 addze 25, 25 1001 1002 # Eliminate in[11] 1003 1004 # acc[7] += in[11] >> 32; 1005 ld 5, 176(4) # in[11].l 1006 ld 6, 176+8(4) # in[11].h 1007 SHR 9, 10, 6, 5, 32 1008 addc 28, 28, 10 1009 adde 29, 29, 9 1010 1011 # acc[6] += (in[11] & 0xffffffff) << 24; 1012 srdi 11, 7, 32 # 0xffffffff 1013 and 11, 11, 5 1014 sldi 11, 11, 24 # << 24 1015 addc 26, 26, 11 1016 addze 27, 27 1017 1018 # acc[6] += in[11] >> 8; 1019 SHR 9, 10, 6, 5, 8 1020 addc 26, 26, 10 1021 adde 27, 27, 9 1022 1023 # acc[5] += (in[11] & 0xff) << 48; 1024 andi. 11, 5, 0xff 1025 sldi 11, 11, 48 1026 addc 24, 24, 11 1027 addze 25, 25 1028 1029 # acc[5] -= in[11] >> 16; 1030 SHR 9, 10, 6, 5, 16 1031 subfc 24, 10, 24 1032 subfe 25, 9, 25 1033 1034 # acc[4] -= (in[11] & 0xffff) << 40; 1035 srdi 11, 7, 48 # 0xffff 1036 and 11, 11, 5 1037 sldi 11, 11, 40 # << 40 1038 li 9, 0 1039 subfc 22, 11, 22 1040 subfe 23, 9, 23 1041 1042 # acc[5] += in[11] >> 48; 1043 SHR 9, 10, 6, 5, 48 1044 addc 24, 24, 10 1045 adde 25, 25, 9 1046 1047 # acc[4] += (in[11] & 0xffffffffffff) << 8; 1048 srdi 11, 7, 16 # 0xffffffffffff 1049 and 11, 11, 5 1050 sldi 11, 11, 8 # << 8 1051 addc 22, 22, 11 1052 addze 23, 23 1053 1054 # Eliminate in[10] 1055 1056 # acc[6] += in[10] >> 32; 1057 ld 5, 160(4) # in[10].l 1058 ld 6, 160+8(4) # in[10].h 1059 SHR 9, 10, 6, 5, 32 1060 addc 26, 26, 10 1061 adde 27, 27, 9 1062 1063 # acc[5] += (in[10] & 0xffffffff) << 24; 1064 srdi 11, 7, 32 # 0xffffffff 1065 and 11, 11, 5 1066 sldi 11, 11, 24 # << 24 1067 addc 24, 24, 11 1068 addze 25, 25 1069 1070 # acc[5] += in[10] >> 8; 1071 SHR 9, 10, 6, 5, 8 1072 addc 24, 24, 10 1073 adde 25, 25, 9 1074 1075 # acc[4] += (in[10] & 0xff) << 48; 1076 andi. 11, 5, 0xff 1077 sldi 11, 11, 48 1078 addc 22, 22, 11 1079 addze 23, 23 1080 1081 # acc[4] -= in[10] >> 16; 1082 SHR 9, 10, 6, 5, 16 1083 subfc 22, 10, 22 1084 subfe 23, 9, 23 1085 1086 # acc[3] -= (in[10] & 0xffff) << 40; 1087 srdi 11, 7, 48 # 0xffff 1088 and 11, 11, 5 1089 sldi 11, 11, 40 # << 40 1090 li 9, 0 1091 subfc 20, 11, 20 1092 subfe 21, 9, 21 1093 1094 # acc[4] += in[10] >> 48; 1095 SHR 9, 10, 6, 5, 48 1096 addc 22, 22, 10 1097 adde 23, 23, 9 1098 1099 # acc[3] += (in[10] & 0xffffffffffff) << 8; 1100 srdi 11, 7, 16 # 0xffffffffffff 1101 and 11, 11, 5 1102 sldi 11, 11, 8 # << 8 1103 addc 20, 20, 11 1104 addze 21, 21 1105 1106 # Eliminate in[9] 1107 1108 # acc[5] += in[9] >> 32; 1109 ld 5, 144(4) # in[9].l 1110 ld 6, 144+8(4) # in[9].h 1111 SHR 9, 10, 6, 5, 32 1112 addc 24, 24, 10 1113 adde 25, 25, 9 1114 1115 # acc[4] += (in[9] & 0xffffffff) << 24; 1116 srdi 11, 7, 32 # 0xffffffff 1117 and 11, 11, 5 1118 sldi 11, 11, 24 # << 24 1119 addc 22, 22, 11 1120 addze 23, 23 1121 1122 # acc[4] += in[9] >> 8; 1123 SHR 9, 10, 6, 5, 8 1124 addc 22, 22, 10 1125 adde 23, 23, 9 1126 1127 # acc[3] += (in[9] & 0xff) << 48; 1128 andi. 11, 5, 0xff 1129 sldi 11, 11, 48 1130 addc 20, 20, 11 1131 addze 21, 21 1132 1133 # acc[3] -= in[9] >> 16; 1134 SHR 9, 10, 6, 5, 16 1135 subfc 20, 10, 20 1136 subfe 21, 9, 21 1137 1138 # acc[2] -= (in[9] & 0xffff) << 40; 1139 srdi 11, 7, 48 # 0xffff 1140 and 11, 11, 5 1141 sldi 11, 11, 40 # << 40 1142 li 9, 0 1143 subfc 18, 11, 18 1144 subfe 19, 9, 19 1145 1146 # acc[3] += in[9] >> 48; 1147 SHR 9, 10, 6, 5, 48 1148 addc 20, 20, 10 1149 adde 21, 21, 9 1150 1151 # acc[2] += (in[9] & 0xffffffffffff) << 8; 1152 srdi 11, 7, 16 # 0xffffffffffff 1153 and 11, 11, 5 1154 sldi 11, 11, 8 # << 8 1155 addc 18, 18, 11 1156 addze 19, 19 1157 1158 # Eliminate acc[8] 1159 1160 # acc[4] += acc[8] >> 32; 1161 mr 5, 30 # acc[8].l 1162 mr 6, 31 # acc[8].h 1163 SHR 9, 10, 6, 5, 32 1164 addc 22, 22, 10 1165 adde 23, 23, 9 1166 1167 # acc[3] += (acc[8] & 0xffffffff) << 24; 1168 srdi 11, 7, 32 # 0xffffffff 1169 and 11, 11, 5 1170 sldi 11, 11, 24 # << 24 1171 addc 20, 20, 11 1172 addze 21, 21 1173 1174 # acc[3] += acc[8] >> 8; 1175 SHR 9, 10, 6, 5, 8 1176 addc 20, 20, 10 1177 adde 21, 21, 9 1178 1179 # acc[2] += (acc[8] & 0xff) << 48; 1180 andi. 11, 5, 0xff 1181 sldi 11, 11, 48 1182 addc 18, 18, 11 1183 addze 19, 19 1184 1185 # acc[2] -= acc[8] >> 16; 1186 SHR 9, 10, 6, 5, 16 1187 subfc 18, 10, 18 1188 subfe 19, 9, 19 1189 1190 # acc[1] -= (acc[8] & 0xffff) << 40; 1191 srdi 11, 7, 48 # 0xffff 1192 and 11, 11, 5 1193 sldi 11, 11, 40 # << 40 1194 li 9, 0 1195 subfc 16, 11, 16 1196 subfe 17, 9, 17 1197 1198 #acc[2] += acc[8] >> 48; 1199 SHR 9, 10, 6, 5, 48 1200 addc 18, 18, 10 1201 adde 19, 19, 9 1202 1203 # acc[1] += (acc[8] & 0xffffffffffff) << 8; 1204 srdi 11, 7, 16 # 0xffffffffffff 1205 and 11, 11, 5 1206 sldi 11, 11, 8 # << 8 1207 addc 16, 16, 11 1208 addze 17, 17 1209 1210 # Eliminate acc[7] 1211 1212 # acc[3] += acc[7] >> 32; 1213 mr 5, 28 # acc[7].l 1214 mr 6, 29 # acc[7].h 1215 SHR 9, 10, 6, 5, 32 1216 addc 20, 20, 10 1217 adde 21, 21, 9 1218 1219 # acc[2] += (acc[7] & 0xffffffff) << 24; 1220 srdi 11, 7, 32 # 0xffffffff 1221 and 11, 11, 5 1222 sldi 11, 11, 24 # << 24 1223 addc 18, 18, 11 1224 addze 19, 19 1225 1226 # acc[2] += acc[7] >> 8; 1227 SHR 9, 10, 6, 5, 8 1228 addc 18, 18, 10 1229 adde 19, 19, 9 1230 1231 # acc[1] += (acc[7] & 0xff) << 48; 1232 andi. 11, 5, 0xff 1233 sldi 11, 11, 48 1234 addc 16, 16, 11 1235 addze 17, 17 1236 1237 # acc[1] -= acc[7] >> 16; 1238 SHR 9, 10, 6, 5, 16 1239 subfc 16, 10, 16 1240 subfe 17, 9, 17 1241 1242 # acc[0] -= (acc[7] & 0xffff) << 40; 1243 srdi 11, 7, 48 # 0xffff 1244 and 11, 11, 5 1245 sldi 11, 11, 40 # << 40 1246 li 9, 0 1247 subfc 14, 11, 14 1248 subfe 15, 9, 15 1249 1250 # acc[1] += acc[7] >> 48; 1251 SHR 9, 10, 6, 5, 48 1252 addc 16, 16, 10 1253 adde 17, 17, 9 1254 1255 # acc[0] += (acc[7] & 0xffffffffffff) << 8; 1256 srdi 11, 7, 16 # 0xffffffffffff 1257 and 11, 11, 5 1258 sldi 11, 11, 8 # << 8 1259 addc 14, 14, 11 1260 addze 15, 15 1261 1262 # 1263 # Carry 4 -> 5 -> 6 1264 # 1265 # acc[5] += acc[4] >> 56; 1266 # acc[4] &= 0x00ffffffffffffff; 1267 SHR 9, 10, 23, 22, 56 1268 addc 24, 24, 10 1269 adde 25, 25, 9 1270 srdi 11, 7, 8 # 0x00ffffffffffffff 1271 and 22, 22, 11 1272 li 23, 0 1273 1274 # acc[6] += acc[5] >> 56; 1275 # acc[5] &= 0x00ffffffffffffff; 1276 SHR 9, 10, 25, 24, 56 1277 addc 26, 26, 10 1278 adde 27, 27, 9 1279 and 24, 24, 11 1280 li 25, 0 1281 1282 # [3]: Eliminate high bits of acc[6] */ 1283 # temp = acc[6] >> 48; 1284 # acc[6] &= 0x0000ffffffffffff; 1285 SHR 31, 30, 27, 26, 48 # temp = acc[6] >> 48 1286 srdi 11, 7, 16 # 0x0000ffffffffffff 1287 and 26, 26, 11 1288 li 27, 0 1289 1290 # temp < 2^80 1291 # acc[3] += temp >> 40; 1292 SHR 9, 10, 31, 30, 40 1293 addc 20, 20, 10 1294 adde 21, 21, 9 1295 1296 # acc[2] += (temp & 0xffffffffff) << 16; 1297 srdi 11, 7, 24 # 0xffffffffff 1298 and 10, 30, 11 1299 sldi 10, 10, 16 1300 addc 18, 18, 10 1301 addze 19, 19 1302 1303 # acc[2] += temp >> 16; 1304 SHR 9, 10, 31, 30, 16 1305 addc 18, 18, 10 1306 adde 19, 19, 9 1307 1308 # acc[1] += (temp & 0xffff) << 40; 1309 srdi 11, 7, 48 # 0xffff 1310 and 10, 30, 11 1311 sldi 10, 10, 40 1312 addc 16, 16, 10 1313 addze 17, 17 1314 1315 # acc[1] -= temp >> 24; 1316 SHR 9, 10, 31, 30, 24 1317 subfc 16, 10, 16 1318 subfe 17, 9, 17 1319 1320 # acc[0] -= (temp & 0xffffff) << 32; 1321 srdi 11, 7, 40 # 0xffffff 1322 and 10, 30, 11 1323 sldi 10, 10, 32 1324 li 9, 0 1325 subfc 14, 10, 14 1326 subfe 15, 9, 15 1327 1328 # acc[0] += temp; 1329 addc 14, 14, 30 1330 adde 15, 15, 31 1331 1332 # Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 1333 # 1334 # acc[1] += acc[0] >> 56; /* acc[1] < acc_old[1] + 2^72 */ 1335 SHR 9, 10, 15, 14, 56 1336 addc 16, 16, 10 1337 adde 17, 17, 9 1338 1339 # acc[0] &= 0x00ffffffffffffff; 1340 srdi 11, 7, 8 # 0x00ffffffffffffff 1341 and 14, 14, 11 1342 li 15, 0 1343 1344 # acc[2] += acc[1] >> 56; /* acc[2] < acc_old[2] + 2^72 + 2^16 */ 1345 SHR 9, 10, 17, 16, 56 1346 addc 18, 18, 10 1347 adde 19, 19, 9 1348 1349 # acc[1] &= 0x00ffffffffffffff; 1350 and 16, 16, 11 1351 li 17, 0 1352 1353 # acc[3] += acc[2] >> 56; /* acc[3] < acc_old[3] + 2^72 + 2^16 */ 1354 SHR 9, 10, 19, 18, 56 1355 addc 20, 20, 10 1356 adde 21, 21, 9 1357 1358 # acc[2] &= 0x00ffffffffffffff; 1359 and 18, 18, 11 1360 li 19, 0 1361 1362 # acc[4] += acc[3] >> 56; 1363 SHR 9, 10, 21, 20, 56 1364 addc 22, 22, 10 1365 adde 23, 23, 9 1366 1367 # acc[3] &= 0x00ffffffffffffff; 1368 and 20, 20, 11 1369 li 21, 0 1370 1371 # acc[5] += acc[4] >> 56; 1372 SHR 9, 10, 23, 22, 56 1373 addc 24, 24, 10 1374 adde 25, 25, 9 1375 1376 # acc[4] &= 0x00ffffffffffffff; 1377 and 22, 22, 11 1378 1379 # acc[6] += acc[5] >> 56; 1380 SHR 9, 10, 25, 24, 56 1381 addc 26, 26, 10 1382 adde 27, 27, 9 1383 1384 # acc[5] &= 0x00ffffffffffffff; 1385 and 24, 24, 11 1386 1387 std 14, 0(3) 1388 std 16, 8(3) 1389 std 18, 16(3) 1390 std 20, 24(3) 1391 std 22, 32(3) 1392 std 24, 40(3) 1393 std 26, 48(3) 1394 blr 1395.size _p384_felem_reduce_core,.-_p384_felem_reduce_core 1396 1397.data 1398.align 4 1399.LConst: 1400# two124m68: 1401.long 0x0, 0x0, 0xfffffff0, 0xfffffff 1402# two124m116m68: 1403.long 0x0, 0x0, 0xfffffff0, 0xfefffff 1404#two124p108m76: 1405.long 0x0, 0x0, 0xfffff000, 0x10000fff 1406#two124m92m68: 1407.long 0x0, 0x0, 0xeffffff0, 0xfffffff 1408 1409.text 1410 1411# 1412# void p384_felem_square_reduce(felem out, const felem in) 1413# 1414.global p384_felem_square_reduce 1415.type p384_felem_square_reduce,\@function 1416.align 4 1417p384_felem_square_reduce: 1418 stdu 1, -512(1) 1419 mflr 0 1420 std 14, 56(1) 1421 std 15, 64(1) 1422 std 16, 72(1) 1423 std 17, 80(1) 1424 std 18, 88(1) 1425 std 19, 96(1) 1426 std 20, 104(1) 1427 std 21, 112(1) 1428 std 22, 120(1) 1429 std 23, 128(1) 1430 std 24, 136(1) 1431 std 25, 144(1) 1432 std 26, 152(1) 1433 std 27, 160(1) 1434 std 28, 168(1) 1435 std 29, 176(1) 1436 std 30, 184(1) 1437 std 31, 192(1) 1438 1439 std 3, 496(1) 1440 addi 3, 1, 208 1441 bl _p384_felem_square_core 1442 1443 mr 4, 3 1444 ld 3, 496(1) 1445 bl _p384_felem_reduce_core 1446 1447 ld 14, 56(1) 1448 ld 15, 64(1) 1449 ld 16, 72(1) 1450 ld 17, 80(1) 1451 ld 18, 88(1) 1452 ld 19, 96(1) 1453 ld 20, 104(1) 1454 ld 21, 112(1) 1455 ld 22, 120(1) 1456 ld 23, 128(1) 1457 ld 24, 136(1) 1458 ld 25, 144(1) 1459 ld 26, 152(1) 1460 ld 27, 160(1) 1461 ld 28, 168(1) 1462 ld 29, 176(1) 1463 ld 30, 184(1) 1464 ld 31, 192(1) 1465 addi 1, 1, 512 1466 mtlr 0 1467 blr 1468.size p384_felem_square_reduce,.-p384_felem_square_reduce 1469 1470# 1471# void p384_felem_mul_reduce(felem out, const felem in1, const felem in2) 1472# 1473.global p384_felem_mul_reduce 1474.type p384_felem_mul_reduce,\@function 1475.align 5 1476p384_felem_mul_reduce: 1477 stdu 1, -512(1) 1478 mflr 0 1479 std 14, 56(1) 1480 std 15, 64(1) 1481 std 16, 72(1) 1482 std 17, 80(1) 1483 std 18, 88(1) 1484 std 19, 96(1) 1485 std 20, 104(1) 1486 std 21, 112(1) 1487 std 22, 120(1) 1488 std 23, 128(1) 1489 std 24, 136(1) 1490 std 25, 144(1) 1491 std 26, 152(1) 1492 std 27, 160(1) 1493 std 28, 168(1) 1494 std 29, 176(1) 1495 std 30, 184(1) 1496 std 31, 192(1) 1497 1498 std 3, 496(1) 1499 addi 3, 1, 208 1500 bl _p384_felem_mul_core 1501 1502 mr 4, 3 1503 ld 3, 496(1) 1504 bl _p384_felem_reduce_core 1505 1506 ld 14, 56(1) 1507 ld 15, 64(1) 1508 ld 16, 72(1) 1509 ld 17, 80(1) 1510 ld 18, 88(1) 1511 ld 19, 96(1) 1512 ld 20, 104(1) 1513 ld 21, 112(1) 1514 ld 22, 120(1) 1515 ld 23, 128(1) 1516 ld 24, 136(1) 1517 ld 25, 144(1) 1518 ld 26, 152(1) 1519 ld 27, 160(1) 1520 ld 28, 168(1) 1521 ld 29, 176(1) 1522 ld 30, 184(1) 1523 ld 31, 192(1) 1524 addi 1, 1, 512 1525 mtlr 0 1526 blr 1527.size p384_felem_mul_reduce,.-p384_felem_mul_reduce 1528___ 1529 1530$code =~ s/\`([^\`]*)\`/eval $1/gem; 1531print $code; 1532close STDOUT or die "error closing STDOUT: $!"; 1533