1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2# 3# This code is taken from CRYPTOGAMs[1] and is included here using the option 4# in the license to distribute the code under the GPL. Therefore this program 5# is free software; you can redistribute it and/or modify it under the terms of 6# the GNU General Public License version 2 as published by the Free Software 7# Foundation. 8# 9# [1] https://github.com/dot-asm/cryptogams/ 10 11# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 12# All rights reserved. 13# 14# Redistribution and use in source and binary forms, with or without 15# modification, are permitted provided that the following conditions 16# are met: 17# 18# * Redistributions of source code must retain copyright notices, 19# this list of conditions and the following disclaimer. 20# 21# * Redistributions in binary form must reproduce the above 22# copyright notice, this list of conditions and the following 23# disclaimer in the documentation and/or other materials 24# provided with the distribution. 25# 26# * Neither the name of the CRYPTOGAMS nor the names of its 27# copyright holder and contributors may be used to endorse or 28# promote products derived from this software without specific 29# prior written permission. 30# 31# ALTERNATIVELY, provided that this notice is retained in full, this 32# product may be distributed under the terms of the GNU General Public 33# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 34# those given above. 35# 36# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 37# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 38# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 39# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 40# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 43# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 44# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 45# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 46# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47 48# ==================================================================== 49# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 50# project. The module is, however, dual licensed under OpenSSL and 51# CRYPTOGAMS licenses depending on where you obtain it. For further 52# details see https://www.openssl.org/~appro/cryptogams/. 53# ==================================================================== 54 55# 56# ==================================================================== 57# Written and Modified by Danny Tsen <dtsen@us.ibm.com> 58# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes 59# and x25519_cswap 60# 61# Copyright 2024- IBM Corp. 62# 63# X25519 lower-level primitives for PPC64. 64# 65 66#include <linux/linkage.h> 67 68.text 69 70.align 5 71SYM_FUNC_START(x25519_fe51_mul) 72 73 stdu 1,-144(1) 74 std 21,56(1) 75 std 22,64(1) 76 std 23,72(1) 77 std 24,80(1) 78 std 25,88(1) 79 std 26,96(1) 80 std 27,104(1) 81 std 28,112(1) 82 std 29,120(1) 83 std 30,128(1) 84 std 31,136(1) 85 86 ld 6,0(5) 87 ld 7,0(4) 88 ld 8,8(4) 89 ld 9,16(4) 90 ld 10,24(4) 91 ld 11,32(4) 92 93 mulld 22,7,6 94 mulhdu 23,7,6 95 96 mulld 24,8,6 97 mulhdu 25,8,6 98 99 mulld 30,11,6 100 mulhdu 31,11,6 101 ld 4,8(5) 102 mulli 11,11,19 103 104 mulld 26,9,6 105 mulhdu 27,9,6 106 107 mulld 28,10,6 108 mulhdu 29,10,6 109 mulld 12,11,4 110 mulhdu 21,11,4 111 addc 22,22,12 112 adde 23,23,21 113 114 mulld 12,7,4 115 mulhdu 21,7,4 116 addc 24,24,12 117 adde 25,25,21 118 119 mulld 12,10,4 120 mulhdu 21,10,4 121 ld 6,16(5) 122 mulli 10,10,19 123 addc 30,30,12 124 adde 31,31,21 125 126 mulld 12,8,4 127 mulhdu 21,8,4 128 addc 26,26,12 129 adde 27,27,21 130 131 mulld 12,9,4 132 mulhdu 21,9,4 133 addc 28,28,12 134 adde 29,29,21 135 mulld 12,10,6 136 mulhdu 21,10,6 137 addc 22,22,12 138 adde 23,23,21 139 140 mulld 12,11,6 141 mulhdu 21,11,6 142 addc 24,24,12 143 adde 25,25,21 144 145 mulld 12,9,6 146 mulhdu 21,9,6 147 ld 4,24(5) 148 mulli 9,9,19 149 addc 30,30,12 150 adde 31,31,21 151 152 mulld 12,7,6 153 mulhdu 21,7,6 154 addc 26,26,12 155 adde 27,27,21 156 157 mulld 12,8,6 158 mulhdu 21,8,6 159 addc 28,28,12 160 adde 29,29,21 161 mulld 12,9,4 162 mulhdu 21,9,4 163 addc 22,22,12 164 adde 23,23,21 165 166 mulld 12,10,4 167 mulhdu 21,10,4 168 addc 24,24,12 169 adde 25,25,21 170 171 mulld 12,8,4 172 mulhdu 21,8,4 173 ld 6,32(5) 174 mulli 8,8,19 175 addc 30,30,12 176 adde 31,31,21 177 178 mulld 12,11,4 179 mulhdu 21,11,4 180 addc 26,26,12 181 adde 27,27,21 182 183 mulld 12,7,4 184 mulhdu 21,7,4 185 addc 28,28,12 186 adde 29,29,21 187 mulld 12,8,6 188 mulhdu 21,8,6 189 addc 22,22,12 190 adde 23,23,21 191 192 mulld 12,9,6 193 mulhdu 21,9,6 194 addc 24,24,12 195 adde 25,25,21 196 197 mulld 12,10,6 198 mulhdu 21,10,6 199 addc 26,26,12 200 adde 27,27,21 201 202 mulld 12,11,6 203 mulhdu 21,11,6 204 addc 28,28,12 205 adde 29,29,21 206 207 mulld 12,7,6 208 mulhdu 21,7,6 209 addc 30,30,12 210 adde 31,31,21 211 212.Lfe51_reduce: 213 li 0,-1 214 srdi 0,0,13 215 216 srdi 12,26,51 217 and 9,26,0 218 insrdi 12,27,51,0 219 srdi 21,22,51 220 and 7,22,0 221 insrdi 21,23,51,0 222 addc 28,28,12 223 addze 29,29 224 addc 24,24,21 225 addze 25,25 226 227 srdi 12,28,51 228 and 10,28,0 229 insrdi 12,29,51,0 230 srdi 21,24,51 231 and 8,24,0 232 insrdi 21,25,51,0 233 addc 30,30,12 234 addze 31,31 235 add 9,9,21 236 237 srdi 12,30,51 238 and 11,30,0 239 insrdi 12,31,51,0 240 mulli 12,12,19 241 242 add 7,7,12 243 244 srdi 21,9,51 245 and 9,9,0 246 add 10,10,21 247 248 srdi 12,7,51 249 and 7,7,0 250 add 8,8,12 251 252 std 9,16(3) 253 std 10,24(3) 254 std 11,32(3) 255 std 7,0(3) 256 std 8,8(3) 257 258 ld 21,56(1) 259 ld 22,64(1) 260 ld 23,72(1) 261 ld 24,80(1) 262 ld 25,88(1) 263 ld 26,96(1) 264 ld 27,104(1) 265 ld 28,112(1) 266 ld 29,120(1) 267 ld 30,128(1) 268 ld 31,136(1) 269 addi 1,1,144 270 blr 271SYM_FUNC_END(x25519_fe51_mul) 272 273.align 5 274SYM_FUNC_START(x25519_fe51_sqr) 275 276 stdu 1,-144(1) 277 std 21,56(1) 278 std 22,64(1) 279 std 23,72(1) 280 std 24,80(1) 281 std 25,88(1) 282 std 26,96(1) 283 std 27,104(1) 284 std 28,112(1) 285 std 29,120(1) 286 std 30,128(1) 287 std 31,136(1) 288 289 ld 7,0(4) 290 ld 8,8(4) 291 ld 9,16(4) 292 ld 10,24(4) 293 ld 11,32(4) 294 295 add 6,7,7 296 mulli 21,11,19 297 298 mulld 22,7,7 299 mulhdu 23,7,7 300 mulld 24,8,6 301 mulhdu 25,8,6 302 mulld 26,9,6 303 mulhdu 27,9,6 304 mulld 28,10,6 305 mulhdu 29,10,6 306 mulld 30,11,6 307 mulhdu 31,11,6 308 add 6,8,8 309 mulld 12,11,21 310 mulhdu 11,11,21 311 addc 28,28,12 312 adde 29,29,11 313 314 mulli 5,10,19 315 316 mulld 12,8,8 317 mulhdu 11,8,8 318 addc 26,26,12 319 adde 27,27,11 320 mulld 12,9,6 321 mulhdu 11,9,6 322 addc 28,28,12 323 adde 29,29,11 324 mulld 12,10,6 325 mulhdu 11,10,6 326 addc 30,30,12 327 adde 31,31,11 328 mulld 12,21,6 329 mulhdu 11,21,6 330 add 6,10,10 331 addc 22,22,12 332 adde 23,23,11 333 mulld 12,10,5 334 mulhdu 10,10,5 335 addc 24,24,12 336 adde 25,25,10 337 mulld 12,6,21 338 mulhdu 10,6,21 339 add 6,9,9 340 addc 26,26,12 341 adde 27,27,10 342 343 mulld 12,9,9 344 mulhdu 10,9,9 345 addc 30,30,12 346 adde 31,31,10 347 mulld 12,5,6 348 mulhdu 10,5,6 349 addc 22,22,12 350 adde 23,23,10 351 mulld 12,21,6 352 mulhdu 10,21,6 353 addc 24,24,12 354 adde 25,25,10 355 356 b .Lfe51_reduce 357SYM_FUNC_END(x25519_fe51_sqr) 358 359.align 5 360SYM_FUNC_START(x25519_fe51_mul121666) 361 362 stdu 1,-144(1) 363 std 21,56(1) 364 std 22,64(1) 365 std 23,72(1) 366 std 24,80(1) 367 std 25,88(1) 368 std 26,96(1) 369 std 27,104(1) 370 std 28,112(1) 371 std 29,120(1) 372 std 30,128(1) 373 std 31,136(1) 374 375 lis 6,1 376 ori 6,6,56130 377 ld 7,0(4) 378 ld 8,8(4) 379 ld 9,16(4) 380 ld 10,24(4) 381 ld 11,32(4) 382 383 mulld 22,7,6 384 mulhdu 23,7,6 385 mulld 24,8,6 386 mulhdu 25,8,6 387 mulld 26,9,6 388 mulhdu 27,9,6 389 mulld 28,10,6 390 mulhdu 29,10,6 391 mulld 30,11,6 392 mulhdu 31,11,6 393 394 b .Lfe51_reduce 395SYM_FUNC_END(x25519_fe51_mul121666) 396 397.align 5 398SYM_FUNC_START(x25519_fe51_sqr_times) 399 400 stdu 1,-144(1) 401 std 21,56(1) 402 std 22,64(1) 403 std 23,72(1) 404 std 24,80(1) 405 std 25,88(1) 406 std 26,96(1) 407 std 27,104(1) 408 std 28,112(1) 409 std 29,120(1) 410 std 30,128(1) 411 std 31,136(1) 412 413 ld 7,0(4) 414 ld 8,8(4) 415 ld 9,16(4) 416 ld 10,24(4) 417 ld 11,32(4) 418 419 mtctr 5 420 421.Lsqr_times_loop: 422 add 6,7,7 423 mulli 21,11,19 424 425 mulld 22,7,7 426 mulhdu 23,7,7 427 mulld 24,8,6 428 mulhdu 25,8,6 429 mulld 26,9,6 430 mulhdu 27,9,6 431 mulld 28,10,6 432 mulhdu 29,10,6 433 mulld 30,11,6 434 mulhdu 31,11,6 435 add 6,8,8 436 mulld 12,11,21 437 mulhdu 11,11,21 438 addc 28,28,12 439 adde 29,29,11 440 441 mulli 5,10,19 442 443 mulld 12,8,8 444 mulhdu 11,8,8 445 addc 26,26,12 446 adde 27,27,11 447 mulld 12,9,6 448 mulhdu 11,9,6 449 addc 28,28,12 450 adde 29,29,11 451 mulld 12,10,6 452 mulhdu 11,10,6 453 addc 30,30,12 454 adde 31,31,11 455 mulld 12,21,6 456 mulhdu 11,21,6 457 add 6,10,10 458 addc 22,22,12 459 adde 23,23,11 460 mulld 12,10,5 461 mulhdu 10,10,5 462 addc 24,24,12 463 adde 25,25,10 464 mulld 12,6,21 465 mulhdu 10,6,21 466 add 6,9,9 467 addc 26,26,12 468 adde 27,27,10 469 470 mulld 12,9,9 471 mulhdu 10,9,9 472 addc 30,30,12 473 adde 31,31,10 474 mulld 12,5,6 475 mulhdu 10,5,6 476 addc 22,22,12 477 adde 23,23,10 478 mulld 12,21,6 479 mulhdu 10,21,6 480 addc 24,24,12 481 adde 25,25,10 482 483 # fe51_reduce 484 li 0,-1 485 srdi 0,0,13 486 487 srdi 12,26,51 488 and 9,26,0 489 insrdi 12,27,51,0 490 srdi 21,22,51 491 and 7,22,0 492 insrdi 21,23,51,0 493 addc 28,28,12 494 addze 29,29 495 addc 24,24,21 496 addze 25,25 497 498 srdi 12,28,51 499 and 10,28,0 500 insrdi 12,29,51,0 501 srdi 21,24,51 502 and 8,24,0 503 insrdi 21,25,51,0 504 addc 30,30,12 505 addze 31,31 506 add 9,9,21 507 508 srdi 12,30,51 509 and 11,30,0 510 insrdi 12,31,51,0 511 mulli 12,12,19 512 513 add 7,7,12 514 515 srdi 21,9,51 516 and 9,9,0 517 add 10,10,21 518 519 srdi 12,7,51 520 and 7,7,0 521 add 8,8,12 522 523 bdnz .Lsqr_times_loop 524 525 std 9,16(3) 526 std 10,24(3) 527 std 11,32(3) 528 std 7,0(3) 529 std 8,8(3) 530 531 ld 21,56(1) 532 ld 22,64(1) 533 ld 23,72(1) 534 ld 24,80(1) 535 ld 25,88(1) 536 ld 26,96(1) 537 ld 27,104(1) 538 ld 28,112(1) 539 ld 29,120(1) 540 ld 30,128(1) 541 ld 31,136(1) 542 addi 1,1,144 543 blr 544SYM_FUNC_END(x25519_fe51_sqr_times) 545 546.align 5 547SYM_FUNC_START(x25519_fe51_frombytes) 548 549 li 12, -1 550 srdi 12, 12, 13 # 0x7ffffffffffff 551 552 ld 5, 0(4) 553 ld 6, 8(4) 554 ld 7, 16(4) 555 ld 8, 24(4) 556 557 srdi 10, 5, 51 558 and 5, 5, 12 # h0 559 560 sldi 11, 6, 13 561 or 11, 10, 11 # h1t 562 srdi 10, 6, 38 563 and 6, 11, 12 # h1 564 565 sldi 11, 7, 26 566 or 10, 10, 11 # h2t 567 568 srdi 11, 7, 25 569 and 7, 10, 12 # h2 570 sldi 10, 8, 39 571 or 11, 11, 10 # h3t 572 573 srdi 9, 8, 12 574 and 8, 11, 12 # h3 575 and 9, 9, 12 # h4 576 577 std 5, 0(3) 578 std 6, 8(3) 579 std 7, 16(3) 580 std 8, 24(3) 581 std 9, 32(3) 582 583 blr 584SYM_FUNC_END(x25519_fe51_frombytes) 585 586.align 5 587SYM_FUNC_START(x25519_fe51_tobytes) 588 589 ld 5, 0(4) 590 ld 6, 8(4) 591 ld 7, 16(4) 592 ld 8, 24(4) 593 ld 9, 32(4) 594 595 li 12, -1 596 srdi 12, 12, 13 # 0x7ffffffffffff 597 598 # Full reducuction 599 addi 10, 5, 19 600 srdi 10, 10, 51 601 add 10, 10, 6 602 srdi 10, 10, 51 603 add 10, 10, 7 604 srdi 10, 10, 51 605 add 10, 10, 8 606 srdi 10, 10, 51 607 add 10, 10, 9 608 srdi 10, 10, 51 609 610 mulli 10, 10, 19 611 add 5, 5, 10 612 srdi 11, 5, 51 613 add 6, 6, 11 614 srdi 11, 6, 51 615 add 7, 7, 11 616 srdi 11, 7, 51 617 add 8, 8, 11 618 srdi 11, 8, 51 619 add 9, 9, 11 620 621 and 5, 5, 12 622 and 6, 6, 12 623 and 7, 7, 12 624 and 8, 8, 12 625 and 9, 9, 12 626 627 sldi 10, 6, 51 628 or 5, 5, 10 # s0 629 630 srdi 11, 6, 13 631 sldi 10, 7, 38 632 or 6, 11, 10 # s1 633 634 srdi 11, 7, 26 635 sldi 10, 8, 25 636 or 7, 11, 10 # s2 637 638 srdi 11, 8, 39 639 sldi 10, 9, 12 640 or 8, 11, 10 # s4 641 642 std 5, 0(3) 643 std 6, 8(3) 644 std 7, 16(3) 645 std 8, 24(3) 646 647 blr 648SYM_FUNC_END(x25519_fe51_tobytes) 649 650.align 5 651SYM_FUNC_START(x25519_cswap) 652 653 li 7, 5 654 neg 6, 5 655 mtctr 7 656 657.Lswap_loop: 658 ld 8, 0(3) 659 ld 9, 0(4) 660 xor 10, 8, 9 661 and 10, 10, 6 662 xor 11, 8, 10 663 xor 12, 9, 10 664 std 11, 0(3) 665 addi 3, 3, 8 666 std 12, 0(4) 667 addi 4, 4, 8 668 bdnz .Lswap_loop 669 670 blr 671SYM_FUNC_END(x25519_cswap) 672