1#! /usr/bin/env perl 2# SPDX-License-Identifier: GPL-2.0 3 4# This code is taken from CRYPTOGAMs[1] and is included here using the option 5# in the license to distribute the code under the GPL. Therefore this program 6# is free software; you can redistribute it and/or modify it under the terms of 7# the GNU General Public License version 2 as published by the Free Software 8# Foundation. 9# 10# [1] https://www.openssl.org/~appro/cryptogams/ 11 12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13# All rights reserved. 14# 15# Redistribution and use in source and binary forms, with or without 16# modification, are permitted provided that the following conditions 17# are met: 18# 19# * Redistributions of source code must retain copyright notices, 20# this list of conditions and the following disclaimer. 21# 22# * Redistributions in binary form must reproduce the above 23# copyright notice, this list of conditions and the following 24# disclaimer in the documentation and/or other materials 25# provided with the distribution. 26# 27# * Neither the name of the CRYPTOGAMS nor the names of its 28# copyright holder and contributors may be used to endorse or 29# promote products derived from this software without specific 30# prior written permission. 31# 32# ALTERNATIVELY, provided that this notice is retained in full, this 33# product may be distributed under the terms of the GNU General Public 34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35# those given above. 36# 37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 49# ==================================================================== 50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51# project. The module is, however, dual licensed under OpenSSL and 52# CRYPTOGAMS licenses depending on where you obtain it. For further 53# details see https://www.openssl.org/~appro/cryptogams/. 54# ==================================================================== 55# 56# This module implements support for AES instructions as per PowerISA 57# specification version 2.07, first implemented by POWER8 processor. 58# The module is endian-agnostic in sense that it supports both big- 59# and little-endian cases. Data alignment in parallelizable modes is 60# handled with VSX loads and stores, which implies MSR.VSX flag being 61# set. It should also be noted that ISA specification doesn't prohibit 62# alignment exceptions for these instructions on page boundaries. 63# Initially alignment was handled in pure AltiVec/VMX way [when data 64# is aligned programmatically, which in turn guarantees exception- 65# free execution], but it turned to hamper performance when vcipher 66# instructions are interleaved. It's reckoned that eventual 67# misalignment penalties at page boundaries are in average lower 68# than additional overhead in pure AltiVec approach. 69# 70# May 2016 71# 72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73# systems were measured. 74# 75###################################################################### 76# Current large-block performance in cycles per byte processed with 77# 128-bit key (less is better). 78# 79# CBC en-/decrypt CTR XTS 80# POWER8[le] 3.96/0.72 0.74 1.1 81# POWER8[be] 3.75/0.65 0.66 1.0 82 83$flavour = shift; 84 85if ($flavour =~ /64/) { 86 $SIZE_T =8; 87 $LRSAVE =2*$SIZE_T; 88 $STU ="stdu"; 89 $POP ="ld"; 90 $PUSH ="std"; 91 $UCMP ="cmpld"; 92 $SHL ="sldi"; 93} elsif ($flavour =~ /32/) { 94 $SIZE_T =4; 95 $LRSAVE =$SIZE_T; 96 $STU ="stwu"; 97 $POP ="lwz"; 98 $PUSH ="stw"; 99 $UCMP ="cmplw"; 100 $SHL ="slwi"; 101} else { die "nonsense $flavour"; } 102 103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104 105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108die "can't locate ppc-xlate.pl"; 109 110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111 112$FRAME=8*$SIZE_T; 113$prefix="aes_p8"; 114 115$sp="r1"; 116$vrsave="r12"; 117 118######################################################################### 119{{{ # Key setup procedures # 120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123 124$code.=<<___; 125.machine "any" 126 127.text 128 129.align 7 130rcon: 131.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134.long 0,0,0,0 ?asis 135.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe 136Lconsts: 137 mflr r0 138 bcl 20,31,\$+4 139 mflr $ptr #vvvvv "distance between . and rcon 140 addi $ptr,$ptr,-0x58 141 mtlr r0 142 blr 143 .long 0 144 .byte 0,12,0x14,0,0,0,0,0 145.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 146 147.globl .${prefix}_set_encrypt_key 148Lset_encrypt_key: 149 mflr r11 150 $PUSH r11,$LRSAVE($sp) 151 152 li $ptr,-1 153 ${UCMP}i $inp,0 154 beq- Lenc_key_abort # if ($inp==0) return -1; 155 ${UCMP}i $out,0 156 beq- Lenc_key_abort # if ($out==0) return -1; 157 li $ptr,-2 158 cmpwi $bits,128 159 blt- Lenc_key_abort 160 cmpwi $bits,256 161 bgt- Lenc_key_abort 162 andi. r0,$bits,0x3f 163 bne- Lenc_key_abort 164 165 lis r0,0xfff0 166 mfspr $vrsave,256 167 mtspr 256,r0 168 169 bl Lconsts 170 mtlr r11 171 172 neg r9,$inp 173 lvx $in0,0,$inp 174 addi $inp,$inp,15 # 15 is not typo 175 lvsr $key,0,r9 # borrow $key 176 li r8,0x20 177 cmpwi $bits,192 178 lvx $in1,0,$inp 179 le?vspltisb $mask,0x0f # borrow $mask 180 lvx $rcon,0,$ptr 181 le?vxor $key,$key,$mask # adjust for byte swap 182 lvx $mask,r8,$ptr 183 addi $ptr,$ptr,0x10 184 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 185 li $cnt,8 186 vxor $zero,$zero,$zero 187 mtctr $cnt 188 189 ?lvsr $outperm,0,$out 190 vspltisb $outmask,-1 191 lvx $outhead,0,$out 192 ?vperm $outmask,$zero,$outmask,$outperm 193 194 blt Loop128 195 addi $inp,$inp,8 196 beq L192 197 addi $inp,$inp,8 198 b L256 199 200.align 4 201Loop128: 202 vperm $key,$in0,$in0,$mask # rotate-n-splat 203 vsldoi $tmp,$zero,$in0,12 # >>32 204 vperm $outtail,$in0,$in0,$outperm # rotate 205 vsel $stage,$outhead,$outtail,$outmask 206 vmr $outhead,$outtail 207 vcipherlast $key,$key,$rcon 208 stvx $stage,0,$out 209 addi $out,$out,16 210 211 vxor $in0,$in0,$tmp 212 vsldoi $tmp,$zero,$tmp,12 # >>32 213 vxor $in0,$in0,$tmp 214 vsldoi $tmp,$zero,$tmp,12 # >>32 215 vxor $in0,$in0,$tmp 216 vadduwm $rcon,$rcon,$rcon 217 vxor $in0,$in0,$key 218 bdnz Loop128 219 220 lvx $rcon,0,$ptr # last two round keys 221 222 vperm $key,$in0,$in0,$mask # rotate-n-splat 223 vsldoi $tmp,$zero,$in0,12 # >>32 224 vperm $outtail,$in0,$in0,$outperm # rotate 225 vsel $stage,$outhead,$outtail,$outmask 226 vmr $outhead,$outtail 227 vcipherlast $key,$key,$rcon 228 stvx $stage,0,$out 229 addi $out,$out,16 230 231 vxor $in0,$in0,$tmp 232 vsldoi $tmp,$zero,$tmp,12 # >>32 233 vxor $in0,$in0,$tmp 234 vsldoi $tmp,$zero,$tmp,12 # >>32 235 vxor $in0,$in0,$tmp 236 vadduwm $rcon,$rcon,$rcon 237 vxor $in0,$in0,$key 238 239 vperm $key,$in0,$in0,$mask # rotate-n-splat 240 vsldoi $tmp,$zero,$in0,12 # >>32 241 vperm $outtail,$in0,$in0,$outperm # rotate 242 vsel $stage,$outhead,$outtail,$outmask 243 vmr $outhead,$outtail 244 vcipherlast $key,$key,$rcon 245 stvx $stage,0,$out 246 addi $out,$out,16 247 248 vxor $in0,$in0,$tmp 249 vsldoi $tmp,$zero,$tmp,12 # >>32 250 vxor $in0,$in0,$tmp 251 vsldoi $tmp,$zero,$tmp,12 # >>32 252 vxor $in0,$in0,$tmp 253 vxor $in0,$in0,$key 254 vperm $outtail,$in0,$in0,$outperm # rotate 255 vsel $stage,$outhead,$outtail,$outmask 256 vmr $outhead,$outtail 257 stvx $stage,0,$out 258 259 addi $inp,$out,15 # 15 is not typo 260 addi $out,$out,0x50 261 262 li $rounds,10 263 b Ldone 264 265.align 4 266L192: 267 lvx $tmp,0,$inp 268 li $cnt,4 269 vperm $outtail,$in0,$in0,$outperm # rotate 270 vsel $stage,$outhead,$outtail,$outmask 271 vmr $outhead,$outtail 272 stvx $stage,0,$out 273 addi $out,$out,16 274 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 275 vspltisb $key,8 # borrow $key 276 mtctr $cnt 277 vsububm $mask,$mask,$key # adjust the mask 278 279Loop192: 280 vperm $key,$in1,$in1,$mask # roate-n-splat 281 vsldoi $tmp,$zero,$in0,12 # >>32 282 vcipherlast $key,$key,$rcon 283 284 vxor $in0,$in0,$tmp 285 vsldoi $tmp,$zero,$tmp,12 # >>32 286 vxor $in0,$in0,$tmp 287 vsldoi $tmp,$zero,$tmp,12 # >>32 288 vxor $in0,$in0,$tmp 289 290 vsldoi $stage,$zero,$in1,8 291 vspltw $tmp,$in0,3 292 vxor $tmp,$tmp,$in1 293 vsldoi $in1,$zero,$in1,12 # >>32 294 vadduwm $rcon,$rcon,$rcon 295 vxor $in1,$in1,$tmp 296 vxor $in0,$in0,$key 297 vxor $in1,$in1,$key 298 vsldoi $stage,$stage,$in0,8 299 300 vperm $key,$in1,$in1,$mask # rotate-n-splat 301 vsldoi $tmp,$zero,$in0,12 # >>32 302 vperm $outtail,$stage,$stage,$outperm # rotate 303 vsel $stage,$outhead,$outtail,$outmask 304 vmr $outhead,$outtail 305 vcipherlast $key,$key,$rcon 306 stvx $stage,0,$out 307 addi $out,$out,16 308 309 vsldoi $stage,$in0,$in1,8 310 vxor $in0,$in0,$tmp 311 vsldoi $tmp,$zero,$tmp,12 # >>32 312 vperm $outtail,$stage,$stage,$outperm # rotate 313 vsel $stage,$outhead,$outtail,$outmask 314 vmr $outhead,$outtail 315 vxor $in0,$in0,$tmp 316 vsldoi $tmp,$zero,$tmp,12 # >>32 317 vxor $in0,$in0,$tmp 318 stvx $stage,0,$out 319 addi $out,$out,16 320 321 vspltw $tmp,$in0,3 322 vxor $tmp,$tmp,$in1 323 vsldoi $in1,$zero,$in1,12 # >>32 324 vadduwm $rcon,$rcon,$rcon 325 vxor $in1,$in1,$tmp 326 vxor $in0,$in0,$key 327 vxor $in1,$in1,$key 328 vperm $outtail,$in0,$in0,$outperm # rotate 329 vsel $stage,$outhead,$outtail,$outmask 330 vmr $outhead,$outtail 331 stvx $stage,0,$out 332 addi $inp,$out,15 # 15 is not typo 333 addi $out,$out,16 334 bdnz Loop192 335 336 li $rounds,12 337 addi $out,$out,0x20 338 b Ldone 339 340.align 4 341L256: 342 lvx $tmp,0,$inp 343 li $cnt,7 344 li $rounds,14 345 vperm $outtail,$in0,$in0,$outperm # rotate 346 vsel $stage,$outhead,$outtail,$outmask 347 vmr $outhead,$outtail 348 stvx $stage,0,$out 349 addi $out,$out,16 350 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 351 mtctr $cnt 352 353Loop256: 354 vperm $key,$in1,$in1,$mask # rotate-n-splat 355 vsldoi $tmp,$zero,$in0,12 # >>32 356 vperm $outtail,$in1,$in1,$outperm # rotate 357 vsel $stage,$outhead,$outtail,$outmask 358 vmr $outhead,$outtail 359 vcipherlast $key,$key,$rcon 360 stvx $stage,0,$out 361 addi $out,$out,16 362 363 vxor $in0,$in0,$tmp 364 vsldoi $tmp,$zero,$tmp,12 # >>32 365 vxor $in0,$in0,$tmp 366 vsldoi $tmp,$zero,$tmp,12 # >>32 367 vxor $in0,$in0,$tmp 368 vadduwm $rcon,$rcon,$rcon 369 vxor $in0,$in0,$key 370 vperm $outtail,$in0,$in0,$outperm # rotate 371 vsel $stage,$outhead,$outtail,$outmask 372 vmr $outhead,$outtail 373 stvx $stage,0,$out 374 addi $inp,$out,15 # 15 is not typo 375 addi $out,$out,16 376 bdz Ldone 377 378 vspltw $key,$in0,3 # just splat 379 vsldoi $tmp,$zero,$in1,12 # >>32 380 vsbox $key,$key 381 382 vxor $in1,$in1,$tmp 383 vsldoi $tmp,$zero,$tmp,12 # >>32 384 vxor $in1,$in1,$tmp 385 vsldoi $tmp,$zero,$tmp,12 # >>32 386 vxor $in1,$in1,$tmp 387 388 vxor $in1,$in1,$key 389 b Loop256 390 391.align 4 392Ldone: 393 lvx $in1,0,$inp # redundant in aligned case 394 vsel $in1,$outhead,$in1,$outmask 395 stvx $in1,0,$inp 396 li $ptr,0 397 mtspr 256,$vrsave 398 stw $rounds,0($out) 399 400Lenc_key_abort: 401 mr r3,$ptr 402 blr 403 .long 0 404 .byte 0,12,0x14,1,0,0,3,0 405 .long 0 406.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 407 408.globl .${prefix}_set_decrypt_key 409 $STU $sp,-$FRAME($sp) 410 mflr r10 411 $PUSH r10,$FRAME+$LRSAVE($sp) 412 bl Lset_encrypt_key 413 mtlr r10 414 415 cmpwi r3,0 416 bne- Ldec_key_abort 417 418 slwi $cnt,$rounds,4 419 subi $inp,$out,240 # first round key 420 srwi $rounds,$rounds,1 421 add $out,$inp,$cnt # last round key 422 mtctr $rounds 423 424Ldeckey: 425 lwz r0, 0($inp) 426 lwz r6, 4($inp) 427 lwz r7, 8($inp) 428 lwz r8, 12($inp) 429 addi $inp,$inp,16 430 lwz r9, 0($out) 431 lwz r10,4($out) 432 lwz r11,8($out) 433 lwz r12,12($out) 434 stw r0, 0($out) 435 stw r6, 4($out) 436 stw r7, 8($out) 437 stw r8, 12($out) 438 subi $out,$out,16 439 stw r9, -16($inp) 440 stw r10,-12($inp) 441 stw r11,-8($inp) 442 stw r12,-4($inp) 443 bdnz Ldeckey 444 445 xor r3,r3,r3 # return value 446Ldec_key_abort: 447 addi $sp,$sp,$FRAME 448 blr 449 .long 0 450 .byte 0,12,4,1,0x80,0,3,0 451 .long 0 452.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 453___ 454}}} 455######################################################################### 456{{{ # Single block en- and decrypt procedures # 457sub gen_block () { 458my $dir = shift; 459my $n = $dir eq "de" ? "n" : ""; 460my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 461 462$code.=<<___; 463.globl .${prefix}_${dir}crypt 464 lwz $rounds,240($key) 465 lis r0,0xfc00 466 mfspr $vrsave,256 467 li $idx,15 # 15 is not typo 468 mtspr 256,r0 469 470 lvx v0,0,$inp 471 neg r11,$out 472 lvx v1,$idx,$inp 473 lvsl v2,0,$inp # inpperm 474 le?vspltisb v4,0x0f 475 ?lvsl v3,0,r11 # outperm 476 le?vxor v2,v2,v4 477 li $idx,16 478 vperm v0,v0,v1,v2 # align [and byte swap in LE] 479 lvx v1,0,$key 480 ?lvsl v5,0,$key # keyperm 481 srwi $rounds,$rounds,1 482 lvx v2,$idx,$key 483 addi $idx,$idx,16 484 subi $rounds,$rounds,1 485 ?vperm v1,v1,v2,v5 # align round key 486 487 vxor v0,v0,v1 488 lvx v1,$idx,$key 489 addi $idx,$idx,16 490 mtctr $rounds 491 492Loop_${dir}c: 493 ?vperm v2,v2,v1,v5 494 v${n}cipher v0,v0,v2 495 lvx v2,$idx,$key 496 addi $idx,$idx,16 497 ?vperm v1,v1,v2,v5 498 v${n}cipher v0,v0,v1 499 lvx v1,$idx,$key 500 addi $idx,$idx,16 501 bdnz Loop_${dir}c 502 503 ?vperm v2,v2,v1,v5 504 v${n}cipher v0,v0,v2 505 lvx v2,$idx,$key 506 ?vperm v1,v1,v2,v5 507 v${n}cipherlast v0,v0,v1 508 509 vspltisb v2,-1 510 vxor v1,v1,v1 511 li $idx,15 # 15 is not typo 512 ?vperm v2,v1,v2,v3 # outmask 513 le?vxor v3,v3,v4 514 lvx v1,0,$out # outhead 515 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 516 vsel v1,v1,v0,v2 517 lvx v4,$idx,$out 518 stvx v1,0,$out 519 vsel v0,v0,v4,v2 520 stvx v0,$idx,$out 521 522 mtspr 256,$vrsave 523 blr 524 .long 0 525 .byte 0,12,0x14,0,0,0,3,0 526 .long 0 527.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 528___ 529} 530&gen_block("en"); 531&gen_block("de"); 532}}} 533######################################################################### 534{{{ # CBC en- and decrypt procedures # 535my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 536my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 537my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 538 map("v$_",(4..10)); 539$code.=<<___; 540.globl .${prefix}_cbc_encrypt 541 ${UCMP}i $len,16 542 bltlr- 543 544 cmpwi $enc,0 # test direction 545 lis r0,0xffe0 546 mfspr $vrsave,256 547 mtspr 256,r0 548 549 li $idx,15 550 vxor $rndkey0,$rndkey0,$rndkey0 551 le?vspltisb $tmp,0x0f 552 553 lvx $ivec,0,$ivp # load [unaligned] iv 554 lvsl $inpperm,0,$ivp 555 lvx $inptail,$idx,$ivp 556 le?vxor $inpperm,$inpperm,$tmp 557 vperm $ivec,$ivec,$inptail,$inpperm 558 559 neg r11,$inp 560 ?lvsl $keyperm,0,$key # prepare for unaligned key 561 lwz $rounds,240($key) 562 563 lvsr $inpperm,0,r11 # prepare for unaligned load 564 lvx $inptail,0,$inp 565 addi $inp,$inp,15 # 15 is not typo 566 le?vxor $inpperm,$inpperm,$tmp 567 568 ?lvsr $outperm,0,$out # prepare for unaligned store 569 vspltisb $outmask,-1 570 lvx $outhead,0,$out 571 ?vperm $outmask,$rndkey0,$outmask,$outperm 572 le?vxor $outperm,$outperm,$tmp 573 574 srwi $rounds,$rounds,1 575 li $idx,16 576 subi $rounds,$rounds,1 577 beq Lcbc_dec 578 579Lcbc_enc: 580 vmr $inout,$inptail 581 lvx $inptail,0,$inp 582 addi $inp,$inp,16 583 mtctr $rounds 584 subi $len,$len,16 # len-=16 585 586 lvx $rndkey0,0,$key 587 vperm $inout,$inout,$inptail,$inpperm 588 lvx $rndkey1,$idx,$key 589 addi $idx,$idx,16 590 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 591 vxor $inout,$inout,$rndkey0 592 lvx $rndkey0,$idx,$key 593 addi $idx,$idx,16 594 vxor $inout,$inout,$ivec 595 596Loop_cbc_enc: 597 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 598 vcipher $inout,$inout,$rndkey1 599 lvx $rndkey1,$idx,$key 600 addi $idx,$idx,16 601 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 602 vcipher $inout,$inout,$rndkey0 603 lvx $rndkey0,$idx,$key 604 addi $idx,$idx,16 605 bdnz Loop_cbc_enc 606 607 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 608 vcipher $inout,$inout,$rndkey1 609 lvx $rndkey1,$idx,$key 610 li $idx,16 611 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 612 vcipherlast $ivec,$inout,$rndkey0 613 ${UCMP}i $len,16 614 615 vperm $tmp,$ivec,$ivec,$outperm 616 vsel $inout,$outhead,$tmp,$outmask 617 vmr $outhead,$tmp 618 stvx $inout,0,$out 619 addi $out,$out,16 620 bge Lcbc_enc 621 622 b Lcbc_done 623 624.align 4 625Lcbc_dec: 626 ${UCMP}i $len,128 627 bge _aesp8_cbc_decrypt8x 628 vmr $tmp,$inptail 629 lvx $inptail,0,$inp 630 addi $inp,$inp,16 631 mtctr $rounds 632 subi $len,$len,16 # len-=16 633 634 lvx $rndkey0,0,$key 635 vperm $tmp,$tmp,$inptail,$inpperm 636 lvx $rndkey1,$idx,$key 637 addi $idx,$idx,16 638 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 639 vxor $inout,$tmp,$rndkey0 640 lvx $rndkey0,$idx,$key 641 addi $idx,$idx,16 642 643Loop_cbc_dec: 644 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 645 vncipher $inout,$inout,$rndkey1 646 lvx $rndkey1,$idx,$key 647 addi $idx,$idx,16 648 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 649 vncipher $inout,$inout,$rndkey0 650 lvx $rndkey0,$idx,$key 651 addi $idx,$idx,16 652 bdnz Loop_cbc_dec 653 654 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 655 vncipher $inout,$inout,$rndkey1 656 lvx $rndkey1,$idx,$key 657 li $idx,16 658 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 659 vncipherlast $inout,$inout,$rndkey0 660 ${UCMP}i $len,16 661 662 vxor $inout,$inout,$ivec 663 vmr $ivec,$tmp 664 vperm $tmp,$inout,$inout,$outperm 665 vsel $inout,$outhead,$tmp,$outmask 666 vmr $outhead,$tmp 667 stvx $inout,0,$out 668 addi $out,$out,16 669 bge Lcbc_dec 670 671Lcbc_done: 672 addi $out,$out,-1 673 lvx $inout,0,$out # redundant in aligned case 674 vsel $inout,$outhead,$inout,$outmask 675 stvx $inout,0,$out 676 677 neg $enc,$ivp # write [unaligned] iv 678 li $idx,15 # 15 is not typo 679 vxor $rndkey0,$rndkey0,$rndkey0 680 vspltisb $outmask,-1 681 le?vspltisb $tmp,0x0f 682 ?lvsl $outperm,0,$enc 683 ?vperm $outmask,$rndkey0,$outmask,$outperm 684 le?vxor $outperm,$outperm,$tmp 685 lvx $outhead,0,$ivp 686 vperm $ivec,$ivec,$ivec,$outperm 687 vsel $inout,$outhead,$ivec,$outmask 688 lvx $inptail,$idx,$ivp 689 stvx $inout,0,$ivp 690 vsel $inout,$ivec,$inptail,$outmask 691 stvx $inout,$idx,$ivp 692 693 mtspr 256,$vrsave 694 blr 695 .long 0 696 .byte 0,12,0x14,0,0,0,6,0 697 .long 0 698___ 699######################################################################### 700{{ # Optimized CBC decrypt procedure # 701my $key_="r11"; 702my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 703my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 704my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 705my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 706 # v26-v31 last 6 round keys 707my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 708 709$code.=<<___; 710.align 5 711_aesp8_cbc_decrypt8x: 712 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 713 li r10,`$FRAME+8*16+15` 714 li r11,`$FRAME+8*16+31` 715 stvx v20,r10,$sp # ABI says so 716 addi r10,r10,32 717 stvx v21,r11,$sp 718 addi r11,r11,32 719 stvx v22,r10,$sp 720 addi r10,r10,32 721 stvx v23,r11,$sp 722 addi r11,r11,32 723 stvx v24,r10,$sp 724 addi r10,r10,32 725 stvx v25,r11,$sp 726 addi r11,r11,32 727 stvx v26,r10,$sp 728 addi r10,r10,32 729 stvx v27,r11,$sp 730 addi r11,r11,32 731 stvx v28,r10,$sp 732 addi r10,r10,32 733 stvx v29,r11,$sp 734 addi r11,r11,32 735 stvx v30,r10,$sp 736 stvx v31,r11,$sp 737 li r0,-1 738 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 739 li $x10,0x10 740 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 741 li $x20,0x20 742 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 743 li $x30,0x30 744 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 745 li $x40,0x40 746 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 747 li $x50,0x50 748 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 749 li $x60,0x60 750 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 751 li $x70,0x70 752 mtspr 256,r0 753 754 subi $rounds,$rounds,3 # -4 in total 755 subi $len,$len,128 # bias 756 757 lvx $rndkey0,$x00,$key # load key schedule 758 lvx v30,$x10,$key 759 addi $key,$key,0x20 760 lvx v31,$x00,$key 761 ?vperm $rndkey0,$rndkey0,v30,$keyperm 762 addi $key_,$sp,$FRAME+15 763 mtctr $rounds 764 765Load_cbc_dec_key: 766 ?vperm v24,v30,v31,$keyperm 767 lvx v30,$x10,$key 768 addi $key,$key,0x20 769 stvx v24,$x00,$key_ # off-load round[1] 770 ?vperm v25,v31,v30,$keyperm 771 lvx v31,$x00,$key 772 stvx v25,$x10,$key_ # off-load round[2] 773 addi $key_,$key_,0x20 774 bdnz Load_cbc_dec_key 775 776 lvx v26,$x10,$key 777 ?vperm v24,v30,v31,$keyperm 778 lvx v27,$x20,$key 779 stvx v24,$x00,$key_ # off-load round[3] 780 ?vperm v25,v31,v26,$keyperm 781 lvx v28,$x30,$key 782 stvx v25,$x10,$key_ # off-load round[4] 783 addi $key_,$sp,$FRAME+15 # rewind $key_ 784 ?vperm v26,v26,v27,$keyperm 785 lvx v29,$x40,$key 786 ?vperm v27,v27,v28,$keyperm 787 lvx v30,$x50,$key 788 ?vperm v28,v28,v29,$keyperm 789 lvx v31,$x60,$key 790 ?vperm v29,v29,v30,$keyperm 791 lvx $out0,$x70,$key # borrow $out0 792 ?vperm v30,v30,v31,$keyperm 793 lvx v24,$x00,$key_ # pre-load round[1] 794 ?vperm v31,v31,$out0,$keyperm 795 lvx v25,$x10,$key_ # pre-load round[2] 796 797 #lvx $inptail,0,$inp # "caller" already did this 798 #addi $inp,$inp,15 # 15 is not typo 799 subi $inp,$inp,15 # undo "caller" 800 801 le?li $idx,8 802 lvx_u $in0,$x00,$inp # load first 8 "words" 803 le?lvsl $inpperm,0,$idx 804 le?vspltisb $tmp,0x0f 805 lvx_u $in1,$x10,$inp 806 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 807 lvx_u $in2,$x20,$inp 808 le?vperm $in0,$in0,$in0,$inpperm 809 lvx_u $in3,$x30,$inp 810 le?vperm $in1,$in1,$in1,$inpperm 811 lvx_u $in4,$x40,$inp 812 le?vperm $in2,$in2,$in2,$inpperm 813 vxor $out0,$in0,$rndkey0 814 lvx_u $in5,$x50,$inp 815 le?vperm $in3,$in3,$in3,$inpperm 816 vxor $out1,$in1,$rndkey0 817 lvx_u $in6,$x60,$inp 818 le?vperm $in4,$in4,$in4,$inpperm 819 vxor $out2,$in2,$rndkey0 820 lvx_u $in7,$x70,$inp 821 addi $inp,$inp,0x80 822 le?vperm $in5,$in5,$in5,$inpperm 823 vxor $out3,$in3,$rndkey0 824 le?vperm $in6,$in6,$in6,$inpperm 825 vxor $out4,$in4,$rndkey0 826 le?vperm $in7,$in7,$in7,$inpperm 827 vxor $out5,$in5,$rndkey0 828 vxor $out6,$in6,$rndkey0 829 vxor $out7,$in7,$rndkey0 830 831 mtctr $rounds 832 b Loop_cbc_dec8x 833.align 5 834Loop_cbc_dec8x: 835 vncipher $out0,$out0,v24 836 vncipher $out1,$out1,v24 837 vncipher $out2,$out2,v24 838 vncipher $out3,$out3,v24 839 vncipher $out4,$out4,v24 840 vncipher $out5,$out5,v24 841 vncipher $out6,$out6,v24 842 vncipher $out7,$out7,v24 843 lvx v24,$x20,$key_ # round[3] 844 addi $key_,$key_,0x20 845 846 vncipher $out0,$out0,v25 847 vncipher $out1,$out1,v25 848 vncipher $out2,$out2,v25 849 vncipher $out3,$out3,v25 850 vncipher $out4,$out4,v25 851 vncipher $out5,$out5,v25 852 vncipher $out6,$out6,v25 853 vncipher $out7,$out7,v25 854 lvx v25,$x10,$key_ # round[4] 855 bdnz Loop_cbc_dec8x 856 857 subic $len,$len,128 # $len-=128 858 vncipher $out0,$out0,v24 859 vncipher $out1,$out1,v24 860 vncipher $out2,$out2,v24 861 vncipher $out3,$out3,v24 862 vncipher $out4,$out4,v24 863 vncipher $out5,$out5,v24 864 vncipher $out6,$out6,v24 865 vncipher $out7,$out7,v24 866 867 subfe. r0,r0,r0 # borrow?-1:0 868 vncipher $out0,$out0,v25 869 vncipher $out1,$out1,v25 870 vncipher $out2,$out2,v25 871 vncipher $out3,$out3,v25 872 vncipher $out4,$out4,v25 873 vncipher $out5,$out5,v25 874 vncipher $out6,$out6,v25 875 vncipher $out7,$out7,v25 876 877 and r0,r0,$len 878 vncipher $out0,$out0,v26 879 vncipher $out1,$out1,v26 880 vncipher $out2,$out2,v26 881 vncipher $out3,$out3,v26 882 vncipher $out4,$out4,v26 883 vncipher $out5,$out5,v26 884 vncipher $out6,$out6,v26 885 vncipher $out7,$out7,v26 886 887 add $inp,$inp,r0 # $inp is adjusted in such 888 # way that at exit from the 889 # loop inX-in7 are loaded 890 # with last "words" 891 vncipher $out0,$out0,v27 892 vncipher $out1,$out1,v27 893 vncipher $out2,$out2,v27 894 vncipher $out3,$out3,v27 895 vncipher $out4,$out4,v27 896 vncipher $out5,$out5,v27 897 vncipher $out6,$out6,v27 898 vncipher $out7,$out7,v27 899 900 addi $key_,$sp,$FRAME+15 # rewind $key_ 901 vncipher $out0,$out0,v28 902 vncipher $out1,$out1,v28 903 vncipher $out2,$out2,v28 904 vncipher $out3,$out3,v28 905 vncipher $out4,$out4,v28 906 vncipher $out5,$out5,v28 907 vncipher $out6,$out6,v28 908 vncipher $out7,$out7,v28 909 lvx v24,$x00,$key_ # re-pre-load round[1] 910 911 vncipher $out0,$out0,v29 912 vncipher $out1,$out1,v29 913 vncipher $out2,$out2,v29 914 vncipher $out3,$out3,v29 915 vncipher $out4,$out4,v29 916 vncipher $out5,$out5,v29 917 vncipher $out6,$out6,v29 918 vncipher $out7,$out7,v29 919 lvx v25,$x10,$key_ # re-pre-load round[2] 920 921 vncipher $out0,$out0,v30 922 vxor $ivec,$ivec,v31 # xor with last round key 923 vncipher $out1,$out1,v30 924 vxor $in0,$in0,v31 925 vncipher $out2,$out2,v30 926 vxor $in1,$in1,v31 927 vncipher $out3,$out3,v30 928 vxor $in2,$in2,v31 929 vncipher $out4,$out4,v30 930 vxor $in3,$in3,v31 931 vncipher $out5,$out5,v30 932 vxor $in4,$in4,v31 933 vncipher $out6,$out6,v30 934 vxor $in5,$in5,v31 935 vncipher $out7,$out7,v30 936 vxor $in6,$in6,v31 937 938 vncipherlast $out0,$out0,$ivec 939 vncipherlast $out1,$out1,$in0 940 lvx_u $in0,$x00,$inp # load next input block 941 vncipherlast $out2,$out2,$in1 942 lvx_u $in1,$x10,$inp 943 vncipherlast $out3,$out3,$in2 944 le?vperm $in0,$in0,$in0,$inpperm 945 lvx_u $in2,$x20,$inp 946 vncipherlast $out4,$out4,$in3 947 le?vperm $in1,$in1,$in1,$inpperm 948 lvx_u $in3,$x30,$inp 949 vncipherlast $out5,$out5,$in4 950 le?vperm $in2,$in2,$in2,$inpperm 951 lvx_u $in4,$x40,$inp 952 vncipherlast $out6,$out6,$in5 953 le?vperm $in3,$in3,$in3,$inpperm 954 lvx_u $in5,$x50,$inp 955 vncipherlast $out7,$out7,$in6 956 le?vperm $in4,$in4,$in4,$inpperm 957 lvx_u $in6,$x60,$inp 958 vmr $ivec,$in7 959 le?vperm $in5,$in5,$in5,$inpperm 960 lvx_u $in7,$x70,$inp 961 addi $inp,$inp,0x80 962 963 le?vperm $out0,$out0,$out0,$inpperm 964 le?vperm $out1,$out1,$out1,$inpperm 965 stvx_u $out0,$x00,$out 966 le?vperm $in6,$in6,$in6,$inpperm 967 vxor $out0,$in0,$rndkey0 968 le?vperm $out2,$out2,$out2,$inpperm 969 stvx_u $out1,$x10,$out 970 le?vperm $in7,$in7,$in7,$inpperm 971 vxor $out1,$in1,$rndkey0 972 le?vperm $out3,$out3,$out3,$inpperm 973 stvx_u $out2,$x20,$out 974 vxor $out2,$in2,$rndkey0 975 le?vperm $out4,$out4,$out4,$inpperm 976 stvx_u $out3,$x30,$out 977 vxor $out3,$in3,$rndkey0 978 le?vperm $out5,$out5,$out5,$inpperm 979 stvx_u $out4,$x40,$out 980 vxor $out4,$in4,$rndkey0 981 le?vperm $out6,$out6,$out6,$inpperm 982 stvx_u $out5,$x50,$out 983 vxor $out5,$in5,$rndkey0 984 le?vperm $out7,$out7,$out7,$inpperm 985 stvx_u $out6,$x60,$out 986 vxor $out6,$in6,$rndkey0 987 stvx_u $out7,$x70,$out 988 addi $out,$out,0x80 989 vxor $out7,$in7,$rndkey0 990 991 mtctr $rounds 992 beq Loop_cbc_dec8x # did $len-=128 borrow? 993 994 addic. $len,$len,128 995 beq Lcbc_dec8x_done 996 nop 997 nop 998 999Loop_cbc_dec8x_tail: # up to 7 "words" tail... 1000 vncipher $out1,$out1,v24 1001 vncipher $out2,$out2,v24 1002 vncipher $out3,$out3,v24 1003 vncipher $out4,$out4,v24 1004 vncipher $out5,$out5,v24 1005 vncipher $out6,$out6,v24 1006 vncipher $out7,$out7,v24 1007 lvx v24,$x20,$key_ # round[3] 1008 addi $key_,$key_,0x20 1009 1010 vncipher $out1,$out1,v25 1011 vncipher $out2,$out2,v25 1012 vncipher $out3,$out3,v25 1013 vncipher $out4,$out4,v25 1014 vncipher $out5,$out5,v25 1015 vncipher $out6,$out6,v25 1016 vncipher $out7,$out7,v25 1017 lvx v25,$x10,$key_ # round[4] 1018 bdnz Loop_cbc_dec8x_tail 1019 1020 vncipher $out1,$out1,v24 1021 vncipher $out2,$out2,v24 1022 vncipher $out3,$out3,v24 1023 vncipher $out4,$out4,v24 1024 vncipher $out5,$out5,v24 1025 vncipher $out6,$out6,v24 1026 vncipher $out7,$out7,v24 1027 1028 vncipher $out1,$out1,v25 1029 vncipher $out2,$out2,v25 1030 vncipher $out3,$out3,v25 1031 vncipher $out4,$out4,v25 1032 vncipher $out5,$out5,v25 1033 vncipher $out6,$out6,v25 1034 vncipher $out7,$out7,v25 1035 1036 vncipher $out1,$out1,v26 1037 vncipher $out2,$out2,v26 1038 vncipher $out3,$out3,v26 1039 vncipher $out4,$out4,v26 1040 vncipher $out5,$out5,v26 1041 vncipher $out6,$out6,v26 1042 vncipher $out7,$out7,v26 1043 1044 vncipher $out1,$out1,v27 1045 vncipher $out2,$out2,v27 1046 vncipher $out3,$out3,v27 1047 vncipher $out4,$out4,v27 1048 vncipher $out5,$out5,v27 1049 vncipher $out6,$out6,v27 1050 vncipher $out7,$out7,v27 1051 1052 vncipher $out1,$out1,v28 1053 vncipher $out2,$out2,v28 1054 vncipher $out3,$out3,v28 1055 vncipher $out4,$out4,v28 1056 vncipher $out5,$out5,v28 1057 vncipher $out6,$out6,v28 1058 vncipher $out7,$out7,v28 1059 1060 vncipher $out1,$out1,v29 1061 vncipher $out2,$out2,v29 1062 vncipher $out3,$out3,v29 1063 vncipher $out4,$out4,v29 1064 vncipher $out5,$out5,v29 1065 vncipher $out6,$out6,v29 1066 vncipher $out7,$out7,v29 1067 1068 vncipher $out1,$out1,v30 1069 vxor $ivec,$ivec,v31 # last round key 1070 vncipher $out2,$out2,v30 1071 vxor $in1,$in1,v31 1072 vncipher $out3,$out3,v30 1073 vxor $in2,$in2,v31 1074 vncipher $out4,$out4,v30 1075 vxor $in3,$in3,v31 1076 vncipher $out5,$out5,v30 1077 vxor $in4,$in4,v31 1078 vncipher $out6,$out6,v30 1079 vxor $in5,$in5,v31 1080 vncipher $out7,$out7,v30 1081 vxor $in6,$in6,v31 1082 1083 cmplwi $len,32 # switch($len) 1084 blt Lcbc_dec8x_one 1085 nop 1086 beq Lcbc_dec8x_two 1087 cmplwi $len,64 1088 blt Lcbc_dec8x_three 1089 nop 1090 beq Lcbc_dec8x_four 1091 cmplwi $len,96 1092 blt Lcbc_dec8x_five 1093 nop 1094 beq Lcbc_dec8x_six 1095 1096Lcbc_dec8x_seven: 1097 vncipherlast $out1,$out1,$ivec 1098 vncipherlast $out2,$out2,$in1 1099 vncipherlast $out3,$out3,$in2 1100 vncipherlast $out4,$out4,$in3 1101 vncipherlast $out5,$out5,$in4 1102 vncipherlast $out6,$out6,$in5 1103 vncipherlast $out7,$out7,$in6 1104 vmr $ivec,$in7 1105 1106 le?vperm $out1,$out1,$out1,$inpperm 1107 le?vperm $out2,$out2,$out2,$inpperm 1108 stvx_u $out1,$x00,$out 1109 le?vperm $out3,$out3,$out3,$inpperm 1110 stvx_u $out2,$x10,$out 1111 le?vperm $out4,$out4,$out4,$inpperm 1112 stvx_u $out3,$x20,$out 1113 le?vperm $out5,$out5,$out5,$inpperm 1114 stvx_u $out4,$x30,$out 1115 le?vperm $out6,$out6,$out6,$inpperm 1116 stvx_u $out5,$x40,$out 1117 le?vperm $out7,$out7,$out7,$inpperm 1118 stvx_u $out6,$x50,$out 1119 stvx_u $out7,$x60,$out 1120 addi $out,$out,0x70 1121 b Lcbc_dec8x_done 1122 1123.align 5 1124Lcbc_dec8x_six: 1125 vncipherlast $out2,$out2,$ivec 1126 vncipherlast $out3,$out3,$in2 1127 vncipherlast $out4,$out4,$in3 1128 vncipherlast $out5,$out5,$in4 1129 vncipherlast $out6,$out6,$in5 1130 vncipherlast $out7,$out7,$in6 1131 vmr $ivec,$in7 1132 1133 le?vperm $out2,$out2,$out2,$inpperm 1134 le?vperm $out3,$out3,$out3,$inpperm 1135 stvx_u $out2,$x00,$out 1136 le?vperm $out4,$out4,$out4,$inpperm 1137 stvx_u $out3,$x10,$out 1138 le?vperm $out5,$out5,$out5,$inpperm 1139 stvx_u $out4,$x20,$out 1140 le?vperm $out6,$out6,$out6,$inpperm 1141 stvx_u $out5,$x30,$out 1142 le?vperm $out7,$out7,$out7,$inpperm 1143 stvx_u $out6,$x40,$out 1144 stvx_u $out7,$x50,$out 1145 addi $out,$out,0x60 1146 b Lcbc_dec8x_done 1147 1148.align 5 1149Lcbc_dec8x_five: 1150 vncipherlast $out3,$out3,$ivec 1151 vncipherlast $out4,$out4,$in3 1152 vncipherlast $out5,$out5,$in4 1153 vncipherlast $out6,$out6,$in5 1154 vncipherlast $out7,$out7,$in6 1155 vmr $ivec,$in7 1156 1157 le?vperm $out3,$out3,$out3,$inpperm 1158 le?vperm $out4,$out4,$out4,$inpperm 1159 stvx_u $out3,$x00,$out 1160 le?vperm $out5,$out5,$out5,$inpperm 1161 stvx_u $out4,$x10,$out 1162 le?vperm $out6,$out6,$out6,$inpperm 1163 stvx_u $out5,$x20,$out 1164 le?vperm $out7,$out7,$out7,$inpperm 1165 stvx_u $out6,$x30,$out 1166 stvx_u $out7,$x40,$out 1167 addi $out,$out,0x50 1168 b Lcbc_dec8x_done 1169 1170.align 5 1171Lcbc_dec8x_four: 1172 vncipherlast $out4,$out4,$ivec 1173 vncipherlast $out5,$out5,$in4 1174 vncipherlast $out6,$out6,$in5 1175 vncipherlast $out7,$out7,$in6 1176 vmr $ivec,$in7 1177 1178 le?vperm $out4,$out4,$out4,$inpperm 1179 le?vperm $out5,$out5,$out5,$inpperm 1180 stvx_u $out4,$x00,$out 1181 le?vperm $out6,$out6,$out6,$inpperm 1182 stvx_u $out5,$x10,$out 1183 le?vperm $out7,$out7,$out7,$inpperm 1184 stvx_u $out6,$x20,$out 1185 stvx_u $out7,$x30,$out 1186 addi $out,$out,0x40 1187 b Lcbc_dec8x_done 1188 1189.align 5 1190Lcbc_dec8x_three: 1191 vncipherlast $out5,$out5,$ivec 1192 vncipherlast $out6,$out6,$in5 1193 vncipherlast $out7,$out7,$in6 1194 vmr $ivec,$in7 1195 1196 le?vperm $out5,$out5,$out5,$inpperm 1197 le?vperm $out6,$out6,$out6,$inpperm 1198 stvx_u $out5,$x00,$out 1199 le?vperm $out7,$out7,$out7,$inpperm 1200 stvx_u $out6,$x10,$out 1201 stvx_u $out7,$x20,$out 1202 addi $out,$out,0x30 1203 b Lcbc_dec8x_done 1204 1205.align 5 1206Lcbc_dec8x_two: 1207 vncipherlast $out6,$out6,$ivec 1208 vncipherlast $out7,$out7,$in6 1209 vmr $ivec,$in7 1210 1211 le?vperm $out6,$out6,$out6,$inpperm 1212 le?vperm $out7,$out7,$out7,$inpperm 1213 stvx_u $out6,$x00,$out 1214 stvx_u $out7,$x10,$out 1215 addi $out,$out,0x20 1216 b Lcbc_dec8x_done 1217 1218.align 5 1219Lcbc_dec8x_one: 1220 vncipherlast $out7,$out7,$ivec 1221 vmr $ivec,$in7 1222 1223 le?vperm $out7,$out7,$out7,$inpperm 1224 stvx_u $out7,0,$out 1225 addi $out,$out,0x10 1226 1227Lcbc_dec8x_done: 1228 le?vperm $ivec,$ivec,$ivec,$inpperm 1229 stvx_u $ivec,0,$ivp # write [unaligned] iv 1230 1231 li r10,`$FRAME+15` 1232 li r11,`$FRAME+31` 1233 stvx $inpperm,r10,$sp # wipe copies of round keys 1234 addi r10,r10,32 1235 stvx $inpperm,r11,$sp 1236 addi r11,r11,32 1237 stvx $inpperm,r10,$sp 1238 addi r10,r10,32 1239 stvx $inpperm,r11,$sp 1240 addi r11,r11,32 1241 stvx $inpperm,r10,$sp 1242 addi r10,r10,32 1243 stvx $inpperm,r11,$sp 1244 addi r11,r11,32 1245 stvx $inpperm,r10,$sp 1246 addi r10,r10,32 1247 stvx $inpperm,r11,$sp 1248 addi r11,r11,32 1249 1250 mtspr 256,$vrsave 1251 lvx v20,r10,$sp # ABI says so 1252 addi r10,r10,32 1253 lvx v21,r11,$sp 1254 addi r11,r11,32 1255 lvx v22,r10,$sp 1256 addi r10,r10,32 1257 lvx v23,r11,$sp 1258 addi r11,r11,32 1259 lvx v24,r10,$sp 1260 addi r10,r10,32 1261 lvx v25,r11,$sp 1262 addi r11,r11,32 1263 lvx v26,r10,$sp 1264 addi r10,r10,32 1265 lvx v27,r11,$sp 1266 addi r11,r11,32 1267 lvx v28,r10,$sp 1268 addi r10,r10,32 1269 lvx v29,r11,$sp 1270 addi r11,r11,32 1271 lvx v30,r10,$sp 1272 lvx v31,r11,$sp 1273 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1274 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1275 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1276 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1277 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1278 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1279 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1280 blr 1281 .long 0 1282 .byte 0,12,0x14,0,0x80,6,6,0 1283 .long 0 1284.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1285___ 1286}} }}} 1287 1288######################################################################### 1289{{{ # CTR procedure[s] # 1290 1291####################### WARNING: Here be dragons! ####################### 1292# 1293# This code is written as 'ctr32', based on a 32-bit counter used 1294# upstream. The kernel does *not* use a 32-bit counter. The kernel uses 1295# a 128-bit counter. 1296# 1297# This leads to subtle changes from the upstream code: the counter 1298# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in 1299# both the bulk (8 blocks at a time) path, and in the individual block 1300# path. Be aware of this when doing updates. 1301# 1302# See: 1303# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") 1304# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") 1305# https://github.com/openssl/openssl/pull/8942 1306# 1307######################################################################### 1308my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1309my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1310my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1311 map("v$_",(4..11)); 1312my $dat=$tmp; 1313 1314$code.=<<___; 1315.globl .${prefix}_ctr32_encrypt_blocks 1316 ${UCMP}i $len,1 1317 bltlr- 1318 1319 lis r0,0xfff0 1320 mfspr $vrsave,256 1321 mtspr 256,r0 1322 1323 li $idx,15 1324 vxor $rndkey0,$rndkey0,$rndkey0 1325 le?vspltisb $tmp,0x0f 1326 1327 lvx $ivec,0,$ivp # load [unaligned] iv 1328 lvsl $inpperm,0,$ivp 1329 lvx $inptail,$idx,$ivp 1330 vspltisb $one,1 1331 le?vxor $inpperm,$inpperm,$tmp 1332 vperm $ivec,$ivec,$inptail,$inpperm 1333 vsldoi $one,$rndkey0,$one,1 1334 1335 neg r11,$inp 1336 ?lvsl $keyperm,0,$key # prepare for unaligned key 1337 lwz $rounds,240($key) 1338 1339 lvsr $inpperm,0,r11 # prepare for unaligned load 1340 lvx $inptail,0,$inp 1341 addi $inp,$inp,15 # 15 is not typo 1342 le?vxor $inpperm,$inpperm,$tmp 1343 1344 srwi $rounds,$rounds,1 1345 li $idx,16 1346 subi $rounds,$rounds,1 1347 1348 ${UCMP}i $len,8 1349 bge _aesp8_ctr32_encrypt8x 1350 1351 ?lvsr $outperm,0,$out # prepare for unaligned store 1352 vspltisb $outmask,-1 1353 lvx $outhead,0,$out 1354 ?vperm $outmask,$rndkey0,$outmask,$outperm 1355 le?vxor $outperm,$outperm,$tmp 1356 1357 lvx $rndkey0,0,$key 1358 mtctr $rounds 1359 lvx $rndkey1,$idx,$key 1360 addi $idx,$idx,16 1361 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1362 vxor $inout,$ivec,$rndkey0 1363 lvx $rndkey0,$idx,$key 1364 addi $idx,$idx,16 1365 b Loop_ctr32_enc 1366 1367.align 5 1368Loop_ctr32_enc: 1369 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1370 vcipher $inout,$inout,$rndkey1 1371 lvx $rndkey1,$idx,$key 1372 addi $idx,$idx,16 1373 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1374 vcipher $inout,$inout,$rndkey0 1375 lvx $rndkey0,$idx,$key 1376 addi $idx,$idx,16 1377 bdnz Loop_ctr32_enc 1378 1379 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit 1380 vmr $dat,$inptail 1381 lvx $inptail,0,$inp 1382 addi $inp,$inp,16 1383 subic. $len,$len,1 # blocks-- 1384 1385 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1386 vcipher $inout,$inout,$rndkey1 1387 lvx $rndkey1,$idx,$key 1388 vperm $dat,$dat,$inptail,$inpperm 1389 li $idx,16 1390 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1391 lvx $rndkey0,0,$key 1392 vxor $dat,$dat,$rndkey1 # last round key 1393 vcipherlast $inout,$inout,$dat 1394 1395 lvx $rndkey1,$idx,$key 1396 addi $idx,$idx,16 1397 vperm $inout,$inout,$inout,$outperm 1398 vsel $dat,$outhead,$inout,$outmask 1399 mtctr $rounds 1400 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1401 vmr $outhead,$inout 1402 vxor $inout,$ivec,$rndkey0 1403 lvx $rndkey0,$idx,$key 1404 addi $idx,$idx,16 1405 stvx $dat,0,$out 1406 addi $out,$out,16 1407 bne Loop_ctr32_enc 1408 1409 addi $out,$out,-1 1410 lvx $inout,0,$out # redundant in aligned case 1411 vsel $inout,$outhead,$inout,$outmask 1412 stvx $inout,0,$out 1413 1414 mtspr 256,$vrsave 1415 blr 1416 .long 0 1417 .byte 0,12,0x14,0,0,0,6,0 1418 .long 0 1419___ 1420######################################################################### 1421{{ # Optimized CTR procedure # 1422my $key_="r11"; 1423my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1424my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1425my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1426my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1427 # v26-v31 last 6 round keys 1428my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1429my ($two,$three,$four)=($outhead,$outperm,$outmask); 1430 1431$code.=<<___; 1432.align 5 1433_aesp8_ctr32_encrypt8x: 1434 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1435 li r10,`$FRAME+8*16+15` 1436 li r11,`$FRAME+8*16+31` 1437 stvx v20,r10,$sp # ABI says so 1438 addi r10,r10,32 1439 stvx v21,r11,$sp 1440 addi r11,r11,32 1441 stvx v22,r10,$sp 1442 addi r10,r10,32 1443 stvx v23,r11,$sp 1444 addi r11,r11,32 1445 stvx v24,r10,$sp 1446 addi r10,r10,32 1447 stvx v25,r11,$sp 1448 addi r11,r11,32 1449 stvx v26,r10,$sp 1450 addi r10,r10,32 1451 stvx v27,r11,$sp 1452 addi r11,r11,32 1453 stvx v28,r10,$sp 1454 addi r10,r10,32 1455 stvx v29,r11,$sp 1456 addi r11,r11,32 1457 stvx v30,r10,$sp 1458 stvx v31,r11,$sp 1459 li r0,-1 1460 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1461 li $x10,0x10 1462 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1463 li $x20,0x20 1464 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1465 li $x30,0x30 1466 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1467 li $x40,0x40 1468 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1469 li $x50,0x50 1470 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1471 li $x60,0x60 1472 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1473 li $x70,0x70 1474 mtspr 256,r0 1475 1476 subi $rounds,$rounds,3 # -4 in total 1477 1478 lvx $rndkey0,$x00,$key # load key schedule 1479 lvx v30,$x10,$key 1480 addi $key,$key,0x20 1481 lvx v31,$x00,$key 1482 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1483 addi $key_,$sp,$FRAME+15 1484 mtctr $rounds 1485 1486Load_ctr32_enc_key: 1487 ?vperm v24,v30,v31,$keyperm 1488 lvx v30,$x10,$key 1489 addi $key,$key,0x20 1490 stvx v24,$x00,$key_ # off-load round[1] 1491 ?vperm v25,v31,v30,$keyperm 1492 lvx v31,$x00,$key 1493 stvx v25,$x10,$key_ # off-load round[2] 1494 addi $key_,$key_,0x20 1495 bdnz Load_ctr32_enc_key 1496 1497 lvx v26,$x10,$key 1498 ?vperm v24,v30,v31,$keyperm 1499 lvx v27,$x20,$key 1500 stvx v24,$x00,$key_ # off-load round[3] 1501 ?vperm v25,v31,v26,$keyperm 1502 lvx v28,$x30,$key 1503 stvx v25,$x10,$key_ # off-load round[4] 1504 addi $key_,$sp,$FRAME+15 # rewind $key_ 1505 ?vperm v26,v26,v27,$keyperm 1506 lvx v29,$x40,$key 1507 ?vperm v27,v27,v28,$keyperm 1508 lvx v30,$x50,$key 1509 ?vperm v28,v28,v29,$keyperm 1510 lvx v31,$x60,$key 1511 ?vperm v29,v29,v30,$keyperm 1512 lvx $out0,$x70,$key # borrow $out0 1513 ?vperm v30,v30,v31,$keyperm 1514 lvx v24,$x00,$key_ # pre-load round[1] 1515 ?vperm v31,v31,$out0,$keyperm 1516 lvx v25,$x10,$key_ # pre-load round[2] 1517 1518 vadduqm $two,$one,$one 1519 subi $inp,$inp,15 # undo "caller" 1520 $SHL $len,$len,4 1521 1522 vadduqm $out1,$ivec,$one # counter values ... 1523 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) 1524 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1525 le?li $idx,8 1526 vadduqm $out3,$out1,$two 1527 vxor $out1,$out1,$rndkey0 1528 le?lvsl $inpperm,0,$idx 1529 vadduqm $out4,$out2,$two 1530 vxor $out2,$out2,$rndkey0 1531 le?vspltisb $tmp,0x0f 1532 vadduqm $out5,$out3,$two 1533 vxor $out3,$out3,$rndkey0 1534 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1535 vadduqm $out6,$out4,$two 1536 vxor $out4,$out4,$rndkey0 1537 vadduqm $out7,$out5,$two 1538 vxor $out5,$out5,$rndkey0 1539 vadduqm $ivec,$out6,$two # next counter value 1540 vxor $out6,$out6,$rndkey0 1541 vxor $out7,$out7,$rndkey0 1542 1543 mtctr $rounds 1544 b Loop_ctr32_enc8x 1545.align 5 1546Loop_ctr32_enc8x: 1547 vcipher $out0,$out0,v24 1548 vcipher $out1,$out1,v24 1549 vcipher $out2,$out2,v24 1550 vcipher $out3,$out3,v24 1551 vcipher $out4,$out4,v24 1552 vcipher $out5,$out5,v24 1553 vcipher $out6,$out6,v24 1554 vcipher $out7,$out7,v24 1555Loop_ctr32_enc8x_middle: 1556 lvx v24,$x20,$key_ # round[3] 1557 addi $key_,$key_,0x20 1558 1559 vcipher $out0,$out0,v25 1560 vcipher $out1,$out1,v25 1561 vcipher $out2,$out2,v25 1562 vcipher $out3,$out3,v25 1563 vcipher $out4,$out4,v25 1564 vcipher $out5,$out5,v25 1565 vcipher $out6,$out6,v25 1566 vcipher $out7,$out7,v25 1567 lvx v25,$x10,$key_ # round[4] 1568 bdnz Loop_ctr32_enc8x 1569 1570 subic r11,$len,256 # $len-256, borrow $key_ 1571 vcipher $out0,$out0,v24 1572 vcipher $out1,$out1,v24 1573 vcipher $out2,$out2,v24 1574 vcipher $out3,$out3,v24 1575 vcipher $out4,$out4,v24 1576 vcipher $out5,$out5,v24 1577 vcipher $out6,$out6,v24 1578 vcipher $out7,$out7,v24 1579 1580 subfe r0,r0,r0 # borrow?-1:0 1581 vcipher $out0,$out0,v25 1582 vcipher $out1,$out1,v25 1583 vcipher $out2,$out2,v25 1584 vcipher $out3,$out3,v25 1585 vcipher $out4,$out4,v25 1586 vcipher $out5,$out5,v25 1587 vcipher $out6,$out6,v25 1588 vcipher $out7,$out7,v25 1589 1590 and r0,r0,r11 1591 addi $key_,$sp,$FRAME+15 # rewind $key_ 1592 vcipher $out0,$out0,v26 1593 vcipher $out1,$out1,v26 1594 vcipher $out2,$out2,v26 1595 vcipher $out3,$out3,v26 1596 vcipher $out4,$out4,v26 1597 vcipher $out5,$out5,v26 1598 vcipher $out6,$out6,v26 1599 vcipher $out7,$out7,v26 1600 lvx v24,$x00,$key_ # re-pre-load round[1] 1601 1602 subic $len,$len,129 # $len-=129 1603 vcipher $out0,$out0,v27 1604 addi $len,$len,1 # $len-=128 really 1605 vcipher $out1,$out1,v27 1606 vcipher $out2,$out2,v27 1607 vcipher $out3,$out3,v27 1608 vcipher $out4,$out4,v27 1609 vcipher $out5,$out5,v27 1610 vcipher $out6,$out6,v27 1611 vcipher $out7,$out7,v27 1612 lvx v25,$x10,$key_ # re-pre-load round[2] 1613 1614 vcipher $out0,$out0,v28 1615 lvx_u $in0,$x00,$inp # load input 1616 vcipher $out1,$out1,v28 1617 lvx_u $in1,$x10,$inp 1618 vcipher $out2,$out2,v28 1619 lvx_u $in2,$x20,$inp 1620 vcipher $out3,$out3,v28 1621 lvx_u $in3,$x30,$inp 1622 vcipher $out4,$out4,v28 1623 lvx_u $in4,$x40,$inp 1624 vcipher $out5,$out5,v28 1625 lvx_u $in5,$x50,$inp 1626 vcipher $out6,$out6,v28 1627 lvx_u $in6,$x60,$inp 1628 vcipher $out7,$out7,v28 1629 lvx_u $in7,$x70,$inp 1630 addi $inp,$inp,0x80 1631 1632 vcipher $out0,$out0,v29 1633 le?vperm $in0,$in0,$in0,$inpperm 1634 vcipher $out1,$out1,v29 1635 le?vperm $in1,$in1,$in1,$inpperm 1636 vcipher $out2,$out2,v29 1637 le?vperm $in2,$in2,$in2,$inpperm 1638 vcipher $out3,$out3,v29 1639 le?vperm $in3,$in3,$in3,$inpperm 1640 vcipher $out4,$out4,v29 1641 le?vperm $in4,$in4,$in4,$inpperm 1642 vcipher $out5,$out5,v29 1643 le?vperm $in5,$in5,$in5,$inpperm 1644 vcipher $out6,$out6,v29 1645 le?vperm $in6,$in6,$in6,$inpperm 1646 vcipher $out7,$out7,v29 1647 le?vperm $in7,$in7,$in7,$inpperm 1648 1649 add $inp,$inp,r0 # $inp is adjusted in such 1650 # way that at exit from the 1651 # loop inX-in7 are loaded 1652 # with last "words" 1653 subfe. r0,r0,r0 # borrow?-1:0 1654 vcipher $out0,$out0,v30 1655 vxor $in0,$in0,v31 # xor with last round key 1656 vcipher $out1,$out1,v30 1657 vxor $in1,$in1,v31 1658 vcipher $out2,$out2,v30 1659 vxor $in2,$in2,v31 1660 vcipher $out3,$out3,v30 1661 vxor $in3,$in3,v31 1662 vcipher $out4,$out4,v30 1663 vxor $in4,$in4,v31 1664 vcipher $out5,$out5,v30 1665 vxor $in5,$in5,v31 1666 vcipher $out6,$out6,v30 1667 vxor $in6,$in6,v31 1668 vcipher $out7,$out7,v30 1669 vxor $in7,$in7,v31 1670 1671 bne Lctr32_enc8x_break # did $len-129 borrow? 1672 1673 vcipherlast $in0,$out0,$in0 1674 vcipherlast $in1,$out1,$in1 1675 vadduqm $out1,$ivec,$one # counter values ... 1676 vcipherlast $in2,$out2,$in2 1677 vadduqm $out2,$ivec,$two 1678 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1679 vcipherlast $in3,$out3,$in3 1680 vadduqm $out3,$out1,$two 1681 vxor $out1,$out1,$rndkey0 1682 vcipherlast $in4,$out4,$in4 1683 vadduqm $out4,$out2,$two 1684 vxor $out2,$out2,$rndkey0 1685 vcipherlast $in5,$out5,$in5 1686 vadduqm $out5,$out3,$two 1687 vxor $out3,$out3,$rndkey0 1688 vcipherlast $in6,$out6,$in6 1689 vadduqm $out6,$out4,$two 1690 vxor $out4,$out4,$rndkey0 1691 vcipherlast $in7,$out7,$in7 1692 vadduqm $out7,$out5,$two 1693 vxor $out5,$out5,$rndkey0 1694 le?vperm $in0,$in0,$in0,$inpperm 1695 vadduqm $ivec,$out6,$two # next counter value 1696 vxor $out6,$out6,$rndkey0 1697 le?vperm $in1,$in1,$in1,$inpperm 1698 vxor $out7,$out7,$rndkey0 1699 mtctr $rounds 1700 1701 vcipher $out0,$out0,v24 1702 stvx_u $in0,$x00,$out 1703 le?vperm $in2,$in2,$in2,$inpperm 1704 vcipher $out1,$out1,v24 1705 stvx_u $in1,$x10,$out 1706 le?vperm $in3,$in3,$in3,$inpperm 1707 vcipher $out2,$out2,v24 1708 stvx_u $in2,$x20,$out 1709 le?vperm $in4,$in4,$in4,$inpperm 1710 vcipher $out3,$out3,v24 1711 stvx_u $in3,$x30,$out 1712 le?vperm $in5,$in5,$in5,$inpperm 1713 vcipher $out4,$out4,v24 1714 stvx_u $in4,$x40,$out 1715 le?vperm $in6,$in6,$in6,$inpperm 1716 vcipher $out5,$out5,v24 1717 stvx_u $in5,$x50,$out 1718 le?vperm $in7,$in7,$in7,$inpperm 1719 vcipher $out6,$out6,v24 1720 stvx_u $in6,$x60,$out 1721 vcipher $out7,$out7,v24 1722 stvx_u $in7,$x70,$out 1723 addi $out,$out,0x80 1724 1725 b Loop_ctr32_enc8x_middle 1726 1727.align 5 1728Lctr32_enc8x_break: 1729 cmpwi $len,-0x60 1730 blt Lctr32_enc8x_one 1731 nop 1732 beq Lctr32_enc8x_two 1733 cmpwi $len,-0x40 1734 blt Lctr32_enc8x_three 1735 nop 1736 beq Lctr32_enc8x_four 1737 cmpwi $len,-0x20 1738 blt Lctr32_enc8x_five 1739 nop 1740 beq Lctr32_enc8x_six 1741 cmpwi $len,0x00 1742 blt Lctr32_enc8x_seven 1743 1744Lctr32_enc8x_eight: 1745 vcipherlast $out0,$out0,$in0 1746 vcipherlast $out1,$out1,$in1 1747 vcipherlast $out2,$out2,$in2 1748 vcipherlast $out3,$out3,$in3 1749 vcipherlast $out4,$out4,$in4 1750 vcipherlast $out5,$out5,$in5 1751 vcipherlast $out6,$out6,$in6 1752 vcipherlast $out7,$out7,$in7 1753 1754 le?vperm $out0,$out0,$out0,$inpperm 1755 le?vperm $out1,$out1,$out1,$inpperm 1756 stvx_u $out0,$x00,$out 1757 le?vperm $out2,$out2,$out2,$inpperm 1758 stvx_u $out1,$x10,$out 1759 le?vperm $out3,$out3,$out3,$inpperm 1760 stvx_u $out2,$x20,$out 1761 le?vperm $out4,$out4,$out4,$inpperm 1762 stvx_u $out3,$x30,$out 1763 le?vperm $out5,$out5,$out5,$inpperm 1764 stvx_u $out4,$x40,$out 1765 le?vperm $out6,$out6,$out6,$inpperm 1766 stvx_u $out5,$x50,$out 1767 le?vperm $out7,$out7,$out7,$inpperm 1768 stvx_u $out6,$x60,$out 1769 stvx_u $out7,$x70,$out 1770 addi $out,$out,0x80 1771 b Lctr32_enc8x_done 1772 1773.align 5 1774Lctr32_enc8x_seven: 1775 vcipherlast $out0,$out0,$in1 1776 vcipherlast $out1,$out1,$in2 1777 vcipherlast $out2,$out2,$in3 1778 vcipherlast $out3,$out3,$in4 1779 vcipherlast $out4,$out4,$in5 1780 vcipherlast $out5,$out5,$in6 1781 vcipherlast $out6,$out6,$in7 1782 1783 le?vperm $out0,$out0,$out0,$inpperm 1784 le?vperm $out1,$out1,$out1,$inpperm 1785 stvx_u $out0,$x00,$out 1786 le?vperm $out2,$out2,$out2,$inpperm 1787 stvx_u $out1,$x10,$out 1788 le?vperm $out3,$out3,$out3,$inpperm 1789 stvx_u $out2,$x20,$out 1790 le?vperm $out4,$out4,$out4,$inpperm 1791 stvx_u $out3,$x30,$out 1792 le?vperm $out5,$out5,$out5,$inpperm 1793 stvx_u $out4,$x40,$out 1794 le?vperm $out6,$out6,$out6,$inpperm 1795 stvx_u $out5,$x50,$out 1796 stvx_u $out6,$x60,$out 1797 addi $out,$out,0x70 1798 b Lctr32_enc8x_done 1799 1800.align 5 1801Lctr32_enc8x_six: 1802 vcipherlast $out0,$out0,$in2 1803 vcipherlast $out1,$out1,$in3 1804 vcipherlast $out2,$out2,$in4 1805 vcipherlast $out3,$out3,$in5 1806 vcipherlast $out4,$out4,$in6 1807 vcipherlast $out5,$out5,$in7 1808 1809 le?vperm $out0,$out0,$out0,$inpperm 1810 le?vperm $out1,$out1,$out1,$inpperm 1811 stvx_u $out0,$x00,$out 1812 le?vperm $out2,$out2,$out2,$inpperm 1813 stvx_u $out1,$x10,$out 1814 le?vperm $out3,$out3,$out3,$inpperm 1815 stvx_u $out2,$x20,$out 1816 le?vperm $out4,$out4,$out4,$inpperm 1817 stvx_u $out3,$x30,$out 1818 le?vperm $out5,$out5,$out5,$inpperm 1819 stvx_u $out4,$x40,$out 1820 stvx_u $out5,$x50,$out 1821 addi $out,$out,0x60 1822 b Lctr32_enc8x_done 1823 1824.align 5 1825Lctr32_enc8x_five: 1826 vcipherlast $out0,$out0,$in3 1827 vcipherlast $out1,$out1,$in4 1828 vcipherlast $out2,$out2,$in5 1829 vcipherlast $out3,$out3,$in6 1830 vcipherlast $out4,$out4,$in7 1831 1832 le?vperm $out0,$out0,$out0,$inpperm 1833 le?vperm $out1,$out1,$out1,$inpperm 1834 stvx_u $out0,$x00,$out 1835 le?vperm $out2,$out2,$out2,$inpperm 1836 stvx_u $out1,$x10,$out 1837 le?vperm $out3,$out3,$out3,$inpperm 1838 stvx_u $out2,$x20,$out 1839 le?vperm $out4,$out4,$out4,$inpperm 1840 stvx_u $out3,$x30,$out 1841 stvx_u $out4,$x40,$out 1842 addi $out,$out,0x50 1843 b Lctr32_enc8x_done 1844 1845.align 5 1846Lctr32_enc8x_four: 1847 vcipherlast $out0,$out0,$in4 1848 vcipherlast $out1,$out1,$in5 1849 vcipherlast $out2,$out2,$in6 1850 vcipherlast $out3,$out3,$in7 1851 1852 le?vperm $out0,$out0,$out0,$inpperm 1853 le?vperm $out1,$out1,$out1,$inpperm 1854 stvx_u $out0,$x00,$out 1855 le?vperm $out2,$out2,$out2,$inpperm 1856 stvx_u $out1,$x10,$out 1857 le?vperm $out3,$out3,$out3,$inpperm 1858 stvx_u $out2,$x20,$out 1859 stvx_u $out3,$x30,$out 1860 addi $out,$out,0x40 1861 b Lctr32_enc8x_done 1862 1863.align 5 1864Lctr32_enc8x_three: 1865 vcipherlast $out0,$out0,$in5 1866 vcipherlast $out1,$out1,$in6 1867 vcipherlast $out2,$out2,$in7 1868 1869 le?vperm $out0,$out0,$out0,$inpperm 1870 le?vperm $out1,$out1,$out1,$inpperm 1871 stvx_u $out0,$x00,$out 1872 le?vperm $out2,$out2,$out2,$inpperm 1873 stvx_u $out1,$x10,$out 1874 stvx_u $out2,$x20,$out 1875 addi $out,$out,0x30 1876 b Lctr32_enc8x_done 1877 1878.align 5 1879Lctr32_enc8x_two: 1880 vcipherlast $out0,$out0,$in6 1881 vcipherlast $out1,$out1,$in7 1882 1883 le?vperm $out0,$out0,$out0,$inpperm 1884 le?vperm $out1,$out1,$out1,$inpperm 1885 stvx_u $out0,$x00,$out 1886 stvx_u $out1,$x10,$out 1887 addi $out,$out,0x20 1888 b Lctr32_enc8x_done 1889 1890.align 5 1891Lctr32_enc8x_one: 1892 vcipherlast $out0,$out0,$in7 1893 1894 le?vperm $out0,$out0,$out0,$inpperm 1895 stvx_u $out0,0,$out 1896 addi $out,$out,0x10 1897 1898Lctr32_enc8x_done: 1899 li r10,`$FRAME+15` 1900 li r11,`$FRAME+31` 1901 stvx $inpperm,r10,$sp # wipe copies of round keys 1902 addi r10,r10,32 1903 stvx $inpperm,r11,$sp 1904 addi r11,r11,32 1905 stvx $inpperm,r10,$sp 1906 addi r10,r10,32 1907 stvx $inpperm,r11,$sp 1908 addi r11,r11,32 1909 stvx $inpperm,r10,$sp 1910 addi r10,r10,32 1911 stvx $inpperm,r11,$sp 1912 addi r11,r11,32 1913 stvx $inpperm,r10,$sp 1914 addi r10,r10,32 1915 stvx $inpperm,r11,$sp 1916 addi r11,r11,32 1917 1918 mtspr 256,$vrsave 1919 lvx v20,r10,$sp # ABI says so 1920 addi r10,r10,32 1921 lvx v21,r11,$sp 1922 addi r11,r11,32 1923 lvx v22,r10,$sp 1924 addi r10,r10,32 1925 lvx v23,r11,$sp 1926 addi r11,r11,32 1927 lvx v24,r10,$sp 1928 addi r10,r10,32 1929 lvx v25,r11,$sp 1930 addi r11,r11,32 1931 lvx v26,r10,$sp 1932 addi r10,r10,32 1933 lvx v27,r11,$sp 1934 addi r11,r11,32 1935 lvx v28,r10,$sp 1936 addi r10,r10,32 1937 lvx v29,r11,$sp 1938 addi r11,r11,32 1939 lvx v30,r10,$sp 1940 lvx v31,r11,$sp 1941 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1942 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1943 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1944 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1945 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1946 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1947 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1948 blr 1949 .long 0 1950 .byte 0,12,0x14,0,0x80,6,6,0 1951 .long 0 1952.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1953___ 1954}} }}} 1955 1956######################################################################### 1957{{{ # XTS procedures # 1958# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1959# const AES_KEY *key1, const AES_KEY *key2, # 1960# [const] unsigned char iv[16]); # 1961# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1962# input tweak value is assumed to be encrypted already, and last tweak # 1963# value, one suitable for consecutive call on same chunk of data, is # 1964# written back to original buffer. In addition, in "tweak chaining" # 1965# mode only complete input blocks are processed. # 1966 1967my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1968my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1969my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1970my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1971my $taillen = $key2; 1972 1973 ($inp,$idx) = ($idx,$inp); # reassign 1974 1975$code.=<<___; 1976.globl .${prefix}_xts_encrypt 1977 mr $inp,r3 # reassign 1978 li r3,-1 1979 ${UCMP}i $len,16 1980 bltlr- 1981 1982 lis r0,0xfff0 1983 mfspr r12,256 # save vrsave 1984 li r11,0 1985 mtspr 256,r0 1986 1987 vspltisb $seven,0x07 # 0x070707..07 1988 le?lvsl $leperm,r11,r11 1989 le?vspltisb $tmp,0x0f 1990 le?vxor $leperm,$leperm,$seven 1991 1992 li $idx,15 1993 lvx $tweak,0,$ivp # load [unaligned] iv 1994 lvsl $inpperm,0,$ivp 1995 lvx $inptail,$idx,$ivp 1996 le?vxor $inpperm,$inpperm,$tmp 1997 vperm $tweak,$tweak,$inptail,$inpperm 1998 1999 neg r11,$inp 2000 lvsr $inpperm,0,r11 # prepare for unaligned load 2001 lvx $inout,0,$inp 2002 addi $inp,$inp,15 # 15 is not typo 2003 le?vxor $inpperm,$inpperm,$tmp 2004 2005 ${UCMP}i $key2,0 # key2==NULL? 2006 beq Lxts_enc_no_key2 2007 2008 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2009 lwz $rounds,240($key2) 2010 srwi $rounds,$rounds,1 2011 subi $rounds,$rounds,1 2012 li $idx,16 2013 2014 lvx $rndkey0,0,$key2 2015 lvx $rndkey1,$idx,$key2 2016 addi $idx,$idx,16 2017 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2018 vxor $tweak,$tweak,$rndkey0 2019 lvx $rndkey0,$idx,$key2 2020 addi $idx,$idx,16 2021 mtctr $rounds 2022 2023Ltweak_xts_enc: 2024 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2025 vcipher $tweak,$tweak,$rndkey1 2026 lvx $rndkey1,$idx,$key2 2027 addi $idx,$idx,16 2028 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2029 vcipher $tweak,$tweak,$rndkey0 2030 lvx $rndkey0,$idx,$key2 2031 addi $idx,$idx,16 2032 bdnz Ltweak_xts_enc 2033 2034 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2035 vcipher $tweak,$tweak,$rndkey1 2036 lvx $rndkey1,$idx,$key2 2037 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2038 vcipherlast $tweak,$tweak,$rndkey0 2039 2040 li $ivp,0 # don't chain the tweak 2041 b Lxts_enc 2042 2043Lxts_enc_no_key2: 2044 li $idx,-16 2045 and $len,$len,$idx # in "tweak chaining" 2046 # mode only complete 2047 # blocks are processed 2048Lxts_enc: 2049 lvx $inptail,0,$inp 2050 addi $inp,$inp,16 2051 2052 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2053 lwz $rounds,240($key1) 2054 srwi $rounds,$rounds,1 2055 subi $rounds,$rounds,1 2056 li $idx,16 2057 2058 vslb $eighty7,$seven,$seven # 0x808080..80 2059 vor $eighty7,$eighty7,$seven # 0x878787..87 2060 vspltisb $tmp,1 # 0x010101..01 2061 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2062 2063 ${UCMP}i $len,96 2064 bge _aesp8_xts_encrypt6x 2065 2066 andi. $taillen,$len,15 2067 subic r0,$len,32 2068 subi $taillen,$taillen,16 2069 subfe r0,r0,r0 2070 and r0,r0,$taillen 2071 add $inp,$inp,r0 2072 2073 lvx $rndkey0,0,$key1 2074 lvx $rndkey1,$idx,$key1 2075 addi $idx,$idx,16 2076 vperm $inout,$inout,$inptail,$inpperm 2077 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2078 vxor $inout,$inout,$tweak 2079 vxor $inout,$inout,$rndkey0 2080 lvx $rndkey0,$idx,$key1 2081 addi $idx,$idx,16 2082 mtctr $rounds 2083 b Loop_xts_enc 2084 2085.align 5 2086Loop_xts_enc: 2087 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2088 vcipher $inout,$inout,$rndkey1 2089 lvx $rndkey1,$idx,$key1 2090 addi $idx,$idx,16 2091 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2092 vcipher $inout,$inout,$rndkey0 2093 lvx $rndkey0,$idx,$key1 2094 addi $idx,$idx,16 2095 bdnz Loop_xts_enc 2096 2097 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2098 vcipher $inout,$inout,$rndkey1 2099 lvx $rndkey1,$idx,$key1 2100 li $idx,16 2101 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2102 vxor $rndkey0,$rndkey0,$tweak 2103 vcipherlast $output,$inout,$rndkey0 2104 2105 le?vperm $tmp,$output,$output,$leperm 2106 be?nop 2107 le?stvx_u $tmp,0,$out 2108 be?stvx_u $output,0,$out 2109 addi $out,$out,16 2110 2111 subic. $len,$len,16 2112 beq Lxts_enc_done 2113 2114 vmr $inout,$inptail 2115 lvx $inptail,0,$inp 2116 addi $inp,$inp,16 2117 lvx $rndkey0,0,$key1 2118 lvx $rndkey1,$idx,$key1 2119 addi $idx,$idx,16 2120 2121 subic r0,$len,32 2122 subfe r0,r0,r0 2123 and r0,r0,$taillen 2124 add $inp,$inp,r0 2125 2126 vsrab $tmp,$tweak,$seven # next tweak value 2127 vaddubm $tweak,$tweak,$tweak 2128 vsldoi $tmp,$tmp,$tmp,15 2129 vand $tmp,$tmp,$eighty7 2130 vxor $tweak,$tweak,$tmp 2131 2132 vperm $inout,$inout,$inptail,$inpperm 2133 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2134 vxor $inout,$inout,$tweak 2135 vxor $output,$output,$rndkey0 # just in case $len<16 2136 vxor $inout,$inout,$rndkey0 2137 lvx $rndkey0,$idx,$key1 2138 addi $idx,$idx,16 2139 2140 mtctr $rounds 2141 ${UCMP}i $len,16 2142 bge Loop_xts_enc 2143 2144 vxor $output,$output,$tweak 2145 lvsr $inpperm,0,$len # $inpperm is no longer needed 2146 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2147 vspltisb $tmp,-1 2148 vperm $inptail,$inptail,$tmp,$inpperm 2149 vsel $inout,$inout,$output,$inptail 2150 2151 subi r11,$out,17 2152 subi $out,$out,16 2153 mtctr $len 2154 li $len,16 2155Loop_xts_enc_steal: 2156 lbzu r0,1(r11) 2157 stb r0,16(r11) 2158 bdnz Loop_xts_enc_steal 2159 2160 mtctr $rounds 2161 b Loop_xts_enc # one more time... 2162 2163Lxts_enc_done: 2164 ${UCMP}i $ivp,0 2165 beq Lxts_enc_ret 2166 2167 vsrab $tmp,$tweak,$seven # next tweak value 2168 vaddubm $tweak,$tweak,$tweak 2169 vsldoi $tmp,$tmp,$tmp,15 2170 vand $tmp,$tmp,$eighty7 2171 vxor $tweak,$tweak,$tmp 2172 2173 le?vperm $tweak,$tweak,$tweak,$leperm 2174 stvx_u $tweak,0,$ivp 2175 2176Lxts_enc_ret: 2177 mtspr 256,r12 # restore vrsave 2178 li r3,0 2179 blr 2180 .long 0 2181 .byte 0,12,0x04,0,0x80,6,6,0 2182 .long 0 2183.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2184 2185.globl .${prefix}_xts_decrypt 2186 mr $inp,r3 # reassign 2187 li r3,-1 2188 ${UCMP}i $len,16 2189 bltlr- 2190 2191 lis r0,0xfff8 2192 mfspr r12,256 # save vrsave 2193 li r11,0 2194 mtspr 256,r0 2195 2196 andi. r0,$len,15 2197 neg r0,r0 2198 andi. r0,r0,16 2199 sub $len,$len,r0 2200 2201 vspltisb $seven,0x07 # 0x070707..07 2202 le?lvsl $leperm,r11,r11 2203 le?vspltisb $tmp,0x0f 2204 le?vxor $leperm,$leperm,$seven 2205 2206 li $idx,15 2207 lvx $tweak,0,$ivp # load [unaligned] iv 2208 lvsl $inpperm,0,$ivp 2209 lvx $inptail,$idx,$ivp 2210 le?vxor $inpperm,$inpperm,$tmp 2211 vperm $tweak,$tweak,$inptail,$inpperm 2212 2213 neg r11,$inp 2214 lvsr $inpperm,0,r11 # prepare for unaligned load 2215 lvx $inout,0,$inp 2216 addi $inp,$inp,15 # 15 is not typo 2217 le?vxor $inpperm,$inpperm,$tmp 2218 2219 ${UCMP}i $key2,0 # key2==NULL? 2220 beq Lxts_dec_no_key2 2221 2222 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2223 lwz $rounds,240($key2) 2224 srwi $rounds,$rounds,1 2225 subi $rounds,$rounds,1 2226 li $idx,16 2227 2228 lvx $rndkey0,0,$key2 2229 lvx $rndkey1,$idx,$key2 2230 addi $idx,$idx,16 2231 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2232 vxor $tweak,$tweak,$rndkey0 2233 lvx $rndkey0,$idx,$key2 2234 addi $idx,$idx,16 2235 mtctr $rounds 2236 2237Ltweak_xts_dec: 2238 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2239 vcipher $tweak,$tweak,$rndkey1 2240 lvx $rndkey1,$idx,$key2 2241 addi $idx,$idx,16 2242 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2243 vcipher $tweak,$tweak,$rndkey0 2244 lvx $rndkey0,$idx,$key2 2245 addi $idx,$idx,16 2246 bdnz Ltweak_xts_dec 2247 2248 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2249 vcipher $tweak,$tweak,$rndkey1 2250 lvx $rndkey1,$idx,$key2 2251 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2252 vcipherlast $tweak,$tweak,$rndkey0 2253 2254 li $ivp,0 # don't chain the tweak 2255 b Lxts_dec 2256 2257Lxts_dec_no_key2: 2258 neg $idx,$len 2259 andi. $idx,$idx,15 2260 add $len,$len,$idx # in "tweak chaining" 2261 # mode only complete 2262 # blocks are processed 2263Lxts_dec: 2264 lvx $inptail,0,$inp 2265 addi $inp,$inp,16 2266 2267 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2268 lwz $rounds,240($key1) 2269 srwi $rounds,$rounds,1 2270 subi $rounds,$rounds,1 2271 li $idx,16 2272 2273 vslb $eighty7,$seven,$seven # 0x808080..80 2274 vor $eighty7,$eighty7,$seven # 0x878787..87 2275 vspltisb $tmp,1 # 0x010101..01 2276 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2277 2278 ${UCMP}i $len,96 2279 bge _aesp8_xts_decrypt6x 2280 2281 lvx $rndkey0,0,$key1 2282 lvx $rndkey1,$idx,$key1 2283 addi $idx,$idx,16 2284 vperm $inout,$inout,$inptail,$inpperm 2285 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2286 vxor $inout,$inout,$tweak 2287 vxor $inout,$inout,$rndkey0 2288 lvx $rndkey0,$idx,$key1 2289 addi $idx,$idx,16 2290 mtctr $rounds 2291 2292 ${UCMP}i $len,16 2293 blt Ltail_xts_dec 2294 be?b Loop_xts_dec 2295 2296.align 5 2297Loop_xts_dec: 2298 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2299 vncipher $inout,$inout,$rndkey1 2300 lvx $rndkey1,$idx,$key1 2301 addi $idx,$idx,16 2302 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2303 vncipher $inout,$inout,$rndkey0 2304 lvx $rndkey0,$idx,$key1 2305 addi $idx,$idx,16 2306 bdnz Loop_xts_dec 2307 2308 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2309 vncipher $inout,$inout,$rndkey1 2310 lvx $rndkey1,$idx,$key1 2311 li $idx,16 2312 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2313 vxor $rndkey0,$rndkey0,$tweak 2314 vncipherlast $output,$inout,$rndkey0 2315 2316 le?vperm $tmp,$output,$output,$leperm 2317 be?nop 2318 le?stvx_u $tmp,0,$out 2319 be?stvx_u $output,0,$out 2320 addi $out,$out,16 2321 2322 subic. $len,$len,16 2323 beq Lxts_dec_done 2324 2325 vmr $inout,$inptail 2326 lvx $inptail,0,$inp 2327 addi $inp,$inp,16 2328 lvx $rndkey0,0,$key1 2329 lvx $rndkey1,$idx,$key1 2330 addi $idx,$idx,16 2331 2332 vsrab $tmp,$tweak,$seven # next tweak value 2333 vaddubm $tweak,$tweak,$tweak 2334 vsldoi $tmp,$tmp,$tmp,15 2335 vand $tmp,$tmp,$eighty7 2336 vxor $tweak,$tweak,$tmp 2337 2338 vperm $inout,$inout,$inptail,$inpperm 2339 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2340 vxor $inout,$inout,$tweak 2341 vxor $inout,$inout,$rndkey0 2342 lvx $rndkey0,$idx,$key1 2343 addi $idx,$idx,16 2344 2345 mtctr $rounds 2346 ${UCMP}i $len,16 2347 bge Loop_xts_dec 2348 2349Ltail_xts_dec: 2350 vsrab $tmp,$tweak,$seven # next tweak value 2351 vaddubm $tweak1,$tweak,$tweak 2352 vsldoi $tmp,$tmp,$tmp,15 2353 vand $tmp,$tmp,$eighty7 2354 vxor $tweak1,$tweak1,$tmp 2355 2356 subi $inp,$inp,16 2357 add $inp,$inp,$len 2358 2359 vxor $inout,$inout,$tweak # :-( 2360 vxor $inout,$inout,$tweak1 # :-) 2361 2362Loop_xts_dec_short: 2363 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2364 vncipher $inout,$inout,$rndkey1 2365 lvx $rndkey1,$idx,$key1 2366 addi $idx,$idx,16 2367 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2368 vncipher $inout,$inout,$rndkey0 2369 lvx $rndkey0,$idx,$key1 2370 addi $idx,$idx,16 2371 bdnz Loop_xts_dec_short 2372 2373 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2374 vncipher $inout,$inout,$rndkey1 2375 lvx $rndkey1,$idx,$key1 2376 li $idx,16 2377 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2378 vxor $rndkey0,$rndkey0,$tweak1 2379 vncipherlast $output,$inout,$rndkey0 2380 2381 le?vperm $tmp,$output,$output,$leperm 2382 be?nop 2383 le?stvx_u $tmp,0,$out 2384 be?stvx_u $output,0,$out 2385 2386 vmr $inout,$inptail 2387 lvx $inptail,0,$inp 2388 #addi $inp,$inp,16 2389 lvx $rndkey0,0,$key1 2390 lvx $rndkey1,$idx,$key1 2391 addi $idx,$idx,16 2392 vperm $inout,$inout,$inptail,$inpperm 2393 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2394 2395 lvsr $inpperm,0,$len # $inpperm is no longer needed 2396 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2397 vspltisb $tmp,-1 2398 vperm $inptail,$inptail,$tmp,$inpperm 2399 vsel $inout,$inout,$output,$inptail 2400 2401 vxor $rndkey0,$rndkey0,$tweak 2402 vxor $inout,$inout,$rndkey0 2403 lvx $rndkey0,$idx,$key1 2404 addi $idx,$idx,16 2405 2406 subi r11,$out,1 2407 mtctr $len 2408 li $len,16 2409Loop_xts_dec_steal: 2410 lbzu r0,1(r11) 2411 stb r0,16(r11) 2412 bdnz Loop_xts_dec_steal 2413 2414 mtctr $rounds 2415 b Loop_xts_dec # one more time... 2416 2417Lxts_dec_done: 2418 ${UCMP}i $ivp,0 2419 beq Lxts_dec_ret 2420 2421 vsrab $tmp,$tweak,$seven # next tweak value 2422 vaddubm $tweak,$tweak,$tweak 2423 vsldoi $tmp,$tmp,$tmp,15 2424 vand $tmp,$tmp,$eighty7 2425 vxor $tweak,$tweak,$tmp 2426 2427 le?vperm $tweak,$tweak,$tweak,$leperm 2428 stvx_u $tweak,0,$ivp 2429 2430Lxts_dec_ret: 2431 mtspr 256,r12 # restore vrsave 2432 li r3,0 2433 blr 2434 .long 0 2435 .byte 0,12,0x04,0,0x80,6,6,0 2436 .long 0 2437.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2438___ 2439######################################################################### 2440{{ # Optimized XTS procedures # 2441my $key_=$key2; 2442my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2443 $x00=0 if ($flavour =~ /osx/); 2444my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2445my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2446my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2447my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2448 # v26-v31 last 6 round keys 2449my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2450my $taillen=$x70; 2451 2452$code.=<<___; 2453.align 5 2454_aesp8_xts_encrypt6x: 2455 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2456 mflr r11 2457 li r7,`$FRAME+8*16+15` 2458 li r3,`$FRAME+8*16+31` 2459 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2460 stvx v20,r7,$sp # ABI says so 2461 addi r7,r7,32 2462 stvx v21,r3,$sp 2463 addi r3,r3,32 2464 stvx v22,r7,$sp 2465 addi r7,r7,32 2466 stvx v23,r3,$sp 2467 addi r3,r3,32 2468 stvx v24,r7,$sp 2469 addi r7,r7,32 2470 stvx v25,r3,$sp 2471 addi r3,r3,32 2472 stvx v26,r7,$sp 2473 addi r7,r7,32 2474 stvx v27,r3,$sp 2475 addi r3,r3,32 2476 stvx v28,r7,$sp 2477 addi r7,r7,32 2478 stvx v29,r3,$sp 2479 addi r3,r3,32 2480 stvx v30,r7,$sp 2481 stvx v31,r3,$sp 2482 li r0,-1 2483 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2484 li $x10,0x10 2485 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2486 li $x20,0x20 2487 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2488 li $x30,0x30 2489 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2490 li $x40,0x40 2491 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2492 li $x50,0x50 2493 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2494 li $x60,0x60 2495 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2496 li $x70,0x70 2497 mtspr 256,r0 2498 2499 xxlor 2, 32+$eighty7, 32+$eighty7 2500 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 2501 xxlor 1, 32+$eighty7, 32+$eighty7 2502 2503 # Load XOR Lconsts. 2504 mr $x70, r6 2505 bl Lconsts 2506 lxvw4x 0, $x40, r6 # load XOR contents 2507 mr r6, $x70 2508 li $x70,0x70 2509 2510 subi $rounds,$rounds,3 # -4 in total 2511 2512 lvx $rndkey0,$x00,$key1 # load key schedule 2513 lvx v30,$x10,$key1 2514 addi $key1,$key1,0x20 2515 lvx v31,$x00,$key1 2516 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2517 addi $key_,$sp,$FRAME+15 2518 mtctr $rounds 2519 2520Load_xts_enc_key: 2521 ?vperm v24,v30,v31,$keyperm 2522 lvx v30,$x10,$key1 2523 addi $key1,$key1,0x20 2524 stvx v24,$x00,$key_ # off-load round[1] 2525 ?vperm v25,v31,v30,$keyperm 2526 lvx v31,$x00,$key1 2527 stvx v25,$x10,$key_ # off-load round[2] 2528 addi $key_,$key_,0x20 2529 bdnz Load_xts_enc_key 2530 2531 lvx v26,$x10,$key1 2532 ?vperm v24,v30,v31,$keyperm 2533 lvx v27,$x20,$key1 2534 stvx v24,$x00,$key_ # off-load round[3] 2535 ?vperm v25,v31,v26,$keyperm 2536 lvx v28,$x30,$key1 2537 stvx v25,$x10,$key_ # off-load round[4] 2538 addi $key_,$sp,$FRAME+15 # rewind $key_ 2539 ?vperm v26,v26,v27,$keyperm 2540 lvx v29,$x40,$key1 2541 ?vperm v27,v27,v28,$keyperm 2542 lvx v30,$x50,$key1 2543 ?vperm v28,v28,v29,$keyperm 2544 lvx v31,$x60,$key1 2545 ?vperm v29,v29,v30,$keyperm 2546 lvx $twk5,$x70,$key1 # borrow $twk5 2547 ?vperm v30,v30,v31,$keyperm 2548 lvx v24,$x00,$key_ # pre-load round[1] 2549 ?vperm v31,v31,$twk5,$keyperm 2550 lvx v25,$x10,$key_ # pre-load round[2] 2551 2552 # Switch to use the following codes with 0x010101..87 to generate tweak. 2553 # eighty7 = 0x010101..87 2554 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits 2555 # vand tmp, tmp, eighty7 # last byte with carry 2556 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) 2557 # xxlor vsx, 0, 0 2558 # vpermxor tweak, tweak, tmp, vsx 2559 2560 vperm $in0,$inout,$inptail,$inpperm 2561 subi $inp,$inp,31 # undo "caller" 2562 vxor $twk0,$tweak,$rndkey0 2563 vsrab $tmp,$tweak,$seven # next tweak value 2564 vaddubm $tweak,$tweak,$tweak 2565 vand $tmp,$tmp,$eighty7 2566 vxor $out0,$in0,$twk0 2567 xxlor 32+$in1, 0, 0 2568 vpermxor $tweak, $tweak, $tmp, $in1 2569 2570 lvx_u $in1,$x10,$inp 2571 vxor $twk1,$tweak,$rndkey0 2572 vsrab $tmp,$tweak,$seven # next tweak value 2573 vaddubm $tweak,$tweak,$tweak 2574 le?vperm $in1,$in1,$in1,$leperm 2575 vand $tmp,$tmp,$eighty7 2576 vxor $out1,$in1,$twk1 2577 xxlor 32+$in2, 0, 0 2578 vpermxor $tweak, $tweak, $tmp, $in2 2579 2580 lvx_u $in2,$x20,$inp 2581 andi. $taillen,$len,15 2582 vxor $twk2,$tweak,$rndkey0 2583 vsrab $tmp,$tweak,$seven # next tweak value 2584 vaddubm $tweak,$tweak,$tweak 2585 le?vperm $in2,$in2,$in2,$leperm 2586 vand $tmp,$tmp,$eighty7 2587 vxor $out2,$in2,$twk2 2588 xxlor 32+$in3, 0, 0 2589 vpermxor $tweak, $tweak, $tmp, $in3 2590 2591 lvx_u $in3,$x30,$inp 2592 sub $len,$len,$taillen 2593 vxor $twk3,$tweak,$rndkey0 2594 vsrab $tmp,$tweak,$seven # next tweak value 2595 vaddubm $tweak,$tweak,$tweak 2596 le?vperm $in3,$in3,$in3,$leperm 2597 vand $tmp,$tmp,$eighty7 2598 vxor $out3,$in3,$twk3 2599 xxlor 32+$in4, 0, 0 2600 vpermxor $tweak, $tweak, $tmp, $in4 2601 2602 lvx_u $in4,$x40,$inp 2603 subi $len,$len,0x60 2604 vxor $twk4,$tweak,$rndkey0 2605 vsrab $tmp,$tweak,$seven # next tweak value 2606 vaddubm $tweak,$tweak,$tweak 2607 le?vperm $in4,$in4,$in4,$leperm 2608 vand $tmp,$tmp,$eighty7 2609 vxor $out4,$in4,$twk4 2610 xxlor 32+$in5, 0, 0 2611 vpermxor $tweak, $tweak, $tmp, $in5 2612 2613 lvx_u $in5,$x50,$inp 2614 addi $inp,$inp,0x60 2615 vxor $twk5,$tweak,$rndkey0 2616 vsrab $tmp,$tweak,$seven # next tweak value 2617 vaddubm $tweak,$tweak,$tweak 2618 le?vperm $in5,$in5,$in5,$leperm 2619 vand $tmp,$tmp,$eighty7 2620 vxor $out5,$in5,$twk5 2621 xxlor 32+$in0, 0, 0 2622 vpermxor $tweak, $tweak, $tmp, $in0 2623 2624 vxor v31,v31,$rndkey0 2625 mtctr $rounds 2626 b Loop_xts_enc6x 2627 2628.align 5 2629Loop_xts_enc6x: 2630 vcipher $out0,$out0,v24 2631 vcipher $out1,$out1,v24 2632 vcipher $out2,$out2,v24 2633 vcipher $out3,$out3,v24 2634 vcipher $out4,$out4,v24 2635 vcipher $out5,$out5,v24 2636 lvx v24,$x20,$key_ # round[3] 2637 addi $key_,$key_,0x20 2638 2639 vcipher $out0,$out0,v25 2640 vcipher $out1,$out1,v25 2641 vcipher $out2,$out2,v25 2642 vcipher $out3,$out3,v25 2643 vcipher $out4,$out4,v25 2644 vcipher $out5,$out5,v25 2645 lvx v25,$x10,$key_ # round[4] 2646 bdnz Loop_xts_enc6x 2647 2648 xxlor 32+$eighty7, 1, 1 # 0x010101..87 2649 2650 subic $len,$len,96 # $len-=96 2651 vxor $in0,$twk0,v31 # xor with last round key 2652 vcipher $out0,$out0,v24 2653 vcipher $out1,$out1,v24 2654 vsrab $tmp,$tweak,$seven # next tweak value 2655 vxor $twk0,$tweak,$rndkey0 2656 vaddubm $tweak,$tweak,$tweak 2657 vcipher $out2,$out2,v24 2658 vcipher $out3,$out3,v24 2659 vcipher $out4,$out4,v24 2660 vcipher $out5,$out5,v24 2661 2662 subfe. r0,r0,r0 # borrow?-1:0 2663 vand $tmp,$tmp,$eighty7 2664 vcipher $out0,$out0,v25 2665 vcipher $out1,$out1,v25 2666 xxlor 32+$in1, 0, 0 2667 vpermxor $tweak, $tweak, $tmp, $in1 2668 vcipher $out2,$out2,v25 2669 vcipher $out3,$out3,v25 2670 vxor $in1,$twk1,v31 2671 vsrab $tmp,$tweak,$seven # next tweak value 2672 vxor $twk1,$tweak,$rndkey0 2673 vcipher $out4,$out4,v25 2674 vcipher $out5,$out5,v25 2675 2676 and r0,r0,$len 2677 vaddubm $tweak,$tweak,$tweak 2678 vcipher $out0,$out0,v26 2679 vcipher $out1,$out1,v26 2680 vand $tmp,$tmp,$eighty7 2681 vcipher $out2,$out2,v26 2682 vcipher $out3,$out3,v26 2683 xxlor 32+$in2, 0, 0 2684 vpermxor $tweak, $tweak, $tmp, $in2 2685 vcipher $out4,$out4,v26 2686 vcipher $out5,$out5,v26 2687 2688 add $inp,$inp,r0 # $inp is adjusted in such 2689 # way that at exit from the 2690 # loop inX-in5 are loaded 2691 # with last "words" 2692 vxor $in2,$twk2,v31 2693 vsrab $tmp,$tweak,$seven # next tweak value 2694 vxor $twk2,$tweak,$rndkey0 2695 vaddubm $tweak,$tweak,$tweak 2696 vcipher $out0,$out0,v27 2697 vcipher $out1,$out1,v27 2698 vcipher $out2,$out2,v27 2699 vcipher $out3,$out3,v27 2700 vand $tmp,$tmp,$eighty7 2701 vcipher $out4,$out4,v27 2702 vcipher $out5,$out5,v27 2703 2704 addi $key_,$sp,$FRAME+15 # rewind $key_ 2705 xxlor 32+$in3, 0, 0 2706 vpermxor $tweak, $tweak, $tmp, $in3 2707 vcipher $out0,$out0,v28 2708 vcipher $out1,$out1,v28 2709 vxor $in3,$twk3,v31 2710 vsrab $tmp,$tweak,$seven # next tweak value 2711 vxor $twk3,$tweak,$rndkey0 2712 vcipher $out2,$out2,v28 2713 vcipher $out3,$out3,v28 2714 vaddubm $tweak,$tweak,$tweak 2715 vcipher $out4,$out4,v28 2716 vcipher $out5,$out5,v28 2717 lvx v24,$x00,$key_ # re-pre-load round[1] 2718 vand $tmp,$tmp,$eighty7 2719 2720 vcipher $out0,$out0,v29 2721 vcipher $out1,$out1,v29 2722 xxlor 32+$in4, 0, 0 2723 vpermxor $tweak, $tweak, $tmp, $in4 2724 vcipher $out2,$out2,v29 2725 vcipher $out3,$out3,v29 2726 vxor $in4,$twk4,v31 2727 vsrab $tmp,$tweak,$seven # next tweak value 2728 vxor $twk4,$tweak,$rndkey0 2729 vcipher $out4,$out4,v29 2730 vcipher $out5,$out5,v29 2731 lvx v25,$x10,$key_ # re-pre-load round[2] 2732 vaddubm $tweak,$tweak,$tweak 2733 2734 vcipher $out0,$out0,v30 2735 vcipher $out1,$out1,v30 2736 vand $tmp,$tmp,$eighty7 2737 vcipher $out2,$out2,v30 2738 vcipher $out3,$out3,v30 2739 xxlor 32+$in5, 0, 0 2740 vpermxor $tweak, $tweak, $tmp, $in5 2741 vcipher $out4,$out4,v30 2742 vcipher $out5,$out5,v30 2743 vxor $in5,$twk5,v31 2744 vsrab $tmp,$tweak,$seven # next tweak value 2745 vxor $twk5,$tweak,$rndkey0 2746 2747 vcipherlast $out0,$out0,$in0 2748 lvx_u $in0,$x00,$inp # load next input block 2749 vaddubm $tweak,$tweak,$tweak 2750 vcipherlast $out1,$out1,$in1 2751 lvx_u $in1,$x10,$inp 2752 vcipherlast $out2,$out2,$in2 2753 le?vperm $in0,$in0,$in0,$leperm 2754 lvx_u $in2,$x20,$inp 2755 vand $tmp,$tmp,$eighty7 2756 vcipherlast $out3,$out3,$in3 2757 le?vperm $in1,$in1,$in1,$leperm 2758 lvx_u $in3,$x30,$inp 2759 vcipherlast $out4,$out4,$in4 2760 le?vperm $in2,$in2,$in2,$leperm 2761 lvx_u $in4,$x40,$inp 2762 xxlor 10, 32+$in0, 32+$in0 2763 xxlor 32+$in0, 0, 0 2764 vpermxor $tweak, $tweak, $tmp, $in0 2765 xxlor 32+$in0, 10, 10 2766 vcipherlast $tmp,$out5,$in5 # last block might be needed 2767 # in stealing mode 2768 le?vperm $in3,$in3,$in3,$leperm 2769 lvx_u $in5,$x50,$inp 2770 addi $inp,$inp,0x60 2771 le?vperm $in4,$in4,$in4,$leperm 2772 le?vperm $in5,$in5,$in5,$leperm 2773 2774 le?vperm $out0,$out0,$out0,$leperm 2775 le?vperm $out1,$out1,$out1,$leperm 2776 stvx_u $out0,$x00,$out # store output 2777 vxor $out0,$in0,$twk0 2778 le?vperm $out2,$out2,$out2,$leperm 2779 stvx_u $out1,$x10,$out 2780 vxor $out1,$in1,$twk1 2781 le?vperm $out3,$out3,$out3,$leperm 2782 stvx_u $out2,$x20,$out 2783 vxor $out2,$in2,$twk2 2784 le?vperm $out4,$out4,$out4,$leperm 2785 stvx_u $out3,$x30,$out 2786 vxor $out3,$in3,$twk3 2787 le?vperm $out5,$tmp,$tmp,$leperm 2788 stvx_u $out4,$x40,$out 2789 vxor $out4,$in4,$twk4 2790 le?stvx_u $out5,$x50,$out 2791 be?stvx_u $tmp, $x50,$out 2792 vxor $out5,$in5,$twk5 2793 addi $out,$out,0x60 2794 2795 mtctr $rounds 2796 beq Loop_xts_enc6x # did $len-=96 borrow? 2797 2798 xxlor 32+$eighty7, 2, 2 # 0x010101..87 2799 2800 addic. $len,$len,0x60 2801 beq Lxts_enc6x_zero 2802 cmpwi $len,0x20 2803 blt Lxts_enc6x_one 2804 nop 2805 beq Lxts_enc6x_two 2806 cmpwi $len,0x40 2807 blt Lxts_enc6x_three 2808 nop 2809 beq Lxts_enc6x_four 2810 2811Lxts_enc6x_five: 2812 vxor $out0,$in1,$twk0 2813 vxor $out1,$in2,$twk1 2814 vxor $out2,$in3,$twk2 2815 vxor $out3,$in4,$twk3 2816 vxor $out4,$in5,$twk4 2817 2818 bl _aesp8_xts_enc5x 2819 2820 le?vperm $out0,$out0,$out0,$leperm 2821 vmr $twk0,$twk5 # unused tweak 2822 le?vperm $out1,$out1,$out1,$leperm 2823 stvx_u $out0,$x00,$out # store output 2824 le?vperm $out2,$out2,$out2,$leperm 2825 stvx_u $out1,$x10,$out 2826 le?vperm $out3,$out3,$out3,$leperm 2827 stvx_u $out2,$x20,$out 2828 vxor $tmp,$out4,$twk5 # last block prep for stealing 2829 le?vperm $out4,$out4,$out4,$leperm 2830 stvx_u $out3,$x30,$out 2831 stvx_u $out4,$x40,$out 2832 addi $out,$out,0x50 2833 bne Lxts_enc6x_steal 2834 b Lxts_enc6x_done 2835 2836.align 4 2837Lxts_enc6x_four: 2838 vxor $out0,$in2,$twk0 2839 vxor $out1,$in3,$twk1 2840 vxor $out2,$in4,$twk2 2841 vxor $out3,$in5,$twk3 2842 vxor $out4,$out4,$out4 2843 2844 bl _aesp8_xts_enc5x 2845 2846 le?vperm $out0,$out0,$out0,$leperm 2847 vmr $twk0,$twk4 # unused tweak 2848 le?vperm $out1,$out1,$out1,$leperm 2849 stvx_u $out0,$x00,$out # store output 2850 le?vperm $out2,$out2,$out2,$leperm 2851 stvx_u $out1,$x10,$out 2852 vxor $tmp,$out3,$twk4 # last block prep for stealing 2853 le?vperm $out3,$out3,$out3,$leperm 2854 stvx_u $out2,$x20,$out 2855 stvx_u $out3,$x30,$out 2856 addi $out,$out,0x40 2857 bne Lxts_enc6x_steal 2858 b Lxts_enc6x_done 2859 2860.align 4 2861Lxts_enc6x_three: 2862 vxor $out0,$in3,$twk0 2863 vxor $out1,$in4,$twk1 2864 vxor $out2,$in5,$twk2 2865 vxor $out3,$out3,$out3 2866 vxor $out4,$out4,$out4 2867 2868 bl _aesp8_xts_enc5x 2869 2870 le?vperm $out0,$out0,$out0,$leperm 2871 vmr $twk0,$twk3 # unused tweak 2872 le?vperm $out1,$out1,$out1,$leperm 2873 stvx_u $out0,$x00,$out # store output 2874 vxor $tmp,$out2,$twk3 # last block prep for stealing 2875 le?vperm $out2,$out2,$out2,$leperm 2876 stvx_u $out1,$x10,$out 2877 stvx_u $out2,$x20,$out 2878 addi $out,$out,0x30 2879 bne Lxts_enc6x_steal 2880 b Lxts_enc6x_done 2881 2882.align 4 2883Lxts_enc6x_two: 2884 vxor $out0,$in4,$twk0 2885 vxor $out1,$in5,$twk1 2886 vxor $out2,$out2,$out2 2887 vxor $out3,$out3,$out3 2888 vxor $out4,$out4,$out4 2889 2890 bl _aesp8_xts_enc5x 2891 2892 le?vperm $out0,$out0,$out0,$leperm 2893 vmr $twk0,$twk2 # unused tweak 2894 vxor $tmp,$out1,$twk2 # last block prep for stealing 2895 le?vperm $out1,$out1,$out1,$leperm 2896 stvx_u $out0,$x00,$out # store output 2897 stvx_u $out1,$x10,$out 2898 addi $out,$out,0x20 2899 bne Lxts_enc6x_steal 2900 b Lxts_enc6x_done 2901 2902.align 4 2903Lxts_enc6x_one: 2904 vxor $out0,$in5,$twk0 2905 nop 2906Loop_xts_enc1x: 2907 vcipher $out0,$out0,v24 2908 lvx v24,$x20,$key_ # round[3] 2909 addi $key_,$key_,0x20 2910 2911 vcipher $out0,$out0,v25 2912 lvx v25,$x10,$key_ # round[4] 2913 bdnz Loop_xts_enc1x 2914 2915 add $inp,$inp,$taillen 2916 cmpwi $taillen,0 2917 vcipher $out0,$out0,v24 2918 2919 subi $inp,$inp,16 2920 vcipher $out0,$out0,v25 2921 2922 lvsr $inpperm,0,$taillen 2923 vcipher $out0,$out0,v26 2924 2925 lvx_u $in0,0,$inp 2926 vcipher $out0,$out0,v27 2927 2928 addi $key_,$sp,$FRAME+15 # rewind $key_ 2929 vcipher $out0,$out0,v28 2930 lvx v24,$x00,$key_ # re-pre-load round[1] 2931 2932 vcipher $out0,$out0,v29 2933 lvx v25,$x10,$key_ # re-pre-load round[2] 2934 vxor $twk0,$twk0,v31 2935 2936 le?vperm $in0,$in0,$in0,$leperm 2937 vcipher $out0,$out0,v30 2938 2939 vperm $in0,$in0,$in0,$inpperm 2940 vcipherlast $out0,$out0,$twk0 2941 2942 vmr $twk0,$twk1 # unused tweak 2943 vxor $tmp,$out0,$twk1 # last block prep for stealing 2944 le?vperm $out0,$out0,$out0,$leperm 2945 stvx_u $out0,$x00,$out # store output 2946 addi $out,$out,0x10 2947 bne Lxts_enc6x_steal 2948 b Lxts_enc6x_done 2949 2950.align 4 2951Lxts_enc6x_zero: 2952 cmpwi $taillen,0 2953 beq Lxts_enc6x_done 2954 2955 add $inp,$inp,$taillen 2956 subi $inp,$inp,16 2957 lvx_u $in0,0,$inp 2958 lvsr $inpperm,0,$taillen # $in5 is no more 2959 le?vperm $in0,$in0,$in0,$leperm 2960 vperm $in0,$in0,$in0,$inpperm 2961 vxor $tmp,$tmp,$twk0 2962Lxts_enc6x_steal: 2963 vxor $in0,$in0,$twk0 2964 vxor $out0,$out0,$out0 2965 vspltisb $out1,-1 2966 vperm $out0,$out0,$out1,$inpperm 2967 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2968 2969 subi r30,$out,17 2970 subi $out,$out,16 2971 mtctr $taillen 2972Loop_xts_enc6x_steal: 2973 lbzu r0,1(r30) 2974 stb r0,16(r30) 2975 bdnz Loop_xts_enc6x_steal 2976 2977 li $taillen,0 2978 mtctr $rounds 2979 b Loop_xts_enc1x # one more time... 2980 2981.align 4 2982Lxts_enc6x_done: 2983 ${UCMP}i $ivp,0 2984 beq Lxts_enc6x_ret 2985 2986 vxor $tweak,$twk0,$rndkey0 2987 le?vperm $tweak,$tweak,$tweak,$leperm 2988 stvx_u $tweak,0,$ivp 2989 2990Lxts_enc6x_ret: 2991 mtlr r11 2992 li r10,`$FRAME+15` 2993 li r11,`$FRAME+31` 2994 stvx $seven,r10,$sp # wipe copies of round keys 2995 addi r10,r10,32 2996 stvx $seven,r11,$sp 2997 addi r11,r11,32 2998 stvx $seven,r10,$sp 2999 addi r10,r10,32 3000 stvx $seven,r11,$sp 3001 addi r11,r11,32 3002 stvx $seven,r10,$sp 3003 addi r10,r10,32 3004 stvx $seven,r11,$sp 3005 addi r11,r11,32 3006 stvx $seven,r10,$sp 3007 addi r10,r10,32 3008 stvx $seven,r11,$sp 3009 addi r11,r11,32 3010 3011 mtspr 256,$vrsave 3012 lvx v20,r10,$sp # ABI says so 3013 addi r10,r10,32 3014 lvx v21,r11,$sp 3015 addi r11,r11,32 3016 lvx v22,r10,$sp 3017 addi r10,r10,32 3018 lvx v23,r11,$sp 3019 addi r11,r11,32 3020 lvx v24,r10,$sp 3021 addi r10,r10,32 3022 lvx v25,r11,$sp 3023 addi r11,r11,32 3024 lvx v26,r10,$sp 3025 addi r10,r10,32 3026 lvx v27,r11,$sp 3027 addi r11,r11,32 3028 lvx v28,r10,$sp 3029 addi r10,r10,32 3030 lvx v29,r11,$sp 3031 addi r11,r11,32 3032 lvx v30,r10,$sp 3033 lvx v31,r11,$sp 3034 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3035 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3036 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3037 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3038 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3039 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3040 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3041 blr 3042 .long 0 3043 .byte 0,12,0x04,1,0x80,6,6,0 3044 .long 0 3045 3046.align 5 3047_aesp8_xts_enc5x: 3048 vcipher $out0,$out0,v24 3049 vcipher $out1,$out1,v24 3050 vcipher $out2,$out2,v24 3051 vcipher $out3,$out3,v24 3052 vcipher $out4,$out4,v24 3053 lvx v24,$x20,$key_ # round[3] 3054 addi $key_,$key_,0x20 3055 3056 vcipher $out0,$out0,v25 3057 vcipher $out1,$out1,v25 3058 vcipher $out2,$out2,v25 3059 vcipher $out3,$out3,v25 3060 vcipher $out4,$out4,v25 3061 lvx v25,$x10,$key_ # round[4] 3062 bdnz _aesp8_xts_enc5x 3063 3064 add $inp,$inp,$taillen 3065 cmpwi $taillen,0 3066 vcipher $out0,$out0,v24 3067 vcipher $out1,$out1,v24 3068 vcipher $out2,$out2,v24 3069 vcipher $out3,$out3,v24 3070 vcipher $out4,$out4,v24 3071 3072 subi $inp,$inp,16 3073 vcipher $out0,$out0,v25 3074 vcipher $out1,$out1,v25 3075 vcipher $out2,$out2,v25 3076 vcipher $out3,$out3,v25 3077 vcipher $out4,$out4,v25 3078 vxor $twk0,$twk0,v31 3079 3080 vcipher $out0,$out0,v26 3081 lvsr $inpperm,r0,$taillen # $in5 is no more 3082 vcipher $out1,$out1,v26 3083 vcipher $out2,$out2,v26 3084 vcipher $out3,$out3,v26 3085 vcipher $out4,$out4,v26 3086 vxor $in1,$twk1,v31 3087 3088 vcipher $out0,$out0,v27 3089 lvx_u $in0,0,$inp 3090 vcipher $out1,$out1,v27 3091 vcipher $out2,$out2,v27 3092 vcipher $out3,$out3,v27 3093 vcipher $out4,$out4,v27 3094 vxor $in2,$twk2,v31 3095 3096 addi $key_,$sp,$FRAME+15 # rewind $key_ 3097 vcipher $out0,$out0,v28 3098 vcipher $out1,$out1,v28 3099 vcipher $out2,$out2,v28 3100 vcipher $out3,$out3,v28 3101 vcipher $out4,$out4,v28 3102 lvx v24,$x00,$key_ # re-pre-load round[1] 3103 vxor $in3,$twk3,v31 3104 3105 vcipher $out0,$out0,v29 3106 le?vperm $in0,$in0,$in0,$leperm 3107 vcipher $out1,$out1,v29 3108 vcipher $out2,$out2,v29 3109 vcipher $out3,$out3,v29 3110 vcipher $out4,$out4,v29 3111 lvx v25,$x10,$key_ # re-pre-load round[2] 3112 vxor $in4,$twk4,v31 3113 3114 vcipher $out0,$out0,v30 3115 vperm $in0,$in0,$in0,$inpperm 3116 vcipher $out1,$out1,v30 3117 vcipher $out2,$out2,v30 3118 vcipher $out3,$out3,v30 3119 vcipher $out4,$out4,v30 3120 3121 vcipherlast $out0,$out0,$twk0 3122 vcipherlast $out1,$out1,$in1 3123 vcipherlast $out2,$out2,$in2 3124 vcipherlast $out3,$out3,$in3 3125 vcipherlast $out4,$out4,$in4 3126 blr 3127 .long 0 3128 .byte 0,12,0x14,0,0,0,0,0 3129 3130.align 5 3131_aesp8_xts_decrypt6x: 3132 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3133 mflr r11 3134 li r7,`$FRAME+8*16+15` 3135 li r3,`$FRAME+8*16+31` 3136 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3137 stvx v20,r7,$sp # ABI says so 3138 addi r7,r7,32 3139 stvx v21,r3,$sp 3140 addi r3,r3,32 3141 stvx v22,r7,$sp 3142 addi r7,r7,32 3143 stvx v23,r3,$sp 3144 addi r3,r3,32 3145 stvx v24,r7,$sp 3146 addi r7,r7,32 3147 stvx v25,r3,$sp 3148 addi r3,r3,32 3149 stvx v26,r7,$sp 3150 addi r7,r7,32 3151 stvx v27,r3,$sp 3152 addi r3,r3,32 3153 stvx v28,r7,$sp 3154 addi r7,r7,32 3155 stvx v29,r3,$sp 3156 addi r3,r3,32 3157 stvx v30,r7,$sp 3158 stvx v31,r3,$sp 3159 li r0,-1 3160 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3161 li $x10,0x10 3162 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3163 li $x20,0x20 3164 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3165 li $x30,0x30 3166 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3167 li $x40,0x40 3168 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3169 li $x50,0x50 3170 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3171 li $x60,0x60 3172 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3173 li $x70,0x70 3174 mtspr 256,r0 3175 3176 xxlor 2, 32+$eighty7, 32+$eighty7 3177 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 3178 xxlor 1, 32+$eighty7, 32+$eighty7 3179 3180 # Load XOR Lconsts. 3181 mr $x70, r6 3182 bl Lconsts 3183 lxvw4x 0, $x40, r6 # load XOR contents 3184 mr r6, $x70 3185 li $x70,0x70 3186 3187 subi $rounds,$rounds,3 # -4 in total 3188 3189 lvx $rndkey0,$x00,$key1 # load key schedule 3190 lvx v30,$x10,$key1 3191 addi $key1,$key1,0x20 3192 lvx v31,$x00,$key1 3193 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3194 addi $key_,$sp,$FRAME+15 3195 mtctr $rounds 3196 3197Load_xts_dec_key: 3198 ?vperm v24,v30,v31,$keyperm 3199 lvx v30,$x10,$key1 3200 addi $key1,$key1,0x20 3201 stvx v24,$x00,$key_ # off-load round[1] 3202 ?vperm v25,v31,v30,$keyperm 3203 lvx v31,$x00,$key1 3204 stvx v25,$x10,$key_ # off-load round[2] 3205 addi $key_,$key_,0x20 3206 bdnz Load_xts_dec_key 3207 3208 lvx v26,$x10,$key1 3209 ?vperm v24,v30,v31,$keyperm 3210 lvx v27,$x20,$key1 3211 stvx v24,$x00,$key_ # off-load round[3] 3212 ?vperm v25,v31,v26,$keyperm 3213 lvx v28,$x30,$key1 3214 stvx v25,$x10,$key_ # off-load round[4] 3215 addi $key_,$sp,$FRAME+15 # rewind $key_ 3216 ?vperm v26,v26,v27,$keyperm 3217 lvx v29,$x40,$key1 3218 ?vperm v27,v27,v28,$keyperm 3219 lvx v30,$x50,$key1 3220 ?vperm v28,v28,v29,$keyperm 3221 lvx v31,$x60,$key1 3222 ?vperm v29,v29,v30,$keyperm 3223 lvx $twk5,$x70,$key1 # borrow $twk5 3224 ?vperm v30,v30,v31,$keyperm 3225 lvx v24,$x00,$key_ # pre-load round[1] 3226 ?vperm v31,v31,$twk5,$keyperm 3227 lvx v25,$x10,$key_ # pre-load round[2] 3228 3229 vperm $in0,$inout,$inptail,$inpperm 3230 subi $inp,$inp,31 # undo "caller" 3231 vxor $twk0,$tweak,$rndkey0 3232 vsrab $tmp,$tweak,$seven # next tweak value 3233 vaddubm $tweak,$tweak,$tweak 3234 vand $tmp,$tmp,$eighty7 3235 vxor $out0,$in0,$twk0 3236 xxlor 32+$in1, 0, 0 3237 vpermxor $tweak, $tweak, $tmp, $in1 3238 3239 lvx_u $in1,$x10,$inp 3240 vxor $twk1,$tweak,$rndkey0 3241 vsrab $tmp,$tweak,$seven # next tweak value 3242 vaddubm $tweak,$tweak,$tweak 3243 le?vperm $in1,$in1,$in1,$leperm 3244 vand $tmp,$tmp,$eighty7 3245 vxor $out1,$in1,$twk1 3246 xxlor 32+$in2, 0, 0 3247 vpermxor $tweak, $tweak, $tmp, $in2 3248 3249 lvx_u $in2,$x20,$inp 3250 andi. $taillen,$len,15 3251 vxor $twk2,$tweak,$rndkey0 3252 vsrab $tmp,$tweak,$seven # next tweak value 3253 vaddubm $tweak,$tweak,$tweak 3254 le?vperm $in2,$in2,$in2,$leperm 3255 vand $tmp,$tmp,$eighty7 3256 vxor $out2,$in2,$twk2 3257 xxlor 32+$in3, 0, 0 3258 vpermxor $tweak, $tweak, $tmp, $in3 3259 3260 lvx_u $in3,$x30,$inp 3261 sub $len,$len,$taillen 3262 vxor $twk3,$tweak,$rndkey0 3263 vsrab $tmp,$tweak,$seven # next tweak value 3264 vaddubm $tweak,$tweak,$tweak 3265 le?vperm $in3,$in3,$in3,$leperm 3266 vand $tmp,$tmp,$eighty7 3267 vxor $out3,$in3,$twk3 3268 xxlor 32+$in4, 0, 0 3269 vpermxor $tweak, $tweak, $tmp, $in4 3270 3271 lvx_u $in4,$x40,$inp 3272 subi $len,$len,0x60 3273 vxor $twk4,$tweak,$rndkey0 3274 vsrab $tmp,$tweak,$seven # next tweak value 3275 vaddubm $tweak,$tweak,$tweak 3276 le?vperm $in4,$in4,$in4,$leperm 3277 vand $tmp,$tmp,$eighty7 3278 vxor $out4,$in4,$twk4 3279 xxlor 32+$in5, 0, 0 3280 vpermxor $tweak, $tweak, $tmp, $in5 3281 3282 lvx_u $in5,$x50,$inp 3283 addi $inp,$inp,0x60 3284 vxor $twk5,$tweak,$rndkey0 3285 vsrab $tmp,$tweak,$seven # next tweak value 3286 vaddubm $tweak,$tweak,$tweak 3287 le?vperm $in5,$in5,$in5,$leperm 3288 vand $tmp,$tmp,$eighty7 3289 vxor $out5,$in5,$twk5 3290 xxlor 32+$in0, 0, 0 3291 vpermxor $tweak, $tweak, $tmp, $in0 3292 3293 vxor v31,v31,$rndkey0 3294 mtctr $rounds 3295 b Loop_xts_dec6x 3296 3297.align 5 3298Loop_xts_dec6x: 3299 vncipher $out0,$out0,v24 3300 vncipher $out1,$out1,v24 3301 vncipher $out2,$out2,v24 3302 vncipher $out3,$out3,v24 3303 vncipher $out4,$out4,v24 3304 vncipher $out5,$out5,v24 3305 lvx v24,$x20,$key_ # round[3] 3306 addi $key_,$key_,0x20 3307 3308 vncipher $out0,$out0,v25 3309 vncipher $out1,$out1,v25 3310 vncipher $out2,$out2,v25 3311 vncipher $out3,$out3,v25 3312 vncipher $out4,$out4,v25 3313 vncipher $out5,$out5,v25 3314 lvx v25,$x10,$key_ # round[4] 3315 bdnz Loop_xts_dec6x 3316 3317 xxlor 32+$eighty7, 1, 1 # 0x010101..87 3318 3319 subic $len,$len,96 # $len-=96 3320 vxor $in0,$twk0,v31 # xor with last round key 3321 vncipher $out0,$out0,v24 3322 vncipher $out1,$out1,v24 3323 vsrab $tmp,$tweak,$seven # next tweak value 3324 vxor $twk0,$tweak,$rndkey0 3325 vaddubm $tweak,$tweak,$tweak 3326 vncipher $out2,$out2,v24 3327 vncipher $out3,$out3,v24 3328 vncipher $out4,$out4,v24 3329 vncipher $out5,$out5,v24 3330 3331 subfe. r0,r0,r0 # borrow?-1:0 3332 vand $tmp,$tmp,$eighty7 3333 vncipher $out0,$out0,v25 3334 vncipher $out1,$out1,v25 3335 xxlor 32+$in1, 0, 0 3336 vpermxor $tweak, $tweak, $tmp, $in1 3337 vncipher $out2,$out2,v25 3338 vncipher $out3,$out3,v25 3339 vxor $in1,$twk1,v31 3340 vsrab $tmp,$tweak,$seven # next tweak value 3341 vxor $twk1,$tweak,$rndkey0 3342 vncipher $out4,$out4,v25 3343 vncipher $out5,$out5,v25 3344 3345 and r0,r0,$len 3346 vaddubm $tweak,$tweak,$tweak 3347 vncipher $out0,$out0,v26 3348 vncipher $out1,$out1,v26 3349 vand $tmp,$tmp,$eighty7 3350 vncipher $out2,$out2,v26 3351 vncipher $out3,$out3,v26 3352 xxlor 32+$in2, 0, 0 3353 vpermxor $tweak, $tweak, $tmp, $in2 3354 vncipher $out4,$out4,v26 3355 vncipher $out5,$out5,v26 3356 3357 add $inp,$inp,r0 # $inp is adjusted in such 3358 # way that at exit from the 3359 # loop inX-in5 are loaded 3360 # with last "words" 3361 vxor $in2,$twk2,v31 3362 vsrab $tmp,$tweak,$seven # next tweak value 3363 vxor $twk2,$tweak,$rndkey0 3364 vaddubm $tweak,$tweak,$tweak 3365 vncipher $out0,$out0,v27 3366 vncipher $out1,$out1,v27 3367 vncipher $out2,$out2,v27 3368 vncipher $out3,$out3,v27 3369 vand $tmp,$tmp,$eighty7 3370 vncipher $out4,$out4,v27 3371 vncipher $out5,$out5,v27 3372 3373 addi $key_,$sp,$FRAME+15 # rewind $key_ 3374 xxlor 32+$in3, 0, 0 3375 vpermxor $tweak, $tweak, $tmp, $in3 3376 vncipher $out0,$out0,v28 3377 vncipher $out1,$out1,v28 3378 vxor $in3,$twk3,v31 3379 vsrab $tmp,$tweak,$seven # next tweak value 3380 vxor $twk3,$tweak,$rndkey0 3381 vncipher $out2,$out2,v28 3382 vncipher $out3,$out3,v28 3383 vaddubm $tweak,$tweak,$tweak 3384 vncipher $out4,$out4,v28 3385 vncipher $out5,$out5,v28 3386 lvx v24,$x00,$key_ # re-pre-load round[1] 3387 vand $tmp,$tmp,$eighty7 3388 3389 vncipher $out0,$out0,v29 3390 vncipher $out1,$out1,v29 3391 xxlor 32+$in4, 0, 0 3392 vpermxor $tweak, $tweak, $tmp, $in4 3393 vncipher $out2,$out2,v29 3394 vncipher $out3,$out3,v29 3395 vxor $in4,$twk4,v31 3396 vsrab $tmp,$tweak,$seven # next tweak value 3397 vxor $twk4,$tweak,$rndkey0 3398 vncipher $out4,$out4,v29 3399 vncipher $out5,$out5,v29 3400 lvx v25,$x10,$key_ # re-pre-load round[2] 3401 vaddubm $tweak,$tweak,$tweak 3402 3403 vncipher $out0,$out0,v30 3404 vncipher $out1,$out1,v30 3405 vand $tmp,$tmp,$eighty7 3406 vncipher $out2,$out2,v30 3407 vncipher $out3,$out3,v30 3408 xxlor 32+$in5, 0, 0 3409 vpermxor $tweak, $tweak, $tmp, $in5 3410 vncipher $out4,$out4,v30 3411 vncipher $out5,$out5,v30 3412 vxor $in5,$twk5,v31 3413 vsrab $tmp,$tweak,$seven # next tweak value 3414 vxor $twk5,$tweak,$rndkey0 3415 3416 vncipherlast $out0,$out0,$in0 3417 lvx_u $in0,$x00,$inp # load next input block 3418 vaddubm $tweak,$tweak,$tweak 3419 vncipherlast $out1,$out1,$in1 3420 lvx_u $in1,$x10,$inp 3421 vncipherlast $out2,$out2,$in2 3422 le?vperm $in0,$in0,$in0,$leperm 3423 lvx_u $in2,$x20,$inp 3424 vand $tmp,$tmp,$eighty7 3425 vncipherlast $out3,$out3,$in3 3426 le?vperm $in1,$in1,$in1,$leperm 3427 lvx_u $in3,$x30,$inp 3428 vncipherlast $out4,$out4,$in4 3429 le?vperm $in2,$in2,$in2,$leperm 3430 lvx_u $in4,$x40,$inp 3431 xxlor 10, 32+$in0, 32+$in0 3432 xxlor 32+$in0, 0, 0 3433 vpermxor $tweak, $tweak, $tmp, $in0 3434 xxlor 32+$in0, 10, 10 3435 vncipherlast $out5,$out5,$in5 3436 le?vperm $in3,$in3,$in3,$leperm 3437 lvx_u $in5,$x50,$inp 3438 addi $inp,$inp,0x60 3439 le?vperm $in4,$in4,$in4,$leperm 3440 le?vperm $in5,$in5,$in5,$leperm 3441 3442 le?vperm $out0,$out0,$out0,$leperm 3443 le?vperm $out1,$out1,$out1,$leperm 3444 stvx_u $out0,$x00,$out # store output 3445 vxor $out0,$in0,$twk0 3446 le?vperm $out2,$out2,$out2,$leperm 3447 stvx_u $out1,$x10,$out 3448 vxor $out1,$in1,$twk1 3449 le?vperm $out3,$out3,$out3,$leperm 3450 stvx_u $out2,$x20,$out 3451 vxor $out2,$in2,$twk2 3452 le?vperm $out4,$out4,$out4,$leperm 3453 stvx_u $out3,$x30,$out 3454 vxor $out3,$in3,$twk3 3455 le?vperm $out5,$out5,$out5,$leperm 3456 stvx_u $out4,$x40,$out 3457 vxor $out4,$in4,$twk4 3458 stvx_u $out5,$x50,$out 3459 vxor $out5,$in5,$twk5 3460 addi $out,$out,0x60 3461 3462 mtctr $rounds 3463 beq Loop_xts_dec6x # did $len-=96 borrow? 3464 3465 xxlor 32+$eighty7, 2, 2 # 0x010101..87 3466 3467 addic. $len,$len,0x60 3468 beq Lxts_dec6x_zero 3469 cmpwi $len,0x20 3470 blt Lxts_dec6x_one 3471 nop 3472 beq Lxts_dec6x_two 3473 cmpwi $len,0x40 3474 blt Lxts_dec6x_three 3475 nop 3476 beq Lxts_dec6x_four 3477 3478Lxts_dec6x_five: 3479 vxor $out0,$in1,$twk0 3480 vxor $out1,$in2,$twk1 3481 vxor $out2,$in3,$twk2 3482 vxor $out3,$in4,$twk3 3483 vxor $out4,$in5,$twk4 3484 3485 bl _aesp8_xts_dec5x 3486 3487 le?vperm $out0,$out0,$out0,$leperm 3488 vmr $twk0,$twk5 # unused tweak 3489 vxor $twk1,$tweak,$rndkey0 3490 le?vperm $out1,$out1,$out1,$leperm 3491 stvx_u $out0,$x00,$out # store output 3492 vxor $out0,$in0,$twk1 3493 le?vperm $out2,$out2,$out2,$leperm 3494 stvx_u $out1,$x10,$out 3495 le?vperm $out3,$out3,$out3,$leperm 3496 stvx_u $out2,$x20,$out 3497 le?vperm $out4,$out4,$out4,$leperm 3498 stvx_u $out3,$x30,$out 3499 stvx_u $out4,$x40,$out 3500 addi $out,$out,0x50 3501 bne Lxts_dec6x_steal 3502 b Lxts_dec6x_done 3503 3504.align 4 3505Lxts_dec6x_four: 3506 vxor $out0,$in2,$twk0 3507 vxor $out1,$in3,$twk1 3508 vxor $out2,$in4,$twk2 3509 vxor $out3,$in5,$twk3 3510 vxor $out4,$out4,$out4 3511 3512 bl _aesp8_xts_dec5x 3513 3514 le?vperm $out0,$out0,$out0,$leperm 3515 vmr $twk0,$twk4 # unused tweak 3516 vmr $twk1,$twk5 3517 le?vperm $out1,$out1,$out1,$leperm 3518 stvx_u $out0,$x00,$out # store output 3519 vxor $out0,$in0,$twk5 3520 le?vperm $out2,$out2,$out2,$leperm 3521 stvx_u $out1,$x10,$out 3522 le?vperm $out3,$out3,$out3,$leperm 3523 stvx_u $out2,$x20,$out 3524 stvx_u $out3,$x30,$out 3525 addi $out,$out,0x40 3526 bne Lxts_dec6x_steal 3527 b Lxts_dec6x_done 3528 3529.align 4 3530Lxts_dec6x_three: 3531 vxor $out0,$in3,$twk0 3532 vxor $out1,$in4,$twk1 3533 vxor $out2,$in5,$twk2 3534 vxor $out3,$out3,$out3 3535 vxor $out4,$out4,$out4 3536 3537 bl _aesp8_xts_dec5x 3538 3539 le?vperm $out0,$out0,$out0,$leperm 3540 vmr $twk0,$twk3 # unused tweak 3541 vmr $twk1,$twk4 3542 le?vperm $out1,$out1,$out1,$leperm 3543 stvx_u $out0,$x00,$out # store output 3544 vxor $out0,$in0,$twk4 3545 le?vperm $out2,$out2,$out2,$leperm 3546 stvx_u $out1,$x10,$out 3547 stvx_u $out2,$x20,$out 3548 addi $out,$out,0x30 3549 bne Lxts_dec6x_steal 3550 b Lxts_dec6x_done 3551 3552.align 4 3553Lxts_dec6x_two: 3554 vxor $out0,$in4,$twk0 3555 vxor $out1,$in5,$twk1 3556 vxor $out2,$out2,$out2 3557 vxor $out3,$out3,$out3 3558 vxor $out4,$out4,$out4 3559 3560 bl _aesp8_xts_dec5x 3561 3562 le?vperm $out0,$out0,$out0,$leperm 3563 vmr $twk0,$twk2 # unused tweak 3564 vmr $twk1,$twk3 3565 le?vperm $out1,$out1,$out1,$leperm 3566 stvx_u $out0,$x00,$out # store output 3567 vxor $out0,$in0,$twk3 3568 stvx_u $out1,$x10,$out 3569 addi $out,$out,0x20 3570 bne Lxts_dec6x_steal 3571 b Lxts_dec6x_done 3572 3573.align 4 3574Lxts_dec6x_one: 3575 vxor $out0,$in5,$twk0 3576 nop 3577Loop_xts_dec1x: 3578 vncipher $out0,$out0,v24 3579 lvx v24,$x20,$key_ # round[3] 3580 addi $key_,$key_,0x20 3581 3582 vncipher $out0,$out0,v25 3583 lvx v25,$x10,$key_ # round[4] 3584 bdnz Loop_xts_dec1x 3585 3586 subi r0,$taillen,1 3587 vncipher $out0,$out0,v24 3588 3589 andi. r0,r0,16 3590 cmpwi $taillen,0 3591 vncipher $out0,$out0,v25 3592 3593 sub $inp,$inp,r0 3594 vncipher $out0,$out0,v26 3595 3596 lvx_u $in0,0,$inp 3597 vncipher $out0,$out0,v27 3598 3599 addi $key_,$sp,$FRAME+15 # rewind $key_ 3600 vncipher $out0,$out0,v28 3601 lvx v24,$x00,$key_ # re-pre-load round[1] 3602 3603 vncipher $out0,$out0,v29 3604 lvx v25,$x10,$key_ # re-pre-load round[2] 3605 vxor $twk0,$twk0,v31 3606 3607 le?vperm $in0,$in0,$in0,$leperm 3608 vncipher $out0,$out0,v30 3609 3610 mtctr $rounds 3611 vncipherlast $out0,$out0,$twk0 3612 3613 vmr $twk0,$twk1 # unused tweak 3614 vmr $twk1,$twk2 3615 le?vperm $out0,$out0,$out0,$leperm 3616 stvx_u $out0,$x00,$out # store output 3617 addi $out,$out,0x10 3618 vxor $out0,$in0,$twk2 3619 bne Lxts_dec6x_steal 3620 b Lxts_dec6x_done 3621 3622.align 4 3623Lxts_dec6x_zero: 3624 cmpwi $taillen,0 3625 beq Lxts_dec6x_done 3626 3627 lvx_u $in0,0,$inp 3628 le?vperm $in0,$in0,$in0,$leperm 3629 vxor $out0,$in0,$twk1 3630Lxts_dec6x_steal: 3631 vncipher $out0,$out0,v24 3632 lvx v24,$x20,$key_ # round[3] 3633 addi $key_,$key_,0x20 3634 3635 vncipher $out0,$out0,v25 3636 lvx v25,$x10,$key_ # round[4] 3637 bdnz Lxts_dec6x_steal 3638 3639 add $inp,$inp,$taillen 3640 vncipher $out0,$out0,v24 3641 3642 cmpwi $taillen,0 3643 vncipher $out0,$out0,v25 3644 3645 lvx_u $in0,0,$inp 3646 vncipher $out0,$out0,v26 3647 3648 lvsr $inpperm,0,$taillen # $in5 is no more 3649 vncipher $out0,$out0,v27 3650 3651 addi $key_,$sp,$FRAME+15 # rewind $key_ 3652 vncipher $out0,$out0,v28 3653 lvx v24,$x00,$key_ # re-pre-load round[1] 3654 3655 vncipher $out0,$out0,v29 3656 lvx v25,$x10,$key_ # re-pre-load round[2] 3657 vxor $twk1,$twk1,v31 3658 3659 le?vperm $in0,$in0,$in0,$leperm 3660 vncipher $out0,$out0,v30 3661 3662 vperm $in0,$in0,$in0,$inpperm 3663 vncipherlast $tmp,$out0,$twk1 3664 3665 le?vperm $out0,$tmp,$tmp,$leperm 3666 le?stvx_u $out0,0,$out 3667 be?stvx_u $tmp,0,$out 3668 3669 vxor $out0,$out0,$out0 3670 vspltisb $out1,-1 3671 vperm $out0,$out0,$out1,$inpperm 3672 vsel $out0,$in0,$tmp,$out0 3673 vxor $out0,$out0,$twk0 3674 3675 subi r30,$out,1 3676 mtctr $taillen 3677Loop_xts_dec6x_steal: 3678 lbzu r0,1(r30) 3679 stb r0,16(r30) 3680 bdnz Loop_xts_dec6x_steal 3681 3682 li $taillen,0 3683 mtctr $rounds 3684 b Loop_xts_dec1x # one more time... 3685 3686.align 4 3687Lxts_dec6x_done: 3688 ${UCMP}i $ivp,0 3689 beq Lxts_dec6x_ret 3690 3691 vxor $tweak,$twk0,$rndkey0 3692 le?vperm $tweak,$tweak,$tweak,$leperm 3693 stvx_u $tweak,0,$ivp 3694 3695Lxts_dec6x_ret: 3696 mtlr r11 3697 li r10,`$FRAME+15` 3698 li r11,`$FRAME+31` 3699 stvx $seven,r10,$sp # wipe copies of round keys 3700 addi r10,r10,32 3701 stvx $seven,r11,$sp 3702 addi r11,r11,32 3703 stvx $seven,r10,$sp 3704 addi r10,r10,32 3705 stvx $seven,r11,$sp 3706 addi r11,r11,32 3707 stvx $seven,r10,$sp 3708 addi r10,r10,32 3709 stvx $seven,r11,$sp 3710 addi r11,r11,32 3711 stvx $seven,r10,$sp 3712 addi r10,r10,32 3713 stvx $seven,r11,$sp 3714 addi r11,r11,32 3715 3716 mtspr 256,$vrsave 3717 lvx v20,r10,$sp # ABI says so 3718 addi r10,r10,32 3719 lvx v21,r11,$sp 3720 addi r11,r11,32 3721 lvx v22,r10,$sp 3722 addi r10,r10,32 3723 lvx v23,r11,$sp 3724 addi r11,r11,32 3725 lvx v24,r10,$sp 3726 addi r10,r10,32 3727 lvx v25,r11,$sp 3728 addi r11,r11,32 3729 lvx v26,r10,$sp 3730 addi r10,r10,32 3731 lvx v27,r11,$sp 3732 addi r11,r11,32 3733 lvx v28,r10,$sp 3734 addi r10,r10,32 3735 lvx v29,r11,$sp 3736 addi r11,r11,32 3737 lvx v30,r10,$sp 3738 lvx v31,r11,$sp 3739 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3740 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3741 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3742 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3743 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3744 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3745 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3746 blr 3747 .long 0 3748 .byte 0,12,0x04,1,0x80,6,6,0 3749 .long 0 3750 3751.align 5 3752_aesp8_xts_dec5x: 3753 vncipher $out0,$out0,v24 3754 vncipher $out1,$out1,v24 3755 vncipher $out2,$out2,v24 3756 vncipher $out3,$out3,v24 3757 vncipher $out4,$out4,v24 3758 lvx v24,$x20,$key_ # round[3] 3759 addi $key_,$key_,0x20 3760 3761 vncipher $out0,$out0,v25 3762 vncipher $out1,$out1,v25 3763 vncipher $out2,$out2,v25 3764 vncipher $out3,$out3,v25 3765 vncipher $out4,$out4,v25 3766 lvx v25,$x10,$key_ # round[4] 3767 bdnz _aesp8_xts_dec5x 3768 3769 subi r0,$taillen,1 3770 vncipher $out0,$out0,v24 3771 vncipher $out1,$out1,v24 3772 vncipher $out2,$out2,v24 3773 vncipher $out3,$out3,v24 3774 vncipher $out4,$out4,v24 3775 3776 andi. r0,r0,16 3777 cmpwi $taillen,0 3778 vncipher $out0,$out0,v25 3779 vncipher $out1,$out1,v25 3780 vncipher $out2,$out2,v25 3781 vncipher $out3,$out3,v25 3782 vncipher $out4,$out4,v25 3783 vxor $twk0,$twk0,v31 3784 3785 sub $inp,$inp,r0 3786 vncipher $out0,$out0,v26 3787 vncipher $out1,$out1,v26 3788 vncipher $out2,$out2,v26 3789 vncipher $out3,$out3,v26 3790 vncipher $out4,$out4,v26 3791 vxor $in1,$twk1,v31 3792 3793 vncipher $out0,$out0,v27 3794 lvx_u $in0,0,$inp 3795 vncipher $out1,$out1,v27 3796 vncipher $out2,$out2,v27 3797 vncipher $out3,$out3,v27 3798 vncipher $out4,$out4,v27 3799 vxor $in2,$twk2,v31 3800 3801 addi $key_,$sp,$FRAME+15 # rewind $key_ 3802 vncipher $out0,$out0,v28 3803 vncipher $out1,$out1,v28 3804 vncipher $out2,$out2,v28 3805 vncipher $out3,$out3,v28 3806 vncipher $out4,$out4,v28 3807 lvx v24,$x00,$key_ # re-pre-load round[1] 3808 vxor $in3,$twk3,v31 3809 3810 vncipher $out0,$out0,v29 3811 le?vperm $in0,$in0,$in0,$leperm 3812 vncipher $out1,$out1,v29 3813 vncipher $out2,$out2,v29 3814 vncipher $out3,$out3,v29 3815 vncipher $out4,$out4,v29 3816 lvx v25,$x10,$key_ # re-pre-load round[2] 3817 vxor $in4,$twk4,v31 3818 3819 vncipher $out0,$out0,v30 3820 vncipher $out1,$out1,v30 3821 vncipher $out2,$out2,v30 3822 vncipher $out3,$out3,v30 3823 vncipher $out4,$out4,v30 3824 3825 vncipherlast $out0,$out0,$twk0 3826 vncipherlast $out1,$out1,$in1 3827 vncipherlast $out2,$out2,$in2 3828 vncipherlast $out3,$out3,$in3 3829 vncipherlast $out4,$out4,$in4 3830 mtctr $rounds 3831 blr 3832 .long 0 3833 .byte 0,12,0x14,0,0,0,0,0 3834___ 3835}} }}} 3836 3837my $consts=1; 3838foreach(split("\n",$code)) { 3839 s/\`([^\`]*)\`/eval($1)/geo; 3840 3841 # constants table endian-specific conversion 3842 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3843 my $conv=$3; 3844 my @bytes=(); 3845 3846 # convert to endian-agnostic format 3847 if ($1 eq "long") { 3848 foreach (split(/,\s*/,$2)) { 3849 my $l = /^0/?oct:int; 3850 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3851 } 3852 } else { 3853 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3854 } 3855 3856 # little-endian conversion 3857 if ($flavour =~ /le$/o) { 3858 SWITCH: for($conv) { 3859 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3860 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3861 } 3862 } 3863 3864 #emit 3865 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3866 next; 3867 } 3868 $consts=0 if (m/Lconsts:/o); # end of table 3869 3870 # instructions prefixed with '?' are endian-specific and need 3871 # to be adjusted accordingly... 3872 if ($flavour =~ /le$/o) { # little-endian 3873 s/le\?//o or 3874 s/be\?/#be#/o or 3875 s/\?lvsr/lvsl/o or 3876 s/\?lvsl/lvsr/o or 3877 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3878 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3879 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3880 } else { # big-endian 3881 s/le\?/#le#/o or 3882 s/be\?//o or 3883 s/\?([a-z]+)/$1/o; 3884 } 3885 3886 print $_,"\n"; 3887} 3888 3889close STDOUT; 3890