1#! /usr/bin/env perl 2# SPDX-License-Identifier: GPL-2.0 3 4# This code is taken from CRYPTOGAMs[1] and is included here using the option 5# in the license to distribute the code under the GPL. Therefore this program 6# is free software; you can redistribute it and/or modify it under the terms of 7# the GNU General Public License version 2 as published by the Free Software 8# Foundation. 9# 10# [1] https://www.openssl.org/~appro/cryptogams/ 11 12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13# All rights reserved. 14# 15# Redistribution and use in source and binary forms, with or without 16# modification, are permitted provided that the following conditions 17# are met: 18# 19# * Redistributions of source code must retain copyright notices, 20# this list of conditions and the following disclaimer. 21# 22# * Redistributions in binary form must reproduce the above 23# copyright notice, this list of conditions and the following 24# disclaimer in the documentation and/or other materials 25# provided with the distribution. 26# 27# * Neither the name of the CRYPTOGAMS nor the names of its 28# copyright holder and contributors may be used to endorse or 29# promote products derived from this software without specific 30# prior written permission. 31# 32# ALTERNATIVELY, provided that this notice is retained in full, this 33# product may be distributed under the terms of the GNU General Public 34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35# those given above. 36# 37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 49# ==================================================================== 50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51# project. The module is, however, dual licensed under OpenSSL and 52# CRYPTOGAMS licenses depending on where you obtain it. For further 53# details see https://www.openssl.org/~appro/cryptogams/. 54# ==================================================================== 55# 56# This module implements support for AES instructions as per PowerISA 57# specification version 2.07, first implemented by POWER8 processor. 58# The module is endian-agnostic in sense that it supports both big- 59# and little-endian cases. Data alignment in parallelizable modes is 60# handled with VSX loads and stores, which implies MSR.VSX flag being 61# set. It should also be noted that ISA specification doesn't prohibit 62# alignment exceptions for these instructions on page boundaries. 63# Initially alignment was handled in pure AltiVec/VMX way [when data 64# is aligned programmatically, which in turn guarantees exception- 65# free execution], but it turned to hamper performance when vcipher 66# instructions are interleaved. It's reckoned that eventual 67# misalignment penalties at page boundaries are in average lower 68# than additional overhead in pure AltiVec approach. 69# 70# May 2016 71# 72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73# systems were measured. 74# 75###################################################################### 76# Current large-block performance in cycles per byte processed with 77# 128-bit key (less is better). 78# 79# CBC en-/decrypt CTR XTS 80# POWER8[le] 3.96/0.72 0.74 1.1 81# POWER8[be] 3.75/0.65 0.66 1.0 82 83$flavour = shift; 84 85if ($flavour =~ /64/) { 86 $SIZE_T =8; 87 $LRSAVE =2*$SIZE_T; 88 $STU ="stdu"; 89 $POP ="ld"; 90 $PUSH ="std"; 91 $UCMP ="cmpld"; 92 $SHL ="sldi"; 93} elsif ($flavour =~ /32/) { 94 $SIZE_T =4; 95 $LRSAVE =$SIZE_T; 96 $STU ="stwu"; 97 $POP ="lwz"; 98 $PUSH ="stw"; 99 $UCMP ="cmplw"; 100 $SHL ="slwi"; 101} else { die "nonsense $flavour"; } 102 103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104 105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108( $xlate="${dir}../../../arch/powerpc/crypto/ppc-xlate.pl" and -f $xlate) or 109die "can't locate ppc-xlate.pl"; 110 111open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 112 113$FRAME=8*$SIZE_T; 114$prefix="aes_p8"; 115 116$sp="r1"; 117$vrsave="r12"; 118 119######################################################################### 120{{{ # Key setup procedures # 121my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 122my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 123my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 124 125$code.=<<___; 126.machine "any" 127 128.text 129 130.align 7 131rcon: 132.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 133.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 134.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 135.long 0,0,0,0 ?asis 136.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe 137Lconsts: 138 mflr r0 139 bcl 20,31,\$+4 140 mflr $ptr #vvvvv "distance between . and rcon 141 addi $ptr,$ptr,-0x58 142 mtlr r0 143 blr 144 .long 0 145 .byte 0,12,0x14,0,0,0,0,0 146.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 147 148.globl .${prefix}_set_encrypt_key 149Lset_encrypt_key: 150 mflr r11 151 $PUSH r11,$LRSAVE($sp) 152 153 li $ptr,-1 154 ${UCMP}i $inp,0 155 beq- Lenc_key_abort # if ($inp==0) return -1; 156 ${UCMP}i $out,0 157 beq- Lenc_key_abort # if ($out==0) return -1; 158 li $ptr,-2 159 cmpwi $bits,128 160 blt- Lenc_key_abort 161 cmpwi $bits,256 162 bgt- Lenc_key_abort 163 andi. r0,$bits,0x3f 164 bne- Lenc_key_abort 165 166 lis r0,0xfff0 167 mfspr $vrsave,256 168 mtspr 256,r0 169 170 bl Lconsts 171 mtlr r11 172 173 neg r9,$inp 174 lvx $in0,0,$inp 175 addi $inp,$inp,15 # 15 is not typo 176 lvsr $key,0,r9 # borrow $key 177 li r8,0x20 178 cmpwi $bits,192 179 lvx $in1,0,$inp 180 le?vspltisb $mask,0x0f # borrow $mask 181 lvx $rcon,0,$ptr 182 le?vxor $key,$key,$mask # adjust for byte swap 183 lvx $mask,r8,$ptr 184 addi $ptr,$ptr,0x10 185 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 186 li $cnt,8 187 vxor $zero,$zero,$zero 188 mtctr $cnt 189 190 ?lvsr $outperm,0,$out 191 vspltisb $outmask,-1 192 lvx $outhead,0,$out 193 ?vperm $outmask,$zero,$outmask,$outperm 194 195 blt Loop128 196 addi $inp,$inp,8 197 beq L192 198 addi $inp,$inp,8 199 b L256 200 201.align 4 202Loop128: 203 vperm $key,$in0,$in0,$mask # rotate-n-splat 204 vsldoi $tmp,$zero,$in0,12 # >>32 205 vperm $outtail,$in0,$in0,$outperm # rotate 206 vsel $stage,$outhead,$outtail,$outmask 207 vmr $outhead,$outtail 208 vcipherlast $key,$key,$rcon 209 stvx $stage,0,$out 210 addi $out,$out,16 211 212 vxor $in0,$in0,$tmp 213 vsldoi $tmp,$zero,$tmp,12 # >>32 214 vxor $in0,$in0,$tmp 215 vsldoi $tmp,$zero,$tmp,12 # >>32 216 vxor $in0,$in0,$tmp 217 vadduwm $rcon,$rcon,$rcon 218 vxor $in0,$in0,$key 219 bdnz Loop128 220 221 lvx $rcon,0,$ptr # last two round keys 222 223 vperm $key,$in0,$in0,$mask # rotate-n-splat 224 vsldoi $tmp,$zero,$in0,12 # >>32 225 vperm $outtail,$in0,$in0,$outperm # rotate 226 vsel $stage,$outhead,$outtail,$outmask 227 vmr $outhead,$outtail 228 vcipherlast $key,$key,$rcon 229 stvx $stage,0,$out 230 addi $out,$out,16 231 232 vxor $in0,$in0,$tmp 233 vsldoi $tmp,$zero,$tmp,12 # >>32 234 vxor $in0,$in0,$tmp 235 vsldoi $tmp,$zero,$tmp,12 # >>32 236 vxor $in0,$in0,$tmp 237 vadduwm $rcon,$rcon,$rcon 238 vxor $in0,$in0,$key 239 240 vperm $key,$in0,$in0,$mask # rotate-n-splat 241 vsldoi $tmp,$zero,$in0,12 # >>32 242 vperm $outtail,$in0,$in0,$outperm # rotate 243 vsel $stage,$outhead,$outtail,$outmask 244 vmr $outhead,$outtail 245 vcipherlast $key,$key,$rcon 246 stvx $stage,0,$out 247 addi $out,$out,16 248 249 vxor $in0,$in0,$tmp 250 vsldoi $tmp,$zero,$tmp,12 # >>32 251 vxor $in0,$in0,$tmp 252 vsldoi $tmp,$zero,$tmp,12 # >>32 253 vxor $in0,$in0,$tmp 254 vxor $in0,$in0,$key 255 vperm $outtail,$in0,$in0,$outperm # rotate 256 vsel $stage,$outhead,$outtail,$outmask 257 vmr $outhead,$outtail 258 stvx $stage,0,$out 259 260 addi $inp,$out,15 # 15 is not typo 261 addi $out,$out,0x50 262 263 li $rounds,10 264 b Ldone 265 266.align 4 267L192: 268 lvx $tmp,0,$inp 269 li $cnt,4 270 vperm $outtail,$in0,$in0,$outperm # rotate 271 vsel $stage,$outhead,$outtail,$outmask 272 vmr $outhead,$outtail 273 stvx $stage,0,$out 274 addi $out,$out,16 275 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 276 vspltisb $key,8 # borrow $key 277 mtctr $cnt 278 vsububm $mask,$mask,$key # adjust the mask 279 280Loop192: 281 vperm $key,$in1,$in1,$mask # roate-n-splat 282 vsldoi $tmp,$zero,$in0,12 # >>32 283 vcipherlast $key,$key,$rcon 284 285 vxor $in0,$in0,$tmp 286 vsldoi $tmp,$zero,$tmp,12 # >>32 287 vxor $in0,$in0,$tmp 288 vsldoi $tmp,$zero,$tmp,12 # >>32 289 vxor $in0,$in0,$tmp 290 291 vsldoi $stage,$zero,$in1,8 292 vspltw $tmp,$in0,3 293 vxor $tmp,$tmp,$in1 294 vsldoi $in1,$zero,$in1,12 # >>32 295 vadduwm $rcon,$rcon,$rcon 296 vxor $in1,$in1,$tmp 297 vxor $in0,$in0,$key 298 vxor $in1,$in1,$key 299 vsldoi $stage,$stage,$in0,8 300 301 vperm $key,$in1,$in1,$mask # rotate-n-splat 302 vsldoi $tmp,$zero,$in0,12 # >>32 303 vperm $outtail,$stage,$stage,$outperm # rotate 304 vsel $stage,$outhead,$outtail,$outmask 305 vmr $outhead,$outtail 306 vcipherlast $key,$key,$rcon 307 stvx $stage,0,$out 308 addi $out,$out,16 309 310 vsldoi $stage,$in0,$in1,8 311 vxor $in0,$in0,$tmp 312 vsldoi $tmp,$zero,$tmp,12 # >>32 313 vperm $outtail,$stage,$stage,$outperm # rotate 314 vsel $stage,$outhead,$outtail,$outmask 315 vmr $outhead,$outtail 316 vxor $in0,$in0,$tmp 317 vsldoi $tmp,$zero,$tmp,12 # >>32 318 vxor $in0,$in0,$tmp 319 stvx $stage,0,$out 320 addi $out,$out,16 321 322 vspltw $tmp,$in0,3 323 vxor $tmp,$tmp,$in1 324 vsldoi $in1,$zero,$in1,12 # >>32 325 vadduwm $rcon,$rcon,$rcon 326 vxor $in1,$in1,$tmp 327 vxor $in0,$in0,$key 328 vxor $in1,$in1,$key 329 vperm $outtail,$in0,$in0,$outperm # rotate 330 vsel $stage,$outhead,$outtail,$outmask 331 vmr $outhead,$outtail 332 stvx $stage,0,$out 333 addi $inp,$out,15 # 15 is not typo 334 addi $out,$out,16 335 bdnz Loop192 336 337 li $rounds,12 338 addi $out,$out,0x20 339 b Ldone 340 341.align 4 342L256: 343 lvx $tmp,0,$inp 344 li $cnt,7 345 li $rounds,14 346 vperm $outtail,$in0,$in0,$outperm # rotate 347 vsel $stage,$outhead,$outtail,$outmask 348 vmr $outhead,$outtail 349 stvx $stage,0,$out 350 addi $out,$out,16 351 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 352 mtctr $cnt 353 354Loop256: 355 vperm $key,$in1,$in1,$mask # rotate-n-splat 356 vsldoi $tmp,$zero,$in0,12 # >>32 357 vperm $outtail,$in1,$in1,$outperm # rotate 358 vsel $stage,$outhead,$outtail,$outmask 359 vmr $outhead,$outtail 360 vcipherlast $key,$key,$rcon 361 stvx $stage,0,$out 362 addi $out,$out,16 363 364 vxor $in0,$in0,$tmp 365 vsldoi $tmp,$zero,$tmp,12 # >>32 366 vxor $in0,$in0,$tmp 367 vsldoi $tmp,$zero,$tmp,12 # >>32 368 vxor $in0,$in0,$tmp 369 vadduwm $rcon,$rcon,$rcon 370 vxor $in0,$in0,$key 371 vperm $outtail,$in0,$in0,$outperm # rotate 372 vsel $stage,$outhead,$outtail,$outmask 373 vmr $outhead,$outtail 374 stvx $stage,0,$out 375 addi $inp,$out,15 # 15 is not typo 376 addi $out,$out,16 377 bdz Ldone 378 379 vspltw $key,$in0,3 # just splat 380 vsldoi $tmp,$zero,$in1,12 # >>32 381 vsbox $key,$key 382 383 vxor $in1,$in1,$tmp 384 vsldoi $tmp,$zero,$tmp,12 # >>32 385 vxor $in1,$in1,$tmp 386 vsldoi $tmp,$zero,$tmp,12 # >>32 387 vxor $in1,$in1,$tmp 388 389 vxor $in1,$in1,$key 390 b Loop256 391 392.align 4 393Ldone: 394 lvx $in1,0,$inp # redundant in aligned case 395 vsel $in1,$outhead,$in1,$outmask 396 stvx $in1,0,$inp 397 li $ptr,0 398 mtspr 256,$vrsave 399 stw $rounds,0($out) 400 401Lenc_key_abort: 402 mr r3,$ptr 403 blr 404 .long 0 405 .byte 0,12,0x14,1,0,0,3,0 406 .long 0 407.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 408 409.globl .${prefix}_set_decrypt_key 410 $STU $sp,-$FRAME($sp) 411 mflr r10 412 $PUSH r10,$FRAME+$LRSAVE($sp) 413 bl Lset_encrypt_key 414 mtlr r10 415 416 cmpwi r3,0 417 bne- Ldec_key_abort 418 419 slwi $cnt,$rounds,4 420 subi $inp,$out,240 # first round key 421 srwi $rounds,$rounds,1 422 add $out,$inp,$cnt # last round key 423 mtctr $rounds 424 425Ldeckey: 426 lwz r0, 0($inp) 427 lwz r6, 4($inp) 428 lwz r7, 8($inp) 429 lwz r8, 12($inp) 430 addi $inp,$inp,16 431 lwz r9, 0($out) 432 lwz r10,4($out) 433 lwz r11,8($out) 434 lwz r12,12($out) 435 stw r0, 0($out) 436 stw r6, 4($out) 437 stw r7, 8($out) 438 stw r8, 12($out) 439 subi $out,$out,16 440 stw r9, -16($inp) 441 stw r10,-12($inp) 442 stw r11,-8($inp) 443 stw r12,-4($inp) 444 bdnz Ldeckey 445 446 xor r3,r3,r3 # return value 447Ldec_key_abort: 448 addi $sp,$sp,$FRAME 449 blr 450 .long 0 451 .byte 0,12,4,1,0x80,0,3,0 452 .long 0 453.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 454___ 455}}} 456######################################################################### 457{{{ # Single block en- and decrypt procedures # 458sub gen_block () { 459my $dir = shift; 460my $n = $dir eq "de" ? "n" : ""; 461my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 462 463$code.=<<___; 464.globl .${prefix}_${dir}crypt 465 lwz $rounds,240($key) 466 lis r0,0xfc00 467 mfspr $vrsave,256 468 li $idx,15 # 15 is not typo 469 mtspr 256,r0 470 471 lvx v0,0,$inp 472 neg r11,$out 473 lvx v1,$idx,$inp 474 lvsl v2,0,$inp # inpperm 475 le?vspltisb v4,0x0f 476 ?lvsl v3,0,r11 # outperm 477 le?vxor v2,v2,v4 478 li $idx,16 479 vperm v0,v0,v1,v2 # align [and byte swap in LE] 480 lvx v1,0,$key 481 ?lvsl v5,0,$key # keyperm 482 srwi $rounds,$rounds,1 483 lvx v2,$idx,$key 484 addi $idx,$idx,16 485 subi $rounds,$rounds,1 486 ?vperm v1,v1,v2,v5 # align round key 487 488 vxor v0,v0,v1 489 lvx v1,$idx,$key 490 addi $idx,$idx,16 491 mtctr $rounds 492 493Loop_${dir}c: 494 ?vperm v2,v2,v1,v5 495 v${n}cipher v0,v0,v2 496 lvx v2,$idx,$key 497 addi $idx,$idx,16 498 ?vperm v1,v1,v2,v5 499 v${n}cipher v0,v0,v1 500 lvx v1,$idx,$key 501 addi $idx,$idx,16 502 bdnz Loop_${dir}c 503 504 ?vperm v2,v2,v1,v5 505 v${n}cipher v0,v0,v2 506 lvx v2,$idx,$key 507 ?vperm v1,v1,v2,v5 508 v${n}cipherlast v0,v0,v1 509 510 vspltisb v2,-1 511 vxor v1,v1,v1 512 li $idx,15 # 15 is not typo 513 ?vperm v2,v1,v2,v3 # outmask 514 le?vxor v3,v3,v4 515 lvx v1,0,$out # outhead 516 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 517 vsel v1,v1,v0,v2 518 lvx v4,$idx,$out 519 stvx v1,0,$out 520 vsel v0,v0,v4,v2 521 stvx v0,$idx,$out 522 523 mtspr 256,$vrsave 524 blr 525 .long 0 526 .byte 0,12,0x14,0,0,0,3,0 527 .long 0 528.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 529___ 530} 531&gen_block("en"); 532&gen_block("de"); 533}}} 534######################################################################### 535{{{ # CBC en- and decrypt procedures # 536my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 537my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 538my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 539 map("v$_",(4..10)); 540$code.=<<___; 541.globl .${prefix}_cbc_encrypt 542 ${UCMP}i $len,16 543 bltlr- 544 545 cmpwi $enc,0 # test direction 546 lis r0,0xffe0 547 mfspr $vrsave,256 548 mtspr 256,r0 549 550 li $idx,15 551 vxor $rndkey0,$rndkey0,$rndkey0 552 le?vspltisb $tmp,0x0f 553 554 lvx $ivec,0,$ivp # load [unaligned] iv 555 lvsl $inpperm,0,$ivp 556 lvx $inptail,$idx,$ivp 557 le?vxor $inpperm,$inpperm,$tmp 558 vperm $ivec,$ivec,$inptail,$inpperm 559 560 neg r11,$inp 561 ?lvsl $keyperm,0,$key # prepare for unaligned key 562 lwz $rounds,240($key) 563 564 lvsr $inpperm,0,r11 # prepare for unaligned load 565 lvx $inptail,0,$inp 566 addi $inp,$inp,15 # 15 is not typo 567 le?vxor $inpperm,$inpperm,$tmp 568 569 ?lvsr $outperm,0,$out # prepare for unaligned store 570 vspltisb $outmask,-1 571 lvx $outhead,0,$out 572 ?vperm $outmask,$rndkey0,$outmask,$outperm 573 le?vxor $outperm,$outperm,$tmp 574 575 srwi $rounds,$rounds,1 576 li $idx,16 577 subi $rounds,$rounds,1 578 beq Lcbc_dec 579 580Lcbc_enc: 581 vmr $inout,$inptail 582 lvx $inptail,0,$inp 583 addi $inp,$inp,16 584 mtctr $rounds 585 subi $len,$len,16 # len-=16 586 587 lvx $rndkey0,0,$key 588 vperm $inout,$inout,$inptail,$inpperm 589 lvx $rndkey1,$idx,$key 590 addi $idx,$idx,16 591 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 592 vxor $inout,$inout,$rndkey0 593 lvx $rndkey0,$idx,$key 594 addi $idx,$idx,16 595 vxor $inout,$inout,$ivec 596 597Loop_cbc_enc: 598 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 599 vcipher $inout,$inout,$rndkey1 600 lvx $rndkey1,$idx,$key 601 addi $idx,$idx,16 602 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 603 vcipher $inout,$inout,$rndkey0 604 lvx $rndkey0,$idx,$key 605 addi $idx,$idx,16 606 bdnz Loop_cbc_enc 607 608 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 609 vcipher $inout,$inout,$rndkey1 610 lvx $rndkey1,$idx,$key 611 li $idx,16 612 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 613 vcipherlast $ivec,$inout,$rndkey0 614 ${UCMP}i $len,16 615 616 vperm $tmp,$ivec,$ivec,$outperm 617 vsel $inout,$outhead,$tmp,$outmask 618 vmr $outhead,$tmp 619 stvx $inout,0,$out 620 addi $out,$out,16 621 bge Lcbc_enc 622 623 b Lcbc_done 624 625.align 4 626Lcbc_dec: 627 ${UCMP}i $len,128 628 bge _aesp8_cbc_decrypt8x 629 vmr $tmp,$inptail 630 lvx $inptail,0,$inp 631 addi $inp,$inp,16 632 mtctr $rounds 633 subi $len,$len,16 # len-=16 634 635 lvx $rndkey0,0,$key 636 vperm $tmp,$tmp,$inptail,$inpperm 637 lvx $rndkey1,$idx,$key 638 addi $idx,$idx,16 639 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 640 vxor $inout,$tmp,$rndkey0 641 lvx $rndkey0,$idx,$key 642 addi $idx,$idx,16 643 644Loop_cbc_dec: 645 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 646 vncipher $inout,$inout,$rndkey1 647 lvx $rndkey1,$idx,$key 648 addi $idx,$idx,16 649 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 650 vncipher $inout,$inout,$rndkey0 651 lvx $rndkey0,$idx,$key 652 addi $idx,$idx,16 653 bdnz Loop_cbc_dec 654 655 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 656 vncipher $inout,$inout,$rndkey1 657 lvx $rndkey1,$idx,$key 658 li $idx,16 659 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 660 vncipherlast $inout,$inout,$rndkey0 661 ${UCMP}i $len,16 662 663 vxor $inout,$inout,$ivec 664 vmr $ivec,$tmp 665 vperm $tmp,$inout,$inout,$outperm 666 vsel $inout,$outhead,$tmp,$outmask 667 vmr $outhead,$tmp 668 stvx $inout,0,$out 669 addi $out,$out,16 670 bge Lcbc_dec 671 672Lcbc_done: 673 addi $out,$out,-1 674 lvx $inout,0,$out # redundant in aligned case 675 vsel $inout,$outhead,$inout,$outmask 676 stvx $inout,0,$out 677 678 neg $enc,$ivp # write [unaligned] iv 679 li $idx,15 # 15 is not typo 680 vxor $rndkey0,$rndkey0,$rndkey0 681 vspltisb $outmask,-1 682 le?vspltisb $tmp,0x0f 683 ?lvsl $outperm,0,$enc 684 ?vperm $outmask,$rndkey0,$outmask,$outperm 685 le?vxor $outperm,$outperm,$tmp 686 lvx $outhead,0,$ivp 687 vperm $ivec,$ivec,$ivec,$outperm 688 vsel $inout,$outhead,$ivec,$outmask 689 lvx $inptail,$idx,$ivp 690 stvx $inout,0,$ivp 691 vsel $inout,$ivec,$inptail,$outmask 692 stvx $inout,$idx,$ivp 693 694 mtspr 256,$vrsave 695 blr 696 .long 0 697 .byte 0,12,0x14,0,0,0,6,0 698 .long 0 699___ 700######################################################################### 701{{ # Optimized CBC decrypt procedure # 702my $key_="r11"; 703my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 704my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 705my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 706my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 707 # v26-v31 last 6 round keys 708my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 709 710$code.=<<___; 711.align 5 712_aesp8_cbc_decrypt8x: 713 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 714 li r10,`$FRAME+8*16+15` 715 li r11,`$FRAME+8*16+31` 716 stvx v20,r10,$sp # ABI says so 717 addi r10,r10,32 718 stvx v21,r11,$sp 719 addi r11,r11,32 720 stvx v22,r10,$sp 721 addi r10,r10,32 722 stvx v23,r11,$sp 723 addi r11,r11,32 724 stvx v24,r10,$sp 725 addi r10,r10,32 726 stvx v25,r11,$sp 727 addi r11,r11,32 728 stvx v26,r10,$sp 729 addi r10,r10,32 730 stvx v27,r11,$sp 731 addi r11,r11,32 732 stvx v28,r10,$sp 733 addi r10,r10,32 734 stvx v29,r11,$sp 735 addi r11,r11,32 736 stvx v30,r10,$sp 737 stvx v31,r11,$sp 738 li r0,-1 739 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 740 li $x10,0x10 741 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 742 li $x20,0x20 743 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 744 li $x30,0x30 745 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 746 li $x40,0x40 747 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 748 li $x50,0x50 749 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 750 li $x60,0x60 751 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 752 li $x70,0x70 753 mtspr 256,r0 754 755 subi $rounds,$rounds,3 # -4 in total 756 subi $len,$len,128 # bias 757 758 lvx $rndkey0,$x00,$key # load key schedule 759 lvx v30,$x10,$key 760 addi $key,$key,0x20 761 lvx v31,$x00,$key 762 ?vperm $rndkey0,$rndkey0,v30,$keyperm 763 addi $key_,$sp,$FRAME+15 764 mtctr $rounds 765 766Load_cbc_dec_key: 767 ?vperm v24,v30,v31,$keyperm 768 lvx v30,$x10,$key 769 addi $key,$key,0x20 770 stvx v24,$x00,$key_ # off-load round[1] 771 ?vperm v25,v31,v30,$keyperm 772 lvx v31,$x00,$key 773 stvx v25,$x10,$key_ # off-load round[2] 774 addi $key_,$key_,0x20 775 bdnz Load_cbc_dec_key 776 777 lvx v26,$x10,$key 778 ?vperm v24,v30,v31,$keyperm 779 lvx v27,$x20,$key 780 stvx v24,$x00,$key_ # off-load round[3] 781 ?vperm v25,v31,v26,$keyperm 782 lvx v28,$x30,$key 783 stvx v25,$x10,$key_ # off-load round[4] 784 addi $key_,$sp,$FRAME+15 # rewind $key_ 785 ?vperm v26,v26,v27,$keyperm 786 lvx v29,$x40,$key 787 ?vperm v27,v27,v28,$keyperm 788 lvx v30,$x50,$key 789 ?vperm v28,v28,v29,$keyperm 790 lvx v31,$x60,$key 791 ?vperm v29,v29,v30,$keyperm 792 lvx $out0,$x70,$key # borrow $out0 793 ?vperm v30,v30,v31,$keyperm 794 lvx v24,$x00,$key_ # pre-load round[1] 795 ?vperm v31,v31,$out0,$keyperm 796 lvx v25,$x10,$key_ # pre-load round[2] 797 798 #lvx $inptail,0,$inp # "caller" already did this 799 #addi $inp,$inp,15 # 15 is not typo 800 subi $inp,$inp,15 # undo "caller" 801 802 le?li $idx,8 803 lvx_u $in0,$x00,$inp # load first 8 "words" 804 le?lvsl $inpperm,0,$idx 805 le?vspltisb $tmp,0x0f 806 lvx_u $in1,$x10,$inp 807 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 808 lvx_u $in2,$x20,$inp 809 le?vperm $in0,$in0,$in0,$inpperm 810 lvx_u $in3,$x30,$inp 811 le?vperm $in1,$in1,$in1,$inpperm 812 lvx_u $in4,$x40,$inp 813 le?vperm $in2,$in2,$in2,$inpperm 814 vxor $out0,$in0,$rndkey0 815 lvx_u $in5,$x50,$inp 816 le?vperm $in3,$in3,$in3,$inpperm 817 vxor $out1,$in1,$rndkey0 818 lvx_u $in6,$x60,$inp 819 le?vperm $in4,$in4,$in4,$inpperm 820 vxor $out2,$in2,$rndkey0 821 lvx_u $in7,$x70,$inp 822 addi $inp,$inp,0x80 823 le?vperm $in5,$in5,$in5,$inpperm 824 vxor $out3,$in3,$rndkey0 825 le?vperm $in6,$in6,$in6,$inpperm 826 vxor $out4,$in4,$rndkey0 827 le?vperm $in7,$in7,$in7,$inpperm 828 vxor $out5,$in5,$rndkey0 829 vxor $out6,$in6,$rndkey0 830 vxor $out7,$in7,$rndkey0 831 832 mtctr $rounds 833 b Loop_cbc_dec8x 834.align 5 835Loop_cbc_dec8x: 836 vncipher $out0,$out0,v24 837 vncipher $out1,$out1,v24 838 vncipher $out2,$out2,v24 839 vncipher $out3,$out3,v24 840 vncipher $out4,$out4,v24 841 vncipher $out5,$out5,v24 842 vncipher $out6,$out6,v24 843 vncipher $out7,$out7,v24 844 lvx v24,$x20,$key_ # round[3] 845 addi $key_,$key_,0x20 846 847 vncipher $out0,$out0,v25 848 vncipher $out1,$out1,v25 849 vncipher $out2,$out2,v25 850 vncipher $out3,$out3,v25 851 vncipher $out4,$out4,v25 852 vncipher $out5,$out5,v25 853 vncipher $out6,$out6,v25 854 vncipher $out7,$out7,v25 855 lvx v25,$x10,$key_ # round[4] 856 bdnz Loop_cbc_dec8x 857 858 subic $len,$len,128 # $len-=128 859 vncipher $out0,$out0,v24 860 vncipher $out1,$out1,v24 861 vncipher $out2,$out2,v24 862 vncipher $out3,$out3,v24 863 vncipher $out4,$out4,v24 864 vncipher $out5,$out5,v24 865 vncipher $out6,$out6,v24 866 vncipher $out7,$out7,v24 867 868 subfe. r0,r0,r0 # borrow?-1:0 869 vncipher $out0,$out0,v25 870 vncipher $out1,$out1,v25 871 vncipher $out2,$out2,v25 872 vncipher $out3,$out3,v25 873 vncipher $out4,$out4,v25 874 vncipher $out5,$out5,v25 875 vncipher $out6,$out6,v25 876 vncipher $out7,$out7,v25 877 878 and r0,r0,$len 879 vncipher $out0,$out0,v26 880 vncipher $out1,$out1,v26 881 vncipher $out2,$out2,v26 882 vncipher $out3,$out3,v26 883 vncipher $out4,$out4,v26 884 vncipher $out5,$out5,v26 885 vncipher $out6,$out6,v26 886 vncipher $out7,$out7,v26 887 888 add $inp,$inp,r0 # $inp is adjusted in such 889 # way that at exit from the 890 # loop inX-in7 are loaded 891 # with last "words" 892 vncipher $out0,$out0,v27 893 vncipher $out1,$out1,v27 894 vncipher $out2,$out2,v27 895 vncipher $out3,$out3,v27 896 vncipher $out4,$out4,v27 897 vncipher $out5,$out5,v27 898 vncipher $out6,$out6,v27 899 vncipher $out7,$out7,v27 900 901 addi $key_,$sp,$FRAME+15 # rewind $key_ 902 vncipher $out0,$out0,v28 903 vncipher $out1,$out1,v28 904 vncipher $out2,$out2,v28 905 vncipher $out3,$out3,v28 906 vncipher $out4,$out4,v28 907 vncipher $out5,$out5,v28 908 vncipher $out6,$out6,v28 909 vncipher $out7,$out7,v28 910 lvx v24,$x00,$key_ # re-pre-load round[1] 911 912 vncipher $out0,$out0,v29 913 vncipher $out1,$out1,v29 914 vncipher $out2,$out2,v29 915 vncipher $out3,$out3,v29 916 vncipher $out4,$out4,v29 917 vncipher $out5,$out5,v29 918 vncipher $out6,$out6,v29 919 vncipher $out7,$out7,v29 920 lvx v25,$x10,$key_ # re-pre-load round[2] 921 922 vncipher $out0,$out0,v30 923 vxor $ivec,$ivec,v31 # xor with last round key 924 vncipher $out1,$out1,v30 925 vxor $in0,$in0,v31 926 vncipher $out2,$out2,v30 927 vxor $in1,$in1,v31 928 vncipher $out3,$out3,v30 929 vxor $in2,$in2,v31 930 vncipher $out4,$out4,v30 931 vxor $in3,$in3,v31 932 vncipher $out5,$out5,v30 933 vxor $in4,$in4,v31 934 vncipher $out6,$out6,v30 935 vxor $in5,$in5,v31 936 vncipher $out7,$out7,v30 937 vxor $in6,$in6,v31 938 939 vncipherlast $out0,$out0,$ivec 940 vncipherlast $out1,$out1,$in0 941 lvx_u $in0,$x00,$inp # load next input block 942 vncipherlast $out2,$out2,$in1 943 lvx_u $in1,$x10,$inp 944 vncipherlast $out3,$out3,$in2 945 le?vperm $in0,$in0,$in0,$inpperm 946 lvx_u $in2,$x20,$inp 947 vncipherlast $out4,$out4,$in3 948 le?vperm $in1,$in1,$in1,$inpperm 949 lvx_u $in3,$x30,$inp 950 vncipherlast $out5,$out5,$in4 951 le?vperm $in2,$in2,$in2,$inpperm 952 lvx_u $in4,$x40,$inp 953 vncipherlast $out6,$out6,$in5 954 le?vperm $in3,$in3,$in3,$inpperm 955 lvx_u $in5,$x50,$inp 956 vncipherlast $out7,$out7,$in6 957 le?vperm $in4,$in4,$in4,$inpperm 958 lvx_u $in6,$x60,$inp 959 vmr $ivec,$in7 960 le?vperm $in5,$in5,$in5,$inpperm 961 lvx_u $in7,$x70,$inp 962 addi $inp,$inp,0x80 963 964 le?vperm $out0,$out0,$out0,$inpperm 965 le?vperm $out1,$out1,$out1,$inpperm 966 stvx_u $out0,$x00,$out 967 le?vperm $in6,$in6,$in6,$inpperm 968 vxor $out0,$in0,$rndkey0 969 le?vperm $out2,$out2,$out2,$inpperm 970 stvx_u $out1,$x10,$out 971 le?vperm $in7,$in7,$in7,$inpperm 972 vxor $out1,$in1,$rndkey0 973 le?vperm $out3,$out3,$out3,$inpperm 974 stvx_u $out2,$x20,$out 975 vxor $out2,$in2,$rndkey0 976 le?vperm $out4,$out4,$out4,$inpperm 977 stvx_u $out3,$x30,$out 978 vxor $out3,$in3,$rndkey0 979 le?vperm $out5,$out5,$out5,$inpperm 980 stvx_u $out4,$x40,$out 981 vxor $out4,$in4,$rndkey0 982 le?vperm $out6,$out6,$out6,$inpperm 983 stvx_u $out5,$x50,$out 984 vxor $out5,$in5,$rndkey0 985 le?vperm $out7,$out7,$out7,$inpperm 986 stvx_u $out6,$x60,$out 987 vxor $out6,$in6,$rndkey0 988 stvx_u $out7,$x70,$out 989 addi $out,$out,0x80 990 vxor $out7,$in7,$rndkey0 991 992 mtctr $rounds 993 beq Loop_cbc_dec8x # did $len-=128 borrow? 994 995 addic. $len,$len,128 996 beq Lcbc_dec8x_done 997 nop 998 nop 999 1000Loop_cbc_dec8x_tail: # up to 7 "words" tail... 1001 vncipher $out1,$out1,v24 1002 vncipher $out2,$out2,v24 1003 vncipher $out3,$out3,v24 1004 vncipher $out4,$out4,v24 1005 vncipher $out5,$out5,v24 1006 vncipher $out6,$out6,v24 1007 vncipher $out7,$out7,v24 1008 lvx v24,$x20,$key_ # round[3] 1009 addi $key_,$key_,0x20 1010 1011 vncipher $out1,$out1,v25 1012 vncipher $out2,$out2,v25 1013 vncipher $out3,$out3,v25 1014 vncipher $out4,$out4,v25 1015 vncipher $out5,$out5,v25 1016 vncipher $out6,$out6,v25 1017 vncipher $out7,$out7,v25 1018 lvx v25,$x10,$key_ # round[4] 1019 bdnz Loop_cbc_dec8x_tail 1020 1021 vncipher $out1,$out1,v24 1022 vncipher $out2,$out2,v24 1023 vncipher $out3,$out3,v24 1024 vncipher $out4,$out4,v24 1025 vncipher $out5,$out5,v24 1026 vncipher $out6,$out6,v24 1027 vncipher $out7,$out7,v24 1028 1029 vncipher $out1,$out1,v25 1030 vncipher $out2,$out2,v25 1031 vncipher $out3,$out3,v25 1032 vncipher $out4,$out4,v25 1033 vncipher $out5,$out5,v25 1034 vncipher $out6,$out6,v25 1035 vncipher $out7,$out7,v25 1036 1037 vncipher $out1,$out1,v26 1038 vncipher $out2,$out2,v26 1039 vncipher $out3,$out3,v26 1040 vncipher $out4,$out4,v26 1041 vncipher $out5,$out5,v26 1042 vncipher $out6,$out6,v26 1043 vncipher $out7,$out7,v26 1044 1045 vncipher $out1,$out1,v27 1046 vncipher $out2,$out2,v27 1047 vncipher $out3,$out3,v27 1048 vncipher $out4,$out4,v27 1049 vncipher $out5,$out5,v27 1050 vncipher $out6,$out6,v27 1051 vncipher $out7,$out7,v27 1052 1053 vncipher $out1,$out1,v28 1054 vncipher $out2,$out2,v28 1055 vncipher $out3,$out3,v28 1056 vncipher $out4,$out4,v28 1057 vncipher $out5,$out5,v28 1058 vncipher $out6,$out6,v28 1059 vncipher $out7,$out7,v28 1060 1061 vncipher $out1,$out1,v29 1062 vncipher $out2,$out2,v29 1063 vncipher $out3,$out3,v29 1064 vncipher $out4,$out4,v29 1065 vncipher $out5,$out5,v29 1066 vncipher $out6,$out6,v29 1067 vncipher $out7,$out7,v29 1068 1069 vncipher $out1,$out1,v30 1070 vxor $ivec,$ivec,v31 # last round key 1071 vncipher $out2,$out2,v30 1072 vxor $in1,$in1,v31 1073 vncipher $out3,$out3,v30 1074 vxor $in2,$in2,v31 1075 vncipher $out4,$out4,v30 1076 vxor $in3,$in3,v31 1077 vncipher $out5,$out5,v30 1078 vxor $in4,$in4,v31 1079 vncipher $out6,$out6,v30 1080 vxor $in5,$in5,v31 1081 vncipher $out7,$out7,v30 1082 vxor $in6,$in6,v31 1083 1084 cmplwi $len,32 # switch($len) 1085 blt Lcbc_dec8x_one 1086 nop 1087 beq Lcbc_dec8x_two 1088 cmplwi $len,64 1089 blt Lcbc_dec8x_three 1090 nop 1091 beq Lcbc_dec8x_four 1092 cmplwi $len,96 1093 blt Lcbc_dec8x_five 1094 nop 1095 beq Lcbc_dec8x_six 1096 1097Lcbc_dec8x_seven: 1098 vncipherlast $out1,$out1,$ivec 1099 vncipherlast $out2,$out2,$in1 1100 vncipherlast $out3,$out3,$in2 1101 vncipherlast $out4,$out4,$in3 1102 vncipherlast $out5,$out5,$in4 1103 vncipherlast $out6,$out6,$in5 1104 vncipherlast $out7,$out7,$in6 1105 vmr $ivec,$in7 1106 1107 le?vperm $out1,$out1,$out1,$inpperm 1108 le?vperm $out2,$out2,$out2,$inpperm 1109 stvx_u $out1,$x00,$out 1110 le?vperm $out3,$out3,$out3,$inpperm 1111 stvx_u $out2,$x10,$out 1112 le?vperm $out4,$out4,$out4,$inpperm 1113 stvx_u $out3,$x20,$out 1114 le?vperm $out5,$out5,$out5,$inpperm 1115 stvx_u $out4,$x30,$out 1116 le?vperm $out6,$out6,$out6,$inpperm 1117 stvx_u $out5,$x40,$out 1118 le?vperm $out7,$out7,$out7,$inpperm 1119 stvx_u $out6,$x50,$out 1120 stvx_u $out7,$x60,$out 1121 addi $out,$out,0x70 1122 b Lcbc_dec8x_done 1123 1124.align 5 1125Lcbc_dec8x_six: 1126 vncipherlast $out2,$out2,$ivec 1127 vncipherlast $out3,$out3,$in2 1128 vncipherlast $out4,$out4,$in3 1129 vncipherlast $out5,$out5,$in4 1130 vncipherlast $out6,$out6,$in5 1131 vncipherlast $out7,$out7,$in6 1132 vmr $ivec,$in7 1133 1134 le?vperm $out2,$out2,$out2,$inpperm 1135 le?vperm $out3,$out3,$out3,$inpperm 1136 stvx_u $out2,$x00,$out 1137 le?vperm $out4,$out4,$out4,$inpperm 1138 stvx_u $out3,$x10,$out 1139 le?vperm $out5,$out5,$out5,$inpperm 1140 stvx_u $out4,$x20,$out 1141 le?vperm $out6,$out6,$out6,$inpperm 1142 stvx_u $out5,$x30,$out 1143 le?vperm $out7,$out7,$out7,$inpperm 1144 stvx_u $out6,$x40,$out 1145 stvx_u $out7,$x50,$out 1146 addi $out,$out,0x60 1147 b Lcbc_dec8x_done 1148 1149.align 5 1150Lcbc_dec8x_five: 1151 vncipherlast $out3,$out3,$ivec 1152 vncipherlast $out4,$out4,$in3 1153 vncipherlast $out5,$out5,$in4 1154 vncipherlast $out6,$out6,$in5 1155 vncipherlast $out7,$out7,$in6 1156 vmr $ivec,$in7 1157 1158 le?vperm $out3,$out3,$out3,$inpperm 1159 le?vperm $out4,$out4,$out4,$inpperm 1160 stvx_u $out3,$x00,$out 1161 le?vperm $out5,$out5,$out5,$inpperm 1162 stvx_u $out4,$x10,$out 1163 le?vperm $out6,$out6,$out6,$inpperm 1164 stvx_u $out5,$x20,$out 1165 le?vperm $out7,$out7,$out7,$inpperm 1166 stvx_u $out6,$x30,$out 1167 stvx_u $out7,$x40,$out 1168 addi $out,$out,0x50 1169 b Lcbc_dec8x_done 1170 1171.align 5 1172Lcbc_dec8x_four: 1173 vncipherlast $out4,$out4,$ivec 1174 vncipherlast $out5,$out5,$in4 1175 vncipherlast $out6,$out6,$in5 1176 vncipherlast $out7,$out7,$in6 1177 vmr $ivec,$in7 1178 1179 le?vperm $out4,$out4,$out4,$inpperm 1180 le?vperm $out5,$out5,$out5,$inpperm 1181 stvx_u $out4,$x00,$out 1182 le?vperm $out6,$out6,$out6,$inpperm 1183 stvx_u $out5,$x10,$out 1184 le?vperm $out7,$out7,$out7,$inpperm 1185 stvx_u $out6,$x20,$out 1186 stvx_u $out7,$x30,$out 1187 addi $out,$out,0x40 1188 b Lcbc_dec8x_done 1189 1190.align 5 1191Lcbc_dec8x_three: 1192 vncipherlast $out5,$out5,$ivec 1193 vncipherlast $out6,$out6,$in5 1194 vncipherlast $out7,$out7,$in6 1195 vmr $ivec,$in7 1196 1197 le?vperm $out5,$out5,$out5,$inpperm 1198 le?vperm $out6,$out6,$out6,$inpperm 1199 stvx_u $out5,$x00,$out 1200 le?vperm $out7,$out7,$out7,$inpperm 1201 stvx_u $out6,$x10,$out 1202 stvx_u $out7,$x20,$out 1203 addi $out,$out,0x30 1204 b Lcbc_dec8x_done 1205 1206.align 5 1207Lcbc_dec8x_two: 1208 vncipherlast $out6,$out6,$ivec 1209 vncipherlast $out7,$out7,$in6 1210 vmr $ivec,$in7 1211 1212 le?vperm $out6,$out6,$out6,$inpperm 1213 le?vperm $out7,$out7,$out7,$inpperm 1214 stvx_u $out6,$x00,$out 1215 stvx_u $out7,$x10,$out 1216 addi $out,$out,0x20 1217 b Lcbc_dec8x_done 1218 1219.align 5 1220Lcbc_dec8x_one: 1221 vncipherlast $out7,$out7,$ivec 1222 vmr $ivec,$in7 1223 1224 le?vperm $out7,$out7,$out7,$inpperm 1225 stvx_u $out7,0,$out 1226 addi $out,$out,0x10 1227 1228Lcbc_dec8x_done: 1229 le?vperm $ivec,$ivec,$ivec,$inpperm 1230 stvx_u $ivec,0,$ivp # write [unaligned] iv 1231 1232 li r10,`$FRAME+15` 1233 li r11,`$FRAME+31` 1234 stvx $inpperm,r10,$sp # wipe copies of round keys 1235 addi r10,r10,32 1236 stvx $inpperm,r11,$sp 1237 addi r11,r11,32 1238 stvx $inpperm,r10,$sp 1239 addi r10,r10,32 1240 stvx $inpperm,r11,$sp 1241 addi r11,r11,32 1242 stvx $inpperm,r10,$sp 1243 addi r10,r10,32 1244 stvx $inpperm,r11,$sp 1245 addi r11,r11,32 1246 stvx $inpperm,r10,$sp 1247 addi r10,r10,32 1248 stvx $inpperm,r11,$sp 1249 addi r11,r11,32 1250 1251 mtspr 256,$vrsave 1252 lvx v20,r10,$sp # ABI says so 1253 addi r10,r10,32 1254 lvx v21,r11,$sp 1255 addi r11,r11,32 1256 lvx v22,r10,$sp 1257 addi r10,r10,32 1258 lvx v23,r11,$sp 1259 addi r11,r11,32 1260 lvx v24,r10,$sp 1261 addi r10,r10,32 1262 lvx v25,r11,$sp 1263 addi r11,r11,32 1264 lvx v26,r10,$sp 1265 addi r10,r10,32 1266 lvx v27,r11,$sp 1267 addi r11,r11,32 1268 lvx v28,r10,$sp 1269 addi r10,r10,32 1270 lvx v29,r11,$sp 1271 addi r11,r11,32 1272 lvx v30,r10,$sp 1273 lvx v31,r11,$sp 1274 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1275 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1276 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1277 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1278 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1279 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1280 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1281 blr 1282 .long 0 1283 .byte 0,12,0x14,0,0x80,6,6,0 1284 .long 0 1285.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1286___ 1287}} }}} 1288 1289######################################################################### 1290{{{ # CTR procedure[s] # 1291 1292####################### WARNING: Here be dragons! ####################### 1293# 1294# This code is written as 'ctr32', based on a 32-bit counter used 1295# upstream. The kernel does *not* use a 32-bit counter. The kernel uses 1296# a 128-bit counter. 1297# 1298# This leads to subtle changes from the upstream code: the counter 1299# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in 1300# both the bulk (8 blocks at a time) path, and in the individual block 1301# path. Be aware of this when doing updates. 1302# 1303# See: 1304# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") 1305# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") 1306# https://github.com/openssl/openssl/pull/8942 1307# 1308######################################################################### 1309my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1310my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1311my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1312 map("v$_",(4..11)); 1313my $dat=$tmp; 1314 1315$code.=<<___; 1316.globl .${prefix}_ctr32_encrypt_blocks 1317 ${UCMP}i $len,1 1318 bltlr- 1319 1320 lis r0,0xfff0 1321 mfspr $vrsave,256 1322 mtspr 256,r0 1323 1324 li $idx,15 1325 vxor $rndkey0,$rndkey0,$rndkey0 1326 le?vspltisb $tmp,0x0f 1327 1328 lvx $ivec,0,$ivp # load [unaligned] iv 1329 lvsl $inpperm,0,$ivp 1330 lvx $inptail,$idx,$ivp 1331 vspltisb $one,1 1332 le?vxor $inpperm,$inpperm,$tmp 1333 vperm $ivec,$ivec,$inptail,$inpperm 1334 vsldoi $one,$rndkey0,$one,1 1335 1336 neg r11,$inp 1337 ?lvsl $keyperm,0,$key # prepare for unaligned key 1338 lwz $rounds,240($key) 1339 1340 lvsr $inpperm,0,r11 # prepare for unaligned load 1341 lvx $inptail,0,$inp 1342 addi $inp,$inp,15 # 15 is not typo 1343 le?vxor $inpperm,$inpperm,$tmp 1344 1345 srwi $rounds,$rounds,1 1346 li $idx,16 1347 subi $rounds,$rounds,1 1348 1349 ${UCMP}i $len,8 1350 bge _aesp8_ctr32_encrypt8x 1351 1352 ?lvsr $outperm,0,$out # prepare for unaligned store 1353 vspltisb $outmask,-1 1354 lvx $outhead,0,$out 1355 ?vperm $outmask,$rndkey0,$outmask,$outperm 1356 le?vxor $outperm,$outperm,$tmp 1357 1358 lvx $rndkey0,0,$key 1359 mtctr $rounds 1360 lvx $rndkey1,$idx,$key 1361 addi $idx,$idx,16 1362 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1363 vxor $inout,$ivec,$rndkey0 1364 lvx $rndkey0,$idx,$key 1365 addi $idx,$idx,16 1366 b Loop_ctr32_enc 1367 1368.align 5 1369Loop_ctr32_enc: 1370 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1371 vcipher $inout,$inout,$rndkey1 1372 lvx $rndkey1,$idx,$key 1373 addi $idx,$idx,16 1374 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1375 vcipher $inout,$inout,$rndkey0 1376 lvx $rndkey0,$idx,$key 1377 addi $idx,$idx,16 1378 bdnz Loop_ctr32_enc 1379 1380 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit 1381 vmr $dat,$inptail 1382 lvx $inptail,0,$inp 1383 addi $inp,$inp,16 1384 subic. $len,$len,1 # blocks-- 1385 1386 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1387 vcipher $inout,$inout,$rndkey1 1388 lvx $rndkey1,$idx,$key 1389 vperm $dat,$dat,$inptail,$inpperm 1390 li $idx,16 1391 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1392 lvx $rndkey0,0,$key 1393 vxor $dat,$dat,$rndkey1 # last round key 1394 vcipherlast $inout,$inout,$dat 1395 1396 lvx $rndkey1,$idx,$key 1397 addi $idx,$idx,16 1398 vperm $inout,$inout,$inout,$outperm 1399 vsel $dat,$outhead,$inout,$outmask 1400 mtctr $rounds 1401 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1402 vmr $outhead,$inout 1403 vxor $inout,$ivec,$rndkey0 1404 lvx $rndkey0,$idx,$key 1405 addi $idx,$idx,16 1406 stvx $dat,0,$out 1407 addi $out,$out,16 1408 bne Loop_ctr32_enc 1409 1410 addi $out,$out,-1 1411 lvx $inout,0,$out # redundant in aligned case 1412 vsel $inout,$outhead,$inout,$outmask 1413 stvx $inout,0,$out 1414 1415 mtspr 256,$vrsave 1416 blr 1417 .long 0 1418 .byte 0,12,0x14,0,0,0,6,0 1419 .long 0 1420___ 1421######################################################################### 1422{{ # Optimized CTR procedure # 1423my $key_="r11"; 1424my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1425my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1426my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1427my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1428 # v26-v31 last 6 round keys 1429my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1430my ($two,$three,$four)=($outhead,$outperm,$outmask); 1431 1432$code.=<<___; 1433.align 5 1434_aesp8_ctr32_encrypt8x: 1435 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1436 li r10,`$FRAME+8*16+15` 1437 li r11,`$FRAME+8*16+31` 1438 stvx v20,r10,$sp # ABI says so 1439 addi r10,r10,32 1440 stvx v21,r11,$sp 1441 addi r11,r11,32 1442 stvx v22,r10,$sp 1443 addi r10,r10,32 1444 stvx v23,r11,$sp 1445 addi r11,r11,32 1446 stvx v24,r10,$sp 1447 addi r10,r10,32 1448 stvx v25,r11,$sp 1449 addi r11,r11,32 1450 stvx v26,r10,$sp 1451 addi r10,r10,32 1452 stvx v27,r11,$sp 1453 addi r11,r11,32 1454 stvx v28,r10,$sp 1455 addi r10,r10,32 1456 stvx v29,r11,$sp 1457 addi r11,r11,32 1458 stvx v30,r10,$sp 1459 stvx v31,r11,$sp 1460 li r0,-1 1461 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1462 li $x10,0x10 1463 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1464 li $x20,0x20 1465 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1466 li $x30,0x30 1467 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1468 li $x40,0x40 1469 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1470 li $x50,0x50 1471 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1472 li $x60,0x60 1473 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1474 li $x70,0x70 1475 mtspr 256,r0 1476 1477 subi $rounds,$rounds,3 # -4 in total 1478 1479 lvx $rndkey0,$x00,$key # load key schedule 1480 lvx v30,$x10,$key 1481 addi $key,$key,0x20 1482 lvx v31,$x00,$key 1483 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1484 addi $key_,$sp,$FRAME+15 1485 mtctr $rounds 1486 1487Load_ctr32_enc_key: 1488 ?vperm v24,v30,v31,$keyperm 1489 lvx v30,$x10,$key 1490 addi $key,$key,0x20 1491 stvx v24,$x00,$key_ # off-load round[1] 1492 ?vperm v25,v31,v30,$keyperm 1493 lvx v31,$x00,$key 1494 stvx v25,$x10,$key_ # off-load round[2] 1495 addi $key_,$key_,0x20 1496 bdnz Load_ctr32_enc_key 1497 1498 lvx v26,$x10,$key 1499 ?vperm v24,v30,v31,$keyperm 1500 lvx v27,$x20,$key 1501 stvx v24,$x00,$key_ # off-load round[3] 1502 ?vperm v25,v31,v26,$keyperm 1503 lvx v28,$x30,$key 1504 stvx v25,$x10,$key_ # off-load round[4] 1505 addi $key_,$sp,$FRAME+15 # rewind $key_ 1506 ?vperm v26,v26,v27,$keyperm 1507 lvx v29,$x40,$key 1508 ?vperm v27,v27,v28,$keyperm 1509 lvx v30,$x50,$key 1510 ?vperm v28,v28,v29,$keyperm 1511 lvx v31,$x60,$key 1512 ?vperm v29,v29,v30,$keyperm 1513 lvx $out0,$x70,$key # borrow $out0 1514 ?vperm v30,v30,v31,$keyperm 1515 lvx v24,$x00,$key_ # pre-load round[1] 1516 ?vperm v31,v31,$out0,$keyperm 1517 lvx v25,$x10,$key_ # pre-load round[2] 1518 1519 vadduqm $two,$one,$one 1520 subi $inp,$inp,15 # undo "caller" 1521 $SHL $len,$len,4 1522 1523 vadduqm $out1,$ivec,$one # counter values ... 1524 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) 1525 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1526 le?li $idx,8 1527 vadduqm $out3,$out1,$two 1528 vxor $out1,$out1,$rndkey0 1529 le?lvsl $inpperm,0,$idx 1530 vadduqm $out4,$out2,$two 1531 vxor $out2,$out2,$rndkey0 1532 le?vspltisb $tmp,0x0f 1533 vadduqm $out5,$out3,$two 1534 vxor $out3,$out3,$rndkey0 1535 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1536 vadduqm $out6,$out4,$two 1537 vxor $out4,$out4,$rndkey0 1538 vadduqm $out7,$out5,$two 1539 vxor $out5,$out5,$rndkey0 1540 vadduqm $ivec,$out6,$two # next counter value 1541 vxor $out6,$out6,$rndkey0 1542 vxor $out7,$out7,$rndkey0 1543 1544 mtctr $rounds 1545 b Loop_ctr32_enc8x 1546.align 5 1547Loop_ctr32_enc8x: 1548 vcipher $out0,$out0,v24 1549 vcipher $out1,$out1,v24 1550 vcipher $out2,$out2,v24 1551 vcipher $out3,$out3,v24 1552 vcipher $out4,$out4,v24 1553 vcipher $out5,$out5,v24 1554 vcipher $out6,$out6,v24 1555 vcipher $out7,$out7,v24 1556Loop_ctr32_enc8x_middle: 1557 lvx v24,$x20,$key_ # round[3] 1558 addi $key_,$key_,0x20 1559 1560 vcipher $out0,$out0,v25 1561 vcipher $out1,$out1,v25 1562 vcipher $out2,$out2,v25 1563 vcipher $out3,$out3,v25 1564 vcipher $out4,$out4,v25 1565 vcipher $out5,$out5,v25 1566 vcipher $out6,$out6,v25 1567 vcipher $out7,$out7,v25 1568 lvx v25,$x10,$key_ # round[4] 1569 bdnz Loop_ctr32_enc8x 1570 1571 subic r11,$len,256 # $len-256, borrow $key_ 1572 vcipher $out0,$out0,v24 1573 vcipher $out1,$out1,v24 1574 vcipher $out2,$out2,v24 1575 vcipher $out3,$out3,v24 1576 vcipher $out4,$out4,v24 1577 vcipher $out5,$out5,v24 1578 vcipher $out6,$out6,v24 1579 vcipher $out7,$out7,v24 1580 1581 subfe r0,r0,r0 # borrow?-1:0 1582 vcipher $out0,$out0,v25 1583 vcipher $out1,$out1,v25 1584 vcipher $out2,$out2,v25 1585 vcipher $out3,$out3,v25 1586 vcipher $out4,$out4,v25 1587 vcipher $out5,$out5,v25 1588 vcipher $out6,$out6,v25 1589 vcipher $out7,$out7,v25 1590 1591 and r0,r0,r11 1592 addi $key_,$sp,$FRAME+15 # rewind $key_ 1593 vcipher $out0,$out0,v26 1594 vcipher $out1,$out1,v26 1595 vcipher $out2,$out2,v26 1596 vcipher $out3,$out3,v26 1597 vcipher $out4,$out4,v26 1598 vcipher $out5,$out5,v26 1599 vcipher $out6,$out6,v26 1600 vcipher $out7,$out7,v26 1601 lvx v24,$x00,$key_ # re-pre-load round[1] 1602 1603 subic $len,$len,129 # $len-=129 1604 vcipher $out0,$out0,v27 1605 addi $len,$len,1 # $len-=128 really 1606 vcipher $out1,$out1,v27 1607 vcipher $out2,$out2,v27 1608 vcipher $out3,$out3,v27 1609 vcipher $out4,$out4,v27 1610 vcipher $out5,$out5,v27 1611 vcipher $out6,$out6,v27 1612 vcipher $out7,$out7,v27 1613 lvx v25,$x10,$key_ # re-pre-load round[2] 1614 1615 vcipher $out0,$out0,v28 1616 lvx_u $in0,$x00,$inp # load input 1617 vcipher $out1,$out1,v28 1618 lvx_u $in1,$x10,$inp 1619 vcipher $out2,$out2,v28 1620 lvx_u $in2,$x20,$inp 1621 vcipher $out3,$out3,v28 1622 lvx_u $in3,$x30,$inp 1623 vcipher $out4,$out4,v28 1624 lvx_u $in4,$x40,$inp 1625 vcipher $out5,$out5,v28 1626 lvx_u $in5,$x50,$inp 1627 vcipher $out6,$out6,v28 1628 lvx_u $in6,$x60,$inp 1629 vcipher $out7,$out7,v28 1630 lvx_u $in7,$x70,$inp 1631 addi $inp,$inp,0x80 1632 1633 vcipher $out0,$out0,v29 1634 le?vperm $in0,$in0,$in0,$inpperm 1635 vcipher $out1,$out1,v29 1636 le?vperm $in1,$in1,$in1,$inpperm 1637 vcipher $out2,$out2,v29 1638 le?vperm $in2,$in2,$in2,$inpperm 1639 vcipher $out3,$out3,v29 1640 le?vperm $in3,$in3,$in3,$inpperm 1641 vcipher $out4,$out4,v29 1642 le?vperm $in4,$in4,$in4,$inpperm 1643 vcipher $out5,$out5,v29 1644 le?vperm $in5,$in5,$in5,$inpperm 1645 vcipher $out6,$out6,v29 1646 le?vperm $in6,$in6,$in6,$inpperm 1647 vcipher $out7,$out7,v29 1648 le?vperm $in7,$in7,$in7,$inpperm 1649 1650 add $inp,$inp,r0 # $inp is adjusted in such 1651 # way that at exit from the 1652 # loop inX-in7 are loaded 1653 # with last "words" 1654 subfe. r0,r0,r0 # borrow?-1:0 1655 vcipher $out0,$out0,v30 1656 vxor $in0,$in0,v31 # xor with last round key 1657 vcipher $out1,$out1,v30 1658 vxor $in1,$in1,v31 1659 vcipher $out2,$out2,v30 1660 vxor $in2,$in2,v31 1661 vcipher $out3,$out3,v30 1662 vxor $in3,$in3,v31 1663 vcipher $out4,$out4,v30 1664 vxor $in4,$in4,v31 1665 vcipher $out5,$out5,v30 1666 vxor $in5,$in5,v31 1667 vcipher $out6,$out6,v30 1668 vxor $in6,$in6,v31 1669 vcipher $out7,$out7,v30 1670 vxor $in7,$in7,v31 1671 1672 bne Lctr32_enc8x_break # did $len-129 borrow? 1673 1674 vcipherlast $in0,$out0,$in0 1675 vcipherlast $in1,$out1,$in1 1676 vadduqm $out1,$ivec,$one # counter values ... 1677 vcipherlast $in2,$out2,$in2 1678 vadduqm $out2,$ivec,$two 1679 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1680 vcipherlast $in3,$out3,$in3 1681 vadduqm $out3,$out1,$two 1682 vxor $out1,$out1,$rndkey0 1683 vcipherlast $in4,$out4,$in4 1684 vadduqm $out4,$out2,$two 1685 vxor $out2,$out2,$rndkey0 1686 vcipherlast $in5,$out5,$in5 1687 vadduqm $out5,$out3,$two 1688 vxor $out3,$out3,$rndkey0 1689 vcipherlast $in6,$out6,$in6 1690 vadduqm $out6,$out4,$two 1691 vxor $out4,$out4,$rndkey0 1692 vcipherlast $in7,$out7,$in7 1693 vadduqm $out7,$out5,$two 1694 vxor $out5,$out5,$rndkey0 1695 le?vperm $in0,$in0,$in0,$inpperm 1696 vadduqm $ivec,$out6,$two # next counter value 1697 vxor $out6,$out6,$rndkey0 1698 le?vperm $in1,$in1,$in1,$inpperm 1699 vxor $out7,$out7,$rndkey0 1700 mtctr $rounds 1701 1702 vcipher $out0,$out0,v24 1703 stvx_u $in0,$x00,$out 1704 le?vperm $in2,$in2,$in2,$inpperm 1705 vcipher $out1,$out1,v24 1706 stvx_u $in1,$x10,$out 1707 le?vperm $in3,$in3,$in3,$inpperm 1708 vcipher $out2,$out2,v24 1709 stvx_u $in2,$x20,$out 1710 le?vperm $in4,$in4,$in4,$inpperm 1711 vcipher $out3,$out3,v24 1712 stvx_u $in3,$x30,$out 1713 le?vperm $in5,$in5,$in5,$inpperm 1714 vcipher $out4,$out4,v24 1715 stvx_u $in4,$x40,$out 1716 le?vperm $in6,$in6,$in6,$inpperm 1717 vcipher $out5,$out5,v24 1718 stvx_u $in5,$x50,$out 1719 le?vperm $in7,$in7,$in7,$inpperm 1720 vcipher $out6,$out6,v24 1721 stvx_u $in6,$x60,$out 1722 vcipher $out7,$out7,v24 1723 stvx_u $in7,$x70,$out 1724 addi $out,$out,0x80 1725 1726 b Loop_ctr32_enc8x_middle 1727 1728.align 5 1729Lctr32_enc8x_break: 1730 cmpwi $len,-0x60 1731 blt Lctr32_enc8x_one 1732 nop 1733 beq Lctr32_enc8x_two 1734 cmpwi $len,-0x40 1735 blt Lctr32_enc8x_three 1736 nop 1737 beq Lctr32_enc8x_four 1738 cmpwi $len,-0x20 1739 blt Lctr32_enc8x_five 1740 nop 1741 beq Lctr32_enc8x_six 1742 cmpwi $len,0x00 1743 blt Lctr32_enc8x_seven 1744 1745Lctr32_enc8x_eight: 1746 vcipherlast $out0,$out0,$in0 1747 vcipherlast $out1,$out1,$in1 1748 vcipherlast $out2,$out2,$in2 1749 vcipherlast $out3,$out3,$in3 1750 vcipherlast $out4,$out4,$in4 1751 vcipherlast $out5,$out5,$in5 1752 vcipherlast $out6,$out6,$in6 1753 vcipherlast $out7,$out7,$in7 1754 1755 le?vperm $out0,$out0,$out0,$inpperm 1756 le?vperm $out1,$out1,$out1,$inpperm 1757 stvx_u $out0,$x00,$out 1758 le?vperm $out2,$out2,$out2,$inpperm 1759 stvx_u $out1,$x10,$out 1760 le?vperm $out3,$out3,$out3,$inpperm 1761 stvx_u $out2,$x20,$out 1762 le?vperm $out4,$out4,$out4,$inpperm 1763 stvx_u $out3,$x30,$out 1764 le?vperm $out5,$out5,$out5,$inpperm 1765 stvx_u $out4,$x40,$out 1766 le?vperm $out6,$out6,$out6,$inpperm 1767 stvx_u $out5,$x50,$out 1768 le?vperm $out7,$out7,$out7,$inpperm 1769 stvx_u $out6,$x60,$out 1770 stvx_u $out7,$x70,$out 1771 addi $out,$out,0x80 1772 b Lctr32_enc8x_done 1773 1774.align 5 1775Lctr32_enc8x_seven: 1776 vcipherlast $out0,$out0,$in1 1777 vcipherlast $out1,$out1,$in2 1778 vcipherlast $out2,$out2,$in3 1779 vcipherlast $out3,$out3,$in4 1780 vcipherlast $out4,$out4,$in5 1781 vcipherlast $out5,$out5,$in6 1782 vcipherlast $out6,$out6,$in7 1783 1784 le?vperm $out0,$out0,$out0,$inpperm 1785 le?vperm $out1,$out1,$out1,$inpperm 1786 stvx_u $out0,$x00,$out 1787 le?vperm $out2,$out2,$out2,$inpperm 1788 stvx_u $out1,$x10,$out 1789 le?vperm $out3,$out3,$out3,$inpperm 1790 stvx_u $out2,$x20,$out 1791 le?vperm $out4,$out4,$out4,$inpperm 1792 stvx_u $out3,$x30,$out 1793 le?vperm $out5,$out5,$out5,$inpperm 1794 stvx_u $out4,$x40,$out 1795 le?vperm $out6,$out6,$out6,$inpperm 1796 stvx_u $out5,$x50,$out 1797 stvx_u $out6,$x60,$out 1798 addi $out,$out,0x70 1799 b Lctr32_enc8x_done 1800 1801.align 5 1802Lctr32_enc8x_six: 1803 vcipherlast $out0,$out0,$in2 1804 vcipherlast $out1,$out1,$in3 1805 vcipherlast $out2,$out2,$in4 1806 vcipherlast $out3,$out3,$in5 1807 vcipherlast $out4,$out4,$in6 1808 vcipherlast $out5,$out5,$in7 1809 1810 le?vperm $out0,$out0,$out0,$inpperm 1811 le?vperm $out1,$out1,$out1,$inpperm 1812 stvx_u $out0,$x00,$out 1813 le?vperm $out2,$out2,$out2,$inpperm 1814 stvx_u $out1,$x10,$out 1815 le?vperm $out3,$out3,$out3,$inpperm 1816 stvx_u $out2,$x20,$out 1817 le?vperm $out4,$out4,$out4,$inpperm 1818 stvx_u $out3,$x30,$out 1819 le?vperm $out5,$out5,$out5,$inpperm 1820 stvx_u $out4,$x40,$out 1821 stvx_u $out5,$x50,$out 1822 addi $out,$out,0x60 1823 b Lctr32_enc8x_done 1824 1825.align 5 1826Lctr32_enc8x_five: 1827 vcipherlast $out0,$out0,$in3 1828 vcipherlast $out1,$out1,$in4 1829 vcipherlast $out2,$out2,$in5 1830 vcipherlast $out3,$out3,$in6 1831 vcipherlast $out4,$out4,$in7 1832 1833 le?vperm $out0,$out0,$out0,$inpperm 1834 le?vperm $out1,$out1,$out1,$inpperm 1835 stvx_u $out0,$x00,$out 1836 le?vperm $out2,$out2,$out2,$inpperm 1837 stvx_u $out1,$x10,$out 1838 le?vperm $out3,$out3,$out3,$inpperm 1839 stvx_u $out2,$x20,$out 1840 le?vperm $out4,$out4,$out4,$inpperm 1841 stvx_u $out3,$x30,$out 1842 stvx_u $out4,$x40,$out 1843 addi $out,$out,0x50 1844 b Lctr32_enc8x_done 1845 1846.align 5 1847Lctr32_enc8x_four: 1848 vcipherlast $out0,$out0,$in4 1849 vcipherlast $out1,$out1,$in5 1850 vcipherlast $out2,$out2,$in6 1851 vcipherlast $out3,$out3,$in7 1852 1853 le?vperm $out0,$out0,$out0,$inpperm 1854 le?vperm $out1,$out1,$out1,$inpperm 1855 stvx_u $out0,$x00,$out 1856 le?vperm $out2,$out2,$out2,$inpperm 1857 stvx_u $out1,$x10,$out 1858 le?vperm $out3,$out3,$out3,$inpperm 1859 stvx_u $out2,$x20,$out 1860 stvx_u $out3,$x30,$out 1861 addi $out,$out,0x40 1862 b Lctr32_enc8x_done 1863 1864.align 5 1865Lctr32_enc8x_three: 1866 vcipherlast $out0,$out0,$in5 1867 vcipherlast $out1,$out1,$in6 1868 vcipherlast $out2,$out2,$in7 1869 1870 le?vperm $out0,$out0,$out0,$inpperm 1871 le?vperm $out1,$out1,$out1,$inpperm 1872 stvx_u $out0,$x00,$out 1873 le?vperm $out2,$out2,$out2,$inpperm 1874 stvx_u $out1,$x10,$out 1875 stvx_u $out2,$x20,$out 1876 addi $out,$out,0x30 1877 b Lctr32_enc8x_done 1878 1879.align 5 1880Lctr32_enc8x_two: 1881 vcipherlast $out0,$out0,$in6 1882 vcipherlast $out1,$out1,$in7 1883 1884 le?vperm $out0,$out0,$out0,$inpperm 1885 le?vperm $out1,$out1,$out1,$inpperm 1886 stvx_u $out0,$x00,$out 1887 stvx_u $out1,$x10,$out 1888 addi $out,$out,0x20 1889 b Lctr32_enc8x_done 1890 1891.align 5 1892Lctr32_enc8x_one: 1893 vcipherlast $out0,$out0,$in7 1894 1895 le?vperm $out0,$out0,$out0,$inpperm 1896 stvx_u $out0,0,$out 1897 addi $out,$out,0x10 1898 1899Lctr32_enc8x_done: 1900 li r10,`$FRAME+15` 1901 li r11,`$FRAME+31` 1902 stvx $inpperm,r10,$sp # wipe copies of round keys 1903 addi r10,r10,32 1904 stvx $inpperm,r11,$sp 1905 addi r11,r11,32 1906 stvx $inpperm,r10,$sp 1907 addi r10,r10,32 1908 stvx $inpperm,r11,$sp 1909 addi r11,r11,32 1910 stvx $inpperm,r10,$sp 1911 addi r10,r10,32 1912 stvx $inpperm,r11,$sp 1913 addi r11,r11,32 1914 stvx $inpperm,r10,$sp 1915 addi r10,r10,32 1916 stvx $inpperm,r11,$sp 1917 addi r11,r11,32 1918 1919 mtspr 256,$vrsave 1920 lvx v20,r10,$sp # ABI says so 1921 addi r10,r10,32 1922 lvx v21,r11,$sp 1923 addi r11,r11,32 1924 lvx v22,r10,$sp 1925 addi r10,r10,32 1926 lvx v23,r11,$sp 1927 addi r11,r11,32 1928 lvx v24,r10,$sp 1929 addi r10,r10,32 1930 lvx v25,r11,$sp 1931 addi r11,r11,32 1932 lvx v26,r10,$sp 1933 addi r10,r10,32 1934 lvx v27,r11,$sp 1935 addi r11,r11,32 1936 lvx v28,r10,$sp 1937 addi r10,r10,32 1938 lvx v29,r11,$sp 1939 addi r11,r11,32 1940 lvx v30,r10,$sp 1941 lvx v31,r11,$sp 1942 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1943 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1944 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1945 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1946 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1947 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1948 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1949 blr 1950 .long 0 1951 .byte 0,12,0x14,0,0x80,6,6,0 1952 .long 0 1953.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1954___ 1955}} }}} 1956 1957######################################################################### 1958{{{ # XTS procedures # 1959# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1960# const AES_KEY *key1, const AES_KEY *key2, # 1961# [const] unsigned char iv[16]); # 1962# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1963# input tweak value is assumed to be encrypted already, and last tweak # 1964# value, one suitable for consecutive call on same chunk of data, is # 1965# written back to original buffer. In addition, in "tweak chaining" # 1966# mode only complete input blocks are processed. # 1967 1968my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1969my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1970my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1971my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1972my $taillen = $key2; 1973 1974 ($inp,$idx) = ($idx,$inp); # reassign 1975 1976$code.=<<___; 1977.globl .${prefix}_xts_encrypt 1978 mr $inp,r3 # reassign 1979 li r3,-1 1980 ${UCMP}i $len,16 1981 bltlr- 1982 1983 lis r0,0xfff0 1984 mfspr r12,256 # save vrsave 1985 li r11,0 1986 mtspr 256,r0 1987 1988 vspltisb $seven,0x07 # 0x070707..07 1989 le?lvsl $leperm,r11,r11 1990 le?vspltisb $tmp,0x0f 1991 le?vxor $leperm,$leperm,$seven 1992 1993 li $idx,15 1994 lvx $tweak,0,$ivp # load [unaligned] iv 1995 lvsl $inpperm,0,$ivp 1996 lvx $inptail,$idx,$ivp 1997 le?vxor $inpperm,$inpperm,$tmp 1998 vperm $tweak,$tweak,$inptail,$inpperm 1999 2000 neg r11,$inp 2001 lvsr $inpperm,0,r11 # prepare for unaligned load 2002 lvx $inout,0,$inp 2003 addi $inp,$inp,15 # 15 is not typo 2004 le?vxor $inpperm,$inpperm,$tmp 2005 2006 ${UCMP}i $key2,0 # key2==NULL? 2007 beq Lxts_enc_no_key2 2008 2009 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2010 lwz $rounds,240($key2) 2011 srwi $rounds,$rounds,1 2012 subi $rounds,$rounds,1 2013 li $idx,16 2014 2015 lvx $rndkey0,0,$key2 2016 lvx $rndkey1,$idx,$key2 2017 addi $idx,$idx,16 2018 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2019 vxor $tweak,$tweak,$rndkey0 2020 lvx $rndkey0,$idx,$key2 2021 addi $idx,$idx,16 2022 mtctr $rounds 2023 2024Ltweak_xts_enc: 2025 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2026 vcipher $tweak,$tweak,$rndkey1 2027 lvx $rndkey1,$idx,$key2 2028 addi $idx,$idx,16 2029 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2030 vcipher $tweak,$tweak,$rndkey0 2031 lvx $rndkey0,$idx,$key2 2032 addi $idx,$idx,16 2033 bdnz Ltweak_xts_enc 2034 2035 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2036 vcipher $tweak,$tweak,$rndkey1 2037 lvx $rndkey1,$idx,$key2 2038 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2039 vcipherlast $tweak,$tweak,$rndkey0 2040 2041 li $ivp,0 # don't chain the tweak 2042 b Lxts_enc 2043 2044Lxts_enc_no_key2: 2045 li $idx,-16 2046 and $len,$len,$idx # in "tweak chaining" 2047 # mode only complete 2048 # blocks are processed 2049Lxts_enc: 2050 lvx $inptail,0,$inp 2051 addi $inp,$inp,16 2052 2053 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2054 lwz $rounds,240($key1) 2055 srwi $rounds,$rounds,1 2056 subi $rounds,$rounds,1 2057 li $idx,16 2058 2059 vslb $eighty7,$seven,$seven # 0x808080..80 2060 vor $eighty7,$eighty7,$seven # 0x878787..87 2061 vspltisb $tmp,1 # 0x010101..01 2062 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2063 2064 ${UCMP}i $len,96 2065 bge _aesp8_xts_encrypt6x 2066 2067 andi. $taillen,$len,15 2068 subic r0,$len,32 2069 subi $taillen,$taillen,16 2070 subfe r0,r0,r0 2071 and r0,r0,$taillen 2072 add $inp,$inp,r0 2073 2074 lvx $rndkey0,0,$key1 2075 lvx $rndkey1,$idx,$key1 2076 addi $idx,$idx,16 2077 vperm $inout,$inout,$inptail,$inpperm 2078 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2079 vxor $inout,$inout,$tweak 2080 vxor $inout,$inout,$rndkey0 2081 lvx $rndkey0,$idx,$key1 2082 addi $idx,$idx,16 2083 mtctr $rounds 2084 b Loop_xts_enc 2085 2086.align 5 2087Loop_xts_enc: 2088 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2089 vcipher $inout,$inout,$rndkey1 2090 lvx $rndkey1,$idx,$key1 2091 addi $idx,$idx,16 2092 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2093 vcipher $inout,$inout,$rndkey0 2094 lvx $rndkey0,$idx,$key1 2095 addi $idx,$idx,16 2096 bdnz Loop_xts_enc 2097 2098 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2099 vcipher $inout,$inout,$rndkey1 2100 lvx $rndkey1,$idx,$key1 2101 li $idx,16 2102 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2103 vxor $rndkey0,$rndkey0,$tweak 2104 vcipherlast $output,$inout,$rndkey0 2105 2106 le?vperm $tmp,$output,$output,$leperm 2107 be?nop 2108 le?stvx_u $tmp,0,$out 2109 be?stvx_u $output,0,$out 2110 addi $out,$out,16 2111 2112 subic. $len,$len,16 2113 beq Lxts_enc_done 2114 2115 vmr $inout,$inptail 2116 lvx $inptail,0,$inp 2117 addi $inp,$inp,16 2118 lvx $rndkey0,0,$key1 2119 lvx $rndkey1,$idx,$key1 2120 addi $idx,$idx,16 2121 2122 subic r0,$len,32 2123 subfe r0,r0,r0 2124 and r0,r0,$taillen 2125 add $inp,$inp,r0 2126 2127 vsrab $tmp,$tweak,$seven # next tweak value 2128 vaddubm $tweak,$tweak,$tweak 2129 vsldoi $tmp,$tmp,$tmp,15 2130 vand $tmp,$tmp,$eighty7 2131 vxor $tweak,$tweak,$tmp 2132 2133 vperm $inout,$inout,$inptail,$inpperm 2134 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2135 vxor $inout,$inout,$tweak 2136 vxor $output,$output,$rndkey0 # just in case $len<16 2137 vxor $inout,$inout,$rndkey0 2138 lvx $rndkey0,$idx,$key1 2139 addi $idx,$idx,16 2140 2141 mtctr $rounds 2142 ${UCMP}i $len,16 2143 bge Loop_xts_enc 2144 2145 vxor $output,$output,$tweak 2146 lvsr $inpperm,0,$len # $inpperm is no longer needed 2147 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2148 vspltisb $tmp,-1 2149 vperm $inptail,$inptail,$tmp,$inpperm 2150 vsel $inout,$inout,$output,$inptail 2151 2152 subi r11,$out,17 2153 subi $out,$out,16 2154 mtctr $len 2155 li $len,16 2156Loop_xts_enc_steal: 2157 lbzu r0,1(r11) 2158 stb r0,16(r11) 2159 bdnz Loop_xts_enc_steal 2160 2161 mtctr $rounds 2162 b Loop_xts_enc # one more time... 2163 2164Lxts_enc_done: 2165 ${UCMP}i $ivp,0 2166 beq Lxts_enc_ret 2167 2168 vsrab $tmp,$tweak,$seven # next tweak value 2169 vaddubm $tweak,$tweak,$tweak 2170 vsldoi $tmp,$tmp,$tmp,15 2171 vand $tmp,$tmp,$eighty7 2172 vxor $tweak,$tweak,$tmp 2173 2174 le?vperm $tweak,$tweak,$tweak,$leperm 2175 stvx_u $tweak,0,$ivp 2176 2177Lxts_enc_ret: 2178 mtspr 256,r12 # restore vrsave 2179 li r3,0 2180 blr 2181 .long 0 2182 .byte 0,12,0x04,0,0x80,6,6,0 2183 .long 0 2184.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2185 2186.globl .${prefix}_xts_decrypt 2187 mr $inp,r3 # reassign 2188 li r3,-1 2189 ${UCMP}i $len,16 2190 bltlr- 2191 2192 lis r0,0xfff8 2193 mfspr r12,256 # save vrsave 2194 li r11,0 2195 mtspr 256,r0 2196 2197 andi. r0,$len,15 2198 neg r0,r0 2199 andi. r0,r0,16 2200 sub $len,$len,r0 2201 2202 vspltisb $seven,0x07 # 0x070707..07 2203 le?lvsl $leperm,r11,r11 2204 le?vspltisb $tmp,0x0f 2205 le?vxor $leperm,$leperm,$seven 2206 2207 li $idx,15 2208 lvx $tweak,0,$ivp # load [unaligned] iv 2209 lvsl $inpperm,0,$ivp 2210 lvx $inptail,$idx,$ivp 2211 le?vxor $inpperm,$inpperm,$tmp 2212 vperm $tweak,$tweak,$inptail,$inpperm 2213 2214 neg r11,$inp 2215 lvsr $inpperm,0,r11 # prepare for unaligned load 2216 lvx $inout,0,$inp 2217 addi $inp,$inp,15 # 15 is not typo 2218 le?vxor $inpperm,$inpperm,$tmp 2219 2220 ${UCMP}i $key2,0 # key2==NULL? 2221 beq Lxts_dec_no_key2 2222 2223 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2224 lwz $rounds,240($key2) 2225 srwi $rounds,$rounds,1 2226 subi $rounds,$rounds,1 2227 li $idx,16 2228 2229 lvx $rndkey0,0,$key2 2230 lvx $rndkey1,$idx,$key2 2231 addi $idx,$idx,16 2232 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2233 vxor $tweak,$tweak,$rndkey0 2234 lvx $rndkey0,$idx,$key2 2235 addi $idx,$idx,16 2236 mtctr $rounds 2237 2238Ltweak_xts_dec: 2239 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2240 vcipher $tweak,$tweak,$rndkey1 2241 lvx $rndkey1,$idx,$key2 2242 addi $idx,$idx,16 2243 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2244 vcipher $tweak,$tweak,$rndkey0 2245 lvx $rndkey0,$idx,$key2 2246 addi $idx,$idx,16 2247 bdnz Ltweak_xts_dec 2248 2249 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2250 vcipher $tweak,$tweak,$rndkey1 2251 lvx $rndkey1,$idx,$key2 2252 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2253 vcipherlast $tweak,$tweak,$rndkey0 2254 2255 li $ivp,0 # don't chain the tweak 2256 b Lxts_dec 2257 2258Lxts_dec_no_key2: 2259 neg $idx,$len 2260 andi. $idx,$idx,15 2261 add $len,$len,$idx # in "tweak chaining" 2262 # mode only complete 2263 # blocks are processed 2264Lxts_dec: 2265 lvx $inptail,0,$inp 2266 addi $inp,$inp,16 2267 2268 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2269 lwz $rounds,240($key1) 2270 srwi $rounds,$rounds,1 2271 subi $rounds,$rounds,1 2272 li $idx,16 2273 2274 vslb $eighty7,$seven,$seven # 0x808080..80 2275 vor $eighty7,$eighty7,$seven # 0x878787..87 2276 vspltisb $tmp,1 # 0x010101..01 2277 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2278 2279 ${UCMP}i $len,96 2280 bge _aesp8_xts_decrypt6x 2281 2282 lvx $rndkey0,0,$key1 2283 lvx $rndkey1,$idx,$key1 2284 addi $idx,$idx,16 2285 vperm $inout,$inout,$inptail,$inpperm 2286 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2287 vxor $inout,$inout,$tweak 2288 vxor $inout,$inout,$rndkey0 2289 lvx $rndkey0,$idx,$key1 2290 addi $idx,$idx,16 2291 mtctr $rounds 2292 2293 ${UCMP}i $len,16 2294 blt Ltail_xts_dec 2295 be?b Loop_xts_dec 2296 2297.align 5 2298Loop_xts_dec: 2299 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2300 vncipher $inout,$inout,$rndkey1 2301 lvx $rndkey1,$idx,$key1 2302 addi $idx,$idx,16 2303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2304 vncipher $inout,$inout,$rndkey0 2305 lvx $rndkey0,$idx,$key1 2306 addi $idx,$idx,16 2307 bdnz Loop_xts_dec 2308 2309 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2310 vncipher $inout,$inout,$rndkey1 2311 lvx $rndkey1,$idx,$key1 2312 li $idx,16 2313 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2314 vxor $rndkey0,$rndkey0,$tweak 2315 vncipherlast $output,$inout,$rndkey0 2316 2317 le?vperm $tmp,$output,$output,$leperm 2318 be?nop 2319 le?stvx_u $tmp,0,$out 2320 be?stvx_u $output,0,$out 2321 addi $out,$out,16 2322 2323 subic. $len,$len,16 2324 beq Lxts_dec_done 2325 2326 vmr $inout,$inptail 2327 lvx $inptail,0,$inp 2328 addi $inp,$inp,16 2329 lvx $rndkey0,0,$key1 2330 lvx $rndkey1,$idx,$key1 2331 addi $idx,$idx,16 2332 2333 vsrab $tmp,$tweak,$seven # next tweak value 2334 vaddubm $tweak,$tweak,$tweak 2335 vsldoi $tmp,$tmp,$tmp,15 2336 vand $tmp,$tmp,$eighty7 2337 vxor $tweak,$tweak,$tmp 2338 2339 vperm $inout,$inout,$inptail,$inpperm 2340 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2341 vxor $inout,$inout,$tweak 2342 vxor $inout,$inout,$rndkey0 2343 lvx $rndkey0,$idx,$key1 2344 addi $idx,$idx,16 2345 2346 mtctr $rounds 2347 ${UCMP}i $len,16 2348 bge Loop_xts_dec 2349 2350Ltail_xts_dec: 2351 vsrab $tmp,$tweak,$seven # next tweak value 2352 vaddubm $tweak1,$tweak,$tweak 2353 vsldoi $tmp,$tmp,$tmp,15 2354 vand $tmp,$tmp,$eighty7 2355 vxor $tweak1,$tweak1,$tmp 2356 2357 subi $inp,$inp,16 2358 add $inp,$inp,$len 2359 2360 vxor $inout,$inout,$tweak # :-( 2361 vxor $inout,$inout,$tweak1 # :-) 2362 2363Loop_xts_dec_short: 2364 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2365 vncipher $inout,$inout,$rndkey1 2366 lvx $rndkey1,$idx,$key1 2367 addi $idx,$idx,16 2368 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2369 vncipher $inout,$inout,$rndkey0 2370 lvx $rndkey0,$idx,$key1 2371 addi $idx,$idx,16 2372 bdnz Loop_xts_dec_short 2373 2374 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2375 vncipher $inout,$inout,$rndkey1 2376 lvx $rndkey1,$idx,$key1 2377 li $idx,16 2378 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2379 vxor $rndkey0,$rndkey0,$tweak1 2380 vncipherlast $output,$inout,$rndkey0 2381 2382 le?vperm $tmp,$output,$output,$leperm 2383 be?nop 2384 le?stvx_u $tmp,0,$out 2385 be?stvx_u $output,0,$out 2386 2387 vmr $inout,$inptail 2388 lvx $inptail,0,$inp 2389 #addi $inp,$inp,16 2390 lvx $rndkey0,0,$key1 2391 lvx $rndkey1,$idx,$key1 2392 addi $idx,$idx,16 2393 vperm $inout,$inout,$inptail,$inpperm 2394 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2395 2396 lvsr $inpperm,0,$len # $inpperm is no longer needed 2397 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2398 vspltisb $tmp,-1 2399 vperm $inptail,$inptail,$tmp,$inpperm 2400 vsel $inout,$inout,$output,$inptail 2401 2402 vxor $rndkey0,$rndkey0,$tweak 2403 vxor $inout,$inout,$rndkey0 2404 lvx $rndkey0,$idx,$key1 2405 addi $idx,$idx,16 2406 2407 subi r11,$out,1 2408 mtctr $len 2409 li $len,16 2410Loop_xts_dec_steal: 2411 lbzu r0,1(r11) 2412 stb r0,16(r11) 2413 bdnz Loop_xts_dec_steal 2414 2415 mtctr $rounds 2416 b Loop_xts_dec # one more time... 2417 2418Lxts_dec_done: 2419 ${UCMP}i $ivp,0 2420 beq Lxts_dec_ret 2421 2422 vsrab $tmp,$tweak,$seven # next tweak value 2423 vaddubm $tweak,$tweak,$tweak 2424 vsldoi $tmp,$tmp,$tmp,15 2425 vand $tmp,$tmp,$eighty7 2426 vxor $tweak,$tweak,$tmp 2427 2428 le?vperm $tweak,$tweak,$tweak,$leperm 2429 stvx_u $tweak,0,$ivp 2430 2431Lxts_dec_ret: 2432 mtspr 256,r12 # restore vrsave 2433 li r3,0 2434 blr 2435 .long 0 2436 .byte 0,12,0x04,0,0x80,6,6,0 2437 .long 0 2438.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2439___ 2440######################################################################### 2441{{ # Optimized XTS procedures # 2442my $key_=$key2; 2443my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2444 $x00=0 if ($flavour =~ /osx/); 2445my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2446my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2447my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2448my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2449 # v26-v31 last 6 round keys 2450my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2451my $taillen=$x70; 2452 2453$code.=<<___; 2454.align 5 2455_aesp8_xts_encrypt6x: 2456 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2457 mflr r11 2458 li r7,`$FRAME+8*16+15` 2459 li r3,`$FRAME+8*16+31` 2460 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2461 stvx v20,r7,$sp # ABI says so 2462 addi r7,r7,32 2463 stvx v21,r3,$sp 2464 addi r3,r3,32 2465 stvx v22,r7,$sp 2466 addi r7,r7,32 2467 stvx v23,r3,$sp 2468 addi r3,r3,32 2469 stvx v24,r7,$sp 2470 addi r7,r7,32 2471 stvx v25,r3,$sp 2472 addi r3,r3,32 2473 stvx v26,r7,$sp 2474 addi r7,r7,32 2475 stvx v27,r3,$sp 2476 addi r3,r3,32 2477 stvx v28,r7,$sp 2478 addi r7,r7,32 2479 stvx v29,r3,$sp 2480 addi r3,r3,32 2481 stvx v30,r7,$sp 2482 stvx v31,r3,$sp 2483 li r0,-1 2484 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2485 li $x10,0x10 2486 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2487 li $x20,0x20 2488 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2489 li $x30,0x30 2490 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2491 li $x40,0x40 2492 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2493 li $x50,0x50 2494 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2495 li $x60,0x60 2496 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2497 li $x70,0x70 2498 mtspr 256,r0 2499 2500 xxlor 2, 32+$eighty7, 32+$eighty7 2501 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 2502 xxlor 1, 32+$eighty7, 32+$eighty7 2503 2504 # Load XOR Lconsts. 2505 mr $x70, r6 2506 bl Lconsts 2507 lxvw4x 0, $x40, r6 # load XOR contents 2508 mr r6, $x70 2509 li $x70,0x70 2510 2511 subi $rounds,$rounds,3 # -4 in total 2512 2513 lvx $rndkey0,$x00,$key1 # load key schedule 2514 lvx v30,$x10,$key1 2515 addi $key1,$key1,0x20 2516 lvx v31,$x00,$key1 2517 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2518 addi $key_,$sp,$FRAME+15 2519 mtctr $rounds 2520 2521Load_xts_enc_key: 2522 ?vperm v24,v30,v31,$keyperm 2523 lvx v30,$x10,$key1 2524 addi $key1,$key1,0x20 2525 stvx v24,$x00,$key_ # off-load round[1] 2526 ?vperm v25,v31,v30,$keyperm 2527 lvx v31,$x00,$key1 2528 stvx v25,$x10,$key_ # off-load round[2] 2529 addi $key_,$key_,0x20 2530 bdnz Load_xts_enc_key 2531 2532 lvx v26,$x10,$key1 2533 ?vperm v24,v30,v31,$keyperm 2534 lvx v27,$x20,$key1 2535 stvx v24,$x00,$key_ # off-load round[3] 2536 ?vperm v25,v31,v26,$keyperm 2537 lvx v28,$x30,$key1 2538 stvx v25,$x10,$key_ # off-load round[4] 2539 addi $key_,$sp,$FRAME+15 # rewind $key_ 2540 ?vperm v26,v26,v27,$keyperm 2541 lvx v29,$x40,$key1 2542 ?vperm v27,v27,v28,$keyperm 2543 lvx v30,$x50,$key1 2544 ?vperm v28,v28,v29,$keyperm 2545 lvx v31,$x60,$key1 2546 ?vperm v29,v29,v30,$keyperm 2547 lvx $twk5,$x70,$key1 # borrow $twk5 2548 ?vperm v30,v30,v31,$keyperm 2549 lvx v24,$x00,$key_ # pre-load round[1] 2550 ?vperm v31,v31,$twk5,$keyperm 2551 lvx v25,$x10,$key_ # pre-load round[2] 2552 2553 # Switch to use the following codes with 0x010101..87 to generate tweak. 2554 # eighty7 = 0x010101..87 2555 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits 2556 # vand tmp, tmp, eighty7 # last byte with carry 2557 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) 2558 # xxlor vsx, 0, 0 2559 # vpermxor tweak, tweak, tmp, vsx 2560 2561 vperm $in0,$inout,$inptail,$inpperm 2562 subi $inp,$inp,31 # undo "caller" 2563 vxor $twk0,$tweak,$rndkey0 2564 vsrab $tmp,$tweak,$seven # next tweak value 2565 vaddubm $tweak,$tweak,$tweak 2566 vand $tmp,$tmp,$eighty7 2567 vxor $out0,$in0,$twk0 2568 xxlor 32+$in1, 0, 0 2569 vpermxor $tweak, $tweak, $tmp, $in1 2570 2571 lvx_u $in1,$x10,$inp 2572 vxor $twk1,$tweak,$rndkey0 2573 vsrab $tmp,$tweak,$seven # next tweak value 2574 vaddubm $tweak,$tweak,$tweak 2575 le?vperm $in1,$in1,$in1,$leperm 2576 vand $tmp,$tmp,$eighty7 2577 vxor $out1,$in1,$twk1 2578 xxlor 32+$in2, 0, 0 2579 vpermxor $tweak, $tweak, $tmp, $in2 2580 2581 lvx_u $in2,$x20,$inp 2582 andi. $taillen,$len,15 2583 vxor $twk2,$tweak,$rndkey0 2584 vsrab $tmp,$tweak,$seven # next tweak value 2585 vaddubm $tweak,$tweak,$tweak 2586 le?vperm $in2,$in2,$in2,$leperm 2587 vand $tmp,$tmp,$eighty7 2588 vxor $out2,$in2,$twk2 2589 xxlor 32+$in3, 0, 0 2590 vpermxor $tweak, $tweak, $tmp, $in3 2591 2592 lvx_u $in3,$x30,$inp 2593 sub $len,$len,$taillen 2594 vxor $twk3,$tweak,$rndkey0 2595 vsrab $tmp,$tweak,$seven # next tweak value 2596 vaddubm $tweak,$tweak,$tweak 2597 le?vperm $in3,$in3,$in3,$leperm 2598 vand $tmp,$tmp,$eighty7 2599 vxor $out3,$in3,$twk3 2600 xxlor 32+$in4, 0, 0 2601 vpermxor $tweak, $tweak, $tmp, $in4 2602 2603 lvx_u $in4,$x40,$inp 2604 subi $len,$len,0x60 2605 vxor $twk4,$tweak,$rndkey0 2606 vsrab $tmp,$tweak,$seven # next tweak value 2607 vaddubm $tweak,$tweak,$tweak 2608 le?vperm $in4,$in4,$in4,$leperm 2609 vand $tmp,$tmp,$eighty7 2610 vxor $out4,$in4,$twk4 2611 xxlor 32+$in5, 0, 0 2612 vpermxor $tweak, $tweak, $tmp, $in5 2613 2614 lvx_u $in5,$x50,$inp 2615 addi $inp,$inp,0x60 2616 vxor $twk5,$tweak,$rndkey0 2617 vsrab $tmp,$tweak,$seven # next tweak value 2618 vaddubm $tweak,$tweak,$tweak 2619 le?vperm $in5,$in5,$in5,$leperm 2620 vand $tmp,$tmp,$eighty7 2621 vxor $out5,$in5,$twk5 2622 xxlor 32+$in0, 0, 0 2623 vpermxor $tweak, $tweak, $tmp, $in0 2624 2625 vxor v31,v31,$rndkey0 2626 mtctr $rounds 2627 b Loop_xts_enc6x 2628 2629.align 5 2630Loop_xts_enc6x: 2631 vcipher $out0,$out0,v24 2632 vcipher $out1,$out1,v24 2633 vcipher $out2,$out2,v24 2634 vcipher $out3,$out3,v24 2635 vcipher $out4,$out4,v24 2636 vcipher $out5,$out5,v24 2637 lvx v24,$x20,$key_ # round[3] 2638 addi $key_,$key_,0x20 2639 2640 vcipher $out0,$out0,v25 2641 vcipher $out1,$out1,v25 2642 vcipher $out2,$out2,v25 2643 vcipher $out3,$out3,v25 2644 vcipher $out4,$out4,v25 2645 vcipher $out5,$out5,v25 2646 lvx v25,$x10,$key_ # round[4] 2647 bdnz Loop_xts_enc6x 2648 2649 xxlor 32+$eighty7, 1, 1 # 0x010101..87 2650 2651 subic $len,$len,96 # $len-=96 2652 vxor $in0,$twk0,v31 # xor with last round key 2653 vcipher $out0,$out0,v24 2654 vcipher $out1,$out1,v24 2655 vsrab $tmp,$tweak,$seven # next tweak value 2656 vxor $twk0,$tweak,$rndkey0 2657 vaddubm $tweak,$tweak,$tweak 2658 vcipher $out2,$out2,v24 2659 vcipher $out3,$out3,v24 2660 vcipher $out4,$out4,v24 2661 vcipher $out5,$out5,v24 2662 2663 subfe. r0,r0,r0 # borrow?-1:0 2664 vand $tmp,$tmp,$eighty7 2665 vcipher $out0,$out0,v25 2666 vcipher $out1,$out1,v25 2667 xxlor 32+$in1, 0, 0 2668 vpermxor $tweak, $tweak, $tmp, $in1 2669 vcipher $out2,$out2,v25 2670 vcipher $out3,$out3,v25 2671 vxor $in1,$twk1,v31 2672 vsrab $tmp,$tweak,$seven # next tweak value 2673 vxor $twk1,$tweak,$rndkey0 2674 vcipher $out4,$out4,v25 2675 vcipher $out5,$out5,v25 2676 2677 and r0,r0,$len 2678 vaddubm $tweak,$tweak,$tweak 2679 vcipher $out0,$out0,v26 2680 vcipher $out1,$out1,v26 2681 vand $tmp,$tmp,$eighty7 2682 vcipher $out2,$out2,v26 2683 vcipher $out3,$out3,v26 2684 xxlor 32+$in2, 0, 0 2685 vpermxor $tweak, $tweak, $tmp, $in2 2686 vcipher $out4,$out4,v26 2687 vcipher $out5,$out5,v26 2688 2689 add $inp,$inp,r0 # $inp is adjusted in such 2690 # way that at exit from the 2691 # loop inX-in5 are loaded 2692 # with last "words" 2693 vxor $in2,$twk2,v31 2694 vsrab $tmp,$tweak,$seven # next tweak value 2695 vxor $twk2,$tweak,$rndkey0 2696 vaddubm $tweak,$tweak,$tweak 2697 vcipher $out0,$out0,v27 2698 vcipher $out1,$out1,v27 2699 vcipher $out2,$out2,v27 2700 vcipher $out3,$out3,v27 2701 vand $tmp,$tmp,$eighty7 2702 vcipher $out4,$out4,v27 2703 vcipher $out5,$out5,v27 2704 2705 addi $key_,$sp,$FRAME+15 # rewind $key_ 2706 xxlor 32+$in3, 0, 0 2707 vpermxor $tweak, $tweak, $tmp, $in3 2708 vcipher $out0,$out0,v28 2709 vcipher $out1,$out1,v28 2710 vxor $in3,$twk3,v31 2711 vsrab $tmp,$tweak,$seven # next tweak value 2712 vxor $twk3,$tweak,$rndkey0 2713 vcipher $out2,$out2,v28 2714 vcipher $out3,$out3,v28 2715 vaddubm $tweak,$tweak,$tweak 2716 vcipher $out4,$out4,v28 2717 vcipher $out5,$out5,v28 2718 lvx v24,$x00,$key_ # re-pre-load round[1] 2719 vand $tmp,$tmp,$eighty7 2720 2721 vcipher $out0,$out0,v29 2722 vcipher $out1,$out1,v29 2723 xxlor 32+$in4, 0, 0 2724 vpermxor $tweak, $tweak, $tmp, $in4 2725 vcipher $out2,$out2,v29 2726 vcipher $out3,$out3,v29 2727 vxor $in4,$twk4,v31 2728 vsrab $tmp,$tweak,$seven # next tweak value 2729 vxor $twk4,$tweak,$rndkey0 2730 vcipher $out4,$out4,v29 2731 vcipher $out5,$out5,v29 2732 lvx v25,$x10,$key_ # re-pre-load round[2] 2733 vaddubm $tweak,$tweak,$tweak 2734 2735 vcipher $out0,$out0,v30 2736 vcipher $out1,$out1,v30 2737 vand $tmp,$tmp,$eighty7 2738 vcipher $out2,$out2,v30 2739 vcipher $out3,$out3,v30 2740 xxlor 32+$in5, 0, 0 2741 vpermxor $tweak, $tweak, $tmp, $in5 2742 vcipher $out4,$out4,v30 2743 vcipher $out5,$out5,v30 2744 vxor $in5,$twk5,v31 2745 vsrab $tmp,$tweak,$seven # next tweak value 2746 vxor $twk5,$tweak,$rndkey0 2747 2748 vcipherlast $out0,$out0,$in0 2749 lvx_u $in0,$x00,$inp # load next input block 2750 vaddubm $tweak,$tweak,$tweak 2751 vcipherlast $out1,$out1,$in1 2752 lvx_u $in1,$x10,$inp 2753 vcipherlast $out2,$out2,$in2 2754 le?vperm $in0,$in0,$in0,$leperm 2755 lvx_u $in2,$x20,$inp 2756 vand $tmp,$tmp,$eighty7 2757 vcipherlast $out3,$out3,$in3 2758 le?vperm $in1,$in1,$in1,$leperm 2759 lvx_u $in3,$x30,$inp 2760 vcipherlast $out4,$out4,$in4 2761 le?vperm $in2,$in2,$in2,$leperm 2762 lvx_u $in4,$x40,$inp 2763 xxlor 10, 32+$in0, 32+$in0 2764 xxlor 32+$in0, 0, 0 2765 vpermxor $tweak, $tweak, $tmp, $in0 2766 xxlor 32+$in0, 10, 10 2767 vcipherlast $tmp,$out5,$in5 # last block might be needed 2768 # in stealing mode 2769 le?vperm $in3,$in3,$in3,$leperm 2770 lvx_u $in5,$x50,$inp 2771 addi $inp,$inp,0x60 2772 le?vperm $in4,$in4,$in4,$leperm 2773 le?vperm $in5,$in5,$in5,$leperm 2774 2775 le?vperm $out0,$out0,$out0,$leperm 2776 le?vperm $out1,$out1,$out1,$leperm 2777 stvx_u $out0,$x00,$out # store output 2778 vxor $out0,$in0,$twk0 2779 le?vperm $out2,$out2,$out2,$leperm 2780 stvx_u $out1,$x10,$out 2781 vxor $out1,$in1,$twk1 2782 le?vperm $out3,$out3,$out3,$leperm 2783 stvx_u $out2,$x20,$out 2784 vxor $out2,$in2,$twk2 2785 le?vperm $out4,$out4,$out4,$leperm 2786 stvx_u $out3,$x30,$out 2787 vxor $out3,$in3,$twk3 2788 le?vperm $out5,$tmp,$tmp,$leperm 2789 stvx_u $out4,$x40,$out 2790 vxor $out4,$in4,$twk4 2791 le?stvx_u $out5,$x50,$out 2792 be?stvx_u $tmp, $x50,$out 2793 vxor $out5,$in5,$twk5 2794 addi $out,$out,0x60 2795 2796 mtctr $rounds 2797 beq Loop_xts_enc6x # did $len-=96 borrow? 2798 2799 xxlor 32+$eighty7, 2, 2 # 0x010101..87 2800 2801 addic. $len,$len,0x60 2802 beq Lxts_enc6x_zero 2803 cmpwi $len,0x20 2804 blt Lxts_enc6x_one 2805 nop 2806 beq Lxts_enc6x_two 2807 cmpwi $len,0x40 2808 blt Lxts_enc6x_three 2809 nop 2810 beq Lxts_enc6x_four 2811 2812Lxts_enc6x_five: 2813 vxor $out0,$in1,$twk0 2814 vxor $out1,$in2,$twk1 2815 vxor $out2,$in3,$twk2 2816 vxor $out3,$in4,$twk3 2817 vxor $out4,$in5,$twk4 2818 2819 bl _aesp8_xts_enc5x 2820 2821 le?vperm $out0,$out0,$out0,$leperm 2822 vmr $twk0,$twk5 # unused tweak 2823 le?vperm $out1,$out1,$out1,$leperm 2824 stvx_u $out0,$x00,$out # store output 2825 le?vperm $out2,$out2,$out2,$leperm 2826 stvx_u $out1,$x10,$out 2827 le?vperm $out3,$out3,$out3,$leperm 2828 stvx_u $out2,$x20,$out 2829 vxor $tmp,$out4,$twk5 # last block prep for stealing 2830 le?vperm $out4,$out4,$out4,$leperm 2831 stvx_u $out3,$x30,$out 2832 stvx_u $out4,$x40,$out 2833 addi $out,$out,0x50 2834 bne Lxts_enc6x_steal 2835 b Lxts_enc6x_done 2836 2837.align 4 2838Lxts_enc6x_four: 2839 vxor $out0,$in2,$twk0 2840 vxor $out1,$in3,$twk1 2841 vxor $out2,$in4,$twk2 2842 vxor $out3,$in5,$twk3 2843 vxor $out4,$out4,$out4 2844 2845 bl _aesp8_xts_enc5x 2846 2847 le?vperm $out0,$out0,$out0,$leperm 2848 vmr $twk0,$twk4 # unused tweak 2849 le?vperm $out1,$out1,$out1,$leperm 2850 stvx_u $out0,$x00,$out # store output 2851 le?vperm $out2,$out2,$out2,$leperm 2852 stvx_u $out1,$x10,$out 2853 vxor $tmp,$out3,$twk4 # last block prep for stealing 2854 le?vperm $out3,$out3,$out3,$leperm 2855 stvx_u $out2,$x20,$out 2856 stvx_u $out3,$x30,$out 2857 addi $out,$out,0x40 2858 bne Lxts_enc6x_steal 2859 b Lxts_enc6x_done 2860 2861.align 4 2862Lxts_enc6x_three: 2863 vxor $out0,$in3,$twk0 2864 vxor $out1,$in4,$twk1 2865 vxor $out2,$in5,$twk2 2866 vxor $out3,$out3,$out3 2867 vxor $out4,$out4,$out4 2868 2869 bl _aesp8_xts_enc5x 2870 2871 le?vperm $out0,$out0,$out0,$leperm 2872 vmr $twk0,$twk3 # unused tweak 2873 le?vperm $out1,$out1,$out1,$leperm 2874 stvx_u $out0,$x00,$out # store output 2875 vxor $tmp,$out2,$twk3 # last block prep for stealing 2876 le?vperm $out2,$out2,$out2,$leperm 2877 stvx_u $out1,$x10,$out 2878 stvx_u $out2,$x20,$out 2879 addi $out,$out,0x30 2880 bne Lxts_enc6x_steal 2881 b Lxts_enc6x_done 2882 2883.align 4 2884Lxts_enc6x_two: 2885 vxor $out0,$in4,$twk0 2886 vxor $out1,$in5,$twk1 2887 vxor $out2,$out2,$out2 2888 vxor $out3,$out3,$out3 2889 vxor $out4,$out4,$out4 2890 2891 bl _aesp8_xts_enc5x 2892 2893 le?vperm $out0,$out0,$out0,$leperm 2894 vmr $twk0,$twk2 # unused tweak 2895 vxor $tmp,$out1,$twk2 # last block prep for stealing 2896 le?vperm $out1,$out1,$out1,$leperm 2897 stvx_u $out0,$x00,$out # store output 2898 stvx_u $out1,$x10,$out 2899 addi $out,$out,0x20 2900 bne Lxts_enc6x_steal 2901 b Lxts_enc6x_done 2902 2903.align 4 2904Lxts_enc6x_one: 2905 vxor $out0,$in5,$twk0 2906 nop 2907Loop_xts_enc1x: 2908 vcipher $out0,$out0,v24 2909 lvx v24,$x20,$key_ # round[3] 2910 addi $key_,$key_,0x20 2911 2912 vcipher $out0,$out0,v25 2913 lvx v25,$x10,$key_ # round[4] 2914 bdnz Loop_xts_enc1x 2915 2916 add $inp,$inp,$taillen 2917 cmpwi $taillen,0 2918 vcipher $out0,$out0,v24 2919 2920 subi $inp,$inp,16 2921 vcipher $out0,$out0,v25 2922 2923 lvsr $inpperm,0,$taillen 2924 vcipher $out0,$out0,v26 2925 2926 lvx_u $in0,0,$inp 2927 vcipher $out0,$out0,v27 2928 2929 addi $key_,$sp,$FRAME+15 # rewind $key_ 2930 vcipher $out0,$out0,v28 2931 lvx v24,$x00,$key_ # re-pre-load round[1] 2932 2933 vcipher $out0,$out0,v29 2934 lvx v25,$x10,$key_ # re-pre-load round[2] 2935 vxor $twk0,$twk0,v31 2936 2937 le?vperm $in0,$in0,$in0,$leperm 2938 vcipher $out0,$out0,v30 2939 2940 vperm $in0,$in0,$in0,$inpperm 2941 vcipherlast $out0,$out0,$twk0 2942 2943 vmr $twk0,$twk1 # unused tweak 2944 vxor $tmp,$out0,$twk1 # last block prep for stealing 2945 le?vperm $out0,$out0,$out0,$leperm 2946 stvx_u $out0,$x00,$out # store output 2947 addi $out,$out,0x10 2948 bne Lxts_enc6x_steal 2949 b Lxts_enc6x_done 2950 2951.align 4 2952Lxts_enc6x_zero: 2953 cmpwi $taillen,0 2954 beq Lxts_enc6x_done 2955 2956 add $inp,$inp,$taillen 2957 subi $inp,$inp,16 2958 lvx_u $in0,0,$inp 2959 lvsr $inpperm,0,$taillen # $in5 is no more 2960 le?vperm $in0,$in0,$in0,$leperm 2961 vperm $in0,$in0,$in0,$inpperm 2962 vxor $tmp,$tmp,$twk0 2963Lxts_enc6x_steal: 2964 vxor $in0,$in0,$twk0 2965 vxor $out0,$out0,$out0 2966 vspltisb $out1,-1 2967 vperm $out0,$out0,$out1,$inpperm 2968 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2969 2970 subi r30,$out,17 2971 subi $out,$out,16 2972 mtctr $taillen 2973Loop_xts_enc6x_steal: 2974 lbzu r0,1(r30) 2975 stb r0,16(r30) 2976 bdnz Loop_xts_enc6x_steal 2977 2978 li $taillen,0 2979 mtctr $rounds 2980 b Loop_xts_enc1x # one more time... 2981 2982.align 4 2983Lxts_enc6x_done: 2984 ${UCMP}i $ivp,0 2985 beq Lxts_enc6x_ret 2986 2987 vxor $tweak,$twk0,$rndkey0 2988 le?vperm $tweak,$tweak,$tweak,$leperm 2989 stvx_u $tweak,0,$ivp 2990 2991Lxts_enc6x_ret: 2992 mtlr r11 2993 li r10,`$FRAME+15` 2994 li r11,`$FRAME+31` 2995 stvx $seven,r10,$sp # wipe copies of round keys 2996 addi r10,r10,32 2997 stvx $seven,r11,$sp 2998 addi r11,r11,32 2999 stvx $seven,r10,$sp 3000 addi r10,r10,32 3001 stvx $seven,r11,$sp 3002 addi r11,r11,32 3003 stvx $seven,r10,$sp 3004 addi r10,r10,32 3005 stvx $seven,r11,$sp 3006 addi r11,r11,32 3007 stvx $seven,r10,$sp 3008 addi r10,r10,32 3009 stvx $seven,r11,$sp 3010 addi r11,r11,32 3011 3012 mtspr 256,$vrsave 3013 lvx v20,r10,$sp # ABI says so 3014 addi r10,r10,32 3015 lvx v21,r11,$sp 3016 addi r11,r11,32 3017 lvx v22,r10,$sp 3018 addi r10,r10,32 3019 lvx v23,r11,$sp 3020 addi r11,r11,32 3021 lvx v24,r10,$sp 3022 addi r10,r10,32 3023 lvx v25,r11,$sp 3024 addi r11,r11,32 3025 lvx v26,r10,$sp 3026 addi r10,r10,32 3027 lvx v27,r11,$sp 3028 addi r11,r11,32 3029 lvx v28,r10,$sp 3030 addi r10,r10,32 3031 lvx v29,r11,$sp 3032 addi r11,r11,32 3033 lvx v30,r10,$sp 3034 lvx v31,r11,$sp 3035 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3036 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3037 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3038 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3039 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3040 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3041 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3042 blr 3043 .long 0 3044 .byte 0,12,0x04,1,0x80,6,6,0 3045 .long 0 3046 3047.align 5 3048_aesp8_xts_enc5x: 3049 vcipher $out0,$out0,v24 3050 vcipher $out1,$out1,v24 3051 vcipher $out2,$out2,v24 3052 vcipher $out3,$out3,v24 3053 vcipher $out4,$out4,v24 3054 lvx v24,$x20,$key_ # round[3] 3055 addi $key_,$key_,0x20 3056 3057 vcipher $out0,$out0,v25 3058 vcipher $out1,$out1,v25 3059 vcipher $out2,$out2,v25 3060 vcipher $out3,$out3,v25 3061 vcipher $out4,$out4,v25 3062 lvx v25,$x10,$key_ # round[4] 3063 bdnz _aesp8_xts_enc5x 3064 3065 add $inp,$inp,$taillen 3066 cmpwi $taillen,0 3067 vcipher $out0,$out0,v24 3068 vcipher $out1,$out1,v24 3069 vcipher $out2,$out2,v24 3070 vcipher $out3,$out3,v24 3071 vcipher $out4,$out4,v24 3072 3073 subi $inp,$inp,16 3074 vcipher $out0,$out0,v25 3075 vcipher $out1,$out1,v25 3076 vcipher $out2,$out2,v25 3077 vcipher $out3,$out3,v25 3078 vcipher $out4,$out4,v25 3079 vxor $twk0,$twk0,v31 3080 3081 vcipher $out0,$out0,v26 3082 lvsr $inpperm,r0,$taillen # $in5 is no more 3083 vcipher $out1,$out1,v26 3084 vcipher $out2,$out2,v26 3085 vcipher $out3,$out3,v26 3086 vcipher $out4,$out4,v26 3087 vxor $in1,$twk1,v31 3088 3089 vcipher $out0,$out0,v27 3090 lvx_u $in0,0,$inp 3091 vcipher $out1,$out1,v27 3092 vcipher $out2,$out2,v27 3093 vcipher $out3,$out3,v27 3094 vcipher $out4,$out4,v27 3095 vxor $in2,$twk2,v31 3096 3097 addi $key_,$sp,$FRAME+15 # rewind $key_ 3098 vcipher $out0,$out0,v28 3099 vcipher $out1,$out1,v28 3100 vcipher $out2,$out2,v28 3101 vcipher $out3,$out3,v28 3102 vcipher $out4,$out4,v28 3103 lvx v24,$x00,$key_ # re-pre-load round[1] 3104 vxor $in3,$twk3,v31 3105 3106 vcipher $out0,$out0,v29 3107 le?vperm $in0,$in0,$in0,$leperm 3108 vcipher $out1,$out1,v29 3109 vcipher $out2,$out2,v29 3110 vcipher $out3,$out3,v29 3111 vcipher $out4,$out4,v29 3112 lvx v25,$x10,$key_ # re-pre-load round[2] 3113 vxor $in4,$twk4,v31 3114 3115 vcipher $out0,$out0,v30 3116 vperm $in0,$in0,$in0,$inpperm 3117 vcipher $out1,$out1,v30 3118 vcipher $out2,$out2,v30 3119 vcipher $out3,$out3,v30 3120 vcipher $out4,$out4,v30 3121 3122 vcipherlast $out0,$out0,$twk0 3123 vcipherlast $out1,$out1,$in1 3124 vcipherlast $out2,$out2,$in2 3125 vcipherlast $out3,$out3,$in3 3126 vcipherlast $out4,$out4,$in4 3127 blr 3128 .long 0 3129 .byte 0,12,0x14,0,0,0,0,0 3130 3131.align 5 3132_aesp8_xts_decrypt6x: 3133 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3134 mflr r11 3135 li r7,`$FRAME+8*16+15` 3136 li r3,`$FRAME+8*16+31` 3137 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3138 stvx v20,r7,$sp # ABI says so 3139 addi r7,r7,32 3140 stvx v21,r3,$sp 3141 addi r3,r3,32 3142 stvx v22,r7,$sp 3143 addi r7,r7,32 3144 stvx v23,r3,$sp 3145 addi r3,r3,32 3146 stvx v24,r7,$sp 3147 addi r7,r7,32 3148 stvx v25,r3,$sp 3149 addi r3,r3,32 3150 stvx v26,r7,$sp 3151 addi r7,r7,32 3152 stvx v27,r3,$sp 3153 addi r3,r3,32 3154 stvx v28,r7,$sp 3155 addi r7,r7,32 3156 stvx v29,r3,$sp 3157 addi r3,r3,32 3158 stvx v30,r7,$sp 3159 stvx v31,r3,$sp 3160 li r0,-1 3161 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3162 li $x10,0x10 3163 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3164 li $x20,0x20 3165 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3166 li $x30,0x30 3167 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3168 li $x40,0x40 3169 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3170 li $x50,0x50 3171 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3172 li $x60,0x60 3173 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3174 li $x70,0x70 3175 mtspr 256,r0 3176 3177 xxlor 2, 32+$eighty7, 32+$eighty7 3178 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 3179 xxlor 1, 32+$eighty7, 32+$eighty7 3180 3181 # Load XOR Lconsts. 3182 mr $x70, r6 3183 bl Lconsts 3184 lxvw4x 0, $x40, r6 # load XOR contents 3185 mr r6, $x70 3186 li $x70,0x70 3187 3188 subi $rounds,$rounds,3 # -4 in total 3189 3190 lvx $rndkey0,$x00,$key1 # load key schedule 3191 lvx v30,$x10,$key1 3192 addi $key1,$key1,0x20 3193 lvx v31,$x00,$key1 3194 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3195 addi $key_,$sp,$FRAME+15 3196 mtctr $rounds 3197 3198Load_xts_dec_key: 3199 ?vperm v24,v30,v31,$keyperm 3200 lvx v30,$x10,$key1 3201 addi $key1,$key1,0x20 3202 stvx v24,$x00,$key_ # off-load round[1] 3203 ?vperm v25,v31,v30,$keyperm 3204 lvx v31,$x00,$key1 3205 stvx v25,$x10,$key_ # off-load round[2] 3206 addi $key_,$key_,0x20 3207 bdnz Load_xts_dec_key 3208 3209 lvx v26,$x10,$key1 3210 ?vperm v24,v30,v31,$keyperm 3211 lvx v27,$x20,$key1 3212 stvx v24,$x00,$key_ # off-load round[3] 3213 ?vperm v25,v31,v26,$keyperm 3214 lvx v28,$x30,$key1 3215 stvx v25,$x10,$key_ # off-load round[4] 3216 addi $key_,$sp,$FRAME+15 # rewind $key_ 3217 ?vperm v26,v26,v27,$keyperm 3218 lvx v29,$x40,$key1 3219 ?vperm v27,v27,v28,$keyperm 3220 lvx v30,$x50,$key1 3221 ?vperm v28,v28,v29,$keyperm 3222 lvx v31,$x60,$key1 3223 ?vperm v29,v29,v30,$keyperm 3224 lvx $twk5,$x70,$key1 # borrow $twk5 3225 ?vperm v30,v30,v31,$keyperm 3226 lvx v24,$x00,$key_ # pre-load round[1] 3227 ?vperm v31,v31,$twk5,$keyperm 3228 lvx v25,$x10,$key_ # pre-load round[2] 3229 3230 vperm $in0,$inout,$inptail,$inpperm 3231 subi $inp,$inp,31 # undo "caller" 3232 vxor $twk0,$tweak,$rndkey0 3233 vsrab $tmp,$tweak,$seven # next tweak value 3234 vaddubm $tweak,$tweak,$tweak 3235 vand $tmp,$tmp,$eighty7 3236 vxor $out0,$in0,$twk0 3237 xxlor 32+$in1, 0, 0 3238 vpermxor $tweak, $tweak, $tmp, $in1 3239 3240 lvx_u $in1,$x10,$inp 3241 vxor $twk1,$tweak,$rndkey0 3242 vsrab $tmp,$tweak,$seven # next tweak value 3243 vaddubm $tweak,$tweak,$tweak 3244 le?vperm $in1,$in1,$in1,$leperm 3245 vand $tmp,$tmp,$eighty7 3246 vxor $out1,$in1,$twk1 3247 xxlor 32+$in2, 0, 0 3248 vpermxor $tweak, $tweak, $tmp, $in2 3249 3250 lvx_u $in2,$x20,$inp 3251 andi. $taillen,$len,15 3252 vxor $twk2,$tweak,$rndkey0 3253 vsrab $tmp,$tweak,$seven # next tweak value 3254 vaddubm $tweak,$tweak,$tweak 3255 le?vperm $in2,$in2,$in2,$leperm 3256 vand $tmp,$tmp,$eighty7 3257 vxor $out2,$in2,$twk2 3258 xxlor 32+$in3, 0, 0 3259 vpermxor $tweak, $tweak, $tmp, $in3 3260 3261 lvx_u $in3,$x30,$inp 3262 sub $len,$len,$taillen 3263 vxor $twk3,$tweak,$rndkey0 3264 vsrab $tmp,$tweak,$seven # next tweak value 3265 vaddubm $tweak,$tweak,$tweak 3266 le?vperm $in3,$in3,$in3,$leperm 3267 vand $tmp,$tmp,$eighty7 3268 vxor $out3,$in3,$twk3 3269 xxlor 32+$in4, 0, 0 3270 vpermxor $tweak, $tweak, $tmp, $in4 3271 3272 lvx_u $in4,$x40,$inp 3273 subi $len,$len,0x60 3274 vxor $twk4,$tweak,$rndkey0 3275 vsrab $tmp,$tweak,$seven # next tweak value 3276 vaddubm $tweak,$tweak,$tweak 3277 le?vperm $in4,$in4,$in4,$leperm 3278 vand $tmp,$tmp,$eighty7 3279 vxor $out4,$in4,$twk4 3280 xxlor 32+$in5, 0, 0 3281 vpermxor $tweak, $tweak, $tmp, $in5 3282 3283 lvx_u $in5,$x50,$inp 3284 addi $inp,$inp,0x60 3285 vxor $twk5,$tweak,$rndkey0 3286 vsrab $tmp,$tweak,$seven # next tweak value 3287 vaddubm $tweak,$tweak,$tweak 3288 le?vperm $in5,$in5,$in5,$leperm 3289 vand $tmp,$tmp,$eighty7 3290 vxor $out5,$in5,$twk5 3291 xxlor 32+$in0, 0, 0 3292 vpermxor $tweak, $tweak, $tmp, $in0 3293 3294 vxor v31,v31,$rndkey0 3295 mtctr $rounds 3296 b Loop_xts_dec6x 3297 3298.align 5 3299Loop_xts_dec6x: 3300 vncipher $out0,$out0,v24 3301 vncipher $out1,$out1,v24 3302 vncipher $out2,$out2,v24 3303 vncipher $out3,$out3,v24 3304 vncipher $out4,$out4,v24 3305 vncipher $out5,$out5,v24 3306 lvx v24,$x20,$key_ # round[3] 3307 addi $key_,$key_,0x20 3308 3309 vncipher $out0,$out0,v25 3310 vncipher $out1,$out1,v25 3311 vncipher $out2,$out2,v25 3312 vncipher $out3,$out3,v25 3313 vncipher $out4,$out4,v25 3314 vncipher $out5,$out5,v25 3315 lvx v25,$x10,$key_ # round[4] 3316 bdnz Loop_xts_dec6x 3317 3318 xxlor 32+$eighty7, 1, 1 # 0x010101..87 3319 3320 subic $len,$len,96 # $len-=96 3321 vxor $in0,$twk0,v31 # xor with last round key 3322 vncipher $out0,$out0,v24 3323 vncipher $out1,$out1,v24 3324 vsrab $tmp,$tweak,$seven # next tweak value 3325 vxor $twk0,$tweak,$rndkey0 3326 vaddubm $tweak,$tweak,$tweak 3327 vncipher $out2,$out2,v24 3328 vncipher $out3,$out3,v24 3329 vncipher $out4,$out4,v24 3330 vncipher $out5,$out5,v24 3331 3332 subfe. r0,r0,r0 # borrow?-1:0 3333 vand $tmp,$tmp,$eighty7 3334 vncipher $out0,$out0,v25 3335 vncipher $out1,$out1,v25 3336 xxlor 32+$in1, 0, 0 3337 vpermxor $tweak, $tweak, $tmp, $in1 3338 vncipher $out2,$out2,v25 3339 vncipher $out3,$out3,v25 3340 vxor $in1,$twk1,v31 3341 vsrab $tmp,$tweak,$seven # next tweak value 3342 vxor $twk1,$tweak,$rndkey0 3343 vncipher $out4,$out4,v25 3344 vncipher $out5,$out5,v25 3345 3346 and r0,r0,$len 3347 vaddubm $tweak,$tweak,$tweak 3348 vncipher $out0,$out0,v26 3349 vncipher $out1,$out1,v26 3350 vand $tmp,$tmp,$eighty7 3351 vncipher $out2,$out2,v26 3352 vncipher $out3,$out3,v26 3353 xxlor 32+$in2, 0, 0 3354 vpermxor $tweak, $tweak, $tmp, $in2 3355 vncipher $out4,$out4,v26 3356 vncipher $out5,$out5,v26 3357 3358 add $inp,$inp,r0 # $inp is adjusted in such 3359 # way that at exit from the 3360 # loop inX-in5 are loaded 3361 # with last "words" 3362 vxor $in2,$twk2,v31 3363 vsrab $tmp,$tweak,$seven # next tweak value 3364 vxor $twk2,$tweak,$rndkey0 3365 vaddubm $tweak,$tweak,$tweak 3366 vncipher $out0,$out0,v27 3367 vncipher $out1,$out1,v27 3368 vncipher $out2,$out2,v27 3369 vncipher $out3,$out3,v27 3370 vand $tmp,$tmp,$eighty7 3371 vncipher $out4,$out4,v27 3372 vncipher $out5,$out5,v27 3373 3374 addi $key_,$sp,$FRAME+15 # rewind $key_ 3375 xxlor 32+$in3, 0, 0 3376 vpermxor $tweak, $tweak, $tmp, $in3 3377 vncipher $out0,$out0,v28 3378 vncipher $out1,$out1,v28 3379 vxor $in3,$twk3,v31 3380 vsrab $tmp,$tweak,$seven # next tweak value 3381 vxor $twk3,$tweak,$rndkey0 3382 vncipher $out2,$out2,v28 3383 vncipher $out3,$out3,v28 3384 vaddubm $tweak,$tweak,$tweak 3385 vncipher $out4,$out4,v28 3386 vncipher $out5,$out5,v28 3387 lvx v24,$x00,$key_ # re-pre-load round[1] 3388 vand $tmp,$tmp,$eighty7 3389 3390 vncipher $out0,$out0,v29 3391 vncipher $out1,$out1,v29 3392 xxlor 32+$in4, 0, 0 3393 vpermxor $tweak, $tweak, $tmp, $in4 3394 vncipher $out2,$out2,v29 3395 vncipher $out3,$out3,v29 3396 vxor $in4,$twk4,v31 3397 vsrab $tmp,$tweak,$seven # next tweak value 3398 vxor $twk4,$tweak,$rndkey0 3399 vncipher $out4,$out4,v29 3400 vncipher $out5,$out5,v29 3401 lvx v25,$x10,$key_ # re-pre-load round[2] 3402 vaddubm $tweak,$tweak,$tweak 3403 3404 vncipher $out0,$out0,v30 3405 vncipher $out1,$out1,v30 3406 vand $tmp,$tmp,$eighty7 3407 vncipher $out2,$out2,v30 3408 vncipher $out3,$out3,v30 3409 xxlor 32+$in5, 0, 0 3410 vpermxor $tweak, $tweak, $tmp, $in5 3411 vncipher $out4,$out4,v30 3412 vncipher $out5,$out5,v30 3413 vxor $in5,$twk5,v31 3414 vsrab $tmp,$tweak,$seven # next tweak value 3415 vxor $twk5,$tweak,$rndkey0 3416 3417 vncipherlast $out0,$out0,$in0 3418 lvx_u $in0,$x00,$inp # load next input block 3419 vaddubm $tweak,$tweak,$tweak 3420 vncipherlast $out1,$out1,$in1 3421 lvx_u $in1,$x10,$inp 3422 vncipherlast $out2,$out2,$in2 3423 le?vperm $in0,$in0,$in0,$leperm 3424 lvx_u $in2,$x20,$inp 3425 vand $tmp,$tmp,$eighty7 3426 vncipherlast $out3,$out3,$in3 3427 le?vperm $in1,$in1,$in1,$leperm 3428 lvx_u $in3,$x30,$inp 3429 vncipherlast $out4,$out4,$in4 3430 le?vperm $in2,$in2,$in2,$leperm 3431 lvx_u $in4,$x40,$inp 3432 xxlor 10, 32+$in0, 32+$in0 3433 xxlor 32+$in0, 0, 0 3434 vpermxor $tweak, $tweak, $tmp, $in0 3435 xxlor 32+$in0, 10, 10 3436 vncipherlast $out5,$out5,$in5 3437 le?vperm $in3,$in3,$in3,$leperm 3438 lvx_u $in5,$x50,$inp 3439 addi $inp,$inp,0x60 3440 le?vperm $in4,$in4,$in4,$leperm 3441 le?vperm $in5,$in5,$in5,$leperm 3442 3443 le?vperm $out0,$out0,$out0,$leperm 3444 le?vperm $out1,$out1,$out1,$leperm 3445 stvx_u $out0,$x00,$out # store output 3446 vxor $out0,$in0,$twk0 3447 le?vperm $out2,$out2,$out2,$leperm 3448 stvx_u $out1,$x10,$out 3449 vxor $out1,$in1,$twk1 3450 le?vperm $out3,$out3,$out3,$leperm 3451 stvx_u $out2,$x20,$out 3452 vxor $out2,$in2,$twk2 3453 le?vperm $out4,$out4,$out4,$leperm 3454 stvx_u $out3,$x30,$out 3455 vxor $out3,$in3,$twk3 3456 le?vperm $out5,$out5,$out5,$leperm 3457 stvx_u $out4,$x40,$out 3458 vxor $out4,$in4,$twk4 3459 stvx_u $out5,$x50,$out 3460 vxor $out5,$in5,$twk5 3461 addi $out,$out,0x60 3462 3463 mtctr $rounds 3464 beq Loop_xts_dec6x # did $len-=96 borrow? 3465 3466 xxlor 32+$eighty7, 2, 2 # 0x010101..87 3467 3468 addic. $len,$len,0x60 3469 beq Lxts_dec6x_zero 3470 cmpwi $len,0x20 3471 blt Lxts_dec6x_one 3472 nop 3473 beq Lxts_dec6x_two 3474 cmpwi $len,0x40 3475 blt Lxts_dec6x_three 3476 nop 3477 beq Lxts_dec6x_four 3478 3479Lxts_dec6x_five: 3480 vxor $out0,$in1,$twk0 3481 vxor $out1,$in2,$twk1 3482 vxor $out2,$in3,$twk2 3483 vxor $out3,$in4,$twk3 3484 vxor $out4,$in5,$twk4 3485 3486 bl _aesp8_xts_dec5x 3487 3488 le?vperm $out0,$out0,$out0,$leperm 3489 vmr $twk0,$twk5 # unused tweak 3490 vxor $twk1,$tweak,$rndkey0 3491 le?vperm $out1,$out1,$out1,$leperm 3492 stvx_u $out0,$x00,$out # store output 3493 vxor $out0,$in0,$twk1 3494 le?vperm $out2,$out2,$out2,$leperm 3495 stvx_u $out1,$x10,$out 3496 le?vperm $out3,$out3,$out3,$leperm 3497 stvx_u $out2,$x20,$out 3498 le?vperm $out4,$out4,$out4,$leperm 3499 stvx_u $out3,$x30,$out 3500 stvx_u $out4,$x40,$out 3501 addi $out,$out,0x50 3502 bne Lxts_dec6x_steal 3503 b Lxts_dec6x_done 3504 3505.align 4 3506Lxts_dec6x_four: 3507 vxor $out0,$in2,$twk0 3508 vxor $out1,$in3,$twk1 3509 vxor $out2,$in4,$twk2 3510 vxor $out3,$in5,$twk3 3511 vxor $out4,$out4,$out4 3512 3513 bl _aesp8_xts_dec5x 3514 3515 le?vperm $out0,$out0,$out0,$leperm 3516 vmr $twk0,$twk4 # unused tweak 3517 vmr $twk1,$twk5 3518 le?vperm $out1,$out1,$out1,$leperm 3519 stvx_u $out0,$x00,$out # store output 3520 vxor $out0,$in0,$twk5 3521 le?vperm $out2,$out2,$out2,$leperm 3522 stvx_u $out1,$x10,$out 3523 le?vperm $out3,$out3,$out3,$leperm 3524 stvx_u $out2,$x20,$out 3525 stvx_u $out3,$x30,$out 3526 addi $out,$out,0x40 3527 bne Lxts_dec6x_steal 3528 b Lxts_dec6x_done 3529 3530.align 4 3531Lxts_dec6x_three: 3532 vxor $out0,$in3,$twk0 3533 vxor $out1,$in4,$twk1 3534 vxor $out2,$in5,$twk2 3535 vxor $out3,$out3,$out3 3536 vxor $out4,$out4,$out4 3537 3538 bl _aesp8_xts_dec5x 3539 3540 le?vperm $out0,$out0,$out0,$leperm 3541 vmr $twk0,$twk3 # unused tweak 3542 vmr $twk1,$twk4 3543 le?vperm $out1,$out1,$out1,$leperm 3544 stvx_u $out0,$x00,$out # store output 3545 vxor $out0,$in0,$twk4 3546 le?vperm $out2,$out2,$out2,$leperm 3547 stvx_u $out1,$x10,$out 3548 stvx_u $out2,$x20,$out 3549 addi $out,$out,0x30 3550 bne Lxts_dec6x_steal 3551 b Lxts_dec6x_done 3552 3553.align 4 3554Lxts_dec6x_two: 3555 vxor $out0,$in4,$twk0 3556 vxor $out1,$in5,$twk1 3557 vxor $out2,$out2,$out2 3558 vxor $out3,$out3,$out3 3559 vxor $out4,$out4,$out4 3560 3561 bl _aesp8_xts_dec5x 3562 3563 le?vperm $out0,$out0,$out0,$leperm 3564 vmr $twk0,$twk2 # unused tweak 3565 vmr $twk1,$twk3 3566 le?vperm $out1,$out1,$out1,$leperm 3567 stvx_u $out0,$x00,$out # store output 3568 vxor $out0,$in0,$twk3 3569 stvx_u $out1,$x10,$out 3570 addi $out,$out,0x20 3571 bne Lxts_dec6x_steal 3572 b Lxts_dec6x_done 3573 3574.align 4 3575Lxts_dec6x_one: 3576 vxor $out0,$in5,$twk0 3577 nop 3578Loop_xts_dec1x: 3579 vncipher $out0,$out0,v24 3580 lvx v24,$x20,$key_ # round[3] 3581 addi $key_,$key_,0x20 3582 3583 vncipher $out0,$out0,v25 3584 lvx v25,$x10,$key_ # round[4] 3585 bdnz Loop_xts_dec1x 3586 3587 subi r0,$taillen,1 3588 vncipher $out0,$out0,v24 3589 3590 andi. r0,r0,16 3591 cmpwi $taillen,0 3592 vncipher $out0,$out0,v25 3593 3594 sub $inp,$inp,r0 3595 vncipher $out0,$out0,v26 3596 3597 lvx_u $in0,0,$inp 3598 vncipher $out0,$out0,v27 3599 3600 addi $key_,$sp,$FRAME+15 # rewind $key_ 3601 vncipher $out0,$out0,v28 3602 lvx v24,$x00,$key_ # re-pre-load round[1] 3603 3604 vncipher $out0,$out0,v29 3605 lvx v25,$x10,$key_ # re-pre-load round[2] 3606 vxor $twk0,$twk0,v31 3607 3608 le?vperm $in0,$in0,$in0,$leperm 3609 vncipher $out0,$out0,v30 3610 3611 mtctr $rounds 3612 vncipherlast $out0,$out0,$twk0 3613 3614 vmr $twk0,$twk1 # unused tweak 3615 vmr $twk1,$twk2 3616 le?vperm $out0,$out0,$out0,$leperm 3617 stvx_u $out0,$x00,$out # store output 3618 addi $out,$out,0x10 3619 vxor $out0,$in0,$twk2 3620 bne Lxts_dec6x_steal 3621 b Lxts_dec6x_done 3622 3623.align 4 3624Lxts_dec6x_zero: 3625 cmpwi $taillen,0 3626 beq Lxts_dec6x_done 3627 3628 lvx_u $in0,0,$inp 3629 le?vperm $in0,$in0,$in0,$leperm 3630 vxor $out0,$in0,$twk1 3631Lxts_dec6x_steal: 3632 vncipher $out0,$out0,v24 3633 lvx v24,$x20,$key_ # round[3] 3634 addi $key_,$key_,0x20 3635 3636 vncipher $out0,$out0,v25 3637 lvx v25,$x10,$key_ # round[4] 3638 bdnz Lxts_dec6x_steal 3639 3640 add $inp,$inp,$taillen 3641 vncipher $out0,$out0,v24 3642 3643 cmpwi $taillen,0 3644 vncipher $out0,$out0,v25 3645 3646 lvx_u $in0,0,$inp 3647 vncipher $out0,$out0,v26 3648 3649 lvsr $inpperm,0,$taillen # $in5 is no more 3650 vncipher $out0,$out0,v27 3651 3652 addi $key_,$sp,$FRAME+15 # rewind $key_ 3653 vncipher $out0,$out0,v28 3654 lvx v24,$x00,$key_ # re-pre-load round[1] 3655 3656 vncipher $out0,$out0,v29 3657 lvx v25,$x10,$key_ # re-pre-load round[2] 3658 vxor $twk1,$twk1,v31 3659 3660 le?vperm $in0,$in0,$in0,$leperm 3661 vncipher $out0,$out0,v30 3662 3663 vperm $in0,$in0,$in0,$inpperm 3664 vncipherlast $tmp,$out0,$twk1 3665 3666 le?vperm $out0,$tmp,$tmp,$leperm 3667 le?stvx_u $out0,0,$out 3668 be?stvx_u $tmp,0,$out 3669 3670 vxor $out0,$out0,$out0 3671 vspltisb $out1,-1 3672 vperm $out0,$out0,$out1,$inpperm 3673 vsel $out0,$in0,$tmp,$out0 3674 vxor $out0,$out0,$twk0 3675 3676 subi r30,$out,1 3677 mtctr $taillen 3678Loop_xts_dec6x_steal: 3679 lbzu r0,1(r30) 3680 stb r0,16(r30) 3681 bdnz Loop_xts_dec6x_steal 3682 3683 li $taillen,0 3684 mtctr $rounds 3685 b Loop_xts_dec1x # one more time... 3686 3687.align 4 3688Lxts_dec6x_done: 3689 ${UCMP}i $ivp,0 3690 beq Lxts_dec6x_ret 3691 3692 vxor $tweak,$twk0,$rndkey0 3693 le?vperm $tweak,$tweak,$tweak,$leperm 3694 stvx_u $tweak,0,$ivp 3695 3696Lxts_dec6x_ret: 3697 mtlr r11 3698 li r10,`$FRAME+15` 3699 li r11,`$FRAME+31` 3700 stvx $seven,r10,$sp # wipe copies of round keys 3701 addi r10,r10,32 3702 stvx $seven,r11,$sp 3703 addi r11,r11,32 3704 stvx $seven,r10,$sp 3705 addi r10,r10,32 3706 stvx $seven,r11,$sp 3707 addi r11,r11,32 3708 stvx $seven,r10,$sp 3709 addi r10,r10,32 3710 stvx $seven,r11,$sp 3711 addi r11,r11,32 3712 stvx $seven,r10,$sp 3713 addi r10,r10,32 3714 stvx $seven,r11,$sp 3715 addi r11,r11,32 3716 3717 mtspr 256,$vrsave 3718 lvx v20,r10,$sp # ABI says so 3719 addi r10,r10,32 3720 lvx v21,r11,$sp 3721 addi r11,r11,32 3722 lvx v22,r10,$sp 3723 addi r10,r10,32 3724 lvx v23,r11,$sp 3725 addi r11,r11,32 3726 lvx v24,r10,$sp 3727 addi r10,r10,32 3728 lvx v25,r11,$sp 3729 addi r11,r11,32 3730 lvx v26,r10,$sp 3731 addi r10,r10,32 3732 lvx v27,r11,$sp 3733 addi r11,r11,32 3734 lvx v28,r10,$sp 3735 addi r10,r10,32 3736 lvx v29,r11,$sp 3737 addi r11,r11,32 3738 lvx v30,r10,$sp 3739 lvx v31,r11,$sp 3740 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3741 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3742 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3743 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3744 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3745 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3746 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3747 blr 3748 .long 0 3749 .byte 0,12,0x04,1,0x80,6,6,0 3750 .long 0 3751 3752.align 5 3753_aesp8_xts_dec5x: 3754 vncipher $out0,$out0,v24 3755 vncipher $out1,$out1,v24 3756 vncipher $out2,$out2,v24 3757 vncipher $out3,$out3,v24 3758 vncipher $out4,$out4,v24 3759 lvx v24,$x20,$key_ # round[3] 3760 addi $key_,$key_,0x20 3761 3762 vncipher $out0,$out0,v25 3763 vncipher $out1,$out1,v25 3764 vncipher $out2,$out2,v25 3765 vncipher $out3,$out3,v25 3766 vncipher $out4,$out4,v25 3767 lvx v25,$x10,$key_ # round[4] 3768 bdnz _aesp8_xts_dec5x 3769 3770 subi r0,$taillen,1 3771 vncipher $out0,$out0,v24 3772 vncipher $out1,$out1,v24 3773 vncipher $out2,$out2,v24 3774 vncipher $out3,$out3,v24 3775 vncipher $out4,$out4,v24 3776 3777 andi. r0,r0,16 3778 cmpwi $taillen,0 3779 vncipher $out0,$out0,v25 3780 vncipher $out1,$out1,v25 3781 vncipher $out2,$out2,v25 3782 vncipher $out3,$out3,v25 3783 vncipher $out4,$out4,v25 3784 vxor $twk0,$twk0,v31 3785 3786 sub $inp,$inp,r0 3787 vncipher $out0,$out0,v26 3788 vncipher $out1,$out1,v26 3789 vncipher $out2,$out2,v26 3790 vncipher $out3,$out3,v26 3791 vncipher $out4,$out4,v26 3792 vxor $in1,$twk1,v31 3793 3794 vncipher $out0,$out0,v27 3795 lvx_u $in0,0,$inp 3796 vncipher $out1,$out1,v27 3797 vncipher $out2,$out2,v27 3798 vncipher $out3,$out3,v27 3799 vncipher $out4,$out4,v27 3800 vxor $in2,$twk2,v31 3801 3802 addi $key_,$sp,$FRAME+15 # rewind $key_ 3803 vncipher $out0,$out0,v28 3804 vncipher $out1,$out1,v28 3805 vncipher $out2,$out2,v28 3806 vncipher $out3,$out3,v28 3807 vncipher $out4,$out4,v28 3808 lvx v24,$x00,$key_ # re-pre-load round[1] 3809 vxor $in3,$twk3,v31 3810 3811 vncipher $out0,$out0,v29 3812 le?vperm $in0,$in0,$in0,$leperm 3813 vncipher $out1,$out1,v29 3814 vncipher $out2,$out2,v29 3815 vncipher $out3,$out3,v29 3816 vncipher $out4,$out4,v29 3817 lvx v25,$x10,$key_ # re-pre-load round[2] 3818 vxor $in4,$twk4,v31 3819 3820 vncipher $out0,$out0,v30 3821 vncipher $out1,$out1,v30 3822 vncipher $out2,$out2,v30 3823 vncipher $out3,$out3,v30 3824 vncipher $out4,$out4,v30 3825 3826 vncipherlast $out0,$out0,$twk0 3827 vncipherlast $out1,$out1,$in1 3828 vncipherlast $out2,$out2,$in2 3829 vncipherlast $out3,$out3,$in3 3830 vncipherlast $out4,$out4,$in4 3831 mtctr $rounds 3832 blr 3833 .long 0 3834 .byte 0,12,0x14,0,0,0,0,0 3835___ 3836}} }}} 3837 3838my $consts=1; 3839foreach(split("\n",$code)) { 3840 s/\`([^\`]*)\`/eval($1)/geo; 3841 3842 # constants table endian-specific conversion 3843 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3844 my $conv=$3; 3845 my @bytes=(); 3846 3847 # convert to endian-agnostic format 3848 if ($1 eq "long") { 3849 foreach (split(/,\s*/,$2)) { 3850 my $l = /^0/?oct:int; 3851 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3852 } 3853 } else { 3854 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3855 } 3856 3857 # little-endian conversion 3858 if ($flavour =~ /le$/o) { 3859 SWITCH: for($conv) { 3860 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3861 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3862 } 3863 } 3864 3865 #emit 3866 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3867 next; 3868 } 3869 $consts=0 if (m/Lconsts:/o); # end of table 3870 3871 # instructions prefixed with '?' are endian-specific and need 3872 # to be adjusted accordingly... 3873 if ($flavour =~ /le$/o) { # little-endian 3874 s/le\?//o or 3875 s/be\?/#be#/o or 3876 s/\?lvsr/lvsl/o or 3877 s/\?lvsl/lvsr/o or 3878 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3879 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3880 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3881 } else { # big-endian 3882 s/le\?/#le#/o or 3883 s/be\?//o or 3884 s/\?([a-z]+)/$1/o; 3885 } 3886 3887 print $_,"\n"; 3888} 3889 3890close STDOUT; 3891